支持在会话输入框直接进行文件/图片的拖拽和粘贴

This commit is contained in:
xintaofei
2026-03-08 10:54:06 +08:00
parent 68e2c7f989
commit 7a4cbcb73e
24 changed files with 1335 additions and 78 deletions

1
src-tauri/Cargo.lock generated
View File

@@ -751,6 +751,7 @@ checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32"
name = "codeg"
version = "0.0.15"
dependencies = [
"base64 0.22.1",
"bzip2",
"chrono",
"dirs",

View File

@@ -45,6 +45,7 @@ sea-orm = { version = "1.1", features = ["sqlx-sqlite", "runtime-tokio-rustls",
sea-orm-migration = { version = "1.1", features = ["sqlx-sqlite", "runtime-tokio-rustls"] }
toml = "0.8"
notify = "6"
base64 = "0.22"
[target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
tauri-plugin-window-state = "2"

View File

@@ -4,19 +4,21 @@ use std::sync::Arc;
use sacp::schema::McpServerStdio;
use sacp::schema::{
CancelNotification, ClientCapabilities, ContentBlock, ContentChunk, CreateTerminalRequest,
CreateTerminalResponse, FileSystemCapability, InitializeRequest, KillTerminalCommandRequest,
BlobResourceContents, CancelNotification, ClientCapabilities, ContentBlock, ContentChunk,
CreateTerminalRequest, CreateTerminalResponse, EmbeddedResource, EmbeddedResourceResource,
FileSystemCapability, ImageContent, InitializeRequest, KillTerminalCommandRequest,
KillTerminalCommandResponse, LoadSessionRequest, NewSessionRequest, NewSessionResponse,
PermissionOptionKind, Plan, PlanEntryPriority, PlanEntryStatus, PromptRequest, ProtocolVersion,
ReadTextFileRequest, ReadTextFileResponse, ReleaseTerminalRequest, ReleaseTerminalResponse,
RequestPermissionOutcome, RequestPermissionRequest, RequestPermissionResponse, ResourceLink,
SelectedPermissionOutcome, SessionConfigKind, SessionConfigOption, SessionConfigOptionCategory,
SessionConfigSelectGroup, SessionConfigSelectOption, SessionConfigSelectOptions, SessionId,
SessionModeState, SessionNotification, SessionUpdate, SetSessionConfigOptionRequest,
PermissionOptionKind, Plan, PlanEntryPriority, PlanEntryStatus, PromptRequest,
ProtocolVersion, ReadTextFileRequest, ReadTextFileResponse, ReleaseTerminalRequest,
ReleaseTerminalResponse, RequestPermissionOutcome, RequestPermissionRequest,
RequestPermissionResponse, ResourceLink, SelectedPermissionOutcome, SessionConfigKind,
SessionConfigOption, SessionConfigOptionCategory, SessionConfigSelectGroup,
SessionConfigSelectOption, SessionConfigSelectOptions, SessionId, SessionModeState,
SessionNotification, SessionUpdate, SetSessionConfigOptionRequest,
SetSessionConfigOptionResponse, SetSessionModeRequest, StopReason, TerminalExitStatus,
TerminalOutputRequest, TerminalOutputResponse, TextContent, ToolCallContent,
WaitForTerminalExitRequest, WaitForTerminalExitResponse, WriteTextFileRequest,
WriteTextFileResponse,
TerminalOutputRequest, TerminalOutputResponse, TextContent, TextResourceContents,
ToolCallContent, WaitForTerminalExitRequest, WaitForTerminalExitResponse,
WriteTextFileRequest, WriteTextFileResponse,
};
use sacp::util::MatchDispatch;
use sacp::{
@@ -32,9 +34,9 @@ use crate::acp::registry::{self, AgentDistribution};
use crate::acp::terminal_runtime::{TerminalRuntime, TerminalRuntimeError};
use crate::acp::types::{
AcpEvent, AvailableCommandInfo, ConnectionInfo, ConnectionStatus, PermissionOptionInfo,
PlanEntryInfo, PromptInputBlock, SessionConfigKindInfo, SessionConfigOptionInfo,
SessionConfigSelectGroupInfo, SessionConfigSelectInfo, SessionConfigSelectOptionInfo,
SessionModeInfo, SessionModeStateInfo,
PlanEntryInfo, PromptCapabilitiesInfo, PromptInputBlock, SessionConfigKindInfo,
SessionConfigOptionInfo, SessionConfigSelectGroupInfo, SessionConfigSelectInfo,
SessionConfigSelectOptionInfo, SessionModeInfo, SessionModeStateInfo,
};
use crate::models::agent::AgentType;
use crate::network::proxy;
@@ -448,6 +450,24 @@ fn emit_selectors_ready(connection_id: &str, app_handle: &tauri::AppHandle) {
);
}
fn emit_prompt_capabilities(
connection_id: &str,
app_handle: &tauri::AppHandle,
capabilities: &sacp::schema::PromptCapabilities,
) {
let _ = app_handle.emit(
"acp://event",
AcpEvent::PromptCapabilities {
connection_id: connection_id.into(),
prompt_capabilities: PromptCapabilitiesInfo {
image: capabilities.image,
audio: capabilities.audio,
embedded_context: capabilities.embedded_context,
},
},
);
}
fn resolve_working_dir(working_dir: Option<&str>) -> PathBuf {
match working_dir {
Some(dir) => {
@@ -591,7 +611,12 @@ async fn run_connection(
.read_text_file(true)
.write_text_file(true)),
);
let _init_resp = cx.send_request_to(Agent, init_request).block_task().await?;
let init_resp = cx.send_request_to(Agent, init_request).block_task().await?;
emit_prompt_capabilities(
&conn_id,
&handle,
&init_resp.agent_capabilities.prompt_capabilities,
);
// Emit connected status
let _ = handle.emit(
@@ -1128,6 +1153,35 @@ fn map_prompt_blocks(blocks: Vec<PromptInputBlock>) -> Vec<ContentBlock> {
.into_iter()
.map(|block| match block {
PromptInputBlock::Text { text } => ContentBlock::Text(TextContent::new(text)),
PromptInputBlock::Image {
data,
mime_type,
uri,
} => ContentBlock::Image(ImageContent::new(data, mime_type).uri(uri)),
PromptInputBlock::Resource {
uri,
mime_type,
text,
blob,
} => {
let resource = match (text, blob) {
(Some(text_value), _) => {
let content =
TextResourceContents::new(text_value, uri.clone()).mime_type(mime_type);
EmbeddedResourceResource::TextResourceContents(content)
}
(None, Some(blob_value)) => {
let content =
BlobResourceContents::new(blob_value, uri.clone()).mime_type(mime_type);
EmbeddedResourceResource::BlobResourceContents(content)
}
(None, None) => {
let content = TextResourceContents::new("", uri.clone()).mime_type(mime_type);
EmbeddedResourceResource::TextResourceContents(content)
}
};
ContentBlock::Resource(EmbeddedResource::new(resource))
}
PromptInputBlock::ResourceLink {
uri,
name,

View File

@@ -7,6 +7,21 @@ pub enum PromptInputBlock {
Text {
text: String,
},
Image {
data: String,
mime_type: String,
#[serde(default)]
uri: Option<String>,
},
Resource {
uri: String,
#[serde(default)]
mime_type: Option<String>,
#[serde(default)]
text: Option<String>,
#[serde(default)]
blob: Option<String>,
},
ResourceLink {
uri: String,
name: String,
@@ -17,6 +32,13 @@ pub enum PromptInputBlock {
},
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PromptCapabilitiesInfo {
pub image: bool,
pub audio: bool,
pub embedded_context: bool,
}
/// Events pushed from Rust backend to frontend via Tauri event system.
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
@@ -77,6 +99,11 @@ pub enum AcpEvent {
},
/// Initial selector payloads (modes/config options) have been emitted
SelectorsReady { connection_id: String },
/// Prompt capabilities for this connection
PromptCapabilities {
connection_id: String,
prompt_capabilities: PromptCapabilitiesInfo,
},
/// Current session mode changed
ModeChanged {
connection_id: String,

View File

@@ -7,6 +7,7 @@ use std::process::{Command, Stdio};
use std::sync::{mpsc, LazyLock, Mutex};
use std::time::{Duration, Instant, UNIX_EPOCH};
use base64::Engine as _;
use notify::{EventKind, RecommendedWatcher, RecursiveMode, Watcher};
use serde::Serialize;
use tauri::Emitter;
@@ -1161,6 +1162,8 @@ const FILE_PREVIEW_MIN_BYTES: usize = 4_096;
const FILE_PREVIEW_MAX_BYTES: usize = 2_000_000;
const FILE_EDIT_DEFAULT_MAX_BYTES: usize = 400_000;
const FILE_EDIT_MAX_BYTES: usize = 2_000_000;
const FILE_BASE64_DEFAULT_MAX_BYTES: usize = 20_000_000;
const FILE_BASE64_MAX_BYTES: usize = 100_000_000;
const FILE_IO_MAX_CONCURRENT_OPS: usize = 8;
static FILE_IO_SEMAPHORE: LazyLock<Semaphore> =
@@ -2028,6 +2031,47 @@ pub async fn get_file_tree(
Ok(dir_children.remove(&root).unwrap_or_default())
}
#[tauri::command]
pub async fn read_file_base64(
path: String,
max_bytes: Option<usize>,
) -> Result<String, AppCommandError> {
let trimmed = path.trim();
if trimmed.is_empty() {
return Err(AppCommandError::invalid_input("Path cannot be empty"));
}
let target = PathBuf::from(trimmed);
if !target.exists() {
return Err(AppCommandError::not_found("File does not exist"));
}
if !target.is_file() {
return Err(AppCommandError::invalid_input("Path is not a file"));
}
let limit = max_bytes
.unwrap_or(FILE_BASE64_DEFAULT_MAX_BYTES)
.clamp(FILE_PREVIEW_MIN_BYTES, FILE_BASE64_MAX_BYTES);
run_file_io(move || {
let metadata = std::fs::metadata(&target).map_err(AppCommandError::io)?;
if metadata.len() > limit as u64 {
return Err(
AppCommandError::invalid_input("File is too large to attach")
.with_detail(format!("max_bytes={limit}")),
);
}
let bytes = std::fs::read(&target).map_err(AppCommandError::io)?;
if bytes.len() > limit {
return Err(
AppCommandError::invalid_input("File is too large to attach")
.with_detail(format!("max_bytes={limit}")),
);
}
Ok(base64::engine::general_purpose::STANDARD.encode(bytes))
})
.await
}
#[tauri::command]
pub async fn read_file_preview(
root_path: String,

View File

@@ -206,6 +206,7 @@ pub fn run() {
folders::start_file_tree_watch,
folders::stop_file_tree_watch,
folders::get_file_tree,
folders::read_file_base64,
folders::read_file_preview,
folders::read_file_for_edit,
folders::save_file_content,

View File

@@ -16,6 +16,12 @@ pub enum ContentBlock {
Text {
text: String,
},
Image {
data: String,
mime_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
uri: Option<String>,
},
ToolUse {
tool_use_id: Option<String>,
tool_name: String,

View File

@@ -501,20 +501,49 @@ impl CodexParser {
.unwrap_or("")
.to_string();
let normalized = strip_blocked_resource_mentions(&text);
let message_text = if normalized.is_empty() {
"Attached resources".to_string()
} else {
normalized
};
let mut blocks: Vec<ContentBlock> = Vec::new();
if !normalized.is_empty() {
blocks.push(ContentBlock::Text { text: normalized });
}
if let Some(images) =
payload.get("images").and_then(|v| v.as_array())
{
for image in images {
let Some(raw) = image.as_str() else {
continue;
};
let Some((mime_type, data)) = parse_data_uri_image(raw)
else {
continue;
};
blocks.push(ContentBlock::Image {
data,
mime_type,
uri: None,
});
}
}
if blocks.is_empty() {
blocks.push(ContentBlock::Text {
text: "Attached resources".to_string(),
});
}
if title.is_none() {
title = extract_codex_title_candidate(&text, true);
}
if should_skip_duplicate_user_message(&messages, &blocks, timestamp)
{
continue;
}
messages.push(UnifiedMessage {
id: format!("user-{}", messages.len()),
role: MessageRole::User,
content: vec![ContentBlock::Text { text: message_text }],
content: blocks,
timestamp,
usage: None,
duration_ms: None,
@@ -690,6 +719,40 @@ impl CodexParser {
model: None,
});
}
"message" => {
let role =
payload.get("role").and_then(|r| r.as_str()).unwrap_or("");
if role == "user" {
if let Some(blocks) =
extract_response_item_user_image_blocks(payload)
{
if should_skip_duplicate_user_message(
&messages, &blocks, timestamp,
) {
continue;
}
if title.is_none() {
if let Some(text) = first_text_block(&blocks) {
title = extract_codex_title_candidate(
text.as_str(),
true,
);
}
}
messages.push(UnifiedMessage {
id: format!("user-{}", messages.len()),
role: MessageRole::User,
content: blocks,
timestamp,
usage: None,
duration_ms: None,
model: None,
});
}
}
}
_ => {}
}
}
@@ -946,10 +1009,138 @@ fn extract_codex_text_content(payload: &serde_json::Value) -> Option<String> {
None
}
fn parse_data_uri_image(raw: &str) -> Option<(String, String)> {
let trimmed = raw.trim();
if !trimmed.starts_with("data:") {
return None;
}
let marker = ";base64,";
let marker_idx = trimmed.find(marker)?;
let mime_type = trimmed.get(5..marker_idx)?.trim();
if !mime_type.starts_with("image/") {
return None;
}
let data = trimmed.get(marker_idx + marker.len()..)?.trim();
if data.is_empty() {
return None;
}
Some((mime_type.to_string(), data.to_string()))
}
fn parse_input_image_data_uri(item: &serde_json::Value) -> Option<(String, String)> {
let data_uri = item
.get("image_url")
.and_then(|v| v.as_str())
.or_else(|| {
item.get("image_url")
.and_then(|v| v.get("url"))
.and_then(|v| v.as_str())
})
.or_else(|| item.get("url").and_then(|v| v.as_str()))?;
parse_data_uri_image(data_uri)
}
fn first_text_block(blocks: &[ContentBlock]) -> Option<String> {
blocks.iter().find_map(|block| match block {
ContentBlock::Text { text } => Some(text.clone()),
_ => None,
})
}
fn blocks_equal(a: &[ContentBlock], b: &[ContentBlock]) -> bool {
if a.len() != b.len() {
return false;
}
serde_json::to_value(a).ok() == serde_json::to_value(b).ok()
}
fn should_skip_duplicate_user_message(
messages: &[UnifiedMessage],
blocks: &[ContentBlock],
timestamp: DateTime<Utc>,
) -> bool {
// Some Codex logs emit the same user message through both `response_item`
// and `event_msg`, sometimes with a non-trivial delay. Deduplicate by
// content in a bounded recent time window.
const DUP_WINDOW_MS: i64 = 120_000;
for msg in messages.iter().rev() {
if !matches!(msg.role, MessageRole::User) {
continue;
}
let delta_ms = (timestamp - msg.timestamp).num_milliseconds().abs();
if delta_ms > DUP_WINDOW_MS {
break;
}
if blocks_equal(&msg.content, blocks) {
return true;
}
}
false
}
fn extract_response_item_user_image_blocks(
payload: &serde_json::Value,
) -> Option<Vec<ContentBlock>> {
let content = payload.get("content")?.as_array()?;
let mut blocks: Vec<ContentBlock> = Vec::new();
let mut text_parts: Vec<String> = Vec::new();
let mut has_input_image = false;
for item in content {
let item_type = item.get("type").and_then(|v| v.as_str()).unwrap_or("");
match item_type {
"input_text" => {
let Some(text) = item.get("text").and_then(|v| v.as_str()) else {
continue;
};
if text.trim() == "<image>" {
continue;
}
if !text.is_empty() {
text_parts.push(text.to_string());
}
}
"input_image" => {
has_input_image = true;
let Some((mime_type, data)) = parse_input_image_data_uri(item) else {
continue;
};
blocks.push(ContentBlock::Image {
data,
mime_type,
uri: None,
});
}
_ => {}
}
}
if !has_input_image {
return None;
}
let text = strip_blocked_resource_mentions(&text_parts.join("\n"));
if !text.is_empty() {
blocks.insert(0, ContentBlock::Text { text });
}
if blocks.is_empty() {
blocks.push(ContentBlock::Text {
text: "Attached resources".to_string(),
});
}
Some(blocks)
}
fn strip_blocked_resource_mentions(input: &str) -> String {
let blocked_re = Regex::new(r"@([^\s@]+)\s*\[blocked[^\]]*\]").expect("valid blocked regex");
let image_tag_re = Regex::new(r"(?i)</?image\s*/?>").expect("valid image tag regex");
let collapsed_ws_re = Regex::new(r"[ \t]{2,}").expect("valid whitespace regex");
let text = blocked_re.replace_all(input, "").to_string();
let text = image_tag_re.replace_all(&text, "").to_string();
let text = collapsed_ws_re.replace_all(&text, " ").to_string();
text.trim().to_string()
}
@@ -1030,12 +1221,16 @@ fn group_into_turns(messages: Vec<UnifiedMessage>) -> Vec<MessageTurn> {
mod tests {
use super::extract_codex_title_candidate;
use super::extract_context_window_used_tokens_from_token_count_info;
use super::extract_response_item_user_image_blocks;
use super::extract_turn_usage_from_codex_usage;
use super::merge_codex_context_window_stats;
use super::merge_codex_total_usage_stats;
use super::resolve_codex_home_dir_from;
use super::should_skip_duplicate_user_message;
use super::strip_blocked_resource_mentions;
use super::CodexParser;
use crate::models::{SessionStats, TurnUsage};
use crate::models::{ContentBlock, MessageRole, SessionStats, TurnUsage, UnifiedMessage};
use chrono::{Duration, Utc};
use std::env;
use std::fs;
use std::path::PathBuf;
@@ -1063,6 +1258,75 @@ mod tests {
assert_eq!(got.as_deref(), Some("修复 codex 会话标题"));
}
#[test]
fn strips_image_placeholders_from_user_text() {
let input = "这个图片里面是什么\n</image>\n<image>\n";
let got = strip_blocked_resource_mentions(input);
assert_eq!(got, "这个图片里面是什么");
}
#[test]
fn extracts_response_item_input_image_blocks() {
let payload = serde_json::json!({
"content": [
{"type": "input_text", "text": "这是什么东西"},
{"type": "input_text", "text": "<image>"},
{"type": "input_image", "image_url": "data:image/png;base64,QUJD"}
]
});
let blocks = extract_response_item_user_image_blocks(&payload).expect("blocks");
assert_eq!(blocks.len(), 2);
match &blocks[0] {
ContentBlock::Text { text } => assert_eq!(text, "这是什么东西"),
_ => panic!("expected text block"),
}
match &blocks[1] {
ContentBlock::Image {
data, mime_type, ..
} => {
assert_eq!(mime_type, "image/png");
assert_eq!(data, "QUJD");
}
_ => panic!("expected image block"),
}
}
#[test]
fn skips_duplicate_user_message_within_short_window() {
let now = Utc::now();
let blocks = vec![
ContentBlock::Text {
text: "hello".to_string(),
},
ContentBlock::Image {
data: "QUJD".to_string(),
mime_type: "image/png".to_string(),
uri: None,
},
];
let messages = vec![UnifiedMessage {
id: "user-0".to_string(),
role: MessageRole::User,
content: blocks.clone(),
timestamp: now,
usage: None,
duration_ms: None,
model: None,
}];
assert!(should_skip_duplicate_user_message(
&messages,
&blocks,
now + Duration::milliseconds(1200),
));
assert!(!should_skip_duplicate_user_message(
&messages,
&blocks,
now + Duration::seconds(180),
));
}
#[test]
fn summary_title_skips_injected_messages_and_uses_real_prompt() {
let nanos = SystemTime::now()