diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index a682c25..76f3d1b 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -751,6 +751,7 @@ checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" name = "codeg" version = "0.0.15" dependencies = [ + "base64 0.22.1", "bzip2", "chrono", "dirs", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index f5f112d..7e1fb1a 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -45,6 +45,7 @@ sea-orm = { version = "1.1", features = ["sqlx-sqlite", "runtime-tokio-rustls", sea-orm-migration = { version = "1.1", features = ["sqlx-sqlite", "runtime-tokio-rustls"] } toml = "0.8" notify = "6" +base64 = "0.22" [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies] tauri-plugin-window-state = "2" diff --git a/src-tauri/src/acp/connection.rs b/src-tauri/src/acp/connection.rs index 36101d4..3d1f1ea 100644 --- a/src-tauri/src/acp/connection.rs +++ b/src-tauri/src/acp/connection.rs @@ -4,19 +4,21 @@ use std::sync::Arc; use sacp::schema::McpServerStdio; use sacp::schema::{ - CancelNotification, ClientCapabilities, ContentBlock, ContentChunk, CreateTerminalRequest, - CreateTerminalResponse, FileSystemCapability, InitializeRequest, KillTerminalCommandRequest, + BlobResourceContents, CancelNotification, ClientCapabilities, ContentBlock, ContentChunk, + CreateTerminalRequest, CreateTerminalResponse, EmbeddedResource, EmbeddedResourceResource, + FileSystemCapability, ImageContent, InitializeRequest, KillTerminalCommandRequest, KillTerminalCommandResponse, LoadSessionRequest, NewSessionRequest, NewSessionResponse, - PermissionOptionKind, Plan, PlanEntryPriority, PlanEntryStatus, PromptRequest, ProtocolVersion, - ReadTextFileRequest, ReadTextFileResponse, ReleaseTerminalRequest, ReleaseTerminalResponse, - RequestPermissionOutcome, RequestPermissionRequest, RequestPermissionResponse, ResourceLink, - SelectedPermissionOutcome, SessionConfigKind, SessionConfigOption, SessionConfigOptionCategory, - SessionConfigSelectGroup, SessionConfigSelectOption, SessionConfigSelectOptions, SessionId, - SessionModeState, SessionNotification, SessionUpdate, SetSessionConfigOptionRequest, + PermissionOptionKind, Plan, PlanEntryPriority, PlanEntryStatus, PromptRequest, + ProtocolVersion, ReadTextFileRequest, ReadTextFileResponse, ReleaseTerminalRequest, + ReleaseTerminalResponse, RequestPermissionOutcome, RequestPermissionRequest, + RequestPermissionResponse, ResourceLink, SelectedPermissionOutcome, SessionConfigKind, + SessionConfigOption, SessionConfigOptionCategory, SessionConfigSelectGroup, + SessionConfigSelectOption, SessionConfigSelectOptions, SessionId, SessionModeState, + SessionNotification, SessionUpdate, SetSessionConfigOptionRequest, SetSessionConfigOptionResponse, SetSessionModeRequest, StopReason, TerminalExitStatus, - TerminalOutputRequest, TerminalOutputResponse, TextContent, ToolCallContent, - WaitForTerminalExitRequest, WaitForTerminalExitResponse, WriteTextFileRequest, - WriteTextFileResponse, + TerminalOutputRequest, TerminalOutputResponse, TextContent, TextResourceContents, + ToolCallContent, WaitForTerminalExitRequest, WaitForTerminalExitResponse, + WriteTextFileRequest, WriteTextFileResponse, }; use sacp::util::MatchDispatch; use sacp::{ @@ -32,9 +34,9 @@ use crate::acp::registry::{self, AgentDistribution}; use crate::acp::terminal_runtime::{TerminalRuntime, TerminalRuntimeError}; use crate::acp::types::{ AcpEvent, AvailableCommandInfo, ConnectionInfo, ConnectionStatus, PermissionOptionInfo, - PlanEntryInfo, PromptInputBlock, SessionConfigKindInfo, SessionConfigOptionInfo, - SessionConfigSelectGroupInfo, SessionConfigSelectInfo, SessionConfigSelectOptionInfo, - SessionModeInfo, SessionModeStateInfo, + PlanEntryInfo, PromptCapabilitiesInfo, PromptInputBlock, SessionConfigKindInfo, + SessionConfigOptionInfo, SessionConfigSelectGroupInfo, SessionConfigSelectInfo, + SessionConfigSelectOptionInfo, SessionModeInfo, SessionModeStateInfo, }; use crate::models::agent::AgentType; use crate::network::proxy; @@ -448,6 +450,24 @@ fn emit_selectors_ready(connection_id: &str, app_handle: &tauri::AppHandle) { ); } +fn emit_prompt_capabilities( + connection_id: &str, + app_handle: &tauri::AppHandle, + capabilities: &sacp::schema::PromptCapabilities, +) { + let _ = app_handle.emit( + "acp://event", + AcpEvent::PromptCapabilities { + connection_id: connection_id.into(), + prompt_capabilities: PromptCapabilitiesInfo { + image: capabilities.image, + audio: capabilities.audio, + embedded_context: capabilities.embedded_context, + }, + }, + ); +} + fn resolve_working_dir(working_dir: Option<&str>) -> PathBuf { match working_dir { Some(dir) => { @@ -591,7 +611,12 @@ async fn run_connection( .read_text_file(true) .write_text_file(true)), ); - let _init_resp = cx.send_request_to(Agent, init_request).block_task().await?; + let init_resp = cx.send_request_to(Agent, init_request).block_task().await?; + emit_prompt_capabilities( + &conn_id, + &handle, + &init_resp.agent_capabilities.prompt_capabilities, + ); // Emit connected status let _ = handle.emit( @@ -1128,6 +1153,35 @@ fn map_prompt_blocks(blocks: Vec) -> Vec { .into_iter() .map(|block| match block { PromptInputBlock::Text { text } => ContentBlock::Text(TextContent::new(text)), + PromptInputBlock::Image { + data, + mime_type, + uri, + } => ContentBlock::Image(ImageContent::new(data, mime_type).uri(uri)), + PromptInputBlock::Resource { + uri, + mime_type, + text, + blob, + } => { + let resource = match (text, blob) { + (Some(text_value), _) => { + let content = + TextResourceContents::new(text_value, uri.clone()).mime_type(mime_type); + EmbeddedResourceResource::TextResourceContents(content) + } + (None, Some(blob_value)) => { + let content = + BlobResourceContents::new(blob_value, uri.clone()).mime_type(mime_type); + EmbeddedResourceResource::BlobResourceContents(content) + } + (None, None) => { + let content = TextResourceContents::new("", uri.clone()).mime_type(mime_type); + EmbeddedResourceResource::TextResourceContents(content) + } + }; + ContentBlock::Resource(EmbeddedResource::new(resource)) + } PromptInputBlock::ResourceLink { uri, name, diff --git a/src-tauri/src/acp/types.rs b/src-tauri/src/acp/types.rs index 65a6bcc..197b6ae 100644 --- a/src-tauri/src/acp/types.rs +++ b/src-tauri/src/acp/types.rs @@ -7,6 +7,21 @@ pub enum PromptInputBlock { Text { text: String, }, + Image { + data: String, + mime_type: String, + #[serde(default)] + uri: Option, + }, + Resource { + uri: String, + #[serde(default)] + mime_type: Option, + #[serde(default)] + text: Option, + #[serde(default)] + blob: Option, + }, ResourceLink { uri: String, name: String, @@ -17,6 +32,13 @@ pub enum PromptInputBlock { }, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PromptCapabilitiesInfo { + pub image: bool, + pub audio: bool, + pub embedded_context: bool, +} + /// Events pushed from Rust backend to frontend via Tauri event system. #[derive(Debug, Clone, Serialize)] #[serde(tag = "type", rename_all = "snake_case")] @@ -77,6 +99,11 @@ pub enum AcpEvent { }, /// Initial selector payloads (modes/config options) have been emitted SelectorsReady { connection_id: String }, + /// Prompt capabilities for this connection + PromptCapabilities { + connection_id: String, + prompt_capabilities: PromptCapabilitiesInfo, + }, /// Current session mode changed ModeChanged { connection_id: String, diff --git a/src-tauri/src/commands/folders.rs b/src-tauri/src/commands/folders.rs index a52d079..396a8bd 100644 --- a/src-tauri/src/commands/folders.rs +++ b/src-tauri/src/commands/folders.rs @@ -7,6 +7,7 @@ use std::process::{Command, Stdio}; use std::sync::{mpsc, LazyLock, Mutex}; use std::time::{Duration, Instant, UNIX_EPOCH}; +use base64::Engine as _; use notify::{EventKind, RecommendedWatcher, RecursiveMode, Watcher}; use serde::Serialize; use tauri::Emitter; @@ -1161,6 +1162,8 @@ const FILE_PREVIEW_MIN_BYTES: usize = 4_096; const FILE_PREVIEW_MAX_BYTES: usize = 2_000_000; const FILE_EDIT_DEFAULT_MAX_BYTES: usize = 400_000; const FILE_EDIT_MAX_BYTES: usize = 2_000_000; +const FILE_BASE64_DEFAULT_MAX_BYTES: usize = 20_000_000; +const FILE_BASE64_MAX_BYTES: usize = 100_000_000; const FILE_IO_MAX_CONCURRENT_OPS: usize = 8; static FILE_IO_SEMAPHORE: LazyLock = @@ -2028,6 +2031,47 @@ pub async fn get_file_tree( Ok(dir_children.remove(&root).unwrap_or_default()) } +#[tauri::command] +pub async fn read_file_base64( + path: String, + max_bytes: Option, +) -> Result { + let trimmed = path.trim(); + if trimmed.is_empty() { + return Err(AppCommandError::invalid_input("Path cannot be empty")); + } + let target = PathBuf::from(trimmed); + if !target.exists() { + return Err(AppCommandError::not_found("File does not exist")); + } + if !target.is_file() { + return Err(AppCommandError::invalid_input("Path is not a file")); + } + + let limit = max_bytes + .unwrap_or(FILE_BASE64_DEFAULT_MAX_BYTES) + .clamp(FILE_PREVIEW_MIN_BYTES, FILE_BASE64_MAX_BYTES); + + run_file_io(move || { + let metadata = std::fs::metadata(&target).map_err(AppCommandError::io)?; + if metadata.len() > limit as u64 { + return Err( + AppCommandError::invalid_input("File is too large to attach") + .with_detail(format!("max_bytes={limit}")), + ); + } + let bytes = std::fs::read(&target).map_err(AppCommandError::io)?; + if bytes.len() > limit { + return Err( + AppCommandError::invalid_input("File is too large to attach") + .with_detail(format!("max_bytes={limit}")), + ); + } + Ok(base64::engine::general_purpose::STANDARD.encode(bytes)) + }) + .await +} + #[tauri::command] pub async fn read_file_preview( root_path: String, diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index d14b9d1..2166fcd 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -206,6 +206,7 @@ pub fn run() { folders::start_file_tree_watch, folders::stop_file_tree_watch, folders::get_file_tree, + folders::read_file_base64, folders::read_file_preview, folders::read_file_for_edit, folders::save_file_content, diff --git a/src-tauri/src/models/message.rs b/src-tauri/src/models/message.rs index d528676..774d98e 100644 --- a/src-tauri/src/models/message.rs +++ b/src-tauri/src/models/message.rs @@ -16,6 +16,12 @@ pub enum ContentBlock { Text { text: String, }, + Image { + data: String, + mime_type: String, + #[serde(skip_serializing_if = "Option::is_none")] + uri: Option, + }, ToolUse { tool_use_id: Option, tool_name: String, diff --git a/src-tauri/src/parsers/codex.rs b/src-tauri/src/parsers/codex.rs index a201866..be8da6a 100644 --- a/src-tauri/src/parsers/codex.rs +++ b/src-tauri/src/parsers/codex.rs @@ -501,20 +501,49 @@ impl CodexParser { .unwrap_or("") .to_string(); let normalized = strip_blocked_resource_mentions(&text); - let message_text = if normalized.is_empty() { - "Attached resources".to_string() - } else { - normalized - }; + let mut blocks: Vec = Vec::new(); + if !normalized.is_empty() { + blocks.push(ContentBlock::Text { text: normalized }); + } + + if let Some(images) = + payload.get("images").and_then(|v| v.as_array()) + { + for image in images { + let Some(raw) = image.as_str() else { + continue; + }; + let Some((mime_type, data)) = parse_data_uri_image(raw) + else { + continue; + }; + blocks.push(ContentBlock::Image { + data, + mime_type, + uri: None, + }); + } + } + + if blocks.is_empty() { + blocks.push(ContentBlock::Text { + text: "Attached resources".to_string(), + }); + } if title.is_none() { title = extract_codex_title_candidate(&text, true); } + if should_skip_duplicate_user_message(&messages, &blocks, timestamp) + { + continue; + } + messages.push(UnifiedMessage { id: format!("user-{}", messages.len()), role: MessageRole::User, - content: vec![ContentBlock::Text { text: message_text }], + content: blocks, timestamp, usage: None, duration_ms: None, @@ -690,6 +719,40 @@ impl CodexParser { model: None, }); } + "message" => { + let role = + payload.get("role").and_then(|r| r.as_str()).unwrap_or(""); + if role == "user" { + if let Some(blocks) = + extract_response_item_user_image_blocks(payload) + { + if should_skip_duplicate_user_message( + &messages, &blocks, timestamp, + ) { + continue; + } + + if title.is_none() { + if let Some(text) = first_text_block(&blocks) { + title = extract_codex_title_candidate( + text.as_str(), + true, + ); + } + } + + messages.push(UnifiedMessage { + id: format!("user-{}", messages.len()), + role: MessageRole::User, + content: blocks, + timestamp, + usage: None, + duration_ms: None, + model: None, + }); + } + } + } _ => {} } } @@ -946,10 +1009,138 @@ fn extract_codex_text_content(payload: &serde_json::Value) -> Option { None } +fn parse_data_uri_image(raw: &str) -> Option<(String, String)> { + let trimmed = raw.trim(); + if !trimmed.starts_with("data:") { + return None; + } + let marker = ";base64,"; + let marker_idx = trimmed.find(marker)?; + let mime_type = trimmed.get(5..marker_idx)?.trim(); + if !mime_type.starts_with("image/") { + return None; + } + let data = trimmed.get(marker_idx + marker.len()..)?.trim(); + if data.is_empty() { + return None; + } + Some((mime_type.to_string(), data.to_string())) +} + +fn parse_input_image_data_uri(item: &serde_json::Value) -> Option<(String, String)> { + let data_uri = item + .get("image_url") + .and_then(|v| v.as_str()) + .or_else(|| { + item.get("image_url") + .and_then(|v| v.get("url")) + .and_then(|v| v.as_str()) + }) + .or_else(|| item.get("url").and_then(|v| v.as_str()))?; + parse_data_uri_image(data_uri) +} + +fn first_text_block(blocks: &[ContentBlock]) -> Option { + blocks.iter().find_map(|block| match block { + ContentBlock::Text { text } => Some(text.clone()), + _ => None, + }) +} + +fn blocks_equal(a: &[ContentBlock], b: &[ContentBlock]) -> bool { + if a.len() != b.len() { + return false; + } + serde_json::to_value(a).ok() == serde_json::to_value(b).ok() +} + +fn should_skip_duplicate_user_message( + messages: &[UnifiedMessage], + blocks: &[ContentBlock], + timestamp: DateTime, +) -> bool { + // Some Codex logs emit the same user message through both `response_item` + // and `event_msg`, sometimes with a non-trivial delay. Deduplicate by + // content in a bounded recent time window. + const DUP_WINDOW_MS: i64 = 120_000; + + for msg in messages.iter().rev() { + if !matches!(msg.role, MessageRole::User) { + continue; + } + let delta_ms = (timestamp - msg.timestamp).num_milliseconds().abs(); + if delta_ms > DUP_WINDOW_MS { + break; + } + if blocks_equal(&msg.content, blocks) { + return true; + } + } + + false +} + +fn extract_response_item_user_image_blocks( + payload: &serde_json::Value, +) -> Option> { + let content = payload.get("content")?.as_array()?; + let mut blocks: Vec = Vec::new(); + let mut text_parts: Vec = Vec::new(); + let mut has_input_image = false; + + for item in content { + let item_type = item.get("type").and_then(|v| v.as_str()).unwrap_or(""); + match item_type { + "input_text" => { + let Some(text) = item.get("text").and_then(|v| v.as_str()) else { + continue; + }; + if text.trim() == "" { + continue; + } + if !text.is_empty() { + text_parts.push(text.to_string()); + } + } + "input_image" => { + has_input_image = true; + let Some((mime_type, data)) = parse_input_image_data_uri(item) else { + continue; + }; + blocks.push(ContentBlock::Image { + data, + mime_type, + uri: None, + }); + } + _ => {} + } + } + + if !has_input_image { + return None; + } + + let text = strip_blocked_resource_mentions(&text_parts.join("\n")); + if !text.is_empty() { + blocks.insert(0, ContentBlock::Text { text }); + } + + if blocks.is_empty() { + blocks.push(ContentBlock::Text { + text: "Attached resources".to_string(), + }); + } + + Some(blocks) +} + fn strip_blocked_resource_mentions(input: &str) -> String { let blocked_re = Regex::new(r"@([^\s@]+)\s*\[blocked[^\]]*\]").expect("valid blocked regex"); + let image_tag_re = Regex::new(r"(?i)").expect("valid image tag regex"); let collapsed_ws_re = Regex::new(r"[ \t]{2,}").expect("valid whitespace regex"); let text = blocked_re.replace_all(input, "").to_string(); + let text = image_tag_re.replace_all(&text, "").to_string(); let text = collapsed_ws_re.replace_all(&text, " ").to_string(); text.trim().to_string() } @@ -1030,12 +1221,16 @@ fn group_into_turns(messages: Vec) -> Vec { mod tests { use super::extract_codex_title_candidate; use super::extract_context_window_used_tokens_from_token_count_info; + use super::extract_response_item_user_image_blocks; use super::extract_turn_usage_from_codex_usage; use super::merge_codex_context_window_stats; use super::merge_codex_total_usage_stats; use super::resolve_codex_home_dir_from; + use super::should_skip_duplicate_user_message; + use super::strip_blocked_resource_mentions; use super::CodexParser; - use crate::models::{SessionStats, TurnUsage}; + use crate::models::{ContentBlock, MessageRole, SessionStats, TurnUsage, UnifiedMessage}; + use chrono::{Duration, Utc}; use std::env; use std::fs; use std::path::PathBuf; @@ -1063,6 +1258,75 @@ mod tests { assert_eq!(got.as_deref(), Some("修复 codex 会话标题")); } + #[test] + fn strips_image_placeholders_from_user_text() { + let input = "这个图片里面是什么\n\n\n"; + let got = strip_blocked_resource_mentions(input); + assert_eq!(got, "这个图片里面是什么"); + } + + #[test] + fn extracts_response_item_input_image_blocks() { + let payload = serde_json::json!({ + "content": [ + {"type": "input_text", "text": "这是什么东西"}, + {"type": "input_text", "text": ""}, + {"type": "input_image", "image_url": "data:image/png;base64,QUJD"} + ] + }); + + let blocks = extract_response_item_user_image_blocks(&payload).expect("blocks"); + assert_eq!(blocks.len(), 2); + match &blocks[0] { + ContentBlock::Text { text } => assert_eq!(text, "这是什么东西"), + _ => panic!("expected text block"), + } + match &blocks[1] { + ContentBlock::Image { + data, mime_type, .. + } => { + assert_eq!(mime_type, "image/png"); + assert_eq!(data, "QUJD"); + } + _ => panic!("expected image block"), + } + } + + #[test] + fn skips_duplicate_user_message_within_short_window() { + let now = Utc::now(); + let blocks = vec![ + ContentBlock::Text { + text: "hello".to_string(), + }, + ContentBlock::Image { + data: "QUJD".to_string(), + mime_type: "image/png".to_string(), + uri: None, + }, + ]; + let messages = vec![UnifiedMessage { + id: "user-0".to_string(), + role: MessageRole::User, + content: blocks.clone(), + timestamp: now, + usage: None, + duration_ms: None, + model: None, + }]; + + assert!(should_skip_duplicate_user_message( + &messages, + &blocks, + now + Duration::milliseconds(1200), + )); + assert!(!should_skip_duplicate_user_message( + &messages, + &blocks, + now + Duration::seconds(180), + )); + } + #[test] fn summary_title_skips_injected_messages_and_uses_real_prompt() { let nanos = SystemTime::now() diff --git a/src/components/chat/chat-input.tsx b/src/components/chat/chat-input.tsx index 8791533..d4c66e9 100644 --- a/src/components/chat/chat-input.tsx +++ b/src/components/chat/chat-input.tsx @@ -3,6 +3,7 @@ import { useTranslations } from "next-intl" import type { ConnectionStatus, + PromptCapabilitiesInfo, PromptDraft, SessionConfigOptionInfo, SessionModeInfo, @@ -12,6 +13,7 @@ import { MessageInput } from "@/components/chat/message-input" interface ChatInputProps { status: ConnectionStatus | null + promptCapabilities: PromptCapabilitiesInfo defaultPath?: string onFocus?: () => void onSend: (draft: PromptDraft, modeId?: string | null) => void @@ -30,6 +32,7 @@ interface ChatInputProps { export function ChatInput({ status, + promptCapabilities, defaultPath, onFocus, onSend, @@ -54,6 +57,7 @@ export function ChatInput({
void onConfigOptionChange?: (configId: string, valueId: string) => void availableCommands?: AvailableCommandInfo[] | null + promptCapabilities: PromptCapabilitiesInfo attachmentTabId?: string | null draftStorageKey?: string | null } -interface InputAttachment { - path: string +interface ResourceInputAttachment { + id: string + type: "resource" + kind: "link" | "embedded" uri: string name: string mimeType: string | null + text?: string | null + blob?: string | null } +interface ImageInputAttachment { + id: string + type: "image" + data: string + uri: string | null + name: string + mimeType: string +} + +type InputAttachment = ResourceInputAttachment | ImageInputAttachment + const MIME_BY_EXT: Record = { txt: "text/plain", md: "text/markdown", @@ -104,6 +124,102 @@ function toFileUri(path: string): string { return `file:///${encoded}` } +function hasDragFiles(dataTransfer: DataTransfer | null): boolean { + if (!dataTransfer?.types) return false + return Array.from(dataTransfer.types).includes("Files") +} + +function pointWithinElement( + position: { x: number; y: number }, + element: HTMLElement +): boolean { + const rect = element.getBoundingClientRect() + const dpr = window.devicePixelRatio || 1 + const candidates = [ + { x: position.x, y: position.y }, + { x: position.x / dpr, y: position.y / dpr }, + ] + return candidates.some( + (point) => + point.x >= rect.left && + point.x <= rect.right && + point.y >= rect.top && + point.y <= rect.bottom + ) +} + +function blobToBase64(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader() + reader.onerror = () => { + reject(reader.error ?? new Error("Failed to read blob")) + } + reader.onload = () => { + if (typeof reader.result !== "string") { + reject(new Error("Unexpected non-string blob reader result")) + return + } + const markerIndex = reader.result.indexOf(",") + resolve( + markerIndex >= 0 ? reader.result.slice(markerIndex + 1) : reader.result + ) + } + reader.readAsDataURL(blob) + }) +} + +function getFilePath(file: File): string | null { + const withPath = file as File & { path?: string; webkitRelativePath?: string } + if (typeof withPath.path === "string" && withPath.path.trim().length > 0) { + return withPath.path + } + if ( + typeof withPath.webkitRelativePath === "string" && + withPath.webkitRelativePath.trim().length > 0 + ) { + return withPath.webkitRelativePath + } + return null +} + +const TEXT_LIKE_MIME_PREFIXES = [ + "text/", + "application/json", + "application/xml", + "application/yaml", + "application/x-yaml", + "application/toml", + "application/javascript", + "application/typescript", +] +const DRAG_DROP_IMAGE_MAX_BYTES = 20_000_000 + +function isTextLikeFile(file: File): boolean { + const mime = file.type.toLowerCase() + if (mime) { + if (TEXT_LIKE_MIME_PREFIXES.some((prefix) => mime.startsWith(prefix))) { + return true + } + } + const ext = file.name.split(".").pop()?.toLowerCase() + if (!ext) return false + return Boolean( + MIME_BY_EXT[ext]?.startsWith("text/") || + ["json", "yaml", "yml", "xml", "toml", "md", "csv"].includes(ext) + ) +} + +function buildClipboardResourceUri(name: string): string { + const normalizedName = name.trim() || "clipboard-resource" + return `clipboard://${encodeURIComponent(normalizedName)}-${crypto.randomUUID()}` +} + +function buildDataUri(base64Data: string, mimeType: string | null): string { + const safeMime = + mimeType && mimeType.trim() ? mimeType : "application/octet-stream" + return `data:${safeMime};base64,${base64Data}` +} + function SelectorLoadingChip({ label }: { label: string }) { return (
@@ -131,6 +247,7 @@ export function MessageInput({ onModeChange, onConfigOptionChange, availableCommands, + promptCapabilities, attachmentTabId, draftStorageKey, }: MessageInputProps) { @@ -142,8 +259,12 @@ export function MessageInput({ return loadMessageInputDraft(effectiveDraftStorageKey) ?? "" }) const [attachments, setAttachments] = useState([]) + const [isDragActive, setIsDragActive] = useState(false) + const containerRef = useRef(null) + const lastDomDropAtRef = useRef(0) const composingRef = useRef(false) const textRef = useRef(text) + const canAttachImages = promptCapabilities.image useEffect(() => { textRef.current = text @@ -176,6 +297,22 @@ export function MessageInput({ hasModes && Boolean(effectiveModeId) && !hasConfigOptions const showModeLoading = modeLoading && !hasConfigOptions && !showModeSelector const showConfigLoading = configOptionsLoading && !hasConfigOptions + const imageAttachments = useMemo( + () => + attachments.filter( + (attachment): attachment is ImageInputAttachment => + attachment.type === "image" + ), + [attachments] + ) + const resourceAttachments = useMemo( + () => + attachments.filter( + (attachment): attachment is ResourceInputAttachment => + attachment.type === "resource" + ), + [attachments] + ) const hasAttachments = attachments.length > 0 const hasSendableContent = text.trim().length > 0 || hasAttachments @@ -196,24 +333,292 @@ export function MessageInput({ ) }, [slashMenuOpen, slashCommands, text]) - const appendAttachments = useCallback((paths: string[]) => { - setAttachments((prev) => { - const seen = new Set(prev.map((item) => item.path)) - const next = [...prev] - for (const path of paths) { - if (typeof path !== "string" || !path || seen.has(path)) continue - seen.add(path) - next.push({ - path, - uri: toFileUri(path), - name: fileNameFromPath(path), - mimeType: mimeTypeFromPath(path), + const appendResourceLinks = useCallback( + ( + links: Array<{ + uri: string + name: string + mimeType: string | null + dedupeKey: string + }> + ) => { + if (links.length === 0) return + setAttachments((prev) => { + const seen = new Set( + prev.flatMap((item) => + item.type === "resource" && item.kind === "link" ? [item.uri] : [] + ) + ) + const next = [...prev] + for (const link of links) { + if (!link.uri || seen.has(link.dedupeKey)) continue + seen.add(link.dedupeKey) + next.push({ + id: `resource-link:${link.dedupeKey}`, + type: "resource", + kind: "link", + uri: link.uri, + name: link.name, + mimeType: link.mimeType, + }) + } + return next + }) + }, + [] + ) + + const appendResourceAttachments = useCallback( + (paths: string[]) => { + const normalized = paths + .filter( + (path): path is string => typeof path === "string" && path.length > 0 + ) + .map((path) => { + const uri = toFileUri(path) + return { + uri, + name: fileNameFromPath(path), + mimeType: mimeTypeFromPath(path), + dedupeKey: uri, + } }) + appendResourceLinks(normalized) + }, + [appendResourceLinks] + ) + + const appendEmbeddedResources = useCallback( + ( + resources: Array<{ + uri: string + name: string + mimeType: string | null + text?: string | null + blob?: string | null + }> + ) => { + if (resources.length === 0) return + setAttachments((prev) => [ + ...prev, + ...resources.map((resource) => ({ + id: `resource-embedded:${crypto.randomUUID()}`, + type: "resource" as const, + kind: "embedded" as const, + uri: resource.uri, + name: resource.name, + mimeType: resource.mimeType, + text: resource.text ?? null, + blob: resource.blob ?? null, + })), + ]) + }, + [] + ) + + const appendFilesAsResources = useCallback( + async (files: File[]) => { + if (files.length === 0) return + const pathLinks: Array<{ + uri: string + name: string + mimeType: string | null + dedupeKey: string + }> = [] + const fallbackDataLinks: Array<{ + uri: string + name: string + mimeType: string | null + dedupeKey: string + }> = [] + const embeddedResources: Array<{ + uri: string + name: string + mimeType: string | null + text?: string | null + blob?: string | null + }> = [] + + for (const file of files) { + const path = getFilePath(file) + const name = file.name || `resource-${crypto.randomUUID()}` + const mimeType = file.type || mimeTypeFromPath(name) + if (path) { + const uri = toFileUri(path) + pathLinks.push({ + uri, + name: fileNameFromPath(path), + mimeType: mimeTypeFromPath(path) ?? mimeType ?? null, + dedupeKey: uri, + }) + continue + } + + if (!promptCapabilities.embedded_context) { + const base64 = await blobToBase64(file) + const dataUri = buildDataUri(base64, mimeType ?? null) + fallbackDataLinks.push({ + uri: dataUri, + name, + mimeType: mimeType ?? null, + dedupeKey: `${name}:${file.size}:${file.lastModified}`, + }) + continue + } + + const uri = buildClipboardResourceUri(name) + if (isTextLikeFile(file)) { + const textContent = await file.text() + embeddedResources.push({ + uri, + name, + mimeType: mimeType ?? null, + text: textContent, + }) + } else { + const blobContent = await blobToBase64(file) + embeddedResources.push({ + uri, + name, + mimeType: mimeType ?? null, + blob: blobContent, + }) + } } - return next - }) + + appendResourceLinks(pathLinks) + appendResourceLinks(fallbackDataLinks) + appendEmbeddedResources(embeddedResources) + }, + [ + appendEmbeddedResources, + appendResourceLinks, + promptCapabilities.embedded_context, + ] + ) + + const appendImageAttachments = useCallback(async (files: File[]) => { + if (files.length === 0) return + const parsed = await Promise.all( + files.map(async (file, index) => { + const mimeType = + file.type && file.type.startsWith("image/") + ? file.type + : (mimeTypeFromPath(file.name) ?? "image/png") + const base64Data = await blobToBase64(file) + return { + id: `image:${Date.now()}:${index}:${crypto.randomUUID()}`, + type: "image" as const, + data: base64Data, + uri: null, + name: file.name || `image-${Date.now()}-${index + 1}`, + mimeType, + } + }) + ) + setAttachments((prev) => [...prev, ...parsed]) }, []) + const appendImagePathAttachments = useCallback( + async (paths: string[]) => { + if (paths.length === 0 || !canAttachImages) return + const settled = await Promise.allSettled( + paths.map(async (path, index) => { + const data = await readFileBase64(path, DRAG_DROP_IMAGE_MAX_BYTES) + return { + id: `image:${Date.now()}:${index}:${crypto.randomUUID()}`, + type: "image" as const, + data, + uri: toFileUri(path), + name: fileNameFromPath(path), + mimeType: mimeTypeFromPath(path) ?? "image/png", + } + }) + ) + + const parsed: ImageInputAttachment[] = [] + settled.forEach((result, index) => { + if (result.status === "fulfilled") { + parsed.push(result.value) + return + } + console.error( + `[MessageInput] drop image path failed (${paths[index]}):`, + result.reason + ) + }) + if (parsed.length === 0) return + setAttachments((prev) => [...prev, ...parsed]) + }, + [canAttachImages] + ) + + const appendPathsFromDrop = useCallback( + async (paths: string[]) => { + if (paths.length === 0) return + const normalized = paths.filter( + (path): path is string => typeof path === "string" && path.length > 0 + ) + if (normalized.length === 0) return + + const imagePaths: string[] = [] + const resourcePaths: string[] = [] + for (const path of normalized) { + const mimeType = mimeTypeFromPath(path) ?? "" + if (canAttachImages && mimeType.startsWith("image/")) { + imagePaths.push(path) + } else { + resourcePaths.push(path) + } + } + + if (imagePaths.length > 0) { + await appendImagePathAttachments(imagePaths) + } + if (resourcePaths.length > 0) { + appendResourceAttachments(resourcePaths) + } + }, + [appendImagePathAttachments, appendResourceAttachments, canAttachImages] + ) + + const appendFilesFromInput = useCallback( + async (files: File[]) => { + if (files.length === 0) return + const imageFiles: File[] = [] + const resourceFiles: File[] = [] + for (const file of files) { + const mimeType = file.type || mimeTypeFromPath(file.name) || "" + if (canAttachImages && mimeType.startsWith("image/")) { + imageFiles.push(file) + } else { + resourceFiles.push(file) + } + } + + if (imageFiles.length > 0) { + await appendImageAttachments(imageFiles) + } + if (resourceFiles.length > 0) { + await appendFilesAsResources(resourceFiles) + } + }, + [appendFilesAsResources, appendImageAttachments, canAttachImages] + ) + + const handlePaste = useCallback( + (event: React.ClipboardEvent) => { + if (disabled || isPrompting) return + const files = Array.from(event.clipboardData?.files ?? []) + if (files.length === 0) return + event.preventDefault() + void appendFilesFromInput(files).catch((error) => { + console.error("[MessageInput] paste files failed:", error) + }) + }, + [appendFilesFromInput, disabled, isPrompting] + ) + useEffect(() => { if (!showModeSelector) return if (!effectiveModeId || !onModeChange) return @@ -258,11 +663,11 @@ export function MessageInput({ }) if (!selected) return const picked = Array.isArray(selected) ? selected : [selected] - appendAttachments(picked.filter((item): item is string => !!item)) + appendResourceAttachments(picked.filter((item): item is string => !!item)) } catch (error) { console.error("[MessageInput] pick files failed:", error) } - }, [appendAttachments, defaultPath, disabled]) + }, [appendResourceAttachments, defaultPath, disabled]) useEffect(() => { if (!attachmentTabId) return @@ -271,17 +676,61 @@ export function MessageInput({ const customEvent = event as CustomEvent if (!customEvent.detail) return if (customEvent.detail.tabId !== attachmentTabId) return - appendAttachments([customEvent.detail.path]) + appendResourceAttachments([customEvent.detail.path]) } window.addEventListener(ATTACH_FILE_TO_SESSION_EVENT, handleAttachFile) return () => { window.removeEventListener(ATTACH_FILE_TO_SESSION_EVENT, handleAttachFile) } - }, [appendAttachments, attachmentTabId]) + }, [appendResourceAttachments, attachmentTabId]) - const removeAttachment = useCallback((path: string) => { - setAttachments((prev) => prev.filter((item) => item.path !== path)) + useEffect(() => { + let unlisten: (() => void) | null = null + let cancelled = false + + getCurrentWebview() + .onDragDropEvent((event) => { + const host = containerRef.current + if (!host) return + const payload = event.payload + if (payload.type === "leave") { + setIsDragActive(false) + return + } + const inside = pointWithinElement(payload.position, host) + if (payload.type === "drop") { + setIsDragActive(false) + if (Date.now() - lastDomDropAtRef.current < 250) return + if (!inside || disabled || isPrompting) return + void appendPathsFromDrop(payload.paths).catch((error) => { + console.error("[MessageInput] drag drop paths failed:", error) + }) + return + } + setIsDragActive(inside && !disabled && !isPrompting) + }) + .then((fn) => { + if (cancelled) { + fn() + } else { + unlisten = fn + } + }) + .catch(() => { + // Ignore non-Tauri environments. + }) + + return () => { + cancelled = true + if (unlisten) { + unlisten() + } + } + }, [appendPathsFromDrop, disabled, isPrompting]) + + const removeAttachment = useCallback((id: string) => { + setAttachments((prev) => prev.filter((item) => item.id !== id)) }, []) const handleSend = useCallback(() => { @@ -293,18 +742,37 @@ export function MessageInput({ blocks.push({ type: "text", text: trimmed }) } for (const attachment of attachments) { - blocks.push({ - type: "resource_link", - uri: attachment.uri, - name: attachment.name, - mime_type: attachment.mimeType, - description: null, - }) + if (attachment.type === "resource") { + if (attachment.kind === "link") { + blocks.push({ + type: "resource_link", + uri: attachment.uri, + name: attachment.name, + mime_type: attachment.mimeType, + description: null, + }) + } else { + blocks.push({ + type: "resource", + uri: attachment.uri, + mime_type: attachment.mimeType, + text: attachment.text ?? null, + blob: attachment.blob ?? null, + }) + } + } else { + blocks.push({ + type: "image", + data: attachment.data, + mime_type: attachment.mimeType, + uri: attachment.uri, + }) + } } const displayText = trimmed || - `Attached ${attachments.length} resource${attachments.length > 1 ? "s" : ""}` + `Attached ${attachments.length} attachment${attachments.length > 1 ? "s" : ""}` onSend({ blocks, displayText }, showModeSelector ? effectiveModeId : null) if (effectiveDraftStorageKey) { clearMessageInputDraft(effectiveDraftStorageKey) @@ -372,11 +840,70 @@ export function MessageInput({ ] ) + const handleContainerDragOver = useCallback( + (event: React.DragEvent) => { + if (!hasDragFiles(event.dataTransfer)) return + event.preventDefault() + if (!disabled && !isPrompting) { + setIsDragActive(true) + } + }, + [disabled, isPrompting] + ) + + const handleContainerDragLeave = useCallback( + (event: React.DragEvent) => { + const related = event.relatedTarget + if ( + related && + related instanceof Node && + event.currentTarget.contains(related) + ) { + return + } + setIsDragActive(false) + }, + [] + ) + + const handleContainerDrop = useCallback( + (event: React.DragEvent) => { + if (!hasDragFiles(event.dataTransfer)) return + event.preventDefault() + lastDomDropAtRef.current = Date.now() + setIsDragActive(false) + if (disabled || isPrompting) return + const files = Array.from(event.dataTransfer.files ?? []) + if (files.length > 0) { + void appendFilesFromInput(files).catch((error) => { + console.error("[MessageInput] drop files failed:", error) + }) + } + }, + [appendFilesFromInput, disabled, isPrompting] + ) + + const hasImageAttachments = imageAttachments.length > 0 + const hasResourceAttachments = resourceAttachments.length > 0 + const topPaddingClass = + hasImageAttachments && hasResourceAttachments + ? "pt-24" + : hasImageAttachments + ? "pt-16" + : hasResourceAttachments + ? "pt-10" + : "pt-3" const bottomPaddingClass = "pb-10" - const topPaddingClass = hasAttachments ? "pt-10" : "" + const showDragActive = isDragActive && !disabled && !isPrompting return ( -
+
{slashMenuOpen && filteredSlashCommands.length > 0 && ( (composingRef.current = true)} onCompositionEnd={() => (composingRef.current = false)} + onPaste={handlePaste} onFocus={onFocus} placeholder={resolvedPlaceholder} className={cn( "text-sm pr-12 resize-none bg-transparent", + showDragActive && "ring-1 ring-primary/40", topPaddingClass, bottomPaddingClass, className )} autoFocus={autoFocus} /> - {hasAttachments && ( -
-
- {attachments.map((attachment) => ( -
- - {attachment.name} - -
- ))} -
+ {attachment.name} + +
+ ))} +
+ )} + {hasResourceAttachments && ( +
+ {resourceAttachments.map((attachment) => ( +
+ + {attachment.name} + +
+ ))} +
+ )} +
+ )} + {showDragActive && ( +
+ {t("dropFilesToAttach")}
)}
diff --git a/src/components/chat/welcome-input-panel.tsx b/src/components/chat/welcome-input-panel.tsx index 8be580b..2ee5753 100644 --- a/src/components/chat/welcome-input-panel.tsx +++ b/src/components/chat/welcome-input-panel.tsx @@ -16,6 +16,7 @@ import { } from "@/lib/adapters/ai-elements-adapter" import { buildUserMessageTextPartsFromDraft, + extractUserImagesFromDraft, extractUserResourcesFromDraft, getPromptDraftDisplayText, } from "@/lib/prompt-draft" @@ -41,6 +42,7 @@ import { AgentPlanOverlay } from "@/components/chat/agent-plan-overlay" import { LiveTurnStats } from "@/components/message/live-turn-stats" import { TurnStats } from "@/components/message/turn-stats" import { UserResourceLinks } from "@/components/message/user-resource-links" +import { UserImageAttachments } from "@/components/message/user-image-attachments" import { ConversationShell } from "@/components/chat/conversation-shell" import { MessageThread, @@ -604,6 +606,7 @@ export function WelcomeInputPanel({ draft, sharedT("attachedResources") ), + userImages: extractUserImagesFromDraft(draft), userResources: extractUserResourcesFromDraft(draft), timestamp: new Date().toISOString(), } @@ -683,6 +686,7 @@ export function WelcomeInputPanel({ draft, sharedT("attachedResources") ), + userImages: extractUserImagesFromDraft(draft), userResources: extractUserResourcesFromDraft(draft), timestamp: new Date().toISOString(), } @@ -791,6 +795,7 @@ export function WelcomeInputPanel({ + {msg.role === "user" && msg.userImages?.length ? ( + + ) : null} {msg.role === "user" && msg.userResources?.length ? ( + {group.role === "user" && group.images.length > 0 ? ( + + ) : null} {group.role === "user" && group.resources.length > 0 ? ( ) : null} @@ -152,6 +165,9 @@ const PendingMessageGroup = memo(function PendingMessageGroup({ + {group.role === "user" && group.images.length > 0 ? ( + + ) : null} {group.role === "user" && group.resources.length > 0 ? ( ) : null} diff --git a/src/components/message/user-image-attachments.tsx b/src/components/message/user-image-attachments.tsx new file mode 100644 index 0000000..c551834 --- /dev/null +++ b/src/components/message/user-image-attachments.tsx @@ -0,0 +1,38 @@ +"use client" + +import Image from "next/image" +import type { UserImageDisplay } from "@/lib/adapters/ai-elements-adapter" + +interface UserImageAttachmentsProps { + images: UserImageDisplay[] + className?: string +} + +export function UserImageAttachments({ + images, + className, +}: UserImageAttachmentsProps) { + if (images.length === 0) return null + + return ( +
+
+ {images.map((image, index) => ( +
+ {image.name} +
+ ))} +
+
+ ) +} diff --git a/src/contexts/acp-connections-context.tsx b/src/contexts/acp-connections-context.tsx index a4f6aff..2b36a94 100644 --- a/src/contexts/acp-connections-context.tsx +++ b/src/contexts/acp-connections-context.tsx @@ -33,6 +33,7 @@ import type { SessionConfigOptionInfo, SessionModeStateInfo, FixAction, + PromptCapabilitiesInfo, PromptInputBlock, } from "@/lib/types" import { AGENT_LABELS } from "@/lib/types" @@ -80,6 +81,7 @@ export interface ConnectionState { contextKey: string agentType: AgentType status: ConnectionStatus + promptCapabilities: PromptCapabilitiesInfo selectorsReady: boolean sessionId: string | null modes: SessionModeStateInfo | null @@ -157,6 +159,11 @@ type Action = type: "SELECTORS_READY" contextKey: string } + | { + type: "PROMPT_CAPABILITIES" + contextKey: string + promptCapabilities: PromptCapabilitiesInfo + } | { type: "MODE_CHANGED"; contextKey: string; modeId: string } | { type: "PLAN_UPDATE" @@ -266,6 +273,17 @@ function sameModes( return true } +function samePromptCapabilities( + a: PromptCapabilitiesInfo, + b: PromptCapabilitiesInfo +): boolean { + return ( + a.image === b.image && + a.audio === b.audio && + a.embedded_context === b.embedded_context + ) +} + function samePlanEntries(a: PlanEntryInfo[], b: PlanEntryInfo[]): boolean { if (a === b) return true if (a.length !== b.length) return false @@ -412,6 +430,11 @@ function connectionsReducer( contextKey: action.contextKey, agentType: action.agentType, status: "connecting", + promptCapabilities: { + image: false, + audio: false, + embedded_context: false, + }, selectorsReady: false, sessionId: null, modes: null, @@ -757,6 +780,25 @@ function connectionsReducer( return next } + case "PROMPT_CAPABILITIES": { + const conn = state.get(action.contextKey) + if (!conn) return state + if ( + samePromptCapabilities( + conn.promptCapabilities, + action.promptCapabilities + ) + ) { + return state + } + const next = new Map(state) + next.set(action.contextKey, { + ...conn, + promptCapabilities: action.promptCapabilities, + }) + return next + } + case "MODE_CHANGED": { const conn = state.get(action.contextKey) if (!conn?.modes) return state @@ -1307,6 +1349,14 @@ export function AcpConnectionsProvider({ children }: { children: ReactNode }) { contextKey, }) break + case "prompt_capabilities": + flushStreamingQueue() + dispatch({ + type: "PROMPT_CAPABILITIES", + contextKey, + promptCapabilities: e.prompt_capabilities, + }) + break case "mode_changed": flushStreamingQueue() dispatch({ diff --git a/src/hooks/use-connection.ts b/src/hooks/use-connection.ts index ca7a488..c39e943 100644 --- a/src/hooks/use-connection.ts +++ b/src/hooks/use-connection.ts @@ -13,14 +13,22 @@ import type { AgentType, AvailableCommandInfo, ConnectionStatus, + PromptCapabilitiesInfo, SessionConfigOptionInfo, SessionModeStateInfo, PromptInputBlock, } from "@/lib/types" +const DEFAULT_PROMPT_CAPABILITIES: PromptCapabilitiesInfo = { + image: false, + audio: false, + embedded_context: false, +} + export interface UseConnectionReturn { connectionId: string | null status: ConnectionStatus | null + promptCapabilities: PromptCapabilitiesInfo selectorsReady: boolean sessionId: string | null modes: SessionModeStateInfo | null @@ -64,6 +72,8 @@ export function useConnection(contextKey: string): UseConnectionReturn { const connectionId = connection?.connectionId ?? null const status = connection?.status ?? null + const promptCapabilities = + connection?.promptCapabilities ?? DEFAULT_PROMPT_CAPABILITIES const selectorsReady = connection?.selectorsReady ?? false const sessionId = connection?.sessionId ?? null const modes = connection?.modes ?? null @@ -119,6 +129,7 @@ export function useConnection(contextKey: string): UseConnectionReturn { () => ({ connectionId, status, + promptCapabilities, selectorsReady, sessionId, modes, @@ -138,6 +149,7 @@ export function useConnection(contextKey: string): UseConnectionReturn { [ connectionId, status, + promptCapabilities, selectorsReady, sessionId, modes, diff --git a/src/i18n/messages/en.json b/src/i18n/messages/en.json index cd8887a..f3c867a 100644 --- a/src/i18n/messages/en.json +++ b/src/i18n/messages/en.json @@ -1123,6 +1123,7 @@ "askAnything": "Ask anything...", "removeAttachmentAria": "Remove {name}", "attachFiles": "Attach files", + "dropFilesToAttach": "Drop files to attach", "loadingSettings": "Loading settings...", "loadingMode": "Loading mode...", "cancel": "Cancel", diff --git a/src/i18n/messages/zh-CN.json b/src/i18n/messages/zh-CN.json index e7c97a8..58e1ecb 100644 --- a/src/i18n/messages/zh-CN.json +++ b/src/i18n/messages/zh-CN.json @@ -1123,6 +1123,7 @@ "askAnything": "请开始输入...", "removeAttachmentAria": "移除 {name}", "attachFiles": "附加文件", + "dropFilesToAttach": "拖拽文件到此处附加", "loadingSettings": "正在加载设置...", "loadingMode": "正在加载模式...", "cancel": "取消", diff --git a/src/i18n/messages/zh-TW.json b/src/i18n/messages/zh-TW.json index d336126..3917f21 100644 --- a/src/i18n/messages/zh-TW.json +++ b/src/i18n/messages/zh-TW.json @@ -1123,6 +1123,7 @@ "askAnything": "請開始輸入...", "removeAttachmentAria": "移除 {name}", "attachFiles": "附加檔案", + "dropFilesToAttach": "拖曳檔案到此處附加", "loadingSettings": "正在載入設定...", "loadingMode": "正在載入模式...", "cancel": "取消", diff --git a/src/lib/adapters/ai-elements-adapter.ts b/src/lib/adapters/ai-elements-adapter.ts index a41776f..5c23ec7 100644 --- a/src/lib/adapters/ai-elements-adapter.ts +++ b/src/lib/adapters/ai-elements-adapter.ts @@ -43,6 +43,13 @@ export interface UserResourceDisplay { mime_type?: string | null } +export interface UserImageDisplay { + name: string + data: string + mime_type: string + uri?: string | null +} + const BLOCKED_RESOURCE_MENTION_RE = /@([^\s@]+)\s*\[blocked[^\]]*\]/gi const MARKDOWN_LINK_RE = /\[([^\]]+)\]\(([^)]+)\)/g @@ -54,6 +61,7 @@ export interface AdaptedMessage { role: MessageRole content: AdaptedContentPart[] userResources?: UserResourceDisplay[] + userImages?: UserImageDisplay[] timestamp: string usage?: TurnUsage | null duration_ms?: number | null @@ -398,6 +406,20 @@ function addResource( resources.push(resource) } +function addImage(images: UserImageDisplay[], image: UserImageDisplay) { + const key = `${image.mime_type}:${image.data.length}:${image.data.slice(0, 64)}` + if ( + images.some( + (item) => + `${item.mime_type}:${item.data.length}:${item.data.slice(0, 64)}` === + key + ) + ) { + return + } + images.push(image) +} + export function extractUserResourcesFromText(text: string): { text: string resources: UserResourceDisplay[] @@ -480,6 +502,33 @@ function splitUserTextAndResources( return { parts: nextParts, resources } } +function deriveImageNameFromBlock( + block: Extract +): string { + if (block.uri && block.uri.trim().length > 0) { + return fileNameFromUri(block.uri) + } + const ext = block.mime_type.split("/")[1]?.split("+")[0] ?? "image" + return `image.${ext}` +} + +function extractUserImagesFromBlocks( + blocks: ContentBlock[] +): UserImageDisplay[] { + const images: UserImageDisplay[] = [] + for (const block of blocks) { + if (block.type !== "image") continue + if (!block.data || !block.mime_type) continue + addImage(images, { + name: deriveImageNameFromBlock(block), + data: block.data, + mime_type: block.mime_type, + uri: block.uri ?? null, + }) + } + return images +} + /** * Generate a stable tool call ID based on message ID and block index */ @@ -661,6 +710,8 @@ export function adaptMessageTurn( turn.role === "user" ? splitUserTextAndResources(adaptedContent, text.attachedResources) : { parts: adaptedContent, resources: [] as UserResourceDisplay[] } + const userImages = + turn.role === "user" ? extractUserImagesFromBlocks(turn.blocks) : [] return { id: turn.id, @@ -668,6 +719,7 @@ export function adaptMessageTurn( content: userSplit.parts, userResources: userSplit.resources.length > 0 ? userSplit.resources : undefined, + userImages: userImages.length > 0 ? userImages : undefined, timestamp: turn.timestamp, usage: turn.usage, duration_ms: turn.duration_ms, @@ -695,6 +747,7 @@ export interface MessageGroup { role: "user" | "assistant" | "system" parts: AdaptedContentPart[] userResources?: UserResourceDisplay[] + userImages?: UserImageDisplay[] usage?: TurnUsage | null duration_ms?: number | null model?: string | null @@ -738,6 +791,7 @@ export function groupAdaptedMessages( role: effectiveRole, parts: [...msg.content], userResources: msg.userResources, + userImages: msg.userImages, }) } else { if (currentGroup && currentGroup.role === "assistant") { diff --git a/src/lib/prompt-draft.ts b/src/lib/prompt-draft.ts index 32cd61c..7ad9cbe 100644 --- a/src/lib/prompt-draft.ts +++ b/src/lib/prompt-draft.ts @@ -1,5 +1,6 @@ import type { AdaptedContentPart, + UserImageDisplay, UserResourceDisplay, } from "@/lib/adapters/ai-elements-adapter" import type { PromptDraft, PromptInputBlock } from "@/lib/types" @@ -10,6 +11,35 @@ function isResourceLinkBlock( return block.type === "resource_link" } +function isEmbeddedResourceBlock( + block: PromptInputBlock +): block is Extract { + return block.type === "resource" +} + +function isImageBlock( + block: PromptInputBlock +): block is Extract { + return block.type === "image" +} + +function deriveResourceNameFromUri(uri: string): string { + const fallback = "resource" + const normalized = uri.trim() + if (!normalized) return fallback + const withoutQuery = normalized.split(/[?#]/, 1)[0] + const candidate = withoutQuery.split(/[\\/]/).pop() ?? "" + let decoded = "" + if (candidate) { + try { + decoded = decodeURIComponent(candidate) + } catch { + decoded = candidate + } + } + return decoded || fallback +} + export function getPromptDraftDisplayText( draft: PromptDraft, attachedResourcesFallback: string @@ -33,9 +63,40 @@ export function buildUserMessageTextPartsFromDraft( export function extractUserResourcesFromDraft( draft: PromptDraft ): UserResourceDisplay[] { - return draft.blocks.filter(isResourceLinkBlock).map((resource) => ({ + const linked = draft.blocks.filter(isResourceLinkBlock).map((resource) => ({ name: resource.name, uri: resource.uri, mime_type: resource.mime_type ?? null, })) + const embedded = draft.blocks + .filter(isEmbeddedResourceBlock) + .map((resource) => ({ + name: deriveResourceNameFromUri(resource.uri), + uri: resource.uri, + mime_type: resource.mime_type ?? null, + })) + return [...linked, ...embedded] +} + +function deriveImageName( + uri: string | null | undefined, + mimeType: string +): string { + if (uri && uri.trim().length > 0) { + const name = deriveResourceNameFromUri(uri) + if (name !== "resource") return name + } + const ext = mimeType.split("/")[1]?.split("+")[0] ?? "image" + return `image.${ext}` +} + +export function extractUserImagesFromDraft( + draft: PromptDraft +): UserImageDisplay[] { + return draft.blocks.filter(isImageBlock).map((image) => ({ + name: deriveImageName(image.uri, image.mime_type), + data: image.data, + mime_type: image.mime_type, + uri: image.uri ?? null, + })) } diff --git a/src/lib/tauri.ts b/src/lib/tauri.ts index afdc345..65da073 100644 --- a/src/lib/tauri.ts +++ b/src/lib/tauri.ts @@ -743,6 +743,13 @@ export async function stopFileTreeWatch(rootPath: string): Promise { return invoke("stop_file_tree_watch", { rootPath }) } +export async function readFileBase64( + path: string, + maxBytes?: number +): Promise { + return invoke("read_file_base64", { path, maxBytes: maxBytes ?? null }) +} + export async function readFilePreview( rootPath: string, path: string, diff --git a/src/lib/types.ts b/src/lib/types.ts index 193d81f..bfea806 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -60,6 +60,12 @@ export type MessageRole = "user" | "assistant" | "system" | "tool" export type ContentBlock = | { type: "text"; text: string } + | { + type: "image" + data: string + mime_type: string + uri?: string | null + } | { type: "tool_use" tool_use_id: string | null @@ -304,8 +310,27 @@ export type ConnectionStatus = | "disconnected" | "error" +export interface PromptCapabilitiesInfo { + image: boolean + audio: boolean + embedded_context: boolean +} + export type PromptInputBlock = | { type: "text"; text: string } + | { + type: "image" + data: string + mime_type: string + uri?: string | null + } + | { + type: "resource" + uri: string + mime_type?: string | null + text?: string | null + blob?: string | null + } | { type: "resource_link" uri: string @@ -430,6 +455,11 @@ export type AcpEvent = type: "selectors_ready" connection_id: string } + | { + type: "prompt_capabilities" + connection_id: string + prompt_capabilities: PromptCapabilitiesInfo + } | { type: "mode_changed" connection_id: string