pub mod claude; pub mod cline; pub mod codex; pub mod gemini; pub mod openclaw; pub mod opencode; use std::collections::{HashMap, HashSet}; use std::sync::OnceLock; use regex::Regex; use crate::models::{ ContentBlock, ConversationDetail, ConversationSummary, MessageTurn, SessionStats, TurnUsage, }; #[derive(Debug, thiserror::Error)] pub enum ParseError { #[error("IO error: {0}")] Io(#[from] std::io::Error), #[error("JSON parse error: {0}")] Json(#[from] serde_json::Error), #[error("Database error: {0}")] Db(#[from] sea_orm::DbErr), #[error("Conversation not found: {0}")] ConversationNotFound(String), #[error("Invalid data: {0}")] InvalidData(String), } pub trait AgentParser { fn list_conversations(&self) -> Result, ParseError>; fn get_conversation(&self, conversation_id: &str) -> Result; } /// Truncate a string to `max_len` characters, appending "..." if truncated. pub fn truncate_str(s: &str, max_len: usize) -> String { if s.chars().count() <= max_len { s.to_string() } else { let truncated: String = s.chars().take(max_len).collect(); format!("{}...", truncated) } } /// Aggregate turn-level usage and duration into a single `SessionStats`. pub fn compute_session_stats(turns: &[MessageTurn]) -> Option { let mut total_in = 0u64; let mut total_out = 0u64; let mut total_cache_create = 0u64; let mut total_cache_read = 0u64; let mut total_duration = 0u64; let mut has_data = false; for turn in turns { if let Some(ref u) = turn.usage { total_in += u.input_tokens; total_out += u.output_tokens; total_cache_create += u.cache_creation_input_tokens; total_cache_read += u.cache_read_input_tokens; has_data = true; } if let Some(d) = turn.duration_ms { total_duration += d; } } if !has_data { return None; } Some(SessionStats { total_usage: Some(TurnUsage { input_tokens: total_in, output_tokens: total_out, cache_creation_input_tokens: total_cache_create, cache_read_input_tokens: total_cache_read, }), total_tokens: Some(total_in + total_out + total_cache_create + total_cache_read), total_duration_ms: total_duration, context_window_used_tokens: None, context_window_max_tokens: None, context_window_usage_percent: None, }) } fn model_capacity_suffix_regex() -> &'static Regex { static RE: OnceLock = OnceLock::new(); RE.get_or_init(|| { Regex::new(r"(?i)\[\s*([0-9]+(?:\.[0-9]+)?)\s*([km])\s*\]\s*$") .expect("valid model capacity regex") }) } fn parse_model_capacity_suffix(model: &str) -> Option { let captures = model_capacity_suffix_regex().captures(model.trim())?; let value = captures.get(1)?.as_str().parse::().ok()?; if !value.is_finite() || value <= 0.0 { return None; } let unit = captures .get(2) .map(|m| m.as_str().to_ascii_lowercase()) .unwrap_or_default(); let multiplier = match unit.as_str() { "m" => 1_000_000.0, "k" => 1_000.0, _ => return None, }; Some((value * multiplier) as u64) } pub fn infer_context_window_max_tokens(model: Option<&str>) -> Option { let raw = model?.trim(); if raw.is_empty() { return None; } if let Some(suffixed_limit) = parse_model_capacity_suffix(raw) { return Some(suffixed_limit); } let normalized = raw .rsplit('/') .next() .unwrap_or(raw) .split(':') .next() .unwrap_or(raw) .trim() .to_ascii_lowercase(); if normalized.starts_with("claude") { return Some(200_000); } if normalized.starts_with("gemini") { return Some(1_000_000); } match normalized.as_str() { "gpt-5.2-codex" | "gpt-5.1-codex-max" | "gpt-5.1-codex-mini" | "gpt-5.2" => Some(258_000), "gpt-5.1" | "gpt-5.1-codex" | "gpt-4o" | "gpt-4o-mini" | "gpt-4-turbo" | "o1-mini" | "o1-preview" => Some(128_000), "gpt-4" => Some(8_192), "o3" | "o3-mini" | "o1" => Some(200_000), _ => { if normalized.starts_with("gpt-5") { Some(258_000) } else if normalized.starts_with("gpt-4o") || normalized.starts_with("gpt-4.1") || normalized.starts_with("gpt-4-turbo") { Some(128_000) } else if normalized.starts_with("o3") || normalized == "o1" { Some(200_000) } else if normalized.starts_with("o1-mini") || normalized.starts_with("o1-preview") { Some(128_000) } else { None } } } } pub fn latest_turn_total_usage_tokens(turns: &[MessageTurn]) -> Option { turns.iter().rev().find_map(|turn| { turn.usage.as_ref().map(|usage| { usage .input_tokens .saturating_add(usage.output_tokens) .saturating_add(usage.cache_creation_input_tokens) .saturating_add(usage.cache_read_input_tokens) }) }) } pub fn merge_context_window_stats( stats: Option, used_tokens: Option, max_tokens: Option, ) -> Option { if used_tokens.is_none() && max_tokens.is_none() { return stats; } let usage_percent = match (used_tokens, max_tokens) { (Some(used), Some(max)) if max > 0 => Some((used as f64 / max as f64) * 100.0), _ => None, }; match stats { Some(mut s) => { s.context_window_used_tokens = used_tokens; s.context_window_max_tokens = max_tokens; s.context_window_usage_percent = usage_percent; Some(s) } None => Some(SessionStats { total_usage: None, total_tokens: None, total_duration_ms: 0, context_window_used_tokens: used_tokens, context_window_max_tokens: max_tokens, context_window_usage_percent: usage_percent, }), } } /// Relocate orphaned tool_result blocks to the turn that contains their matching tool_use. /// /// After `group_into_turns` splits assistant rounds, async tool execution can cause /// a tool_result to land in a later turn than its corresponding tool_use. /// This post-processing step moves such orphaned results back. pub fn relocate_orphaned_tool_results(turns: &mut Vec) { // Build map: tool_use_id → turn index let mut tool_use_turn: HashMap = HashMap::new(); for (idx, turn) in turns.iter().enumerate() { for block in &turn.blocks { if let ContentBlock::ToolUse { tool_use_id: Some(ref id), .. } = block { tool_use_turn.insert(id.clone(), idx); } } } if tool_use_turn.is_empty() { return; } // Collect (source_turn, target_turn, block) for orphaned results let mut relocations: Vec<(usize, usize, ContentBlock)> = Vec::new(); for (turn_idx, turn) in turns.iter().enumerate() { for block in &turn.blocks { if let ContentBlock::ToolResult { tool_use_id: Some(ref id), .. } = block { if let Some(&target) = tool_use_turn.get(id) { if target != turn_idx { relocations.push((turn_idx, target, block.clone())); } } } } } if relocations.is_empty() { return; } // Build set of (turn_idx, tool_use_id) to remove let remove_set: HashMap> = { let mut map: HashMap> = HashMap::new(); for (from, _, block) in &relocations { if let ContentBlock::ToolResult { tool_use_id: Some(ref id), .. } = block { map.entry(*from).or_default().push(id.clone()); } } map }; // Remove from source turns for (&turn_idx, ids) in &remove_set { turns[turn_idx].blocks.retain(|block| { if let ContentBlock::ToolResult { tool_use_id: Some(ref id), .. } = block { !ids.contains(id) } else { true } }); } // Append to target turns for (_, target, block) in relocations { turns[target].blocks.push(block); } // Remove turns that became empty after relocation turns.retain(|turn| !turn.blocks.is_empty()); } /// Convert Read tool output from numbered-line format to `{"start_line":N,"content":"..."}`. /// /// Claude Code embeds line numbers in Read output like ` 115→content`. /// This splits on the `→` delimiter (or tab for older `cat -n` format), /// extracts the starting line number, and returns clean content. pub fn structurize_read_tool_output(turns: &mut [MessageTurn]) { let mut read_tool_ids: HashSet = HashSet::new(); for turn in turns.iter() { for block in &turn.blocks { if let ContentBlock::ToolUse { tool_use_id: Some(ref id), ref tool_name, .. } = block { let name = tool_name.to_lowercase(); if matches!( name.as_str(), "read" | "read_file" | "readfile" | "read file" | "cat" | "view" ) { read_tool_ids.insert(id.clone()); } } } } for turn in turns.iter_mut() { for block in turn.blocks.iter_mut() { let is_read_result = matches!( block, ContentBlock::ToolResult { tool_use_id: Some(ref id), .. } if read_tool_ids.contains(id) ); if !is_read_result { continue; } if let ContentBlock::ToolResult { ref mut output_preview, .. } = block { if let Some(ref text) = output_preview { if let Some(json) = strip_numbered_lines(text) { *output_preview = Some(json); } } } } } } /// Known delimiters between line number and content. const LINE_NUM_DELIMITERS: &[&str] = &["→", "\t"]; /// Try to split a line at a known delimiter, returning (line_number, content). fn split_line_number(line: &str) -> Option<(u64, &str)> { for delim in LINE_NUM_DELIMITERS { if let Some(pos) = line.find(delim) { let prefix = line[..pos].trim(); if let Ok(num) = prefix.parse::() { let content_start = pos + delim.len(); return Some((num, &line[content_start..])); } } } None } /// If most lines have a recognized line-number prefix, strip them all /// and return `{"start_line":N,"content":"clean text"}`. pub fn strip_numbered_lines(text: &str) -> Option { let raw_lines: Vec<&str> = text.lines().collect(); if raw_lines.len() < 2 { return None; } let matched = raw_lines .iter() .filter(|l| l.is_empty() || split_line_number(l).is_some()) .count(); if matched < raw_lines.len() * 4 / 5 { return None; } let mut start_line: u64 = 1; let mut first = true; let stripped: Vec<&str> = raw_lines .iter() .map(|line| { if let Some((num, content)) = split_line_number(line) { if first { start_line = num; first = false; } content } else { first = false; *line } }) .collect(); Some( serde_json::json!({ "start_line": start_line, "content": stripped.join("\n") }) .to_string(), ) } /// Resolve line numbers for `*** Update File` / `*** Add File` style patches. /// /// When a hunk header is just `@@` without `-N,M +N,M`, this reads the actual /// file from disk and matches the context lines to calculate real line numbers. /// Falls back gracefully if the file doesn't exist or context doesn't match. pub fn resolve_patch_line_numbers(turns: &mut [MessageTurn], cwd: Option<&str>) { for turn in turns.iter_mut() { for block in turn.blocks.iter_mut() { if let ContentBlock::ToolUse { ref tool_name, ref mut input_preview, .. } = block { let name = tool_name.to_lowercase(); if !matches!( name.as_str(), "apply_patch" | "edit" | "patch" | "applypatch" ) { continue; } if let Some(ref text) = input_preview { if text.contains("@@\n") || text.contains("@@\r\n") { if let Some(resolved) = resolve_patch_text(text, cwd) { *input_preview = Some(resolved); } } } } } } } /// Resolve a single patch text, replacing bare `@@` with `@@ -N,M +N,M @@`. pub fn resolve_patch_text(patch: &str, cwd: Option<&str>) -> Option { let mut output = String::with_capacity(patch.len() + 256); let mut current_file_path: Option = None; let mut file_lines: Option> = None; let mut any_resolved = false; let lines: Vec<&str> = patch.lines().collect(); let mut i = 0; while i < lines.len() { let line = lines[i]; // Detect file markers if line.starts_with("*** Update File: ") || line.starts_with("*** Add File: ") { let marker_end = if line.starts_with("*** Update File: ") { 17 } else { 14 }; let path = line[marker_end..].trim(); current_file_path = Some(path.to_string()); file_lines = load_file_lines(path, cwd); output.push_str(line); output.push('\n'); i += 1; continue; } // Detect bare @@ hunk header (no line numbers) if line == "@@" { if let (Some(ref fl), true) = (&file_lines, current_file_path.is_some()) { // Collect context lines from this hunk to find match position let hunk_lines = collect_hunk_lines(&lines, i + 1); if let Some((old_start, old_count, new_count)) = find_hunk_position(fl, &hunk_lines) { let new_start = old_start; // same start for context-based patches output.push_str(&format!( "@@ -{},{} +{},{} @@\n", old_start, old_count, new_start, new_count )); any_resolved = true; i += 1; continue; } } // Fallback: keep bare @@ output.push_str(line); output.push('\n'); i += 1; continue; } output.push_str(line); output.push('\n'); i += 1; } if any_resolved { Some(output) } else { None } } /// Load file lines from disk, trying both absolute path and cwd-relative. pub fn load_file_lines(path: &str, cwd: Option<&str>) -> Option> { use std::fs; use std::path::Path; let p = Path::new(path); if p.is_absolute() { if let Ok(content) = fs::read_to_string(p) { return Some(content.lines().map(|l| l.to_string()).collect()); } } if let Some(base) = cwd { let full = Path::new(base).join(path); if let Ok(content) = fs::read_to_string(&full) { return Some(content.lines().map(|l| l.to_string()).collect()); } } None } /// Collect lines belonging to a hunk (until next `@@` or `*** ` marker or end). fn collect_hunk_lines<'a>(lines: &'a [&'a str], start: usize) -> Vec<&'a str> { let mut result = Vec::new(); for &line in &lines[start..] { if line == "@@" || line.starts_with("*** ") { break; } result.push(line); } result } /// Find where a hunk's context lines match in the file, returning (start_line, old_count, new_count). /// `start_line` is 1-based. /// /// The file on disk may be in either pre-patch or post-patch state, and may /// have been further modified. We try three strategies in order: /// 1. Contiguous match of context+added lines (post-patch file, no further edits) /// 2. Contiguous match of context+deleted lines (pre-patch file) /// 3. Subsequence match of context-only lines (file has been further modified) fn find_hunk_position( file_lines: &[String], hunk_lines: &[&str], ) -> Option<(usize, usize, usize)> { let mut old_count = 0usize; let mut new_count = 0usize; for hl in hunk_lines { if hl.starts_with(' ') { old_count += 1; new_count += 1; } else if hl.starts_with('-') { old_count += 1; } else if hl.starts_with('+') { new_count += 1; } } // Strategy 1: contiguous match of context+added (post-patch) let new_view: Vec<&str> = hunk_lines .iter() .filter(|l| l.starts_with(' ') || l.starts_with('+')) .map(|l| &l[1..]) .collect(); if let Some(pos) = find_contiguous(file_lines, &new_view) { return Some((pos + 1, old_count, new_count)); } // Strategy 2: contiguous match of context+deleted (pre-patch) let old_view: Vec<&str> = hunk_lines .iter() .filter(|l| l.starts_with(' ') || l.starts_with('-')) .map(|l| &l[1..]) .collect(); if let Some(pos) = find_contiguous(file_lines, &old_view) { return Some((pos + 1, old_count, new_count)); } // Strategy 3: subsequence match of context-only lines (file further modified) let ctx_only: Vec<&str> = hunk_lines .iter() .filter(|l| l.starts_with(' ')) .map(|l| &l[1..]) .collect(); if let Some(pos) = find_subsequence(file_lines, &ctx_only) { return Some((pos + 1, old_count, new_count)); } None } /// Find contiguous `view` lines in `file_lines`. Returns 0-based start index. fn find_contiguous(file_lines: &[String], view: &[&str]) -> Option { if view.is_empty() || view.len() > file_lines.len() { return None; } let first = view[0]; for i in 0..=(file_lines.len() - view.len()) { if file_lines[i].as_str() != first { continue; } if view.iter().enumerate().all(|(j, v)| file_lines[i + j].as_str() == *v) { return Some(i); } } None } /// Find `needles` as an ordered subsequence in `file_lines` within a small window. /// Returns 0-based index of the first needle's position. fn find_subsequence(file_lines: &[String], needles: &[&str]) -> Option { if needles.is_empty() { return None; } let first = needles[0]; for start in 0..file_lines.len() { if file_lines[start].as_str() != first { continue; } let mut cursor = start + 1; let mut all_found = true; for &needle in &needles[1..] { // Allow up to 10 lines gap between consecutive context lines let limit = std::cmp::min(cursor + 10, file_lines.len()); match file_lines[cursor..limit] .iter() .position(|fl| fl.as_str() == needle) { Some(offset) => cursor = cursor + offset + 1, None => { all_found = false; break; } } } if all_found { return Some(start); } } None } /// Extract the last path component as the folder name. pub fn folder_name_from_path(path: &str) -> String { path.rsplit(['/', '\\']).next().unwrap_or(path).to_string() } /// Normalize a filesystem path string for tolerant cross-platform comparison. /// This intentionally does not hit the filesystem (no canonicalize), and only /// normalizes separators/casing differences that commonly break exact matching. pub fn normalize_path_for_matching(path: &str) -> String { let mut normalized = path.trim().replace('\\', "/"); #[cfg(target_os = "windows")] { if let Some(stripped) = normalized.strip_prefix("//?/") { normalized = stripped.to_string(); } normalized = normalized.to_ascii_lowercase(); } while normalized.ends_with('/') { if normalized == "/" { break; } // Keep Windows drive root such as "c:/" intact. if normalized.len() == 3 && normalized.as_bytes().get(1) == Some(&b':') && normalized.as_bytes().get(2) == Some(&b'/') { break; } normalized.pop(); } normalized } pub fn path_eq_for_matching(left: &str, right: &str) -> bool { normalize_path_for_matching(left) == normalize_path_for_matching(right) } #[cfg(test)] mod tests { use chrono::Utc; use super::{ infer_context_window_max_tokens, latest_turn_total_usage_tokens, merge_context_window_stats, path_eq_for_matching, }; use crate::models::{MessageTurn, SessionStats, TurnRole, TurnUsage}; #[test] fn infers_model_context_limits() { assert_eq!( infer_context_window_max_tokens(Some("claude-sonnet-4-6")), Some(200_000) ); assert_eq!( infer_context_window_max_tokens(Some("gemini-2.5-pro")), Some(1_000_000) ); assert_eq!( infer_context_window_max_tokens(Some("claude-sonnet-4-6 [1.5M]")), Some(1_500_000) ); assert_eq!(infer_context_window_max_tokens(Some("unknown-model")), None); } #[test] fn picks_latest_turn_usage_total_tokens() { let timestamp = Utc::now(); let turns = vec![ MessageTurn { id: "turn-0".to_string(), role: TurnRole::Assistant, blocks: vec![], timestamp, usage: Some(TurnUsage { input_tokens: 10, output_tokens: 20, cache_creation_input_tokens: 30, cache_read_input_tokens: 40, }), duration_ms: None, model: None, }, MessageTurn { id: "turn-1".to_string(), role: TurnRole::Assistant, blocks: vec![], timestamp, usage: Some(TurnUsage { input_tokens: 11, output_tokens: 21, cache_creation_input_tokens: 31, cache_read_input_tokens: 41, }), duration_ms: None, model: None, }, ]; assert_eq!(latest_turn_total_usage_tokens(&turns), Some(104)); } #[test] fn merges_context_window_stats() { let merged = merge_context_window_stats(None, Some(1500), Some(3000)) .expect("context stats should exist"); assert_eq!(merged.context_window_used_tokens, Some(1500)); assert_eq!(merged.context_window_max_tokens, Some(3000)); assert!(merged.total_usage.is_none()); let percent = merged .context_window_usage_percent .expect("usage percent should exist"); assert!((percent - 50.0).abs() < f64::EPSILON); let existing = Some(SessionStats { total_usage: Some(TurnUsage { input_tokens: 1, output_tokens: 2, cache_creation_input_tokens: 3, cache_read_input_tokens: 4, }), total_tokens: Some(10), total_duration_ms: 100, context_window_used_tokens: None, context_window_max_tokens: None, context_window_usage_percent: None, }); let merged_existing = merge_context_window_stats(existing, Some(200), Some(1000)).expect("merged"); assert_eq!(merged_existing.total_tokens, Some(10)); assert_eq!(merged_existing.context_window_used_tokens, Some(200)); assert_eq!(merged_existing.context_window_max_tokens, Some(1000)); } #[test] fn path_matching_handles_separator_differences() { assert!(path_eq_for_matching( "/Users/demo/workspace/codeg", "/Users/demo/workspace/codeg/" )); assert!(path_eq_for_matching( "C:\\Users\\demo\\workspace\\codeg", "C:/Users/demo/workspace/codeg" )); } }