Files
codeg/src-tauri/src/parsers/claude.rs
2026-03-28 14:04:19 +08:00

1359 lines
46 KiB
Rust

use std::fs;
use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::sync::OnceLock;
use chrono::{DateTime, Utc};
use regex::Regex;
use crate::models::*;
use crate::parsers::{folder_name_from_path, truncate_str, AgentParser, ParseError};
/// Regex that matches Claude Code system-injected XML tags and their content.
/// These tags are internal metadata and should not be displayed to users.
/// Note: Rust regex doesn't support backreferences, so each tag is listed explicitly.
fn system_tag_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(concat!(
r"(?s)",
r"<system-reminder>.*?</system-reminder>",
r"|<local-command-caveat>.*?</local-command-caveat>",
r"|<command-name>.*?</command-name>",
r"|<command-message>.*?</command-message>",
r"|<command-args>.*?</command-args>",
r"|<local-command-stdout>.*?</local-command-stdout>",
r"|<user-prompt-submit-hook>.*?</user-prompt-submit-hook>",
r"|<task-notification>.*?</task-notification>",
r"|<fast_mode_info>.*?</fast_mode_info>",
))
.unwrap()
})
}
/// Regex that matches an optional model capacity suffix like `[1M]` / `[500k]`.
fn model_capacity_suffix_regex() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(r"(?i)\[\s*([0-9]+(?:\.[0-9]+)?)\s*([km])\s*\]\s*$")
.expect("valid model capacity regex")
})
}
/// Strip system-injected XML tags from text content.
/// Returns None if the text becomes empty after stripping.
fn strip_system_tags(text: &str) -> Option<String> {
let cleaned = system_tag_regex().replace_all(text, "");
let trimmed = cleaned.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
/// Check if a JSONL entry is a system meta message (isMeta: true).
fn is_meta_message(value: &serde_json::Value) -> bool {
value
.get("isMeta")
.and_then(|v| v.as_bool())
.unwrap_or(false)
}
/// Check if an assistant message is a synthetic placeholder (e.g. generated by
/// Claude Code for local commands like `/context` or `/model`).
/// These carry `model: "<synthetic>"` and all-zero usage, so they should be
/// excluded from conversation turns and stats.
const CONTEXT_CONTINUATION_PREFIX: &str =
"This session is being continued from a previous conversation";
/// Detect Claude Code context continuation summary messages.
/// These are injected as "user" type but are actually system context.
fn is_context_continuation(content: &[ContentBlock]) -> bool {
content.iter().any(|block| {
if let ContentBlock::Text { text } = block {
text.starts_with(CONTEXT_CONTINUATION_PREFIX)
} else {
false
}
})
}
fn is_synthetic_assistant(value: &serde_json::Value) -> bool {
value
.get("message")
.and_then(|m| m.get("model"))
.and_then(|m| m.as_str())
.map(|s| s == "<synthetic>")
.unwrap_or(false)
}
fn parse_model_capacity_suffix(model: &str) -> Option<u64> {
let captures = model_capacity_suffix_regex().captures(model.trim())?;
let value = captures.get(1)?.as_str().parse::<f64>().ok()?;
if !value.is_finite() || value <= 0.0 {
return None;
}
let unit = captures
.get(2)
.map(|m| m.as_str().to_ascii_lowercase())
.unwrap_or_default();
let multiplier = match unit.as_str() {
"m" => 1_000_000.0,
"k" => 1_000.0,
_ => return None,
};
Some((value * multiplier) as u64)
}
fn claude_context_window_max_tokens_for_model(model: Option<&str>) -> Option<u64> {
let model = model?.trim();
if model.is_empty() {
return None;
}
// If user/model config contains an explicit capacity suffix, prefer it.
if let Some(suffixed_limit) = parse_model_capacity_suffix(model) {
return Some(suffixed_limit);
}
// Claude models default to 1M when no explicit capacity is provided.
if model.to_ascii_lowercase().starts_with("claude") {
return Some(1_000_000);
}
None
}
fn claude_context_window_used_tokens_from_usage(usage: &TurnUsage) -> Option<u64> {
let used_tokens = usage
.input_tokens
.saturating_add(usage.cache_creation_input_tokens)
.saturating_add(usage.cache_read_input_tokens);
if used_tokens > 0 {
Some(used_tokens)
} else {
None
}
}
fn latest_claude_context_window_used_tokens(turns: &[MessageTurn]) -> Option<u64> {
turns.iter().rev().find_map(|turn| {
turn.usage
.as_ref()
.and_then(claude_context_window_used_tokens_from_usage)
})
}
fn merge_claude_context_window_stats(
stats: Option<SessionStats>,
used_tokens: Option<u64>,
max_tokens: Option<u64>,
) -> Option<SessionStats> {
if used_tokens.is_none() && max_tokens.is_none() {
return stats;
}
let usage_percent = match (used_tokens, max_tokens) {
(Some(used), Some(max)) if max > 0 => Some((used as f64 / max as f64) * 100.0),
_ => None,
};
match stats {
Some(mut s) => {
s.context_window_used_tokens = used_tokens;
s.context_window_max_tokens = max_tokens;
s.context_window_usage_percent = usage_percent;
Some(s)
}
None => Some(SessionStats {
total_usage: None,
total_tokens: None,
total_duration_ms: 0,
context_window_used_tokens: used_tokens,
context_window_max_tokens: max_tokens,
context_window_usage_percent: usage_percent,
}),
}
}
pub struct ClaudeParser {
base_dir: PathBuf,
}
impl ClaudeParser {
pub fn new() -> Self {
let base_dir = resolve_claude_config_dir().join("projects");
Self { base_dir }
}
fn decode_folder_path(encoded: &str) -> String {
encoded.replace('-', "/")
}
fn parse_jsonl_summary(
&self,
path: &PathBuf,
) -> Result<Option<ConversationSummary>, ParseError> {
let file = fs::File::open(path)?;
let reader = BufReader::new(file);
let mut conversation_id: Option<String> = None;
let mut cwd: Option<String> = None;
let mut git_branch: Option<String> = None;
let mut model: Option<String> = None;
let mut title: Option<String> = None;
let mut first_timestamp: Option<DateTime<Utc>> = None;
let mut last_timestamp: Option<DateTime<Utc>> = None;
let mut message_count: u32 = 0;
for line in reader.lines() {
let line = match line {
Ok(l) => l,
Err(_) => continue,
};
if line.trim().is_empty() {
continue;
}
let value: serde_json::Value = match serde_json::from_str(&line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = value.get("type").and_then(|t| t.as_str()).unwrap_or("");
// Skip non-conversation entries
if msg_type == "file-history-snapshot" || msg_type == "progress" {
continue;
}
// Skip system meta messages (e.g. local-command-caveat injections)
if is_meta_message(&value) {
continue;
}
if conversation_id.is_none() {
conversation_id = value
.get("sessionId")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
}
if cwd.is_none() {
cwd = value
.get("cwd")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
}
if git_branch.is_none() {
git_branch = value
.get("gitBranch")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
}
if let Some(ts_str) = value.get("timestamp").and_then(|t| t.as_str()) {
if let Ok(ts) = ts_str.parse::<DateTime<Utc>>() {
if first_timestamp.is_none() {
first_timestamp = Some(ts);
}
last_timestamp = Some(ts);
}
}
if msg_type == "user" || msg_type == "assistant" {
// Skip synthetic assistant placeholders for local commands
if msg_type == "assistant" && is_synthetic_assistant(&value) {
continue;
}
message_count += 1;
// Extract model from assistant messages
if msg_type == "assistant" && model.is_none() {
model = value
.get("message")
.and_then(|m| m.get("model"))
.and_then(|m| m.as_str())
.map(|s| s.to_string());
}
// Extract title from first user message
if msg_type == "user" && title.is_none() {
title = extract_user_text(&value).map(|t| truncate_str(&t, 100));
}
}
}
let started_at = match first_timestamp {
Some(ts) => ts,
None => return Ok(None),
};
// Use filename (without .jsonl) as ID fallback
let id = conversation_id.unwrap_or_else(|| {
path.file_stem()
.unwrap_or_default()
.to_string_lossy()
.to_string()
});
let folder_path = cwd.clone();
let folder_name = folder_path.as_ref().map(|p| folder_name_from_path(p));
Ok(Some(ConversationSummary {
id,
agent_type: AgentType::ClaudeCode,
folder_path,
folder_name,
title,
started_at,
ended_at: last_timestamp,
message_count,
model,
git_branch,
}))
}
}
fn resolve_claude_config_dir() -> PathBuf {
resolve_claude_config_dir_from(std::env::var_os("CLAUDE_CONFIG_DIR"), dirs::home_dir())
}
fn resolve_claude_config_dir_from(
claude_config_dir_env: Option<std::ffi::OsString>,
home_dir: Option<PathBuf>,
) -> PathBuf {
claude_config_dir_env
.filter(|value| !value.is_empty())
.map(PathBuf::from)
.unwrap_or_else(|| home_dir.unwrap_or_default().join(".claude"))
}
impl AgentParser for ClaudeParser {
fn list_conversations(&self) -> Result<Vec<ConversationSummary>, ParseError> {
let mut conversations = Vec::new();
if !self.base_dir.exists() {
return Ok(conversations);
}
let entries = fs::read_dir(&self.base_dir)?;
for entry in entries {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
let project_dir = entry.path();
if !project_dir.is_dir() {
continue;
}
let jsonl_files = fs::read_dir(&project_dir)?;
for file_entry in jsonl_files {
let file_entry = match file_entry {
Ok(e) => e,
Err(_) => continue,
};
let file_path = file_entry.path();
if file_path.extension().and_then(|e| e.to_str()) != Some("jsonl") {
continue;
}
match self.parse_jsonl_summary(&file_path) {
Ok(Some(mut summary)) => {
// If folder_path is still None, derive from directory name
if summary.folder_path.is_none() {
let dir_name = project_dir
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let decoded = Self::decode_folder_path(&dir_name);
summary.folder_path = Some(decoded.clone());
summary.folder_name = Some(folder_name_from_path(&decoded));
}
conversations.push(summary);
}
Ok(None) => continue,
Err(_) => continue,
}
}
}
conversations.sort_by(|a, b| b.started_at.cmp(&a.started_at));
Ok(conversations)
}
fn get_conversation(&self, conversation_id: &str) -> Result<ConversationDetail, ParseError> {
// Find the conversation file by searching all directories
if !self.base_dir.exists() {
return Err(ParseError::ConversationNotFound(
conversation_id.to_string(),
));
}
for entry in fs::read_dir(&self.base_dir)? {
let entry = match entry {
Ok(e) => e,
Err(_) => continue,
};
let project_dir = entry.path();
if !project_dir.is_dir() {
continue;
}
let file_path = project_dir.join(format!("{}.jsonl", conversation_id));
if file_path.exists() {
return self.parse_conversation_detail(&file_path, conversation_id);
}
}
Err(ParseError::ConversationNotFound(
conversation_id.to_string(),
))
}
}
impl ClaudeParser {
fn parse_conversation_detail(
&self,
path: &PathBuf,
conversation_id: &str,
) -> Result<ConversationDetail, ParseError> {
let file = fs::File::open(path)?;
let reader = BufReader::new(file);
let mut messages = Vec::new();
let mut cwd: Option<String> = None;
let mut git_branch: Option<String> = None;
let mut model: Option<String> = None;
let mut title: Option<String> = None;
let mut first_timestamp: Option<DateTime<Utc>> = None;
let mut last_timestamp: Option<DateTime<Utc>> = None;
for line in reader.lines() {
let line = match line {
Ok(l) => l,
Err(_) => continue,
};
if line.trim().is_empty() {
continue;
}
let value: serde_json::Value = match serde_json::from_str(&line) {
Ok(v) => v,
Err(_) => continue,
};
let msg_type = value.get("type").and_then(|t| t.as_str()).unwrap_or("");
if msg_type == "file-history-snapshot" || msg_type == "progress" {
continue;
}
// Skip system meta messages
if is_meta_message(&value) {
continue;
}
if cwd.is_none() {
cwd = value
.get("cwd")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
}
if git_branch.is_none() {
git_branch = value
.get("gitBranch")
.and_then(|s| s.as_str())
.map(|s| s.to_string());
}
if let Some(ts_str) = value.get("timestamp").and_then(|t| t.as_str()) {
if let Ok(ts) = ts_str.parse::<DateTime<Utc>>() {
if first_timestamp.is_none() {
first_timestamp = Some(ts);
}
last_timestamp = Some(ts);
}
}
match msg_type {
"assistant" if is_synthetic_assistant(&value) => {
// Skip synthetic assistant placeholders for local commands
continue;
}
"user" => {
let content = extract_user_content(&value);
// Skip user messages that are empty after system tag stripping
if content.is_empty() {
continue;
}
let timestamp = parse_timestamp(&value).unwrap_or_else(Utc::now);
let uuid = value
.get("uuid")
.and_then(|u| u.as_str())
.unwrap_or("")
.to_string();
// Detect context continuation summary and treat as system message
let role = if is_context_continuation(&content) {
MessageRole::System
} else {
if title.is_none() {
if let Some(first_text) = content.iter().find_map(|c| match c {
ContentBlock::Text { text } => Some(text.clone()),
_ => None,
}) {
title = Some(truncate_str(&first_text, 100));
}
}
MessageRole::User
};
messages.push(UnifiedMessage {
id: uuid,
role,
content,
timestamp,
usage: None,
duration_ms: None,
model: None,
});
}
"assistant" => {
let timestamp = parse_timestamp(&value).unwrap_or_else(Utc::now);
let uuid = value
.get("uuid")
.and_then(|u| u.as_str())
.unwrap_or("")
.to_string();
let msg_model = value
.get("message")
.and_then(|m| m.get("model"))
.and_then(|m| m.as_str())
.map(|s| s.to_string());
if model.is_none() {
model = msg_model.clone();
}
let content = extract_assistant_content(&value);
let usage = extract_usage(&value);
messages.push(UnifiedMessage {
id: uuid,
role: MessageRole::Assistant,
content,
timestamp,
usage,
duration_ms: None,
model: msg_model,
});
}
"system" => {
let subtype = value.get("subtype").and_then(|s| s.as_str()).unwrap_or("");
if subtype == "turn_duration" {
if let Some(duration) = value.get("durationMs").and_then(|d| d.as_u64()) {
// Attach to the last assistant message
if let Some(last) = messages
.iter_mut()
.rev()
.find(|m| matches!(m.role, MessageRole::Assistant))
{
last.duration_ms = Some(duration);
}
}
}
}
"tool_use" => {
// Top-level tool_use record (Claude Code JSONL format)
let timestamp = parse_timestamp(&value).unwrap_or_else(Utc::now);
let tool_name = value
.get("tool_name")
.and_then(|n| n.as_str())
.unwrap_or("unknown")
.to_string();
let input_preview = value.get("tool_input").map(|i| i.to_string());
let synthetic_id = format!("tl-tool-{}", messages.len());
// Attach to last assistant message, or create a synthetic one
if let Some(last) = messages
.iter_mut()
.rev()
.find(|m| matches!(m.role, MessageRole::Assistant))
{
last.content.push(ContentBlock::ToolUse {
tool_use_id: Some(synthetic_id),
tool_name,
input_preview,
});
} else {
messages.push(UnifiedMessage {
id: format!("synth-assistant-{}", messages.len()),
role: MessageRole::Assistant,
content: vec![ContentBlock::ToolUse {
tool_use_id: Some(synthetic_id),
tool_name,
input_preview,
}],
timestamp,
usage: None,
duration_ms: None,
model: None,
});
}
}
"tool_result" => {
// Top-level tool_result record (Claude Code JSONL format)
let tool_output = value.get("tool_output");
let tool_name = value
.get("tool_name")
.and_then(|n| n.as_str())
.unwrap_or("");
let is_error = tool_output
.and_then(|o| o.get("exit"))
.and_then(|e| e.as_i64())
.is_some_and(|code| code != 0);
// Extract output text: prefer "preview" (read), then "output" (bash)
let output_text = tool_output
.and_then(|o| {
o.get("preview")
.or_else(|| o.get("output"))
.and_then(|v| v.as_str())
})
.map(|s| s.to_string());
// For read tools, structurize with start_line from tool_input.offset
let output_preview =
if tool_name == "read" || tool_name == "Read" {
let start_line = value
.get("tool_input")
.and_then(|i| i.get("offset"))
.and_then(|o| o.as_u64())
.map(|o| o + 1)
.unwrap_or(1);
output_text.map(|text| {
serde_json::json!({
"start_line": start_line,
"content": text
})
.to_string()
})
} else {
output_text
};
// Find matching ToolUse in the last assistant message and use its ID
let matching_id = messages
.iter()
.rev()
.find(|m| matches!(m.role, MessageRole::Assistant))
.and_then(|m| {
m.content.iter().rev().find_map(|b| {
if let ContentBlock::ToolUse {
tool_use_id: Some(ref id),
..
} = b
{
Some(id.clone())
} else {
None
}
})
});
// Append ToolResult to the same assistant message so they stay in the same turn
if let Some(last) = messages
.iter_mut()
.rev()
.find(|m| matches!(m.role, MessageRole::Assistant))
{
last.content.push(ContentBlock::ToolResult {
tool_use_id: matching_id,
output_preview,
is_error,
});
} else {
messages.push(UnifiedMessage {
id: format!("synth-result-{}", messages.len()),
role: MessageRole::Assistant,
content: vec![ContentBlock::ToolResult {
tool_use_id: matching_id,
output_preview,
is_error,
}],
timestamp: parse_timestamp(&value).unwrap_or_else(Utc::now),
usage: None,
duration_ms: None,
model: None,
});
}
}
_ => {}
}
}
let folder_path = cwd.clone();
let folder_name = folder_path.as_ref().map(|p| folder_name_from_path(p));
let mut turns = group_into_turns(messages);
super::relocate_orphaned_tool_results(&mut turns);
super::structurize_read_tool_output(&mut turns);
let context_window_used_tokens = latest_claude_context_window_used_tokens(&turns);
let context_window_max_tokens =
claude_context_window_max_tokens_for_model(model.as_deref());
let session_stats = merge_claude_context_window_stats(
super::compute_session_stats(&turns),
context_window_used_tokens,
context_window_max_tokens,
);
let summary = ConversationSummary {
id: conversation_id.to_string(),
agent_type: AgentType::ClaudeCode,
folder_path,
folder_name,
title,
started_at: first_timestamp.unwrap_or_else(Utc::now),
ended_at: last_timestamp,
message_count: turns.len() as u32,
model,
git_branch,
};
Ok(ConversationDetail {
summary,
turns,
session_stats,
})
}
}
fn parse_timestamp(value: &serde_json::Value) -> Option<DateTime<Utc>> {
value
.get("timestamp")
.and_then(|t| t.as_str())
.and_then(|s| s.parse::<DateTime<Utc>>().ok())
}
fn extract_user_text(value: &serde_json::Value) -> Option<String> {
let message = value.get("message")?;
let content = message.get("content")?;
if let Some(text) = content.as_str() {
return strip_system_tags(text);
}
if let Some(arr) = content.as_array() {
for item in arr {
if item.get("type").and_then(|t| t.as_str()) == Some("text") {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if let Some(cleaned) = strip_system_tags(text) {
return Some(cleaned);
}
}
}
}
}
None
}
fn extract_user_content(value: &serde_json::Value) -> Vec<ContentBlock> {
let mut blocks = Vec::new();
let message = match value.get("message") {
Some(m) => m,
None => return blocks,
};
let content = match message.get("content") {
Some(c) => c,
None => return blocks,
};
if let Some(text) = content.as_str() {
if let Some(cleaned) = strip_system_tags(text) {
blocks.push(ContentBlock::Text { text: cleaned });
}
return blocks;
}
if let Some(arr) = content.as_array() {
for item in arr {
let block_type = item.get("type").and_then(|t| t.as_str()).unwrap_or("");
match block_type {
"text" => {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
if let Some(cleaned) = strip_system_tags(text) {
blocks.push(ContentBlock::Text { text: cleaned });
}
}
}
"image" => {
if let Some(image_block) = extract_claude_user_image(item) {
blocks.push(image_block);
}
}
"tool_result" | "server_tool_result" => {
let tool_use_id = item
.get("tool_use_id")
.and_then(|n| n.as_str())
.map(|s| s.to_string());
let output = extract_tool_result_text(item);
let is_error = item
.get("is_error")
.and_then(|e| e.as_bool())
.unwrap_or(false);
blocks.push(ContentBlock::ToolResult {
tool_use_id,
output_preview: output,
is_error,
});
}
_ => {}
}
}
}
blocks
}
fn extract_claude_user_image(item: &serde_json::Value) -> Option<ContentBlock> {
let source = item.get("source");
let source_data = source
.and_then(|s| s.get("data"))
.and_then(|d| d.as_str())
.or_else(|| item.get("data").and_then(|d| d.as_str()))
.map(str::trim)
.filter(|v| !v.is_empty())?;
if let Some((mime_type, data)) = parse_data_uri_image(source_data) {
return Some(ContentBlock::Image {
data,
mime_type,
uri: None,
});
}
let mime_type = source
.and_then(|s| s.get("media_type"))
.and_then(|m| m.as_str())
.or_else(|| source.and_then(|s| s.get("mime_type")).and_then(|m| m.as_str()))
.or_else(|| item.get("media_type").and_then(|m| m.as_str()))
.or_else(|| item.get("mime_type").and_then(|m| m.as_str()))
.map(str::trim)
.filter(|m| !m.is_empty() && m.starts_with("image/"))?;
let uri = source
.and_then(|s| s.get("url"))
.and_then(|u| u.as_str())
.or_else(|| item.get("url").and_then(|u| u.as_str()))
.map(|u| u.to_string());
Some(ContentBlock::Image {
data: source_data.to_string(),
mime_type: mime_type.to_string(),
uri,
})
}
fn parse_data_uri_image(raw: &str) -> Option<(String, String)> {
let trimmed = raw.trim();
let without_prefix = trimmed.strip_prefix("data:")?;
let marker = ";base64,";
let marker_idx = without_prefix.find(marker)?;
let mime_type = without_prefix.get(..marker_idx)?.trim();
if !mime_type.starts_with("image/") {
return None;
}
let data = without_prefix.get(marker_idx + marker.len()..)?.trim();
if data.is_empty() {
return None;
}
Some((mime_type.to_string(), data.to_string()))
}
fn extract_assistant_content(value: &serde_json::Value) -> Vec<ContentBlock> {
let mut blocks = Vec::new();
let message = match value.get("message") {
Some(m) => m,
None => return blocks,
};
let content = match message.get("content") {
Some(c) => c,
None => return blocks,
};
if let Some(arr) = content.as_array() {
for item in arr {
let block_type = item.get("type").and_then(|t| t.as_str()).unwrap_or("");
match block_type {
"text" => {
if let Some(text) = item.get("text").and_then(|t| t.as_str()) {
blocks.push(ContentBlock::Text {
text: text.to_string(),
});
}
}
"thinking" => {
if let Some(text) = item.get("thinking").and_then(|t| t.as_str()) {
blocks.push(ContentBlock::Thinking {
text: text.to_string(),
});
}
}
"tool_use" | "server_tool_use" => {
let tool_use_id = item
.get("id")
.and_then(|n| n.as_str())
.map(|s| s.to_string());
let tool_name = item
.get("name")
.and_then(|n| n.as_str())
.unwrap_or("unknown")
.to_string();
let input_preview = item.get("input").map(|i| i.to_string());
blocks.push(ContentBlock::ToolUse {
tool_use_id,
tool_name,
input_preview,
});
}
_ => {}
}
}
}
blocks
}
fn extract_usage(value: &serde_json::Value) -> Option<TurnUsage> {
let usage = value.get("message")?.get("usage")?;
Some(TurnUsage {
input_tokens: usage
.get("input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
output_tokens: usage
.get("output_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_creation_input_tokens: usage
.get("cache_creation_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_read_input_tokens: usage
.get("cache_read_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
})
}
fn extract_tool_result_text(item: &serde_json::Value) -> Option<String> {
let content = item.get("content")?;
if let Some(text) = content.as_str() {
return Some(text.to_string());
}
if let Some(arr) = content.as_array() {
let texts: Vec<String> = arr
.iter()
.filter_map(|c| {
if c.get("type").and_then(|t| t.as_str()) == Some("text") {
c.get("text")
.and_then(|t| t.as_str())
.map(|s| s.to_string())
} else {
None
}
})
.collect();
if !texts.is_empty() {
return Some(texts.join("\n"));
}
}
None
}
/// Check if a user message contains ONLY tool_result blocks (no text).
/// In Claude Code, tool results come back as "user" messages.
fn is_tool_result_only(msg: &UnifiedMessage) -> bool {
matches!(msg.role, MessageRole::User)
&& !msg.content.is_empty()
&& msg
.content
.iter()
.all(|b| matches!(b, ContentBlock::ToolResult { .. }))
}
/// Group flat messages into conversation turns.
/// Claude Code rule: assistant msg + following tool-result-only user msgs
/// merge into one Assistant turn.
fn group_into_turns(messages: Vec<UnifiedMessage>) -> Vec<MessageTurn> {
let mut turns = Vec::new();
let mut i = 0;
while i < messages.len() {
let msg = &messages[i];
if matches!(msg.role, MessageRole::Assistant) {
let mut blocks: Vec<ContentBlock> = msg.content.clone();
let timestamp = msg.timestamp;
let id = format!("turn-{}", turns.len());
let usage = msg.usage.clone();
let duration_ms = msg.duration_ms;
let turn_model = msg.model.clone();
i += 1;
// Only absorb immediately following tool-result-only user msgs
// (stop at the next assistant message to keep turns small for virtualization)
while i < messages.len() && is_tool_result_only(&messages[i]) {
blocks.extend(messages[i].content.clone());
i += 1;
}
turns.push(MessageTurn {
id,
role: TurnRole::Assistant,
blocks,
timestamp,
usage,
duration_ms,
model: turn_model,
});
} else if matches!(msg.role, MessageRole::System) {
turns.push(MessageTurn {
id: format!("turn-{}", turns.len()),
role: TurnRole::System,
blocks: msg.content.clone(),
timestamp: msg.timestamp,
usage: None,
duration_ms: None,
model: None,
});
i += 1;
} else {
turns.push(MessageTurn {
id: format!("turn-{}", turns.len()),
role: TurnRole::User,
blocks: msg.content.clone(),
timestamp: msg.timestamp,
usage: None,
duration_ms: None,
model: None,
});
i += 1;
}
}
turns
}
#[cfg(test)]
mod tests {
use std::io::Write;
use super::*;
use serde_json::json;
#[test]
fn parses_model_capacity_suffix() {
assert_eq!(
parse_model_capacity_suffix("claude-sonnet-4-6[1.5M]"),
Some(1_500_000)
);
assert_eq!(
parse_model_capacity_suffix("claude-opus-4-6 [500k]"),
Some(500_000)
);
assert_eq!(parse_model_capacity_suffix("claude-sonnet-4-6"), None);
}
#[test]
fn defaults_context_limit_for_claude_models() {
assert_eq!(
claude_context_window_max_tokens_for_model(Some("claude-sonnet-4-6")),
Some(200_000)
);
assert_eq!(
claude_context_window_max_tokens_for_model(Some("custom-model-x")),
None
);
}
#[test]
fn uses_latest_assistant_usage_for_context_tokens() {
let timestamp = Utc::now();
let turns = vec![
MessageTurn {
id: "turn-0".to_string(),
role: TurnRole::Assistant,
blocks: vec![],
timestamp,
usage: Some(TurnUsage {
input_tokens: 100,
output_tokens: 20,
cache_creation_input_tokens: 30,
cache_read_input_tokens: 40,
}),
duration_ms: None,
model: None,
},
MessageTurn {
id: "turn-1".to_string(),
role: TurnRole::Assistant,
blocks: vec![],
timestamp,
usage: Some(TurnUsage {
input_tokens: 250,
output_tokens: 60,
cache_creation_input_tokens: 70,
cache_read_input_tokens: 80,
}),
duration_ms: None,
model: None,
},
];
assert_eq!(
latest_claude_context_window_used_tokens(&turns),
Some(250 + 70 + 80)
);
}
#[test]
fn parse_detail_sets_claude_context_window_stats() {
let path = std::env::temp_dir().join(format!(
"codeg-claude-parser-{}.jsonl",
uuid::Uuid::new_v4()
));
let mut file = fs::File::create(&path).expect("create temp jsonl");
writeln!(
file,
"{}",
serde_json::json!({
"type": "user",
"sessionId": "session-test",
"timestamp": "2026-03-01T10:00:00Z",
"uuid": "u1",
"cwd": "/tmp/demo",
"gitBranch": "main",
"message": {
"content": [{"type": "text", "text": "hello"}]
}
})
)
.expect("write user line");
writeln!(
file,
"{}",
serde_json::json!({
"type": "assistant",
"sessionId": "session-test",
"timestamp": "2026-03-01T10:00:02Z",
"uuid": "a1",
"message": {
"model": "claude-sonnet-4-6",
"content": [{"type": "text", "text": "world"}],
"usage": {
"input_tokens": 1000,
"output_tokens": 200,
"cache_creation_input_tokens": 300,
"cache_read_input_tokens": 400
}
}
})
)
.expect("write assistant line");
let parser = ClaudeParser {
base_dir: PathBuf::new(),
};
let detail = parser
.parse_conversation_detail(&path, "session-test")
.expect("parse conversation detail");
fs::remove_file(&path).expect("cleanup temp jsonl");
let stats = detail.session_stats.expect("session stats");
assert_eq!(stats.context_window_used_tokens, Some(1700));
assert_eq!(stats.context_window_max_tokens, Some(200_000));
let percent = stats
.context_window_usage_percent
.expect("context window usage percent");
assert!((percent - 0.85).abs() < f64::EPSILON);
}
#[test]
fn claude_config_dir_env_overrides_home() {
let resolved = resolve_claude_config_dir_from(
Some(std::ffi::OsString::from("/tmp/claude-config")),
Some(PathBuf::from("/Users/default")),
);
assert_eq!(resolved, PathBuf::from("/tmp/claude-config"));
}
#[test]
fn claude_config_dir_defaults_to_home_dot_claude() {
let resolved = resolve_claude_config_dir_from(None, Some(PathBuf::from("/Users/default")));
assert_eq!(resolved, PathBuf::from("/Users/default/.claude"));
}
#[test]
fn synthetic_assistant_excluded_from_detail() {
let path = std::env::temp_dir().join(format!(
"codeg-claude-synthetic-{}.jsonl",
uuid::Uuid::new_v4()
));
let mut file = fs::File::create(&path).expect("create temp jsonl");
// Normal user message
writeln!(
file,
"{}",
json!({
"type": "user",
"sessionId": "synth-test",
"timestamp": "2026-03-01T10:00:00Z",
"uuid": "u1",
"cwd": "/tmp/demo",
"message": {
"content": [{"type": "text", "text": "hello"}]
}
})
)
.unwrap();
// Normal assistant message with real usage
writeln!(
file,
"{}",
json!({
"type": "assistant",
"sessionId": "synth-test",
"timestamp": "2026-03-01T10:00:02Z",
"uuid": "a1",
"message": {
"model": "claude-sonnet-4-6",
"content": [{"type": "text", "text": "world"}],
"usage": {
"input_tokens": 1000,
"output_tokens": 200,
"cache_creation_input_tokens": 300,
"cache_read_input_tokens": 400
}
}
})
)
.unwrap();
// Synthetic assistant from a local command like /context
writeln!(
file,
"{}",
json!({
"type": "assistant",
"sessionId": "synth-test",
"timestamp": "2026-03-01T10:01:00Z",
"uuid": "a2",
"message": {
"model": "<synthetic>",
"content": [{"type": "text", "text": "No response requested."}],
"usage": {
"input_tokens": 0,
"output_tokens": 0,
"cache_creation_input_tokens": 0,
"cache_read_input_tokens": 0
}
}
})
)
.unwrap();
let parser = ClaudeParser {
base_dir: PathBuf::new(),
};
let detail = parser
.parse_conversation_detail(&path, "synth-test")
.expect("parse detail");
fs::remove_file(&path).unwrap();
// Should have 2 turns (user + real assistant), synthetic is excluded
assert_eq!(detail.turns.len(), 2);
assert!(
!detail
.turns
.iter()
.any(|t| t.blocks.iter().any(|b| matches!(
b,
ContentBlock::Text { text } if text == "No response requested."
))),
"synthetic assistant content should not appear in turns"
);
// Stats should reflect only the real assistant usage
let stats = detail.session_stats.expect("session stats");
assert_eq!(stats.context_window_used_tokens, Some(1700));
assert_eq!(stats.context_window_max_tokens, Some(200_000));
let total = stats.total_tokens.expect("total tokens");
assert_eq!(total, 1900); // 1000 + 200 + 300 + 400
}
#[test]
fn extract_user_content_parses_claude_base64_image_block() {
let value = json!({
"message": {
"content": [
{"type": "text", "text": "这个图片里面是什么"},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": "QUJDREVGRw=="
}
}
]
}
});
let blocks = extract_user_content(&value);
assert_eq!(blocks.len(), 2);
assert!(matches!(&blocks[0], ContentBlock::Text { text } if text == "这个图片里面是什么"));
assert!(matches!(
&blocks[1],
ContentBlock::Image { data, mime_type, uri }
if data == "QUJDREVGRw==" && mime_type == "image/jpeg" && uri.is_none()
));
}
#[test]
fn extract_user_content_parses_claude_data_uri_image_block() {
let value = json!({
"message": {
"content": [
{
"type": "image",
"source": {
"type": "base64",
"data": "data:image/png;base64,QUJD"
}
}
]
}
});
let blocks = extract_user_content(&value);
assert_eq!(blocks.len(), 1);
assert!(matches!(
&blocks[0],
ContentBlock::Image { data, mime_type, uri }
if data == "QUJD" && mime_type == "image/png" && uri.is_none()
));
}
}