fix(acp): bypass Windows file locks when clearing binary cache

clear_agent_cache falls back to renaming the agent cache directory
to <cache_dir>/.trash/<agent_id>-<nanos>-<counter>/ when
remove_dir_all fails — typically because a running <cmd>.exe or a
Defender scan holds the file. NTFS rename succeeds on directories
whose children are locked, so Upgrade and Uninstall no longer
surface ERROR_ACCESS_DENIED on Windows.

A detached OS thread sweeps .trash/ at startup with all errors
swallowed, panics caught, and no shared state — cannot block app
startup or leak threads. Still-locked entries are left for the
next launch.
This commit is contained in:
xintaofei
2026-04-25 15:56:47 +08:00
parent f0bd2a28a2
commit f264f560b1
3 changed files with 80 additions and 4 deletions

View File

@@ -1,11 +1,18 @@
use std::collections::HashSet; use std::collections::HashSet;
use std::io::Read; use std::io::Read;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use crate::acp::error::AcpError; use crate::acp::error::AcpError;
use crate::acp::registry; use crate::acp::registry;
use crate::models::agent::AgentType; use crate::models::agent::AgentType;
/// Process-local counter appended to rename-aside trash directory names. Guards
/// against the rare case where two `clear_agent_cache` calls land in the same
/// `SystemTime::now()` tick (Windows `GetSystemTimePreciseAsFileTime` has ~100ns
/// resolution) and would otherwise collide on the rename target.
static TRASH_COUNTER: AtomicU64 = AtomicU64::new(0);
pub(crate) fn cache_dir() -> Result<PathBuf, AcpError> { pub(crate) fn cache_dir() -> Result<PathBuf, AcpError> {
let base = dirs::cache_dir() let base = dirs::cache_dir()
.ok_or_else(|| AcpError::DownloadFailed("cannot determine cache directory".into()))?; .ok_or_else(|| AcpError::DownloadFailed("cannot determine cache directory".into()))?;
@@ -44,14 +51,56 @@ pub(crate) fn binary_dir(agent_id: &str, version: &str) -> Result<PathBuf, AcpEr
pub fn clear_agent_cache(agent_type: AgentType) -> Result<(), AcpError> { pub fn clear_agent_cache(agent_type: AgentType) -> Result<(), AcpError> {
let agent_id = agent_cache_key(agent_type); let agent_id = agent_cache_key(agent_type);
let dir = cache_dir()?.join(agent_id); let dir = cache_dir()?.join(&agent_id);
if dir.exists() { if !dir.exists() {
std::fs::remove_dir_all(&dir) return Ok(());
.map_err(|e| AcpError::DownloadFailed(format!("failed to clear cache: {e}")))?;
} }
if std::fs::remove_dir_all(&dir).is_ok() {
return Ok(());
}
// Windows: a running `<cmd>.exe` (ours or anti-virus scanning it) keeps the
// file locked, so `remove_dir_all` returns ERROR_ACCESS_DENIED. NTFS allows
// renaming a directory whose children are locked because rename only
// updates the parent directory entry; the locked file's FILE_OBJECT keeps
// working under the new path. The aside is swept on next startup.
let trash_root = cache_dir()?.join(".trash");
let _ = std::fs::create_dir_all(&trash_root);
let stamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let counter = TRASH_COUNTER.fetch_add(1, Ordering::Relaxed);
let aside = trash_root.join(format!("{agent_id}-{stamp}-{counter}"));
std::fs::rename(&dir, &aside)
.map_err(|e| AcpError::DownloadFailed(format!("failed to clear cache: {e}")))?;
let _ = std::fs::remove_dir_all(&aside);
Ok(()) Ok(())
} }
/// Best-effort cleanup of trash directories left behind by
/// `clear_agent_cache`'s rename-aside fallback. Designed to be run from a
/// detached OS thread at startup: every error path is silently swallowed,
/// no logs, no panics escape, no subprocesses spawned. Whatever cannot be
/// removed (e.g. a binary still locked by an external process) is left for
/// the next startup.
///
/// Iterates children rather than nuking the parent so that a concurrent
/// `clear_agent_cache` racing to rename a fresh entry into `.trash/` cannot
/// have its target directory yanked out from under it.
pub fn sweep_trash() {
let Ok(base) = cache_dir() else { return };
let trash = base.join(".trash");
let Ok(entries) = std::fs::read_dir(&trash) else {
return;
};
for entry in entries.flatten() {
let _ = std::fs::remove_dir_all(entry.path());
}
}
fn installed_binary_path(agent_id: &str, version: &str, cmd_name: &str) -> Option<PathBuf> { fn installed_binary_path(agent_id: &str, version: &str, cmd_name: &str) -> Option<PathBuf> {
let bin_name = if cfg!(target_os = "windows") { let bin_name = if cfg!(target_os = "windows") {
format!("{cmd_name}.exe") format!("{cmd_name}.exe")

View File

@@ -29,6 +29,15 @@ fn main() {
} }
async fn async_main() { async fn async_main() {
// Sweep stale ACP binary cache trash (rename-aside fallback artifacts).
// Detached OS thread: cannot block startup, panics are caught and dropped,
// errors are silenced, no subprocesses spawned.
std::thread::spawn(|| {
let _ = std::panic::catch_unwind(|| {
codeg_lib::sweep_acp_binary_trash();
});
});
let port: u16 = std::env::var("CODEG_PORT") let port: u16 = std::env::var("CODEG_PORT")
.ok() .ok()
.and_then(|v| v.parse().ok()) .and_then(|v| v.parse().ok())

View File

@@ -17,6 +17,14 @@ mod terminal;
pub mod web; pub mod web;
pub mod workspace_state; pub mod workspace_state;
/// Sweep stale ACP binary cache trash created by the rename-aside fallback in
/// `acp::binary_cache::clear_agent_cache`. Safe to call any time; intended to
/// be invoked once at startup from a detached OS thread. Does not block, does
/// not panic, errors are silently dropped.
pub fn sweep_acp_binary_trash() {
crate::acp::binary_cache::sweep_trash();
}
#[cfg(feature = "tauri-runtime")] #[cfg(feature = "tauri-runtime")]
mod tauri_app { mod tauri_app {
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
@@ -124,6 +132,16 @@ mod tauri_app {
tauri::async_runtime::block_on(windows::load_saved_zoom(&db.conn)); tauri::async_runtime::block_on(windows::load_saved_zoom(&db.conn));
tauri::async_runtime::block_on(windows::load_saved_appearance_mode(&db.conn)); tauri::async_runtime::block_on(windows::load_saved_appearance_mode(&db.conn));
// Sweep stale ACP binary cache trash (rename-aside fallback
// artifacts). Detached OS thread: cannot block startup, panics
// are caught and dropped, errors are silenced, no subprocesses
// spawned. Anything still locked is left for next startup.
std::thread::spawn(|| {
let _ = std::panic::catch_unwind(|| {
crate::sweep_acp_binary_trash();
});
});
// Install bundled expert skills into the central store // Install bundled expert skills into the central store
// (`~/.codeg/skills/`). Runs in the background and does // (`~/.codeg/skills/`). Runs in the background and does
// not block startup; failures are logged but non-fatal. // not block startup; failures are logged but non-fatal.