use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Duration, Utc};
use crate::state_store::Session;
const ROOT_MARKERS: &[&str] = &[
"Cargo.toml",
"package.json",
"go.mod",
"pyproject.toml",
"setup.py",
"pom.xml",
"build.gradle",
"build.gradle.kts",
"Gemfile",
"composer.json",
"mix.exs",
"CMakeLists.txt",
"Makefile",
"AGENTS.md",
".git",
];
#[derive(Debug, Clone, Default)]
pub struct PathFrequency {
pub count: usize,
pub last_seen: Option<DateTime<Utc>>,
}
#[derive(Debug, Clone)]
pub struct PromotionConfig {
pub enabled: bool,
pub threshold: usize,
pub window_days: i64,
}
impl Default for PromotionConfig {
fn default() -> Self {
Self {
enabled: true,
threshold: 3,
window_days: 14,
}
}
}
pub fn extract_paths(session: &Session) -> Vec<(String, DateTime<Utc>)> {
let mut out = Vec::new();
for step in &session.trajectory_steps {
collect_path_like_strings(&step.tool_args, &mut out, step.timestamp);
}
for msg in &session.user_messages {
for word in msg.content.split_whitespace() {
if looks_like_path(word) {
out.push((word.trim_matches(punct).to_string(), msg.timestamp));
}
}
}
out
}
pub fn tally_frequencies(
sessions: &[Session],
config: &PromotionConfig,
) -> HashMap<PathBuf, PathFrequency> {
let cutoff = Utc::now() - Duration::days(config.window_days);
let mut freqs: HashMap<PathBuf, PathFrequency> = HashMap::new();
for session in sessions {
let mut distinct_roots: HashSet<PathBuf> = HashSet::new();
let mut root_last_seen: HashMap<PathBuf, DateTime<Utc>> = HashMap::new();
for (raw, ts) in extract_paths(session) {
if ts < cutoff {
continue;
}
let Some(root) = normalize_to_root(Path::new(&raw)) else {
continue;
};
distinct_roots.insert(root.clone());
root_last_seen
.entry(root)
.and_modify(|prev| *prev = (*prev).max(ts))
.or_insert(ts);
}
for root in distinct_roots {
let ts = root_last_seen[&root];
let entry = freqs.entry(root).or_default();
entry.count += 1; entry.last_seen = Some(
entry
.last_seen
.map_or(ts, |prev: DateTime<Utc>| prev.max(ts)),
);
}
}
freqs
}
pub fn normalize_to_root(path: &Path) -> Option<PathBuf> {
let expanded = expand_tilde(path);
let canonical = std::fs::canonicalize(&expanded).unwrap_or(expanded);
let start = if canonical.is_dir() {
canonical.clone()
} else {
canonical.parent()?.to_path_buf()
};
let mut candidate = Some(start.as_path());
while let Some(dir) = candidate {
if has_marker(dir) {
return Some(dir.to_path_buf());
}
candidate = dir.parent();
}
None
}
fn has_marker(dir: &Path) -> bool {
ROOT_MARKERS.iter().any(|m| dir.join(m).exists())
}
fn expand_tilde(path: &Path) -> PathBuf {
expand_tilde_with_home(path, std::env::var_os("HOME"))
}
fn expand_tilde_with_home(path: &Path, home: Option<std::ffi::OsString>) -> PathBuf {
let s = path.to_string_lossy();
let Some(home) = home else {
return path.to_path_buf();
};
if s == "~" {
return PathBuf::from(home);
}
if let Some(rest) = s.strip_prefix("~/") {
return PathBuf::from(home).join(rest);
}
path.to_path_buf()
}
fn collect_path_like_strings(
value: &serde_json::Value,
out: &mut Vec<(String, DateTime<Utc>)>,
ts: DateTime<Utc>,
) {
collect_path_like_strings_inner(value, out, ts, 0);
}
fn collect_path_like_strings_inner(
value: &serde_json::Value,
out: &mut Vec<(String, DateTime<Utc>)>,
ts: DateTime<Utc>,
depth: u32,
) {
const MAX_DEPTH: u32 = 32;
if depth > MAX_DEPTH {
return;
}
match value {
serde_json::Value::String(s) => {
if looks_like_path(s) {
out.push((s.trim_matches(punct).to_string(), ts));
}
}
serde_json::Value::Array(arr) => {
for v in arr {
collect_path_like_strings_inner(v, out, ts, depth + 1);
}
}
serde_json::Value::Object(map) => {
for v in map.values() {
collect_path_like_strings_inner(v, out, ts, depth + 1);
}
}
_ => {}
}
}
fn looks_like_path(s: &str) -> bool {
let s = s.trim_matches(punct);
if s.starts_with("//") {
return false;
}
(s.starts_with('/') && s.matches('/').count() >= 2 && s.len() > 3)
|| (s.starts_with("~/") && s.len() > 3)
}
fn punct(c: char) -> bool {
matches!(
c,
'"' | '\'' | '`' | ',' | ';' | ')' | ']' | '}' | '(' | '[' | '{'
)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::state_store::{Session, SessionId};
fn make_session(msgs: Vec<(&str, DateTime<Utc>)>) -> Session {
let mut s = Session::new("test");
s.id = SessionId("s1".into());
for (content, ts) in msgs {
let mut m = crate::state_store::UserMessage {
content: content.into(),
timestamp: ts,
};
s.user_messages.push(std::mem::replace(
&mut m,
crate::state_store::UserMessage {
content: String::new(),
timestamp: ts,
},
));
}
s
}
#[test]
fn test_looks_like_path() {
assert!(looks_like_path("/usr/local/bin"));
assert!(looks_like_path("~/projects/foo"));
assert!(!looks_like_path("hello world"));
assert!(!looks_like_path("/x")); assert!(!looks_like_path("no-slash"));
}
fn crate_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
}
#[test]
fn test_tally_counts_repeated_paths() {
let now = Utc::now();
let root = crate_root();
let sessions = vec![make_session(vec![
(format!("fix {}/src/lib.rs", root.display()).as_str(), now),
(
format!("also check {}/Cargo.toml", root.display()).as_str(),
now,
),
(format!("again {}", root.display()).as_str(), now),
])];
let config = PromotionConfig {
threshold: 1,
..Default::default()
};
let freqs = tally_frequencies(&sessions, &config);
let final_segment = root
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("oxios-kernel");
let freq = freqs
.iter()
.find(|(k, _)| k.ends_with(final_segment))
.map(|(_, v)| v)
.unwrap_or_else(|| panic!("expected root in {:?}", freqs));
assert_eq!(freq.count, 1);
}
#[test]
fn test_tally_respects_window() {
let now = Utc::now();
let old = now - Duration::days(30);
let root = crate_root();
let sessions = vec![make_session(vec![
(
format!("work on {}/src/lib.rs", root.display()).as_str(),
old,
),
(
format!("work on {}/Cargo.toml", root.display()).as_str(),
old,
),
(format!("work on {}", root.display()).as_str(), old),
])];
let config = PromotionConfig {
window_days: 14,
..Default::default()
};
let freqs = tally_frequencies(&sessions, &config);
assert!(freqs.is_empty(), "expected empty freqs, got {:?}", freqs);
}
#[test]
fn test_normalize_collapses_files_to_root() {
let file = Path::new(env!("CARGO_MANIFEST_DIR")).join("src/lib.rs");
let root = normalize_to_root(&file).expect("should find root");
assert!(root.ends_with("oxios-kernel"));
}
#[test]
fn test_normalize_expands_tilde() {
let home = std::ffi::OsString::from("/Users/test");
assert_eq!(
expand_tilde_with_home(Path::new("~/foo"), Some(home.clone())),
PathBuf::from("/Users/test/foo")
);
assert_eq!(
expand_tilde_with_home(Path::new("~"), Some(home.clone())),
PathBuf::from("/Users/test")
);
assert_eq!(
expand_tilde_with_home(Path::new("/etc/passwd"), Some(home.clone())),
PathBuf::from("/etc/passwd")
);
assert_eq!(
expand_tilde_with_home(Path::new("relative/path"), Some(home.clone())),
PathBuf::from("relative/path")
);
assert_eq!(
expand_tilde_with_home(Path::new("~/foo"), None),
PathBuf::from("~/foo")
);
}
#[test]
fn test_collect_path_like_bounds_recursion_depth() {
let mut value = serde_json::json!({"path": "/usr/local/bin"});
for _ in 0..100 {
value = serde_json::json!({ "nested": value });
}
let mut out = Vec::new();
collect_path_like_strings(&value, &mut out, Utc::now());
assert!(out.is_empty(), "expected no paths past depth bound");
let shallow = serde_json::json!({
"a": { "b": { "file": "/usr/local/bin/oxios" } }
});
let mut out2 = Vec::new();
collect_path_like_strings(&shallow, &mut out2, Utc::now());
assert_eq!(out2.len(), 1);
assert_eq!(out2[0].0, "/usr/local/bin/oxios");
}
}