Skip to main content

lcsa_core/
filesystem.rs

1use std::collections::BTreeMap;
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use time::{OffsetDateTime, format_description::well_known::Rfc3339};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum PrimitiveEventKind {
11    Created,
12    Modified,
13    Deleted,
14    Renamed,
15    Accessed,
16    MetadataChanged,
17    Unknown,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct PrimitiveEvent {
22    pub occurred_at: OffsetDateTime,
23    pub source: String,
24    pub kind: PrimitiveEventKind,
25    pub paths: Vec<PathBuf>,
26    pub is_directory: Option<bool>,
27}
28
29impl PrimitiveEvent {
30    pub fn new(
31        source: impl Into<String>,
32        kind: PrimitiveEventKind,
33        paths: Vec<PathBuf>,
34        is_directory: Option<bool>,
35        occurred_at: OffsetDateTime,
36    ) -> Self {
37        Self {
38            occurred_at,
39            source: source.into(),
40            kind,
41            paths,
42            is_directory,
43        }
44    }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SignalAction {
50    Created,
51    Updated,
52    Deleted,
53    Renamed,
54    Accessed,
55    MetadataChanged,
56    Observed,
57}
58
59impl SignalAction {
60    pub fn as_str(self) -> &'static str {
61        match self {
62            SignalAction::Created => "created",
63            SignalAction::Updated => "updated",
64            SignalAction::Deleted => "deleted",
65            SignalAction::Renamed => "renamed",
66            SignalAction::Accessed => "accessed",
67            SignalAction::MetadataChanged => "metadata_changed",
68            SignalAction::Observed => "observed",
69        }
70    }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum EntityKind {
76    Code,
77    Document,
78    Config,
79    Data,
80    Media,
81    Directory,
82    Archive,
83    Binary,
84    Unknown,
85}
86
87impl EntityKind {
88    pub fn as_str(self) -> &'static str {
89        match self {
90            EntityKind::Code => "code",
91            EntityKind::Document => "document",
92            EntityKind::Config => "config",
93            EntityKind::Data => "data",
94            EntityKind::Media => "media",
95            EntityKind::Directory => "directory",
96            EntityKind::Archive => "archive",
97            EntityKind::Binary => "binary",
98            EntityKind::Unknown => "unknown",
99        }
100    }
101}
102
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub struct SemanticSignal {
105    pub version: String,
106    pub occurred_at: String,
107    pub source: String,
108    pub action: SignalAction,
109    pub entity_kind: EntityKind,
110    pub summary: String,
111    pub confidence: f32,
112    pub paths: Vec<String>,
113    pub tags: Vec<String>,
114    pub metadata: BTreeMap<String, Value>,
115}
116
117impl SemanticSignal {
118    pub fn event_name(&self) -> String {
119        format!("{}.{}", self.entity_kind.as_str(), self.action.as_str())
120    }
121}
122
123pub fn normalize_event(event: &PrimitiveEvent) -> SemanticSignal {
124    let path_kind = infer_entity_kind(
125        event.paths.first().map(PathBuf::as_path),
126        event.is_directory.unwrap_or(false),
127    );
128
129    let action = match event.kind {
130        PrimitiveEventKind::Created => SignalAction::Created,
131        PrimitiveEventKind::Modified => SignalAction::Updated,
132        PrimitiveEventKind::Deleted => SignalAction::Deleted,
133        PrimitiveEventKind::Renamed => SignalAction::Renamed,
134        PrimitiveEventKind::Accessed => SignalAction::Accessed,
135        PrimitiveEventKind::MetadataChanged => SignalAction::MetadataChanged,
136        PrimitiveEventKind::Unknown => SignalAction::Observed,
137    };
138
139    let entity_kind = if matches!(event.is_directory, Some(true)) {
140        EntityKind::Directory
141    } else {
142        path_kind
143    };
144
145    let paths = event
146        .paths
147        .iter()
148        .map(|path| normalize_path(path))
149        .collect::<Vec<_>>();
150
151    let primary_path = paths
152        .first()
153        .cloned()
154        .unwrap_or_else(|| "<unknown>".to_string());
155
156    let summary = summarize(action, entity_kind, &paths);
157    let confidence = confidence_for(entity_kind, action);
158
159    let mut tags = Vec::new();
160    if let Some(path) = event.paths.first() {
161        if let Some(ext) = path.extension().and_then(|value| value.to_str()) {
162            tags.push(format!("ext:{}", ext.to_ascii_lowercase()));
163        }
164
165        if let Some(topdir) = top_level_component(path) {
166            tags.push(format!("topdir:{}", topdir));
167        }
168
169        if is_hidden(path) {
170            tags.push("hidden:true".to_string());
171        }
172    }
173
174    tags.push(format!("event:{}", action.as_str()));
175    tags.push(format!("kind:{}", entity_kind.as_str()));
176
177    let mut metadata = BTreeMap::new();
178    metadata.insert(
179        "event_name".to_string(),
180        Value::String(format!("{}.{}", entity_kind.as_str(), action.as_str())),
181    );
182    metadata.insert("path_count".to_string(), Value::from(paths.len() as u64));
183    metadata.insert("primary_path".to_string(), Value::String(primary_path));
184
185    if let Some(ext) = event
186        .paths
187        .first()
188        .and_then(|path| path.extension())
189        .and_then(|value| value.to_str())
190    {
191        metadata.insert(
192            "extension".to_string(),
193            Value::String(ext.to_ascii_lowercase()),
194        );
195    }
196
197    if action == SignalAction::Renamed && paths.len() >= 2 {
198        metadata.insert("from_path".to_string(), Value::String(paths[0].clone()));
199        metadata.insert("to_path".to_string(), Value::String(paths[1].clone()));
200    }
201
202    if let Some(is_directory) = event.is_directory {
203        metadata.insert("is_directory".to_string(), Value::Bool(is_directory));
204    }
205
206    SemanticSignal {
207        version: "0.1".to_string(),
208        occurred_at: event
209            .occurred_at
210            .format(&Rfc3339)
211            .unwrap_or_else(|_| event.occurred_at.unix_timestamp().to_string()),
212        source: event.source.clone(),
213        action,
214        entity_kind,
215        summary,
216        confidence,
217        paths,
218        tags,
219        metadata,
220    }
221}
222
223pub fn infer_entity_kind(path: Option<&Path>, is_directory: bool) -> EntityKind {
224    if is_directory {
225        return EntityKind::Directory;
226    }
227
228    let Some(path) = path else {
229        return EntityKind::Unknown;
230    };
231
232    let ext = path
233        .extension()
234        .and_then(|value| value.to_str())
235        .map(|value| value.to_ascii_lowercase());
236
237    match ext.as_deref() {
238        Some(
239            "rs" | "py" | "js" | "jsx" | "ts" | "tsx" | "go" | "java" | "kt" | "c" | "cc" | "cpp"
240            | "h" | "hpp" | "cs" | "rb" | "php" | "swift" | "scala" | "sql" | "ipynb",
241        ) => EntityKind::Code,
242        Some("md" | "txt" | "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" | "rst") => {
243            EntityKind::Document
244        }
245        Some("toml" | "yaml" | "yml" | "ini" | "env" | "conf" | "cfg" | "xml") => {
246            EntityKind::Config
247        }
248        Some("json" | "csv" | "tsv" | "parquet" | "feather" | "sqlite" | "db") => EntityKind::Data,
249        Some(
250            "png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "mp4" | "mov" | "mp3" | "wav"
251            | "flac",
252        ) => EntityKind::Media,
253        Some("zip" | "tar" | "gz" | "bz2" | "xz" | "7z") => EntityKind::Archive,
254        Some("bin" | "exe" | "so" | "dylib" | "dll") => EntityKind::Binary,
255        _ => infer_from_name(path),
256    }
257}
258
259fn infer_from_name(path: &Path) -> EntityKind {
260    let name = path
261        .file_name()
262        .and_then(|value| value.to_str())
263        .map(|value| value.to_ascii_lowercase())
264        .unwrap_or_default();
265
266    if matches!(
267        name.as_str(),
268        "cargo.toml"
269            | "cargo.lock"
270            | "package.json"
271            | "package-lock.json"
272            | "pnpm-lock.yaml"
273            | "dockerfile"
274            | ".env"
275            | ".gitignore"
276            | "makefile"
277    ) {
278        return EntityKind::Config;
279    }
280
281    if name == "readme" || name.starts_with("readme.") || name.starts_with("license") {
282        return EntityKind::Document;
283    }
284
285    EntityKind::Unknown
286}
287
288fn summarize(action: SignalAction, entity_kind: EntityKind, paths: &[String]) -> String {
289    let noun = match entity_kind {
290        EntityKind::Code => "Code file",
291        EntityKind::Document => "Document",
292        EntityKind::Config => "Config file",
293        EntityKind::Data => "Data file",
294        EntityKind::Media => "Media asset",
295        EntityKind::Directory => "Directory",
296        EntityKind::Archive => "Archive",
297        EntityKind::Binary => "Binary artifact",
298        EntityKind::Unknown => "File",
299    };
300
301    match action {
302        SignalAction::Renamed if paths.len() >= 2 => {
303            format!("{} renamed: {} -> {}", noun, paths[0], paths[1])
304        }
305        SignalAction::Created => format!("{} created: {}", noun, first_or_unknown(paths)),
306        SignalAction::Updated => format!("{} updated: {}", noun, first_or_unknown(paths)),
307        SignalAction::Deleted => format!("{} deleted: {}", noun, first_or_unknown(paths)),
308        SignalAction::Accessed => format!("{} accessed: {}", noun, first_or_unknown(paths)),
309        SignalAction::MetadataChanged => {
310            format!("{} metadata changed: {}", noun, first_or_unknown(paths))
311        }
312        SignalAction::Observed => format!("{} observed: {}", noun, first_or_unknown(paths)),
313        SignalAction::Renamed => format!("{} renamed", noun),
314    }
315}
316
317fn first_or_unknown(paths: &[String]) -> &str {
318    paths.first().map(String::as_str).unwrap_or("<unknown>")
319}
320
321fn confidence_for(entity_kind: EntityKind, action: SignalAction) -> f32 {
322    let entity_score: f32 = match entity_kind {
323        EntityKind::Unknown => 0.65,
324        EntityKind::Directory => 0.92,
325        EntityKind::Config => 0.97,
326        EntityKind::Code => 0.98,
327        _ => 0.95,
328    };
329
330    let action_adjustment: f32 = match action {
331        SignalAction::Observed => -0.12,
332        SignalAction::MetadataChanged => -0.06,
333        _ => 0.0,
334    };
335
336    (entity_score + action_adjustment).clamp(0.0, 1.0)
337}
338
339fn normalize_path(path: &Path) -> String {
340    let raw = path.to_string_lossy().replace('\\', "/");
341    if raw.is_empty() { ".".to_string() } else { raw }
342}
343
344fn top_level_component(path: &Path) -> Option<String> {
345    path.components()
346        .next()
347        .map(|component| component.as_os_str().to_string_lossy().to_string())
348}
349
350fn is_hidden(path: &Path) -> bool {
351    path.components().any(|component| {
352        component
353            .as_os_str()
354            .to_str()
355            .map(|segment| segment.starts_with('.') && segment.len() > 1)
356            .unwrap_or(false)
357    })
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use std::path::PathBuf;
364    use time::macros::datetime;
365
366    #[test]
367    fn classifies_rust_file_as_code() {
368        let kind = infer_entity_kind(Some(Path::new("src/lib.rs")), false);
369        assert_eq!(kind, EntityKind::Code);
370    }
371
372    #[test]
373    fn classifies_readme_without_extension_as_document() {
374        let kind = infer_entity_kind(Some(Path::new("README")), false);
375        assert_eq!(kind, EntityKind::Document);
376    }
377
378    #[test]
379    fn emits_rename_signal_with_both_paths() {
380        let event = PrimitiveEvent::new(
381            "filesystem",
382            PrimitiveEventKind::Renamed,
383            vec![
384                PathBuf::from("notes/todo.md"),
385                PathBuf::from("notes/done.md"),
386            ],
387            Some(false),
388            datetime!(2026-03-22 10:12:05 UTC),
389        );
390
391        let signal = normalize_event(&event);
392
393        assert_eq!(signal.entity_kind, EntityKind::Document);
394        assert_eq!(signal.action, SignalAction::Renamed);
395        assert_eq!(signal.event_name(), "document.renamed");
396        assert_eq!(
397            signal.summary,
398            "Document renamed: notes/todo.md -> notes/done.md"
399        );
400        assert_eq!(
401            signal.metadata.get("from_path"),
402            Some(&Value::String("notes/todo.md".to_string()))
403        );
404        assert_eq!(
405            signal.metadata.get("to_path"),
406            Some(&Value::String("notes/done.md".to_string()))
407        );
408    }
409
410    #[test]
411    fn tags_hidden_config_file() {
412        let event = PrimitiveEvent::new(
413            "filesystem",
414            PrimitiveEventKind::Modified,
415            vec![PathBuf::from(".env")],
416            Some(false),
417            datetime!(2026-03-22 10:12:05 UTC),
418        );
419
420        let signal = normalize_event(&event);
421
422        assert_eq!(signal.entity_kind, EntityKind::Config);
423        assert!(signal.tags.iter().any(|tag| tag == "hidden:true"));
424        assert!(signal.tags.iter().any(|tag| tag == "event:updated"));
425    }
426}