1use std::collections::BTreeMap;
2use std::path::{Path, PathBuf};
3
4use serde::{Deserialize, Serialize};
5use serde_json::Value;
6use time::{OffsetDateTime, format_description::well_known::Rfc3339};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "snake_case")]
10pub enum PrimitiveEventKind {
11 Created,
12 Modified,
13 Deleted,
14 Renamed,
15 Accessed,
16 MetadataChanged,
17 Unknown,
18}
19
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct PrimitiveEvent {
22 pub occurred_at: OffsetDateTime,
23 pub source: String,
24 pub kind: PrimitiveEventKind,
25 pub paths: Vec<PathBuf>,
26 pub is_directory: Option<bool>,
27}
28
29impl PrimitiveEvent {
30 pub fn new(
31 source: impl Into<String>,
32 kind: PrimitiveEventKind,
33 paths: Vec<PathBuf>,
34 is_directory: Option<bool>,
35 occurred_at: OffsetDateTime,
36 ) -> Self {
37 Self {
38 occurred_at,
39 source: source.into(),
40 kind,
41 paths,
42 is_directory,
43 }
44 }
45}
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SignalAction {
50 Created,
51 Updated,
52 Deleted,
53 Renamed,
54 Accessed,
55 MetadataChanged,
56 Observed,
57}
58
59impl SignalAction {
60 pub fn as_str(self) -> &'static str {
61 match self {
62 SignalAction::Created => "created",
63 SignalAction::Updated => "updated",
64 SignalAction::Deleted => "deleted",
65 SignalAction::Renamed => "renamed",
66 SignalAction::Accessed => "accessed",
67 SignalAction::MetadataChanged => "metadata_changed",
68 SignalAction::Observed => "observed",
69 }
70 }
71}
72
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum EntityKind {
76 Code,
77 Document,
78 Config,
79 Data,
80 Media,
81 Directory,
82 Archive,
83 Binary,
84 Unknown,
85}
86
87impl EntityKind {
88 pub fn as_str(self) -> &'static str {
89 match self {
90 EntityKind::Code => "code",
91 EntityKind::Document => "document",
92 EntityKind::Config => "config",
93 EntityKind::Data => "data",
94 EntityKind::Media => "media",
95 EntityKind::Directory => "directory",
96 EntityKind::Archive => "archive",
97 EntityKind::Binary => "binary",
98 EntityKind::Unknown => "unknown",
99 }
100 }
101}
102
103#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
104pub struct SemanticSignal {
105 pub version: String,
106 pub occurred_at: String,
107 pub source: String,
108 pub action: SignalAction,
109 pub entity_kind: EntityKind,
110 pub summary: String,
111 pub confidence: f32,
112 pub paths: Vec<String>,
113 pub tags: Vec<String>,
114 pub metadata: BTreeMap<String, Value>,
115}
116
117impl SemanticSignal {
118 pub fn event_name(&self) -> String {
119 format!("{}.{}", self.entity_kind.as_str(), self.action.as_str())
120 }
121}
122
123pub fn normalize_event(event: &PrimitiveEvent) -> SemanticSignal {
124 let path_kind = infer_entity_kind(
125 event.paths.first().map(PathBuf::as_path),
126 event.is_directory.unwrap_or(false),
127 );
128
129 let action = match event.kind {
130 PrimitiveEventKind::Created => SignalAction::Created,
131 PrimitiveEventKind::Modified => SignalAction::Updated,
132 PrimitiveEventKind::Deleted => SignalAction::Deleted,
133 PrimitiveEventKind::Renamed => SignalAction::Renamed,
134 PrimitiveEventKind::Accessed => SignalAction::Accessed,
135 PrimitiveEventKind::MetadataChanged => SignalAction::MetadataChanged,
136 PrimitiveEventKind::Unknown => SignalAction::Observed,
137 };
138
139 let entity_kind = if matches!(event.is_directory, Some(true)) {
140 EntityKind::Directory
141 } else {
142 path_kind
143 };
144
145 let paths = event
146 .paths
147 .iter()
148 .map(|path| normalize_path(path))
149 .collect::<Vec<_>>();
150
151 let primary_path = paths
152 .first()
153 .cloned()
154 .unwrap_or_else(|| "<unknown>".to_string());
155
156 let summary = summarize(action, entity_kind, &paths);
157 let confidence = confidence_for(entity_kind, action);
158
159 let mut tags = Vec::new();
160 if let Some(path) = event.paths.first() {
161 if let Some(ext) = path.extension().and_then(|value| value.to_str()) {
162 tags.push(format!("ext:{}", ext.to_ascii_lowercase()));
163 }
164
165 if let Some(topdir) = top_level_component(path) {
166 tags.push(format!("topdir:{}", topdir));
167 }
168
169 if is_hidden(path) {
170 tags.push("hidden:true".to_string());
171 }
172 }
173
174 tags.push(format!("event:{}", action.as_str()));
175 tags.push(format!("kind:{}", entity_kind.as_str()));
176
177 let mut metadata = BTreeMap::new();
178 metadata.insert(
179 "event_name".to_string(),
180 Value::String(format!("{}.{}", entity_kind.as_str(), action.as_str())),
181 );
182 metadata.insert("path_count".to_string(), Value::from(paths.len() as u64));
183 metadata.insert("primary_path".to_string(), Value::String(primary_path));
184
185 if let Some(ext) = event
186 .paths
187 .first()
188 .and_then(|path| path.extension())
189 .and_then(|value| value.to_str())
190 {
191 metadata.insert(
192 "extension".to_string(),
193 Value::String(ext.to_ascii_lowercase()),
194 );
195 }
196
197 if action == SignalAction::Renamed && paths.len() >= 2 {
198 metadata.insert("from_path".to_string(), Value::String(paths[0].clone()));
199 metadata.insert("to_path".to_string(), Value::String(paths[1].clone()));
200 }
201
202 if let Some(is_directory) = event.is_directory {
203 metadata.insert("is_directory".to_string(), Value::Bool(is_directory));
204 }
205
206 SemanticSignal {
207 version: "0.1".to_string(),
208 occurred_at: event
209 .occurred_at
210 .format(&Rfc3339)
211 .unwrap_or_else(|_| event.occurred_at.unix_timestamp().to_string()),
212 source: event.source.clone(),
213 action,
214 entity_kind,
215 summary,
216 confidence,
217 paths,
218 tags,
219 metadata,
220 }
221}
222
223pub fn infer_entity_kind(path: Option<&Path>, is_directory: bool) -> EntityKind {
224 if is_directory {
225 return EntityKind::Directory;
226 }
227
228 let Some(path) = path else {
229 return EntityKind::Unknown;
230 };
231
232 let ext = path
233 .extension()
234 .and_then(|value| value.to_str())
235 .map(|value| value.to_ascii_lowercase());
236
237 match ext.as_deref() {
238 Some(
239 "rs" | "py" | "js" | "jsx" | "ts" | "tsx" | "go" | "java" | "kt" | "c" | "cc" | "cpp"
240 | "h" | "hpp" | "cs" | "rb" | "php" | "swift" | "scala" | "sql" | "ipynb",
241 ) => EntityKind::Code,
242 Some("md" | "txt" | "pdf" | "doc" | "docx" | "rtf" | "odt" | "pages" | "rst") => {
243 EntityKind::Document
244 }
245 Some("toml" | "yaml" | "yml" | "ini" | "env" | "conf" | "cfg" | "xml") => {
246 EntityKind::Config
247 }
248 Some("json" | "csv" | "tsv" | "parquet" | "feather" | "sqlite" | "db") => EntityKind::Data,
249 Some(
250 "png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "mp4" | "mov" | "mp3" | "wav"
251 | "flac",
252 ) => EntityKind::Media,
253 Some("zip" | "tar" | "gz" | "bz2" | "xz" | "7z") => EntityKind::Archive,
254 Some("bin" | "exe" | "so" | "dylib" | "dll") => EntityKind::Binary,
255 _ => infer_from_name(path),
256 }
257}
258
259fn infer_from_name(path: &Path) -> EntityKind {
260 let name = path
261 .file_name()
262 .and_then(|value| value.to_str())
263 .map(|value| value.to_ascii_lowercase())
264 .unwrap_or_default();
265
266 if matches!(
267 name.as_str(),
268 "cargo.toml"
269 | "cargo.lock"
270 | "package.json"
271 | "package-lock.json"
272 | "pnpm-lock.yaml"
273 | "dockerfile"
274 | ".env"
275 | ".gitignore"
276 | "makefile"
277 ) {
278 return EntityKind::Config;
279 }
280
281 if name == "readme" || name.starts_with("readme.") || name.starts_with("license") {
282 return EntityKind::Document;
283 }
284
285 EntityKind::Unknown
286}
287
288fn summarize(action: SignalAction, entity_kind: EntityKind, paths: &[String]) -> String {
289 let noun = match entity_kind {
290 EntityKind::Code => "Code file",
291 EntityKind::Document => "Document",
292 EntityKind::Config => "Config file",
293 EntityKind::Data => "Data file",
294 EntityKind::Media => "Media asset",
295 EntityKind::Directory => "Directory",
296 EntityKind::Archive => "Archive",
297 EntityKind::Binary => "Binary artifact",
298 EntityKind::Unknown => "File",
299 };
300
301 match action {
302 SignalAction::Renamed if paths.len() >= 2 => {
303 format!("{} renamed: {} -> {}", noun, paths[0], paths[1])
304 }
305 SignalAction::Created => format!("{} created: {}", noun, first_or_unknown(paths)),
306 SignalAction::Updated => format!("{} updated: {}", noun, first_or_unknown(paths)),
307 SignalAction::Deleted => format!("{} deleted: {}", noun, first_or_unknown(paths)),
308 SignalAction::Accessed => format!("{} accessed: {}", noun, first_or_unknown(paths)),
309 SignalAction::MetadataChanged => {
310 format!("{} metadata changed: {}", noun, first_or_unknown(paths))
311 }
312 SignalAction::Observed => format!("{} observed: {}", noun, first_or_unknown(paths)),
313 SignalAction::Renamed => format!("{} renamed", noun),
314 }
315}
316
317fn first_or_unknown(paths: &[String]) -> &str {
318 paths.first().map(String::as_str).unwrap_or("<unknown>")
319}
320
321fn confidence_for(entity_kind: EntityKind, action: SignalAction) -> f32 {
322 let entity_score: f32 = match entity_kind {
323 EntityKind::Unknown => 0.65,
324 EntityKind::Directory => 0.92,
325 EntityKind::Config => 0.97,
326 EntityKind::Code => 0.98,
327 _ => 0.95,
328 };
329
330 let action_adjustment: f32 = match action {
331 SignalAction::Observed => -0.12,
332 SignalAction::MetadataChanged => -0.06,
333 _ => 0.0,
334 };
335
336 (entity_score + action_adjustment).clamp(0.0, 1.0)
337}
338
339fn normalize_path(path: &Path) -> String {
340 let raw = path.to_string_lossy().replace('\\', "/");
341 if raw.is_empty() { ".".to_string() } else { raw }
342}
343
344fn top_level_component(path: &Path) -> Option<String> {
345 path.components()
346 .next()
347 .map(|component| component.as_os_str().to_string_lossy().to_string())
348}
349
350fn is_hidden(path: &Path) -> bool {
351 path.components().any(|component| {
352 component
353 .as_os_str()
354 .to_str()
355 .map(|segment| segment.starts_with('.') && segment.len() > 1)
356 .unwrap_or(false)
357 })
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363 use std::path::PathBuf;
364 use time::macros::datetime;
365
366 #[test]
367 fn classifies_rust_file_as_code() {
368 let kind = infer_entity_kind(Some(Path::new("src/lib.rs")), false);
369 assert_eq!(kind, EntityKind::Code);
370 }
371
372 #[test]
373 fn classifies_readme_without_extension_as_document() {
374 let kind = infer_entity_kind(Some(Path::new("README")), false);
375 assert_eq!(kind, EntityKind::Document);
376 }
377
378 #[test]
379 fn emits_rename_signal_with_both_paths() {
380 let event = PrimitiveEvent::new(
381 "filesystem",
382 PrimitiveEventKind::Renamed,
383 vec![
384 PathBuf::from("notes/todo.md"),
385 PathBuf::from("notes/done.md"),
386 ],
387 Some(false),
388 datetime!(2026-03-22 10:12:05 UTC),
389 );
390
391 let signal = normalize_event(&event);
392
393 assert_eq!(signal.entity_kind, EntityKind::Document);
394 assert_eq!(signal.action, SignalAction::Renamed);
395 assert_eq!(signal.event_name(), "document.renamed");
396 assert_eq!(
397 signal.summary,
398 "Document renamed: notes/todo.md -> notes/done.md"
399 );
400 assert_eq!(
401 signal.metadata.get("from_path"),
402 Some(&Value::String("notes/todo.md".to_string()))
403 );
404 assert_eq!(
405 signal.metadata.get("to_path"),
406 Some(&Value::String("notes/done.md".to_string()))
407 );
408 }
409
410 #[test]
411 fn tags_hidden_config_file() {
412 let event = PrimitiveEvent::new(
413 "filesystem",
414 PrimitiveEventKind::Modified,
415 vec![PathBuf::from(".env")],
416 Some(false),
417 datetime!(2026-03-22 10:12:05 UTC),
418 );
419
420 let signal = normalize_event(&event);
421
422 assert_eq!(signal.entity_kind, EntityKind::Config);
423 assert!(signal.tags.iter().any(|tag| tag == "hidden:true"));
424 assert!(signal.tags.iter().any(|tag| tag == "event:updated"));
425 }
426}