Skip to main content

aicx_parser/
segmentation.rs

1//! Semantic segmentation for canonical store ownership.
2//!
3//! Reconstructs repository-scoped session segments from content signals rather
4//! than weak source-side identifiers.
5//!
6//! Vibecrafted with AI Agents by VetCoders (c)2026 VetCoders
7
8use crate::timeline::{Kind, RepoIdentity, SemanticSegment, SourceTier, TimelineEntry};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use std::path::{Path, PathBuf};
13use std::process::Command;
14
15// ============================================================================
16// Source trust model
17// ============================================================================
18
19/// A repo identity paired with the trust tier of the signal that produced it.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct TieredIdentity {
22    pub identity: RepoIdentity,
23    pub tier: SourceTier,
24}
25
26// ============================================================================
27// Gemini projectHash registry
28// ============================================================================
29
30/// Registry mapping Gemini `projectHash` values to known repo roots.
31///
32/// The mapping lives in `~/.aicx/gemini-project-map.json` and must be
33/// maintained by the user or by `aicx init`. A projectHash that is not
34/// in this file cannot resolve to a repo — it stays Opaque.
35#[derive(Debug, Clone, Default, Serialize, Deserialize)]
36pub struct ProjectHashRegistry {
37    /// Maps `projectHash` (hex string) → absolute path to project root.
38    #[serde(default)]
39    pub mappings: HashMap<String, String>,
40}
41
42impl ProjectHashRegistry {
43    /// Load from the default location (`~/.aicx/gemini-project-map.json`).
44    /// Returns an empty registry if the file doesn't exist or can't be parsed.
45    pub fn load_default() -> Self {
46        let Some(home) = std::env::var_os("HOME").map(PathBuf::from) else {
47            return Self::default();
48        };
49        let path = home.join(".aicx").join("gemini-project-map.json");
50        Self::load_from(&path)
51    }
52
53    /// Load from a specific path.
54    pub fn load_from(path: &Path) -> Self {
55        std::fs::read_to_string(path)
56            .ok()
57            .and_then(|content| serde_json::from_str(&content).ok())
58            .unwrap_or_default()
59    }
60
61    /// Resolve a projectHash to a `TieredIdentity` by looking up the mapped
62    /// path and then inferring repo identity from that path.
63    pub fn resolve(&self, project_hash: &str) -> Option<TieredIdentity> {
64        let root_path = self.mappings.get(project_hash)?;
65        let path = PathBuf::from(root_path);
66        let identity = infer_repo_identity_from_path(&path)?;
67        Some(TieredIdentity {
68            identity,
69            tier: SourceTier::Secondary,
70        })
71    }
72}
73
74pub fn semantic_segments(entries: &[TimelineEntry]) -> Vec<SemanticSegment> {
75    semantic_segments_with_registry(entries, &ProjectHashRegistry::default())
76}
77
78pub fn semantic_segments_with_registry(
79    entries: &[TimelineEntry],
80    registry: &ProjectHashRegistry,
81) -> Vec<SemanticSegment> {
82    let mut sessions: HashMap<(String, String), Vec<TimelineEntry>> = HashMap::new();
83    for entry in entries {
84        sessions
85            .entry((entry.agent.clone(), entry.session_id.clone()))
86            .or_default()
87            .push(entry.clone());
88    }
89
90    let mut ordered = Vec::new();
91
92    for ((agent, session_id), mut session_entries) in sessions {
93        session_entries.sort_by_key(|left| left.timestamp);
94
95        let mut current_tiered: Option<TieredIdentity> = None;
96        let mut current_entries: Vec<TimelineEntry> = Vec::new();
97
98        for entry in session_entries {
99            let explicit = infer_tiered_identity_from_entry(&entry, registry);
100
101            let explicit_repo = explicit.as_ref().map(|t| &t.identity);
102            let current_repo = current_tiered.as_ref().map(|t| &t.identity);
103
104            let split_for_first_truth =
105                !current_entries.is_empty() && current_repo.is_none() && explicit_repo.is_some();
106            let split_for_context_switch = !current_entries.is_empty()
107                && explicit_repo
108                    .zip(current_repo)
109                    .is_some_and(|(next_repo, active_repo)| next_repo != active_repo);
110
111            if split_for_first_truth || split_for_context_switch {
112                let tier = current_tiered.as_ref().map(|t| t.tier);
113                ordered.push(build_segment(
114                    current_tiered.take().map(|t| t.identity),
115                    tier,
116                    &agent,
117                    &session_id,
118                    std::mem::take(&mut current_entries),
119                ));
120            }
121
122            if current_entries.is_empty() {
123                current_tiered = explicit.clone();
124            }
125
126            if current_tiered.is_none() && explicit.is_some() {
127                current_tiered = explicit.clone();
128            }
129
130            current_entries.push(entry);
131        }
132
133        if !current_entries.is_empty() {
134            let tier = current_tiered.as_ref().map(|t| t.tier);
135            ordered.push(build_segment(
136                current_tiered.map(|t| t.identity),
137                tier,
138                &agent,
139                &session_id,
140                current_entries,
141            ));
142        }
143    }
144
145    ordered.sort_by(|left, right| {
146        left.entries
147            .first()
148            .map(|entry| entry.timestamp)
149            .cmp(&right.entries.first().map(|entry| entry.timestamp))
150            .then_with(|| left.agent.cmp(&right.agent))
151            .then_with(|| left.session_id.cmp(&right.session_id))
152    });
153
154    ordered
155}
156
157pub fn infer_repo_identity_from_entry(entry: &TimelineEntry) -> Option<RepoIdentity> {
158    infer_tiered_identity_from_entry(entry, &ProjectHashRegistry::default()).map(|t| t.identity)
159}
160
161/// Infer repo identity with explicit trust tier from all available signals.
162///
163/// Signal precedence (highest to lowest):
164/// 1. Remote-like URL in message text -> Primary
165/// 2. Path in message text that resolves via git remote -> Primary
166/// 3. Path in message text via known layout -> Fallback
167/// 4. CWD that resolves via local git + remote -> Primary
168/// 5. CWD that resolves via local git + known layout -> Secondary
169/// 6. CWD via known layout (no .git) -> Fallback
170/// 7. ProjectHash resolved through registry -> Secondary
171/// 8. Pure hex hash CWD / opaque -> Opaque (returns None)
172pub fn infer_tiered_identity_from_entry(
173    entry: &TimelineEntry,
174    registry: &ProjectHashRegistry,
175) -> Option<TieredIdentity> {
176    if let Some(tiered) = infer_tiered_identity_from_text(&entry.message) {
177        return Some(tiered);
178    }
179
180    if let Some(tiered) = infer_tiered_identity_from_cwd(entry.cwd.as_deref()) {
181        return Some(tiered);
182    }
183
184    // Last resort: try projectHash registry for Gemini sessions.
185    // The cwd field for Gemini sessions is often the projectHash itself.
186    if let Some(cwd) = entry.cwd.as_deref()
187        && looks_like_weak_source_identifier(cwd)
188    {
189        return registry.resolve(cwd);
190    }
191
192    None
193}
194
195/// Classify a raw CWD string into a source tier without resolving identity.
196pub fn classify_cwd_tier(cwd: Option<&str>) -> SourceTier {
197    let Some(raw) = cwd else {
198        return SourceTier::Opaque;
199    };
200    let trimmed = raw.trim();
201    if trimmed.is_empty() {
202        return SourceTier::Opaque;
203    }
204    if looks_like_weak_source_identifier(trimmed) {
205        return SourceTier::Opaque;
206    }
207    let path = expand_home(trimmed);
208    if discover_git_root(&path).is_some() {
209        return SourceTier::Secondary;
210    }
211    if infer_repo_identity_from_known_layout(&path).is_some() {
212        return SourceTier::Fallback;
213    }
214    SourceTier::Opaque
215}
216
217fn build_segment(
218    repo: Option<RepoIdentity>,
219    source_tier: Option<SourceTier>,
220    agent: &str,
221    session_id: &str,
222    entries: Vec<TimelineEntry>,
223) -> SemanticSegment {
224    let kind = classify_segment_kind(&entries);
225    SemanticSegment {
226        repo,
227        source_tier,
228        kind,
229        agent: agent.to_string(),
230        session_id: session_id.to_string(),
231        entries,
232    }
233}
234
235fn classify_segment_kind(entries: &[TimelineEntry]) -> Kind {
236    if entries.is_empty() {
237        return Kind::Other;
238    }
239
240    let has_conversation = entries
241        .iter()
242        .any(|entry| entry.role == "user" || entry.role == "assistant");
243
244    let report_score = entries
245        .iter()
246        .map(|entry| classify_report_signal(entry.message.as_str()))
247        .sum::<u8>();
248    let plan_score = entries
249        .iter()
250        .map(|entry| classify_plan_signal(entry.message.as_str()))
251        .sum::<u8>();
252
253    if report_score >= 2 && report_score > plan_score && !has_conversation {
254        Kind::Reports
255    } else if plan_score >= 2 && plan_score >= report_score {
256        Kind::Plans
257    } else if has_conversation {
258        Kind::Conversations
259    } else if report_score > 0 {
260        Kind::Reports
261    } else {
262        Kind::Other
263    }
264}
265
266fn classify_plan_signal(message: &str) -> u8 {
267    let lower = message.to_ascii_lowercase();
268    u8::from(lower.contains("goal:"))
269        + u8::from(lower.contains("acceptance:"))
270        + u8::from(lower.contains("test gate:"))
271        + u8::from(lower.contains("- [ ]"))
272        + u8::from(lower.contains("plan:"))
273        + u8::from(lower.contains("migration plan"))
274}
275
276fn classify_report_signal(message: &str) -> u8 {
277    let lower = message.to_ascii_lowercase();
278    u8::from(lower.contains("recovery report"))
279        + u8::from(lower.contains("audit report"))
280        + u8::from(lower.contains("coverage report"))
281        + u8::from(lower.contains("status report"))
282        + u8::from(lower.contains("summary"))
283}
284
285fn infer_repo_identity_from_path(path: &Path) -> Option<RepoIdentity> {
286    if let Some(repo) = infer_repo_identity_from_local_git(path) {
287        return Some(repo);
288    }
289
290    infer_repo_identity_from_known_layout(path)
291}
292
293// ── Tiered inference helpers ──────────────────────────────────────────────
294
295fn infer_tiered_identity_from_text(text: &str) -> Option<TieredIdentity> {
296    // Remote-like URL → Primary (strongest text signal)
297    if let Some(identity) = infer_repo_identity_from_remote_like(text) {
298        return Some(TieredIdentity {
299            identity,
300            tier: SourceTier::Primary,
301        });
302    }
303
304    // Path in text → tier depends on how it resolves
305    let path_re = Regex::new(r"(/[A-Za-z0-9._~\-]+(?:/[A-Za-z0-9._~\-]+)+)").ok()?;
306    for capture in path_re.captures_iter(text) {
307        let raw = capture.get(1)?.as_str();
308        let path = PathBuf::from(raw);
309        if let Some(tiered) = infer_tiered_identity_from_path(&path) {
310            return Some(tiered);
311        }
312    }
313
314    None
315}
316
317fn infer_tiered_identity_from_cwd(cwd: Option<&str>) -> Option<TieredIdentity> {
318    let cwd = cwd?.trim();
319    if cwd.is_empty() || looks_like_weak_source_identifier(cwd) {
320        return None;
321    }
322
323    // Remote-like CWD → Primary
324    if let Some(identity) = infer_repo_identity_from_remote_like(cwd) {
325        return Some(TieredIdentity {
326            identity,
327            tier: SourceTier::Primary,
328        });
329    }
330
331    let path = expand_home(cwd);
332    infer_tiered_identity_from_path(&path)
333}
334
335fn infer_tiered_identity_from_path(path: &Path) -> Option<TieredIdentity> {
336    // Local git with remote → Primary
337    if let Some(repo_root) = discover_git_root(path) {
338        if let Some(identity) = infer_repo_identity_from_git_remote(&repo_root) {
339            return Some(TieredIdentity {
340                identity,
341                tier: SourceTier::Primary,
342            });
343        }
344        // Local git with known layout → Secondary
345        if let Some(identity) = infer_repo_identity_from_known_layout(&repo_root) {
346            return Some(TieredIdentity {
347                identity,
348                tier: SourceTier::Secondary,
349            });
350        }
351        // Local git, basename only → Secondary
352        if let Some(name) = repo_root.file_name() {
353            return Some(TieredIdentity {
354                identity: RepoIdentity {
355                    organization: "local".to_string(),
356                    repository: name.to_string_lossy().to_string(),
357                },
358                tier: SourceTier::Secondary,
359            });
360        }
361    }
362
363    // Known layout without .git → Fallback
364    if let Some(identity) = infer_repo_identity_from_known_layout(path) {
365        return Some(TieredIdentity {
366            identity,
367            tier: SourceTier::Fallback,
368        });
369    }
370
371    None
372}
373
374fn infer_repo_identity_from_local_git(path: &Path) -> Option<RepoIdentity> {
375    let repo_root = discover_git_root(path)?;
376    infer_repo_identity_from_git_remote(&repo_root)
377        .or_else(|| infer_repo_identity_from_known_layout(&repo_root))
378        .or_else(|| {
379            repo_root.file_name().map(|name| RepoIdentity {
380                organization: "local".to_string(),
381                repository: name.to_string_lossy().to_string(),
382            })
383        })
384}
385
386fn discover_git_root(path: &Path) -> Option<PathBuf> {
387    let seed = if path.is_file() {
388        path.parent()?.to_path_buf()
389    } else {
390        path.to_path_buf()
391    };
392
393    seed.ancestors()
394        .find(|candidate| candidate.join(".git").exists())
395        .map(Path::to_path_buf)
396}
397
398fn infer_repo_identity_from_git_remote(repo_root: &Path) -> Option<RepoIdentity> {
399    let output = Command::new("git")
400        .arg("-C")
401        .arg(repo_root)
402        .args(["remote", "get-url", "origin"])
403        .output()
404        .ok()?;
405
406    if !output.status.success() {
407        return None;
408    }
409
410    let remote = String::from_utf8_lossy(&output.stdout);
411    infer_repo_identity_from_remote_like(remote.trim())
412}
413
414fn infer_repo_identity_from_known_layout(path: &Path) -> Option<RepoIdentity> {
415    let components: Vec<String> = path
416        .components()
417        .map(|component| component.as_os_str().to_string_lossy().to_string())
418        .collect();
419
420    for marker in ["hosted", "repos", "repositories", "github", "git"] {
421        let marker_index = components
422            .iter()
423            .position(|component| component == marker)?;
424        if components.len() > marker_index + 2 {
425            let organization = components[marker_index + 1].clone();
426            let repository = components[marker_index + 2].clone();
427            if is_probably_repo_name(&organization) && is_probably_repo_name(&repository) {
428                return Some(RepoIdentity {
429                    organization,
430                    repository,
431                });
432            }
433        }
434    }
435
436    None
437}
438
439fn infer_repo_identity_from_remote_like(raw: &str) -> Option<RepoIdentity> {
440    for token in raw.split_whitespace() {
441        let trimmed = token
442            .trim_matches(|ch: char| matches!(ch, '"' | '\'' | ',' | '.' | ')' | '(' | '[' | ']'));
443        for prefix in [
444            "https://github.com/",
445            "http://github.com/",
446            "https://gitlab.com/",
447            "http://gitlab.com/",
448            "git@github.com:",
449            "git@gitlab.com:",
450        ] {
451            if let Some(rest) = trimmed.strip_prefix(prefix)
452                && let Some(repo) = repo_identity_from_remote_path(rest)
453            {
454                return Some(repo);
455            }
456        }
457    }
458
459    None
460}
461
462fn repo_identity_from_remote_path(path: &str) -> Option<RepoIdentity> {
463    let mut parts = path.split('/');
464    let organization = parts.next()?.trim();
465    let repository = parts.next()?.trim().trim_end_matches(".git");
466
467    if is_probably_repo_name(organization) && is_probably_repo_name(repository) {
468        return Some(RepoIdentity {
469            organization: organization.to_string(),
470            repository: repository.to_string(),
471        });
472    }
473
474    Some(RepoIdentity {
475        organization: "local".to_string(),
476        repository: local_repo_fallback(repository),
477    })
478}
479
480fn local_repo_fallback(repository: &str) -> String {
481    if is_probably_repo_name(repository) {
482        repository.to_string()
483    } else {
484        "unknown".to_string()
485    }
486}
487
488fn looks_like_weak_source_identifier(raw: &str) -> bool {
489    let trimmed = raw.trim();
490    trimmed.len() >= 16
491        && trimmed.chars().all(|ch| ch.is_ascii_hexdigit())
492        && !trimmed.contains('/')
493        && !trimmed.contains(':')
494}
495
496fn expand_home(raw: &str) -> PathBuf {
497    if let Some(rest) = raw.strip_prefix("~/")
498        && let Some(home) = std::env::var_os("HOME").map(PathBuf::from)
499    {
500        return home.join(rest);
501    }
502
503    PathBuf::from(raw)
504}
505
506fn is_probably_repo_name(value: &str) -> bool {
507    if value.is_empty() || value.len() > 64 {
508        return false;
509    }
510
511    let mut chars = value.chars();
512    let Some(first) = chars.next() else {
513        return false;
514    };
515    if !first.is_ascii_alphanumeric() {
516        return false;
517    }
518    if !chars.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '.' | '-' | '_')) {
519        return false;
520    }
521
522    let lower = value.to_ascii_lowercase();
523    if matches!(
524        lower.as_str(),
525        "." | ".."
526            | "..."
527            | "local"
528            | "tmp"
529            | "temp"
530            | "src"
531            | "app"
532            | "lib"
533            | "docs"
534            | "workspace"
535            | "workspaces"
536    ) {
537        return false;
538    }
539
540    let dot_count = value.chars().filter(|ch| *ch == '.').count();
541    if dot_count > value.chars().count() / 2 {
542        return false;
543    }
544
545    true
546}
547
548#[cfg(test)]
549mod tests {
550    use super::*;
551    use chrono::{TimeZone, Utc};
552    use std::fs;
553
554    fn entry(
555        ts: (i32, u32, u32, u32, u32, u32),
556        session_id: &str,
557        role: &str,
558        message: &str,
559        cwd: Option<&str>,
560    ) -> TimelineEntry {
561        TimelineEntry {
562            timestamp: Utc
563                .with_ymd_and_hms(ts.0, ts.1, ts.2, ts.3, ts.4, ts.5)
564                .unwrap(),
565            agent: "claude".to_string(),
566            session_id: session_id.to_string(),
567            role: role.to_string(),
568            message: message.to_string(),
569            branch: None,
570            cwd: cwd.map(ToOwned::to_owned),
571            frame_kind: None,
572        }
573    }
574
575    fn mk_tmp_dir(name: &str) -> PathBuf {
576        std::env::temp_dir().join(format!(
577            "ai-contexters-segmentation-{name}-{}-{}",
578            std::process::id(),
579            Utc::now().timestamp_nanos_opt().unwrap_or_default()
580        ))
581    }
582
583    #[test]
584    fn repo_signal_segmentation_splits_one_session_across_multiple_repositories() {
585        let entries = vec![
586            entry(
587                (2026, 3, 21, 9, 0, 0),
588                "sess-1",
589                "user",
590                "Please inspect https://github.com/VetCoders/ai-contexters before editing.",
591                None,
592            ),
593            entry(
594                (2026, 3, 21, 9, 1, 0),
595                "sess-1",
596                "assistant",
597                "I found the store seam in ai-contexters.",
598                None,
599            ),
600            entry(
601                (2026, 3, 21, 9, 2, 0),
602                "sess-1",
603                "user",
604                "Switch now to https://github.com/VetCoders/loctree and review the scanner.",
605                None,
606            ),
607            entry(
608                (2026, 3, 21, 9, 3, 0),
609                "sess-1",
610                "assistant",
611                "I am reviewing loctree next.",
612                None,
613            ),
614        ];
615
616        let segments = semantic_segments(&entries);
617        assert_eq!(segments.len(), 2);
618        assert_eq!(segments[0].project_label(), "VetCoders/ai-contexters");
619        assert_eq!(segments[1].project_label(), "VetCoders/loctree");
620    }
621
622    #[test]
623    fn repo_signal_segmentation_keeps_unknown_prefix_honest() {
624        let entries = vec![
625            entry(
626                (2026, 3, 21, 9, 0, 0),
627                "sess-2",
628                "user",
629                "Need a migration plan but I have not named the repo yet.",
630                None,
631            ),
632            entry(
633                (2026, 3, 21, 9, 1, 0),
634                "sess-2",
635                "assistant",
636                "Drafting a migration plan with acceptance criteria.",
637                None,
638            ),
639            entry(
640                (2026, 3, 21, 9, 2, 0),
641                "sess-2",
642                "user",
643                "The actual repo is https://github.com/VetCoders/ai-contexters.",
644                None,
645            ),
646        ];
647
648        let segments = semantic_segments(&entries);
649        assert_eq!(segments.len(), 2);
650        assert!(segments[0].repo.is_none());
651        assert_eq!(segments[0].kind, Kind::Plans);
652        assert_eq!(segments[1].project_label(), "VetCoders/ai-contexters");
653    }
654
655    #[test]
656    fn repo_signal_segmentation_ignores_gemini_hash_like_cwd() {
657        let entry = entry(
658            (2026, 3, 21, 9, 0, 0),
659            "sess-3",
660            "user",
661            "No trustworthy repo here.",
662            Some("57cfd37b3a72d995c4f2d018ebf9d5a2"),
663        );
664
665        assert!(infer_repo_identity_from_entry(&entry).is_none());
666        let segments = semantic_segments(&[entry]);
667        assert_eq!(segments.len(), 1);
668        assert!(segments[0].repo.is_none());
669    }
670
671    #[test]
672    fn repo_signal_segmentation_uses_local_git_remote_when_available() {
673        let root = mk_tmp_dir("git-remote");
674        let repo = root.join("hosted").join("VetCoders").join("ai-contexters");
675        fs::create_dir_all(&repo).unwrap();
676
677        Command::new("git")
678            .arg("init")
679            .arg(&repo)
680            .output()
681            .expect("git init should run");
682        Command::new("git")
683            .arg("-C")
684            .arg(&repo)
685            .args([
686                "remote",
687                "add",
688                "origin",
689                "git@github.com:VetCoders/ai-contexters.git",
690            ])
691            .output()
692            .expect("git remote add should run");
693
694        let entry = entry(
695            (2026, 3, 21, 9, 0, 0),
696            "sess-4",
697            "user",
698            "Inspect the repo on disk.",
699            Some(repo.to_string_lossy().as_ref()),
700        );
701
702        let repo_identity = infer_repo_identity_from_entry(&entry).expect("repo identity");
703        assert_eq!(repo_identity.slug(), "VetCoders/ai-contexters");
704
705        let _ = fs::remove_dir_all(&root);
706    }
707
708    // ================================================================
709    // Source tier tests
710    // ================================================================
711
712    #[test]
713    fn source_tier_github_url_is_primary() {
714        let e = entry(
715            (2026, 3, 22, 10, 0, 0),
716            "sess-tier",
717            "user",
718            "Check https://github.com/VetCoders/ai-contexters for updates.",
719            None,
720        );
721        let tiered = infer_tiered_identity_from_entry(&e, &ProjectHashRegistry::default())
722            .expect("should resolve");
723        assert_eq!(tiered.tier, SourceTier::Primary);
724        assert_eq!(tiered.identity.slug(), "VetCoders/ai-contexters");
725        assert!(tiered.tier.is_assertable());
726    }
727
728    #[test]
729    fn rejects_template_literals() {
730        assert!(!is_probably_repo_name("{target_owner}"));
731        assert!(!is_probably_repo_name("<YOUR_USERNAME>"));
732        assert!(!is_probably_repo_name("${RELEASE_REPO}"));
733        assert!(!is_probably_repo_name("$REPO"));
734        assert!(!is_probably_repo_name("{org}"));
735    }
736
737    #[test]
738    fn rejects_dot_only_and_traversal_strings() {
739        assert!(!is_probably_repo_name("..."));
740        assert!(!is_probably_repo_name(".."));
741        assert!(!is_probably_repo_name("."));
742        assert!(!is_probably_repo_name(".../"));
743        assert!(!is_probably_repo_name("..hidden"));
744    }
745
746    #[test]
747    fn rejects_control_chars_and_separators() {
748        assert!(!is_probably_repo_name("foo/bar"));
749        assert!(!is_probably_repo_name("foo\\bar"));
750        assert!(!is_probably_repo_name("foo\nbar"));
751        assert!(!is_probably_repo_name("foo bar"));
752        assert!(!is_probably_repo_name(""));
753    }
754
755    #[test]
756    fn accepts_real_repo_names() {
757        assert!(is_probably_repo_name("vibecrafted"));
758        assert!(is_probably_repo_name("rust-memex"));
759        assert!(is_probably_repo_name("ai-contexters"));
760        assert!(is_probably_repo_name("vc-runtime"));
761        assert!(is_probably_repo_name("CodeScribe"));
762        assert!(is_probably_repo_name("starship"));
763        assert!(is_probably_repo_name("01mf02"));
764        assert!(is_probably_repo_name("a"));
765    }
766
767    #[test]
768    fn fallback_routes_invalid_remote_owner_to_local_bucket() {
769        let e = entry(
770            (2026, 3, 22, 10, 0, 0),
771            "sess-local-fallback",
772            "user",
773            "Clone https://github.com/{target_owner}/vibecrafted.git before release.",
774            None,
775        );
776
777        let tiered = infer_tiered_identity_from_entry(&e, &ProjectHashRegistry::default())
778            .expect("malformed remote should resolve to local fallback");
779        assert_eq!(tiered.identity.slug(), "local/vibecrafted");
780        assert!(tiered.tier.is_assertable());
781
782        let segments = semantic_segments(&[e]);
783        assert_eq!(segments.len(), 1);
784        assert_eq!(segments[0].project_label(), "local/vibecrafted");
785        assert_ne!(segments[0].project_label(), "{target_owner}/vibecrafted");
786    }
787
788    #[test]
789    fn fallback_routes_invalid_remote_repo_to_unknown_local_bucket() {
790        let identity = infer_repo_identity_from_remote_like(
791            "https://github.com/VetCoders/${RELEASE_REPO}.git",
792        )
793        .expect("malformed repository should resolve to local unknown fallback");
794
795        assert_eq!(identity.slug(), "local/unknown");
796    }
797
798    #[test]
799    fn source_tier_git_remote_cwd_is_primary() {
800        let root = mk_tmp_dir("tier-git-remote");
801        let repo = root.join("hosted").join("VetCoders").join("loctree");
802        fs::create_dir_all(&repo).unwrap();
803
804        Command::new("git")
805            .arg("init")
806            .arg(&repo)
807            .output()
808            .expect("git init");
809        Command::new("git")
810            .arg("-C")
811            .arg(&repo)
812            .args([
813                "remote",
814                "add",
815                "origin",
816                "git@github.com:VetCoders/loctree.git",
817            ])
818            .output()
819            .expect("git remote add");
820
821        let e = entry(
822            (2026, 3, 22, 10, 0, 0),
823            "sess-tier-git",
824            "user",
825            "Working in the repo.",
826            Some(repo.to_string_lossy().as_ref()),
827        );
828
829        let tiered = infer_tiered_identity_from_entry(&e, &ProjectHashRegistry::default())
830            .expect("should resolve");
831        assert_eq!(tiered.tier, SourceTier::Primary);
832        assert_eq!(tiered.identity.slug(), "VetCoders/loctree");
833
834        let _ = fs::remove_dir_all(&root);
835    }
836
837    #[test]
838    fn source_tier_known_layout_without_git_is_fallback() {
839        let e = entry(
840            (2026, 3, 22, 10, 0, 0),
841            "sess-tier-layout",
842            "user",
843            "Working at /nonexistent/hosted/SomeOrg/SomeRepo",
844            None,
845        );
846        let tiered = infer_tiered_identity_from_entry(&e, &ProjectHashRegistry::default());
847        // Path in message text resolved via known layout (no .git) → Fallback
848        if let Some(t) = tiered {
849            assert_eq!(t.tier, SourceTier::Fallback);
850            assert!(!t.tier.is_assertable());
851        }
852        // It's also OK if it returns None (path doesn't exist on disk)
853    }
854
855    #[test]
856    fn source_tier_hex_hash_cwd_is_opaque_without_registry() {
857        let e = entry(
858            (2026, 3, 22, 10, 0, 0),
859            "sess-tier-hash",
860            "user",
861            "Hello from Gemini.",
862            Some("fef6ad02174d592d21e7f8a6143564388027ec0c"),
863        );
864        let tiered = infer_tiered_identity_from_entry(&e, &ProjectHashRegistry::default());
865        assert!(
866            tiered.is_none(),
867            "hex hash without registry must not resolve"
868        );
869    }
870
871    #[test]
872    fn source_tier_hex_hash_resolves_through_registry() {
873        let root = mk_tmp_dir("tier-registry");
874        let repo = root.join("hosted").join("VetCoders").join("ai-contexters");
875        fs::create_dir_all(&repo).unwrap();
876
877        Command::new("git")
878            .arg("init")
879            .arg(&repo)
880            .output()
881            .expect("git init");
882        Command::new("git")
883            .arg("-C")
884            .arg(&repo)
885            .args([
886                "remote",
887                "add",
888                "origin",
889                "git@github.com:VetCoders/ai-contexters.git",
890            ])
891            .output()
892            .expect("git remote add");
893
894        let mut registry = ProjectHashRegistry::default();
895        registry.mappings.insert(
896            "fef6ad02174d592d21e7f8a6143564388027ec0c".to_string(),
897            repo.to_string_lossy().to_string(),
898        );
899
900        let e = entry(
901            (2026, 3, 22, 10, 0, 0),
902            "sess-tier-reg",
903            "user",
904            "Hello from Gemini.",
905            Some("fef6ad02174d592d21e7f8a6143564388027ec0c"),
906        );
907
908        let tiered =
909            infer_tiered_identity_from_entry(&e, &registry).expect("registry should resolve");
910        assert_eq!(tiered.tier, SourceTier::Secondary);
911        assert_eq!(tiered.identity.slug(), "VetCoders/ai-contexters");
912        assert!(tiered.tier.is_assertable());
913
914        let _ = fs::remove_dir_all(&root);
915    }
916
917    #[test]
918    fn source_tier_registry_with_unknown_hash_returns_none() {
919        let registry = ProjectHashRegistry::default();
920        let e = entry(
921            (2026, 3, 22, 10, 0, 0),
922            "sess-tier-unknown",
923            "user",
924            "Hello from Gemini.",
925            Some("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
926        );
927        let tiered = infer_tiered_identity_from_entry(&e, &registry);
928        assert!(
929            tiered.is_none(),
930            "unknown hash must not resolve even with empty registry"
931        );
932    }
933
934    #[test]
935    fn source_tier_classify_cwd_empty_is_opaque() {
936        assert_eq!(classify_cwd_tier(None), SourceTier::Opaque);
937        assert_eq!(classify_cwd_tier(Some("")), SourceTier::Opaque);
938    }
939
940    #[test]
941    fn source_tier_classify_cwd_hex_is_opaque() {
942        assert_eq!(
943            classify_cwd_tier(Some("57cfd37b3a72d995c4f2d018ebf9d5a2")),
944            SourceTier::Opaque
945        );
946    }
947
948    #[test]
949    fn segments_carry_source_tier() {
950        let entries = vec![
951            entry(
952                (2026, 3, 22, 10, 0, 0),
953                "sess-st",
954                "user",
955                "Check https://github.com/VetCoders/ai-contexters",
956                None,
957            ),
958            entry(
959                (2026, 3, 22, 10, 1, 0),
960                "sess-st",
961                "assistant",
962                "Reviewing now.",
963                None,
964            ),
965        ];
966
967        let segments = semantic_segments(&entries);
968        assert_eq!(segments.len(), 1);
969        assert_eq!(segments[0].source_tier, Some(SourceTier::Primary));
970        assert!(segments[0].has_assertable_identity());
971    }
972
973    #[test]
974    fn segments_without_repo_have_no_tier() {
975        let entries = vec![entry(
976            (2026, 3, 22, 10, 0, 0),
977            "sess-none",
978            "user",
979            "Just chatting, no repo context.",
980            None,
981        )];
982
983        let segments = semantic_segments(&entries);
984        assert_eq!(segments.len(), 1);
985        assert!(segments[0].repo.is_none());
986        assert!(segments[0].source_tier.is_none());
987        assert!(!segments[0].has_assertable_identity());
988    }
989
990    #[test]
991    fn segments_opaque_cwd_routes_to_non_repo() {
992        let entries = vec![entry(
993            (2026, 3, 22, 10, 0, 0),
994            "sess-opaque",
995            "user",
996            "Gemini session with opaque hash only.",
997            Some("fef6ad02174d592d21e7f8a6143564388027ec0c"),
998        )];
999
1000        let segments = semantic_segments(&entries);
1001        assert_eq!(segments.len(), 1);
1002        assert!(segments[0].repo.is_none());
1003        assert_eq!(segments[0].project_label(), "non-repository-contexts");
1004    }
1005
1006    #[test]
1007    fn segments_opaque_cwd_resolves_with_registry() {
1008        let root = mk_tmp_dir("seg-registry");
1009        let repo = root.join("hosted").join("VetCoders").join("ai-contexters");
1010        fs::create_dir_all(&repo).unwrap();
1011
1012        Command::new("git")
1013            .arg("init")
1014            .arg(&repo)
1015            .output()
1016            .expect("git init");
1017        Command::new("git")
1018            .arg("-C")
1019            .arg(&repo)
1020            .args([
1021                "remote",
1022                "add",
1023                "origin",
1024                "git@github.com:VetCoders/ai-contexters.git",
1025            ])
1026            .output()
1027            .expect("git remote add");
1028
1029        let mut registry = ProjectHashRegistry::default();
1030        registry.mappings.insert(
1031            "fef6ad02174d592d21e7f8a6143564388027ec0c".to_string(),
1032            repo.to_string_lossy().to_string(),
1033        );
1034
1035        let entries = vec![entry(
1036            (2026, 3, 22, 10, 0, 0),
1037            "sess-reg",
1038            "user",
1039            "Gemini session with mapped hash.",
1040            Some("fef6ad02174d592d21e7f8a6143564388027ec0c"),
1041        )];
1042
1043        let segments = semantic_segments_with_registry(&entries, &registry);
1044        assert_eq!(segments.len(), 1);
1045        assert!(segments[0].repo.is_some());
1046        assert_eq!(segments[0].source_tier, Some(SourceTier::Secondary));
1047        assert_eq!(segments[0].project_label(), "VetCoders/ai-contexters");
1048
1049        let _ = fs::remove_dir_all(&root);
1050    }
1051
1052    #[test]
1053    fn project_hash_registry_roundtrip() {
1054        let root = mk_tmp_dir("registry-roundtrip");
1055        fs::create_dir_all(&root).unwrap();
1056        let path = root.join("gemini-project-map.json");
1057
1058        let mut registry = ProjectHashRegistry::default();
1059        registry.mappings.insert(
1060            "abc123".to_string(),
1061            "/home/user/repos/my-project".to_string(),
1062        );
1063
1064        let json = serde_json::to_string_pretty(&registry).unwrap();
1065        fs::write(&path, &json).unwrap();
1066
1067        let loaded = ProjectHashRegistry::load_from(&path);
1068        assert_eq!(loaded.mappings.len(), 1);
1069        assert_eq!(
1070            loaded.mappings.get("abc123").map(String::as_str),
1071            Some("/home/user/repos/my-project")
1072        );
1073
1074        let _ = fs::remove_dir_all(&root);
1075    }
1076
1077    #[test]
1078    fn project_hash_registry_missing_file_returns_empty() {
1079        let registry = ProjectHashRegistry::load_from(Path::new("/nonexistent/path.json"));
1080        assert!(registry.mappings.is_empty());
1081    }
1082
1083    #[test]
1084    fn source_tier_ordering() {
1085        assert!(SourceTier::Primary < SourceTier::Secondary);
1086        assert!(SourceTier::Secondary < SourceTier::Fallback);
1087        assert!(SourceTier::Fallback < SourceTier::Opaque);
1088    }
1089
1090    #[test]
1091    fn source_tier_assertable_boundaries() {
1092        assert!(SourceTier::Primary.is_assertable());
1093        assert!(SourceTier::Secondary.is_assertable());
1094        assert!(!SourceTier::Fallback.is_assertable());
1095        assert!(!SourceTier::Opaque.is_assertable());
1096    }
1097}