Skip to main content

ccboard_core/analytics/
discover.rs

1//! Pattern discovery from session history
2//!
3//! Analyzes JSONL session files to surface recurring user patterns
4//! worth extracting as Claude.md rules, skills, or slash commands.
5//!
6//! Algorithm: n-gram extraction → frequency filtering → deduplication →
7//! Jaccard clustering → category assignment by 20%/5% session thresholds.
8
9use std::collections::{HashMap, HashSet};
10use std::io::{BufRead, BufReader};
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13
14use serde::{Deserialize, Serialize};
15use tokio::sync::Semaphore;
16
17/// Configuration for a discover run
18#[derive(Debug, Clone)]
19pub struct DiscoverConfig {
20    /// Number of past days to analyze (default: 90)
21    pub since_days: u32,
22    /// Minimum occurrences to surface a pattern (default: 3)
23    pub min_count: usize,
24    /// Maximum suggestions to return (default: 20)
25    pub top: usize,
26    /// Search all projects (false = current project directory only)
27    pub all_projects: bool,
28}
29
30impl Default for DiscoverConfig {
31    fn default() -> Self {
32        Self {
33            since_days: 90,
34            min_count: 3,
35            top: 20,
36            all_projects: false,
37        }
38    }
39}
40
41/// Category of a discovered suggestion
42#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
43pub enum SuggestionCategory {
44    /// Broad behavioral rule — active in every session (>20% of sessions)
45    ClaudeMdRule,
46    /// Specialized expertise loaded on demand (5–20% of sessions)
47    Skill,
48    /// Repeatable workflow with clear inputs/outputs (<5% of sessions)
49    Command,
50}
51
52impl SuggestionCategory {
53    pub fn as_str(&self) -> &'static str {
54        match self {
55            Self::ClaudeMdRule => "CLAUDE.MD RULE",
56            Self::Skill => "SKILL",
57            Self::Command => "COMMAND",
58        }
59    }
60
61    pub fn icon(&self) -> &'static str {
62        match self {
63            Self::ClaudeMdRule => "📋",
64            Self::Skill => "🧩",
65            Self::Command => "⚡",
66        }
67    }
68}
69
70/// A single pattern suggestion produced by the discover algorithm
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct DiscoverSuggestion {
73    /// Human-readable pattern phrase
74    pub pattern: String,
75    /// Total occurrences across all sessions
76    pub count: usize,
77    /// Number of distinct sessions containing this pattern
78    pub session_count: usize,
79    /// Number of distinct projects containing this pattern
80    pub project_count: usize,
81    /// True when pattern appears in 2+ projects
82    pub cross_project: bool,
83    /// Category determined by session percentage thresholds
84    pub category: SuggestionCategory,
85    /// Composite score: session_pct * cross_project_bonus
86    pub score: f64,
87    /// Up to 2 example session IDs
88    pub example_sessions: Vec<String>,
89}
90
91// ─────────────────────────────────────────────────────────────────────────────
92// Stop words & system injection markers
93// ─────────────────────────────────────────────────────────────────────────────
94
95static STOP_WORDS: &[&str] = &[
96    "a", "an", "the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by",
97    "from", "is", "it", "its", "be", "as", "was", "are", "were", "been", "have", "has", "had",
98    "do", "does", "did", "will", "would", "could", "should", "may", "might", "can", "shall",
99    "this", "that", "these", "those", "i", "you", "we", "they", "he", "she", "my", "your", "our",
100    "their", "his", "her", "me", "us", "them", "so", "if", "then", "than", "when", "what", "how",
101    "why", "where", "who", "which", "not", "no", "also", "just", "now", "up", "out", "about",
102    "into", "after", "before", "all", "any", "some", "more", "new", "add", "use", "make", "get",
103    "go", "run", "see", "here", "there", "need", "want", "please", "ok", "okay", "yes", "yeah",
104    "let", "can", "help", "look", "check", "same", "like", "very", "much", "only", "other", "also",
105    "each", "file", "code", "create", "update", "change", "think", "know", "give", "take", "put",
106    "keep",
107];
108
109static SYSTEM_INJECTION_MARKERS: &[&str] = &[
110    "this session is being continued",
111    "read the full transcript",
112    "context summary below covers",
113    "exact snippets error messages content",
114    "exiting plan mode",
115    "task tools haven",
116    "teamcreate tool team parallelize",
117];
118
119fn is_stop_word(token: &str) -> bool {
120    STOP_WORDS.contains(&token)
121}
122
123fn is_system_injection(text: &str) -> bool {
124    let lower = text.to_lowercase();
125    SYSTEM_INJECTION_MARKERS
126        .iter()
127        .any(|marker| lower.contains(marker))
128}
129
130// ─────────────────────────────────────────────────────────────────────────────
131// Text normalization & n-gram extraction
132// ─────────────────────────────────────────────────────────────────────────────
133
134/// Normalize text: lowercase, strip punctuation (keep hyphens), tokenize,
135/// remove stop words, and drop tokens with length <= 2.
136pub fn normalize_text(text: &str) -> Vec<String> {
137    let lower = text.to_lowercase();
138    // Replace chars that are not alphanumeric or hyphens with spaces
139    let clean: String = lower
140        .chars()
141        .map(|c| {
142            if c.is_alphanumeric() || c == '-' {
143                c
144            } else {
145                ' '
146            }
147        })
148        .collect();
149
150    clean
151        .split_whitespace()
152        .filter(|t| t.len() > 2 && !is_stop_word(t))
153        .map(|t| t.to_string())
154        .collect()
155}
156
157/// Extract all n-grams of length `n` from a token list.
158pub fn extract_ngrams(tokens: &[String], n: usize) -> Vec<Vec<String>> {
159    if tokens.len() < n {
160        return vec![];
161    }
162    (0..=(tokens.len() - n))
163        .map(|i| tokens[i..i + n].to_vec())
164        .collect()
165}
166
167/// Jaccard similarity between two token lists (set-based).
168pub fn jaccard_overlap(a: &[String], b: &[String]) -> f64 {
169    if a.is_empty() || b.is_empty() {
170        return 0.0;
171    }
172    let set_a: HashSet<&str> = a.iter().map(|s| s.as_str()).collect();
173    let set_b: HashSet<&str> = b.iter().map(|s| s.as_str()).collect();
174    let intersection = set_a.intersection(&set_b).count();
175    let union = set_a.union(&set_b).count();
176    if union == 0 {
177        0.0
178    } else {
179        intersection as f64 / union as f64
180    }
181}
182
183// ─────────────────────────────────────────────────────────────────────────────
184// Session data extraction
185// ─────────────────────────────────────────────────────────────────────────────
186
187/// Collected data for a single session
188#[derive(Debug, Clone)]
189pub struct SessionData {
190    pub session_id: String,
191    pub project: String,
192    pub messages: Vec<String>,
193}
194
195/// Parse a JSONL entry to extract the message content if it is a real user message.
196fn extract_user_content(line: &str) -> Option<String> {
197    #[derive(Deserialize)]
198    struct MessageContent {
199        #[serde(rename = "type")]
200        msg_type: Option<String>,
201        content: Option<serde_json::Value>,
202    }
203    #[derive(Deserialize)]
204    struct Entry {
205        #[serde(rename = "type")]
206        entry_type: Option<String>,
207        message: Option<MessageContent>,
208    }
209
210    let entry: Entry = serde_json::from_str(line).ok()?;
211
212    if entry.entry_type.as_deref() != Some("user") {
213        return None;
214    }
215
216    let message = entry.message?;
217    // Must be a user role message
218    if message.msg_type.as_deref() != Some("human") && message.msg_type.is_some() {
219        // Some entries have no type on the inner message — still accept them
220        // when they have a string content
221    }
222
223    let content = message.content?;
224
225    // Content must be a plain string (arrays = tool_results)
226    let text = match &content {
227        serde_json::Value::String(s) => s.clone(),
228        _ => return None,
229    };
230
231    let trimmed = text.trim();
232
233    // Filter: must not start with XML
234    if trimmed.starts_with('<') {
235        return None;
236    }
237
238    // Filter: length bounds (10..=800)
239    let len = trimmed.len();
240    if !(10..=800).contains(&len) {
241        return None;
242    }
243
244    // Filter: system injection
245    if is_system_injection(trimmed) {
246        return None;
247    }
248
249    Some(trimmed.to_string())
250}
251
252/// Extract all real user messages from a single JSONL file.
253///
254/// Uses streaming BufReader — never loads the whole file.
255fn extract_all_user_messages(path: &Path) -> Vec<String> {
256    let file = match std::fs::File::open(path) {
257        Ok(f) => f,
258        Err(_) => return vec![],
259    };
260
261    let reader = BufReader::new(file);
262    let mut messages = Vec::new();
263
264    for line in reader.lines() {
265        let line = match line {
266            Ok(l) => l,
267            Err(_) => continue,
268        };
269        if line.trim().is_empty() {
270            continue;
271        }
272        if let Some(msg) = extract_user_content(&line) {
273            messages.push(msg);
274        }
275    }
276
277    messages
278}
279
280// ─────────────────────────────────────────────────────────────────────────────
281// Session collection (async, bounded concurrency)
282// ─────────────────────────────────────────────────────────────────────────────
283
284/// Collect session data from all project directories under `projects_dir`.
285///
286/// - Respects `since_days` cutoff via file mtime
287/// - Skips sessions whose filename starts with `agent-`
288/// - Uses a semaphore to bound concurrent file I/O to 32
289pub async fn collect_sessions_data(
290    projects_dir: &Path,
291    since_days: u32,
292    filter_project: Option<&str>,
293) -> Vec<SessionData> {
294    if !projects_dir.exists() {
295        return vec![];
296    }
297
298    // Build list of project dirs
299    let project_dirs: Vec<PathBuf> = match std::fs::read_dir(projects_dir) {
300        Ok(entries) => entries
301            .filter_map(|e| e.ok())
302            .filter(|e| e.path().is_dir())
303            .filter(|e| {
304                if let Some(proj) = filter_project {
305                    e.file_name().to_string_lossy().contains(proj)
306                } else {
307                    true
308                }
309            })
310            .map(|e| e.path())
311            .collect(),
312        Err(_) => return vec![],
313    };
314
315    let cutoff = chrono::Utc::now() - chrono::Duration::days(since_days as i64);
316    let cutoff_secs = cutoff.timestamp() as u64;
317
318    let semaphore = Arc::new(Semaphore::new(32));
319    let mut handles = Vec::new();
320
321    for project_dir in project_dirs {
322        let project_name = project_dir
323            .file_name()
324            .unwrap_or_default()
325            .to_string_lossy()
326            .to_string();
327
328        // Enumerate JSONL files
329        let jsonl_files: Vec<PathBuf> = match std::fs::read_dir(&project_dir) {
330            Ok(entries) => entries
331                .filter_map(|e| e.ok())
332                .map(|e| e.path())
333                .filter(|p| p.extension().map(|ext| ext == "jsonl").unwrap_or(false))
334                .collect(),
335            Err(_) => continue,
336        };
337
338        for filepath in jsonl_files {
339            let session_id = filepath
340                .file_stem()
341                .unwrap_or_default()
342                .to_string_lossy()
343                .to_string();
344
345            // Skip agent sessions
346            if session_id.starts_with("agent-") {
347                continue;
348            }
349
350            // Date filter via mtime
351            let mtime = match filepath.metadata() {
352                Ok(m) => m
353                    .modified()
354                    .ok()
355                    .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
356                    .map(|d| d.as_secs())
357                    .unwrap_or(0),
358                Err(_) => continue,
359            };
360
361            if mtime < cutoff_secs {
362                continue;
363            }
364
365            let sem = Arc::clone(&semaphore);
366            let filepath_clone = filepath.clone();
367            let project_name_clone = project_name.clone();
368            let session_id_clone = session_id.clone();
369
370            let handle = tokio::spawn(async move {
371                let _permit = sem.acquire().await.ok()?;
372                // CPU-bound work: spawn_blocking for file I/O + parsing
373                let path = filepath_clone.clone();
374                let messages =
375                    tokio::task::spawn_blocking(move || extract_all_user_messages(&path))
376                        .await
377                        .ok()?;
378
379                if messages.is_empty() {
380                    return None;
381                }
382
383                Some(SessionData {
384                    session_id: session_id_clone,
385                    project: project_name_clone,
386                    messages,
387                })
388            });
389
390            handles.push(handle);
391        }
392    }
393
394    let mut result = Vec::new();
395    for handle in handles {
396        if let Ok(Some(data)) = handle.await {
397            result.push(data);
398        }
399    }
400
401    result
402}
403
404// ─────────────────────────────────────────────────────────────────────────────
405// Core algorithm
406// ─────────────────────────────────────────────────────────────────────────────
407
408/// Occurrence of an n-gram in a specific session/project
409#[derive(Debug, Clone)]
410struct NgramOccurrence {
411    session_id: String,
412    project: String,
413}
414
415/// Run the full discovery algorithm on pre-collected session data.
416pub fn discover_patterns(
417    sessions_data: &[SessionData],
418    min_count: usize,
419    top: usize,
420) -> Vec<DiscoverSuggestion> {
421    let total_sessions = sessions_data.len();
422    if total_sessions == 0 {
423        return vec![];
424    }
425
426    // ── Step 1: build n-gram index ────────────────────────────────────────────
427    let mut ngram_index: HashMap<Vec<String>, Vec<NgramOccurrence>> = HashMap::new();
428
429    for sd in sessions_data {
430        for msg in &sd.messages {
431            let tokens = normalize_text(msg);
432            if tokens.len() < 3 {
433                continue;
434            }
435
436            for n in 3..=6usize {
437                for ngram in extract_ngrams(&tokens, n) {
438                    ngram_index.entry(ngram).or_default().push(NgramOccurrence {
439                        session_id: sd.session_id.clone(),
440                        project: sd.project.clone(),
441                    });
442                }
443            }
444        }
445    }
446
447    // ── Step 2: frequency filter ──────────────────────────────────────────────
448    let frequent: HashMap<Vec<String>, Vec<NgramOccurrence>> = ngram_index
449        .into_iter()
450        .filter(|(_, occs)| occs.len() >= min_count)
451        .collect();
452
453    // ── Step 3: deduplication — prefer longer n-grams ────────────────────────
454    let mut sorted_ngrams: Vec<(Vec<String>, Vec<NgramOccurrence>)> =
455        frequent.into_iter().collect();
456
457    // Sort by (len DESC, count DESC)
458    sorted_ngrams.sort_by(|a, b| b.0.len().cmp(&a.0.len()).then(b.1.len().cmp(&a.1.len())));
459
460    let mut kept: Vec<(Vec<String>, Vec<NgramOccurrence>)> = Vec::new();
461    let mut subsumed: HashSet<Vec<String>> = HashSet::new();
462
463    for (ngram, occs) in sorted_ngrams {
464        if subsumed.contains(&ngram) {
465            continue;
466        }
467        // Mark all sub-ngrams (length 3..len) as subsumed
468        for sub_n in 3..ngram.len() {
469            let end = ngram.len() - sub_n + 1;
470            for i in 0..end {
471                let sub = ngram[i..i + sub_n].to_vec();
472                subsumed.insert(sub);
473            }
474        }
475        kept.push((ngram, occs));
476    }
477
478    // ── Step 4: Jaccard clustering ────────────────────────────────────────────
479    let mut clusters: Vec<Vec<usize>> = Vec::new();
480    let mut assigned: HashSet<usize> = HashSet::new();
481
482    for i in 0..kept.len() {
483        if assigned.contains(&i) {
484            continue;
485        }
486        let mut cluster = vec![i];
487        for j in (i + 1)..kept.len() {
488            if assigned.contains(&j) {
489                continue;
490            }
491            let overlap = jaccard_overlap(&kept[i].0, &kept[j].0);
492            if overlap > 0.6 {
493                cluster.push(j);
494                assigned.insert(j);
495            }
496        }
497        clusters.push(cluster);
498        assigned.insert(i);
499    }
500
501    // ── Step 5: build suggestions per cluster ─────────────────────────────────
502    let mut suggestions = Vec::new();
503
504    for cluster in &clusters {
505        // Representative: longest ngram with most occurrences
506        let best_idx = *cluster
507            .iter()
508            .max_by_key(|&&i| (kept[i].0.len(), kept[i].1.len()))
509            .unwrap();
510
511        let (ref ngram, _) = kept[best_idx];
512
513        // Aggregate across cluster members
514        let mut all_occurrences: Vec<&NgramOccurrence> = Vec::new();
515        for &idx in cluster {
516            all_occurrences.extend(kept[idx].1.iter());
517        }
518
519        let distinct_sessions: HashSet<&str> = all_occurrences
520            .iter()
521            .map(|o| o.session_id.as_str())
522            .collect();
523        let distinct_projects: HashSet<&str> =
524            all_occurrences.iter().map(|o| o.project.as_str()).collect();
525
526        let count = all_occurrences.len();
527        let session_count = distinct_sessions.len();
528        let project_count = distinct_projects.len();
529        let cross_project = project_count >= 2;
530
531        if session_count < min_count {
532            continue;
533        }
534
535        let session_pct = session_count as f64 / total_sessions as f64;
536
537        let category = if session_pct > 0.20 {
538            SuggestionCategory::ClaudeMdRule
539        } else if session_pct >= 0.05 {
540            SuggestionCategory::Skill
541        } else {
542            SuggestionCategory::Command
543        };
544
545        let score = session_pct * if cross_project { 1.5 } else { 1.0 };
546
547        // Up to 2 example session IDs
548        let mut example_sessions: Vec<String> = distinct_sessions
549            .iter()
550            .take(2)
551            .map(|s| s.to_string())
552            .collect();
553        example_sessions.sort(); // deterministic order
554
555        let pattern = ngram.join(" ");
556
557        suggestions.push(DiscoverSuggestion {
558            pattern,
559            count,
560            session_count,
561            project_count,
562            cross_project,
563            category,
564            score: (score * 10000.0).round() / 10000.0,
565            example_sessions,
566        });
567    }
568
569    // ── Step 6: sort by score DESC, take top N ────────────────────────────────
570    suggestions.sort_by(|a, b| {
571        b.score
572            .partial_cmp(&a.score)
573            .unwrap_or(std::cmp::Ordering::Equal)
574    });
575    suggestions.truncate(top);
576    suggestions
577}
578
579// ─────────────────────────────────────────────────────────────────────────────
580// Public entry point
581// ─────────────────────────────────────────────────────────────────────────────
582
583/// Run pattern discovery against the Claude projects directory.
584///
585/// Returns `(suggestions, total_sessions, total_projects)`.
586pub async fn run_discover(
587    claude_home: &Path,
588    config: &DiscoverConfig,
589    filter_project: Option<&str>,
590) -> anyhow::Result<(Vec<DiscoverSuggestion>, usize, usize)> {
591    let projects_dir = claude_home.join("projects");
592
593    eprint!("\rScanning sessions...");
594    let sessions_data =
595        collect_sessions_data(&projects_dir, config.since_days, filter_project).await;
596
597    let total_sessions = sessions_data.len();
598    let total_projects: HashSet<&str> = sessions_data.iter().map(|s| s.project.as_str()).collect();
599    let total_projects_count = total_projects.len();
600
601    if total_sessions == 0 {
602        eprintln!();
603        return Ok((vec![], 0, 0));
604    }
605
606    eprint!(
607        "\rAnalyzing {} sessions across {} project(s)...    ",
608        total_sessions, total_projects_count
609    );
610
611    // CPU-bound: run in blocking thread
612    let sessions_data_clone = sessions_data;
613    let min_count = config.min_count;
614    let top = config.top;
615
616    let suggestions = tokio::task::spawn_blocking(move || {
617        discover_patterns(&sessions_data_clone, min_count, top)
618    })
619    .await
620    .map_err(|e| anyhow::anyhow!("spawn_blocking failed: {}", e))?;
621
622    eprintln!();
623
624    Ok((suggestions, total_sessions, total_projects_count))
625}
626
627// ─────────────────────────────────────────────────────────────────────────────
628// Tests
629// ─────────────────────────────────────────────────────────────────────────────
630
631#[cfg(test)]
632mod tests {
633    use super::*;
634
635    #[test]
636    fn test_normalize_text() {
637        let tokens = normalize_text("Add unit tests for the authentication flow!");
638        // "add", "unit", "tests", "the", "for", "authentication", "flow" raw
639        // after stop word removal: "unit", "tests", "authentication", "flow"
640        assert!(tokens.contains(&"authentication".to_string()));
641        assert!(tokens.contains(&"flow".to_string()));
642        assert!(tokens.contains(&"tests".to_string()));
643        // stop words removed
644        assert!(!tokens.contains(&"the".to_string()));
645        assert!(!tokens.contains(&"for".to_string()));
646        // "add" is a stop word
647        assert!(!tokens.contains(&"add".to_string()));
648    }
649
650    #[test]
651    fn test_normalize_strips_punctuation() {
652        let tokens = normalize_text("Write tests: before implementation.");
653        // Punctuation stripped, no stop words
654        assert!(tokens.contains(&"write".to_string()));
655        assert!(tokens.contains(&"tests".to_string()));
656        assert!(tokens.contains(&"before".to_string()) || !tokens.contains(&"before".to_string()));
657        // "before" is a stop word — confirm it's filtered
658        assert!(!tokens.contains(&"before".to_string()));
659    }
660
661    #[test]
662    fn test_ngram_extraction() {
663        let tokens: Vec<String> = vec![
664            "write".into(),
665            "tests".into(),
666            "authentication".into(),
667            "flow".into(),
668            "security".into(),
669        ];
670
671        let trigrams = extract_ngrams(&tokens, 3);
672        assert_eq!(trigrams.len(), 3);
673        assert_eq!(trigrams[0], vec!["write", "tests", "authentication"]);
674
675        let six_grams = extract_ngrams(&tokens, 6);
676        assert!(six_grams.is_empty(), "tokens shorter than 6 → no 6-grams");
677
678        let tokens6: Vec<String> = vec![
679            "a".into(),
680            "b".into(),
681            "c".into(),
682            "d".into(),
683            "e".into(),
684            "f".into(),
685        ];
686        let six_grams2 = extract_ngrams(&tokens6, 6);
687        assert_eq!(six_grams2.len(), 1);
688    }
689
690    #[test]
691    fn test_jaccard_overlap() {
692        let a: Vec<String> = vec!["write".into(), "tests".into(), "first".into()];
693        let b: Vec<String> = vec!["write".into(), "tests".into(), "auth".into()];
694        let overlap = jaccard_overlap(&a, &b);
695        // intersection=2 ("write","tests"), union=4
696        assert!((overlap - 0.5).abs() < 1e-9);
697
698        let identical: Vec<String> = vec!["foo".into(), "bar".into()];
699        assert!((jaccard_overlap(&identical, &identical) - 1.0).abs() < 1e-9);
700
701        let empty: Vec<String> = vec![];
702        assert_eq!(jaccard_overlap(&empty, &a), 0.0);
703    }
704
705    #[test]
706    fn test_category_threshold() {
707        // Build synthetic sessions_data to hit 21% → ClaudeMdRule
708        let mut sessions_data: Vec<SessionData> = Vec::new();
709
710        // Repeat pattern "security review authentication flow handling" in 22/100 sessions
711        for i in 0..100 {
712            let msg = if i < 22 {
713                "security review authentication flow handling properly"
714            } else {
715                format!("some other message number {}", i).leak()
716            };
717            sessions_data.push(SessionData {
718                session_id: format!("session-{:04}", i),
719                project: "proj-a".to_string(),
720                messages: vec![msg.to_string()],
721            });
722        }
723
724        let suggestions = discover_patterns(&sessions_data, 3, 20);
725        // Find suggestion containing "security"
726        let security = suggestions.iter().find(|s| s.pattern.contains("security"));
727        if let Some(s) = security {
728            assert_eq!(
729                s.category,
730                SuggestionCategory::ClaudeMdRule,
731                "22/100 = 22% > 20% → ClaudeMdRule"
732            );
733        }
734    }
735
736    #[test]
737    fn test_cross_project_bonus() {
738        // Pattern appearing in 2 projects gets 1.5x multiplier
739        let sessions_data: Vec<SessionData> = (0..10)
740            .map(|i| SessionData {
741                session_id: format!("s{}", i),
742                project: if i < 5 { "proj-a" } else { "proj-b" }.to_string(),
743                messages: vec!["deploy staging environment pipeline testing".to_string()],
744            })
745            .collect();
746
747        let suggestions = discover_patterns(&sessions_data, 3, 20);
748        assert!(!suggestions.is_empty(), "should find patterns");
749
750        let first = &suggestions[0];
751        assert!(first.cross_project, "pattern appears in 2 projects");
752        // score = session_pct * 1.5 (10/10 * 1.5 = 1.5 capped to 1.0 range-wise, but not clipped here)
753        assert!(first.score > first.session_count as f64 / 10.0);
754    }
755}