Skip to main content

tldr_cli/commands/patterns/
temporal.rs

1//! Temporal Command - Temporal Constraint Mining
2//!
3//! Mines temporal constraints (method call sequences) from a codebase.
4//!
5//! # Algorithm
6//!
7//! 1. Extract method call sequences from each function
8//! 2. Build frequency table of (before, after) pairs (bigrams)
9//! 3. Calculate confidence: count(A->B) / count(A)
10//! 4. Filter by min_support and min_confidence
11//! 5. Optionally mine trigrams (3-method sequences)
12//!
13//! # TIGER Mitigations
14//!
15//! - **T05**: MAX_TRIGRAMS=10000 with BinaryHeap top-K selection
16//! - **E03**: --timeout flag (default 60s)
17//!
18//! # Example
19//!
20//! ```bash
21//! # Mine constraints from a directory
22//! tldr temporal src/ --min-support 2 --min-confidence 0.5
23//!
24//! # Filter for specific method
25//! tldr temporal src/ --query open
26//!
27//! # Include trigram patterns
28//! tldr temporal src/ --include-trigrams
29//! ```
30
31use std::cmp::Reverse;
32use std::collections::{BinaryHeap, HashMap};
33use std::path::{Path, PathBuf};
34use std::time::{Duration, Instant};
35
36use clap::Args;
37use tree_sitter::{Node, Parser};
38
39use tldr_core::types::Language;
40
41use crate::output::OutputFormat as GlobalOutputFormat;
42
43use super::error::{PatternsError, PatternsResult};
44use super::types::{
45    OutputFormat, TemporalConstraint, TemporalExample, TemporalMetadata, TemporalReport, Trigram,
46};
47use super::validation::{
48    check_directory_file_count, read_file_safe, validate_directory_path, validate_file_path,
49    validate_file_path_in_project, MAX_TRIGRAMS,
50};
51
52// =============================================================================
53// CLI Arguments
54// =============================================================================
55
56/// Mine temporal constraints (method call sequences) from a codebase.
57#[derive(Debug, Args)]
58pub struct TemporalArgs {
59    /// Directory or file to analyze
60    pub path: PathBuf,
61
62    /// Minimum occurrences for a pattern
63    #[arg(long, default_value = "2")]
64    pub min_support: u32,
65
66    /// Minimum confidence threshold (0.0-1.0)
67    #[arg(long, default_value = "0.5")]
68    pub min_confidence: f64,
69
70    /// Filter for specific method
71    #[arg(long)]
72    pub query: Option<String>,
73
74    /// Source language (only 'python' supported)
75    #[arg(long = "source-lang", default_value = "python")]
76    pub source_lang: String,
77
78    /// Maximum files to analyze
79    #[arg(long, default_value = "1000")]
80    pub max_files: u32,
81
82    /// Mine 3-method sequences
83    #[arg(long)]
84    pub include_trigrams: bool,
85
86    /// Number of examples per constraint
87    #[arg(long, default_value = "3")]
88    pub include_examples: u32,
89
90    /// Output format (json or text). Prefer global --format/-f flag.
91    #[arg(
92        long = "output",
93        short = 'o',
94        hide = true,
95        default_value = "json",
96        value_enum
97    )]
98    pub output_format: OutputFormat,
99
100    /// Timeout in seconds (E03 mitigation)
101    #[arg(long, default_value = "60")]
102    pub timeout: u64,
103
104    /// Project root for path validation (optional)
105    #[arg(long)]
106    pub project_root: Option<PathBuf>,
107
108    /// Language filter (auto-detected if omitted)
109    #[arg(long, short = 'l')]
110    pub lang: Option<Language>,
111}
112
113impl TemporalArgs {
114    /// Run the temporal analysis command
115    pub fn run(&self, global_format: GlobalOutputFormat) -> anyhow::Result<()> {
116        run(self.clone(), global_format)
117    }
118}
119
120impl Clone for TemporalArgs {
121    fn clone(&self) -> Self {
122        Self {
123            path: self.path.clone(),
124            min_support: self.min_support,
125            min_confidence: self.min_confidence,
126            query: self.query.clone(),
127            source_lang: self.source_lang.clone(),
128            max_files: self.max_files,
129            include_trigrams: self.include_trigrams,
130            include_examples: self.include_examples,
131            output_format: self.output_format,
132            timeout: self.timeout,
133            project_root: self.project_root.clone(),
134            lang: self.lang,
135        }
136    }
137}
138
139// =============================================================================
140// Sequence Extraction
141// =============================================================================
142
143/// Extractor for method call sequences from source code.
144#[derive(Debug, Default)]
145pub struct SequenceExtractor {
146    /// Current function being analyzed
147    current_function: String,
148    /// Extracted sequences: object_key -> list of method names
149    sequences: HashMap<String, Vec<String>>,
150    /// Variable assignments: variable -> assigned from (for tracking objects)
151    var_assignments: HashMap<String, String>,
152    /// Current line number
153    current_line: u32,
154}
155
156impl SequenceExtractor {
157    /// Create a new sequence extractor for a file
158    pub fn new() -> Self {
159        Self::default()
160    }
161
162    /// Extract sequences from a function node
163    pub fn extract_function(&mut self, func_node: Node, source: &[u8]) {
164        // Get function name
165        let func_name = self.get_function_name(func_node, source);
166        if func_name.is_empty() {
167            return;
168        }
169        self.current_function = func_name;
170        self.var_assignments.clear();
171
172        // Walk the function body and extract call sequences
173        self.extract_calls_recursive(func_node, source, 0);
174    }
175
176    /// Recursively extract method calls from AST nodes
177    fn extract_calls_recursive(&mut self, node: Node, source: &[u8], depth: usize) {
178        // Prevent stack overflow
179        if depth > 100 {
180            return;
181        }
182
183        self.current_line = node.start_position().row as u32 + 1;
184
185        match node.kind() {
186            // Track assignments: x = open(...) or x = something.method()
187            "assignment" => {
188                self.handle_assignment(node, source);
189            }
190
191            // Track method calls: x.read(), x.close(), etc.
192            "call" => {
193                self.handle_call(node, source);
194            }
195
196            // Track with statements: with open(...) as f
197            "with_statement" => {
198                self.handle_with_statement(node, source);
199            }
200
201            _ => {}
202        }
203
204        // Recurse into children
205        let mut cursor = node.walk();
206        for child in node.children(&mut cursor) {
207            self.extract_calls_recursive(child, source, depth + 1);
208        }
209    }
210
211    /// Handle an assignment statement
212    fn handle_assignment(&mut self, node: Node, source: &[u8]) {
213        // Get the left side (variable name)
214        let var_name = if let Some(left) = node.child_by_field_name("left") {
215            self.node_text(left, source).to_string()
216        } else {
217            // Try to find pattern targets (for simple assignments)
218            let mut var = String::new();
219            for child in node.children(&mut node.walk()) {
220                if child.kind() == "identifier" {
221                    var = self.node_text(child, source).to_string();
222                    break;
223                }
224            }
225            var
226        };
227
228        if var_name.is_empty() {
229            return;
230        }
231
232        // Get the right side (value)
233        if let Some(right) = node.child_by_field_name("right") {
234            // Check if it's a call expression
235            if right.kind() == "call" {
236                let call_name = self.extract_call_name(right, source);
237                if !call_name.is_empty() {
238                    // Track the assignment: var_name was assigned from call_name
239                    self.var_assignments
240                        .insert(var_name.clone(), call_name.clone());
241
242                    // Add to sequence: func:var -> [constructor_call]
243                    let key = format!("{}:{}", self.current_function, var_name);
244                    self.sequences.entry(key).or_default().push(call_name);
245                }
246            }
247        }
248    }
249
250    /// Handle a call expression
251    fn handle_call(&mut self, node: Node, source: &[u8]) {
252        // Extract the call structure: object.method() or function()
253        if let Some(func) = node.child_by_field_name("function") {
254            if func.kind() == "attribute" {
255                // Method call: obj.method()
256                if let Some(obj) = func.child_by_field_name("object") {
257                    let obj_name = self.node_text(obj, source).to_string();
258                    if let Some(method) = func.child_by_field_name("attribute") {
259                        let method_name = self.node_text(method, source).to_string();
260
261                        // Add to sequence for this object
262                        let key = format!("{}:{}", self.current_function, obj_name);
263                        self.sequences.entry(key).or_default().push(method_name);
264                    }
265                }
266            }
267        }
268    }
269
270    /// Handle a with statement
271    fn handle_with_statement(&mut self, node: Node, source: &[u8]) {
272        // Extract: with open(path) as f
273        for child in node.children(&mut node.walk()) {
274            if child.kind() == "with_clause" {
275                for item in child.children(&mut child.walk()) {
276                    if item.kind() == "with_item" {
277                        // Get the expression (open(...))
278                        let mut call_name = String::new();
279                        let mut var_name = String::new();
280
281                        for part in item.children(&mut item.walk()) {
282                            if part.kind() == "call" {
283                                call_name = self.extract_call_name(part, source);
284                            } else if part.kind() == "as_pattern" || part.kind() == "identifier" {
285                                // Get the alias
286                                if part.kind() == "identifier" {
287                                    var_name = self.node_text(part, source).to_string();
288                                } else {
289                                    for as_child in part.children(&mut part.walk()) {
290                                        if as_child.kind() == "identifier" {
291                                            var_name = self.node_text(as_child, source).to_string();
292                                            break;
293                                        }
294                                    }
295                                }
296                            }
297                        }
298
299                        if !call_name.is_empty() && !var_name.is_empty() {
300                            let key = format!("{}:{}", self.current_function, var_name);
301                            self.sequences
302                                .entry(key.clone())
303                                .or_default()
304                                .push(call_name);
305                            // with statement implies automatic close
306                            self.sequences
307                                .entry(key)
308                                .or_default()
309                                .push("__exit__".to_string());
310                        }
311                    }
312                }
313            }
314        }
315    }
316
317    /// Extract the call name from a call node
318    fn extract_call_name(&self, node: Node, source: &[u8]) -> String {
319        if let Some(func) = node.child_by_field_name("function") {
320            return self.extract_name_from_expr(func, source);
321        }
322
323        // Fallback: iterate children
324        for child in node.children(&mut node.walk()) {
325            match child.kind() {
326                "identifier" => return self.node_text(child, source).to_string(),
327                "attribute" => return self.extract_name_from_expr(child, source),
328                _ => continue,
329            }
330        }
331        String::new()
332    }
333
334    /// Extract a dotted name from an expression
335    fn extract_name_from_expr(&self, node: Node, source: &[u8]) -> String {
336        match node.kind() {
337            "identifier" => self.node_text(node, source).to_string(),
338            "attribute" => {
339                // Get just the last part (method name)
340                if let Some(attr) = node.child_by_field_name("attribute") {
341                    self.node_text(attr, source).to_string()
342                } else {
343                    String::new()
344                }
345            }
346            _ => self.node_text(node, source).to_string(),
347        }
348    }
349
350    /// Get function name from a function definition
351    fn get_function_name(&self, node: Node, source: &[u8]) -> String {
352        for child in node.children(&mut node.walk()) {
353            if child.kind() == "identifier" {
354                return self.node_text(child, source).to_string();
355            }
356        }
357        String::new()
358    }
359
360    /// Get text for a node
361    fn node_text<'a>(&self, node: Node, source: &'a [u8]) -> &'a str {
362        node.utf8_text(source).unwrap_or("")
363    }
364
365    /// Get extracted sequences
366    pub fn get_sequences(&self) -> &HashMap<String, Vec<String>> {
367        &self.sequences
368    }
369}
370
371/// Extract method call sequences from source code
372pub fn extract_sequences(source: &str) -> HashMap<String, Vec<String>> {
373    let mut extractor = SequenceExtractor::new();
374
375    // Parse with tree-sitter
376    let mut parser = match get_python_parser() {
377        Ok(p) => p,
378        Err(_) => return HashMap::new(),
379    };
380
381    let tree = match parser.parse(source, None) {
382        Some(t) => t,
383        None => return HashMap::new(),
384    };
385
386    let root = tree.root_node();
387    let source_bytes = source.as_bytes();
388
389    // Find all function definitions and extract sequences
390    extract_functions_recursive(root, source_bytes, &mut extractor);
391
392    extractor.sequences
393}
394
395/// Recursively find function definitions and extract sequences
396fn extract_functions_recursive(node: Node, source: &[u8], extractor: &mut SequenceExtractor) {
397    match node.kind() {
398        "function_definition" | "async_function_definition" => {
399            extractor.extract_function(node, source);
400        }
401        _ => {}
402    }
403
404    // Recurse into children
405    let mut cursor = node.walk();
406    for child in node.children(&mut cursor) {
407        extract_functions_recursive(child, source, extractor);
408    }
409}
410
411// =============================================================================
412// Bigram Mining
413// =============================================================================
414
415/// Counter for bigrams with example tracking
416#[derive(Debug, Default)]
417pub struct BigramCounter {
418    /// Bigram counts: (before, after) -> count
419    pub counts: HashMap<(String, String), u32>,
420    /// Before counts: method -> count of times it's followed by something
421    pub before_counts: HashMap<String, u32>,
422    /// Example locations: (before, after) -> list of (file, line)
423    pub examples: HashMap<(String, String), Vec<TemporalExample>>,
424}
425
426impl BigramCounter {
427    /// Create a new bigram counter
428    pub fn new() -> Self {
429        Self::default()
430    }
431
432    /// Add sequences from extraction
433    pub fn add_sequences(&mut self, sequences: &HashMap<String, Vec<String>>, file: &str) {
434        for calls in sequences.values() {
435            // Parse function name from key (func:var)
436            let line = 1u32; // Would need more tracking for accurate line numbers
437
438            for i in 0..calls.len().saturating_sub(1) {
439                let before = &calls[i];
440                let after = &calls[i + 1];
441
442                // Skip self-loops
443                if before == after {
444                    continue;
445                }
446
447                let pair = (before.clone(), after.clone());
448
449                // Increment bigram count
450                *self.counts.entry(pair.clone()).or_default() += 1;
451
452                // Increment before count
453                *self.before_counts.entry(before.clone()).or_default() += 1;
454
455                // Add example
456                self.examples
457                    .entry(pair)
458                    .or_default()
459                    .push(TemporalExample {
460                        file: file.to_string(),
461                        line,
462                    });
463            }
464        }
465    }
466}
467
468/// Mine bigram constraints from sequences
469pub fn mine_bigrams(
470    sequences: &HashMap<String, Vec<String>>,
471    file: &str,
472    args: &TemporalArgs,
473) -> (BigramCounter, Vec<TemporalConstraint>) {
474    let mut counter = BigramCounter::new();
475    counter.add_sequences(sequences, file);
476
477    let mut constraints = Vec::new();
478
479    for ((before, after), count) in &counter.counts {
480        // Filter by min_support
481        if *count < args.min_support {
482            continue;
483        }
484
485        // Calculate confidence
486        let before_total = *counter.before_counts.get(before).unwrap_or(&1);
487        let confidence = (*count as f64) / (before_total as f64);
488
489        // Filter by min_confidence
490        if confidence < args.min_confidence {
491            continue;
492        }
493
494        // Get examples (limited)
495        let examples = counter
496            .examples
497            .get(&(before.clone(), after.clone()))
498            .map(|ex| {
499                ex.iter()
500                    .take(args.include_examples as usize)
501                    .cloned()
502                    .collect()
503            })
504            .unwrap_or_default();
505
506        constraints.push(TemporalConstraint {
507            before: before.clone(),
508            after: after.clone(),
509            support: *count,
510            confidence,
511            examples,
512        });
513    }
514
515    // Sort by confidence (descending), then support (descending)
516    constraints.sort_by(|a, b| {
517        b.confidence
518            .partial_cmp(&a.confidence)
519            .unwrap_or(std::cmp::Ordering::Equal)
520            .then_with(|| b.support.cmp(&a.support))
521    });
522
523    (counter, constraints)
524}
525
526// =============================================================================
527// Trigram Mining (TIGER-05: MAX_TRIGRAMS limit)
528// =============================================================================
529
530/// Mine trigram patterns with MAX_TRIGRAMS limit (TIGER-05)
531pub fn mine_trigrams(
532    sequences: &HashMap<String, Vec<String>>,
533    args: &TemporalArgs,
534) -> Vec<Trigram> {
535    // Count trigrams
536    let mut trigram_counts: HashMap<(String, String, String), u32> = HashMap::new();
537    let mut bigram_follows: HashMap<(String, String), u32> = HashMap::new();
538
539    for calls in sequences.values() {
540        for i in 0..calls.len().saturating_sub(2) {
541            let a = &calls[i];
542            let b = &calls[i + 1];
543            let c = &calls[i + 2];
544
545            // Skip if any self-loops
546            if a == b || b == c {
547                continue;
548            }
549
550            *trigram_counts
551                .entry((a.clone(), b.clone(), c.clone()))
552                .or_default() += 1;
553
554            // Count bigram follows
555            if a != b {
556                *bigram_follows.entry((a.clone(), b.clone())).or_default() += 1;
557            }
558        }
559    }
560
561    // TIGER-05: Use BinaryHeap for top-K selection to limit memory
562    // We use a min-heap of size MAX_TRIGRAMS, keeping the largest support values
563    let mut heap: BinaryHeap<Reverse<(u32, String, String, String)>> = BinaryHeap::new();
564
565    for ((a, b, c), count) in &trigram_counts {
566        if *count < args.min_support {
567            continue;
568        }
569
570        // Calculate confidence
571        let bigram_total = *bigram_follows.get(&(a.clone(), b.clone())).unwrap_or(&1);
572        let confidence = (*count as f64) / (bigram_total as f64);
573
574        if confidence < args.min_confidence {
575            continue;
576        }
577
578        // Add to heap with support as priority
579        if heap.len() < MAX_TRIGRAMS {
580            heap.push(Reverse((*count, a.clone(), b.clone(), c.clone())));
581        } else if let Some(&Reverse((min_support, _, _, _))) = heap.peek() {
582            if *count > min_support {
583                heap.pop();
584                heap.push(Reverse((*count, a.clone(), b.clone(), c.clone())));
585            }
586        }
587    }
588
589    // Convert heap to sorted vector
590    let mut trigrams: Vec<Trigram> = heap
591        .into_iter()
592        .map(|Reverse((support, a, b, c))| {
593            let bigram_total = *bigram_follows.get(&(a.clone(), b.clone())).unwrap_or(&1);
594            let confidence = (support as f64) / (bigram_total as f64);
595
596            Trigram {
597                sequence: [a, b, c],
598                support,
599                confidence,
600            }
601        })
602        .collect();
603
604    // Sort by confidence (descending), then support (descending)
605    trigrams.sort_by(|a, b| {
606        b.confidence
607            .partial_cmp(&a.confidence)
608            .unwrap_or(std::cmp::Ordering::Equal)
609            .then_with(|| b.support.cmp(&a.support))
610    });
611
612    trigrams
613}
614
615// =============================================================================
616// Query Filtering
617// =============================================================================
618
619/// Filter constraints by query string
620pub fn filter_by_query(
621    constraints: Vec<TemporalConstraint>,
622    query: &str,
623) -> Vec<TemporalConstraint> {
624    constraints
625        .into_iter()
626        .filter(|c| c.before.contains(query) || c.after.contains(query))
627        .collect()
628}
629
630/// Filter trigrams by query string
631pub fn filter_trigrams_by_query(trigrams: Vec<Trigram>, query: &str) -> Vec<Trigram> {
632    trigrams
633        .into_iter()
634        .filter(|t| t.sequence.iter().any(|s| s.contains(query)))
635        .collect()
636}
637
638// =============================================================================
639// Tree-sitter Parser
640// =============================================================================
641
642/// Initialize tree-sitter parser for Python
643fn get_python_parser() -> PatternsResult<Parser> {
644    let mut parser = Parser::new();
645    let language = tree_sitter_python::LANGUAGE;
646    parser.set_language(&language.into()).map_err(|e| {
647        PatternsError::parse_error(PathBuf::new(), format!("Failed to set language: {}", e))
648    })?;
649    Ok(parser)
650}
651
652// =============================================================================
653// File Analysis
654// =============================================================================
655
656type TemporalFileAnalysis = (HashMap<String, Vec<String>>, Vec<TemporalConstraint>);
657
658/// Analyze temporal constraints for a single file
659fn analyze_temporal_file(
660    path: &Path,
661    args: &TemporalArgs,
662) -> PatternsResult<TemporalFileAnalysis> {
663    // Validate path
664    let canonical = if let Some(ref root) = args.project_root {
665        validate_file_path_in_project(path, root)?
666    } else {
667        validate_file_path(path)?
668    };
669
670    // Read source
671    let source = read_file_safe(&canonical)?;
672    let file_path_str = canonical.to_string_lossy().to_string();
673
674    // Extract sequences
675    let sequences = extract_sequences(&source);
676
677    // Mine bigrams
678    let (_, constraints) = mine_bigrams(&sequences, &file_path_str, args);
679
680    Ok((sequences, constraints))
681}
682
683/// Analyze temporal constraints for a directory
684fn analyze_temporal_directory(
685    path: &Path,
686    args: &TemporalArgs,
687    start_time: Instant,
688) -> PatternsResult<TemporalReport> {
689    let canonical = validate_directory_path(path)?;
690    let timeout = Duration::from_secs(args.timeout);
691
692    let mut all_sequences: HashMap<String, Vec<String>> = HashMap::new();
693    let mut all_examples: HashMap<(String, String), Vec<TemporalExample>> = HashMap::new();
694    let mut bigram_counts: HashMap<(String, String), u32> = HashMap::new();
695    let mut before_counts: HashMap<String, u32> = HashMap::new();
696    let mut files_analyzed = 0u32;
697
698    // Walk directory
699    for entry in walkdir::WalkDir::new(&canonical)
700        .follow_links(false)
701        .into_iter()
702        .filter_map(|e| e.ok())
703    {
704        // Check timeout (E03 mitigation)
705        if start_time.elapsed() > timeout {
706            break;
707        }
708
709        let entry_path = entry.path();
710
711        // Skip non-Python files
712        if entry_path.extension().is_none_or(|ext| ext != "py") {
713            continue;
714        }
715
716        // Check file count limit
717        files_analyzed += 1;
718        if files_analyzed > args.max_files {
719            break;
720        }
721        check_directory_file_count(files_analyzed as usize)?;
722
723        // Analyze file
724        let file_path_str = entry_path.to_string_lossy().to_string();
725        if let Ok(source) = read_file_safe(entry_path) {
726            let sequences = extract_sequences(&source);
727
728            // Aggregate sequences
729            for (key, calls) in &sequences {
730                all_sequences
731                    .entry(key.clone())
732                    .or_default()
733                    .extend(calls.clone());
734
735                // Count bigrams
736                for i in 0..calls.len().saturating_sub(1) {
737                    let before = &calls[i];
738                    let after = &calls[i + 1];
739
740                    if before == after {
741                        continue;
742                    }
743
744                    let pair = (before.clone(), after.clone());
745                    *bigram_counts.entry(pair.clone()).or_default() += 1;
746                    *before_counts.entry(before.clone()).or_default() += 1;
747
748                    // Track examples
749                    let examples = all_examples.entry(pair).or_default();
750                    if examples.len() < args.include_examples as usize {
751                        examples.push(TemporalExample {
752                            file: file_path_str.clone(),
753                            line: 1, // Would need better line tracking
754                        });
755                    }
756                }
757            }
758        }
759    }
760
761    // Build constraints from aggregated data
762    let mut constraints = Vec::new();
763
764    for ((before, after), count) in &bigram_counts {
765        if *count < args.min_support {
766            continue;
767        }
768
769        let before_total = *before_counts.get(before).unwrap_or(&1);
770        let confidence = (*count as f64) / (before_total as f64);
771
772        if confidence < args.min_confidence {
773            continue;
774        }
775
776        let examples = all_examples
777            .get(&(before.clone(), after.clone()))
778            .cloned()
779            .unwrap_or_default();
780
781        constraints.push(TemporalConstraint {
782            before: before.clone(),
783            after: after.clone(),
784            support: *count,
785            confidence,
786            examples,
787        });
788    }
789
790    // Sort by confidence, then support
791    constraints.sort_by(|a, b| {
792        b.confidence
793            .partial_cmp(&a.confidence)
794            .unwrap_or(std::cmp::Ordering::Equal)
795            .then_with(|| b.support.cmp(&a.support))
796    });
797
798    // Apply query filter if specified
799    if let Some(ref query) = args.query {
800        constraints = filter_by_query(constraints, query);
801    }
802
803    // Mine trigrams if requested
804    let trigrams = if args.include_trigrams {
805        let mut trigrams = mine_trigrams(&all_sequences, args);
806        if let Some(ref query) = args.query {
807            trigrams = filter_trigrams_by_query(trigrams, query);
808        }
809        trigrams
810    } else {
811        Vec::new()
812    };
813
814    let sequences_extracted: u32 = all_sequences.values().map(|v| v.len() as u32).sum();
815
816    Ok(TemporalReport {
817        constraints,
818        trigrams,
819        metadata: TemporalMetadata {
820            files_analyzed,
821            sequences_extracted,
822            min_support: args.min_support,
823            min_confidence: args.min_confidence,
824        },
825    })
826}
827
828// =============================================================================
829// Text Formatting
830// =============================================================================
831
832/// Format a temporal report as human-readable text
833pub fn format_temporal_text(report: &TemporalReport) -> String {
834    let mut lines = Vec::new();
835
836    lines.push("Temporal Constraints".to_string());
837    lines.push("=".repeat(40));
838    lines.push(String::new());
839
840    if report.constraints.is_empty() {
841        lines.push("No constraints found matching criteria.".to_string());
842    } else {
843        lines.push(format!("Found {} constraints:", report.constraints.len()));
844        lines.push(String::new());
845
846        for constraint in &report.constraints {
847            lines.push(format!("  {} -> {}", constraint.before, constraint.after));
848            lines.push(format!(
849                "    support: {}, confidence: {:.2}",
850                constraint.support, constraint.confidence
851            ));
852
853            if !constraint.examples.is_empty() {
854                lines.push("    examples:".to_string());
855                for example in &constraint.examples {
856                    lines.push(format!("      - {}:{}", example.file, example.line));
857                }
858            }
859            lines.push(String::new());
860        }
861    }
862
863    if !report.trigrams.is_empty() {
864        lines.push(String::new());
865        lines.push("Trigrams".to_string());
866        lines.push("-".repeat(40));
867        lines.push(String::new());
868
869        for trigram in &report.trigrams {
870            lines.push(format!(
871                "  {} -> {} -> {}",
872                trigram.sequence[0], trigram.sequence[1], trigram.sequence[2]
873            ));
874            lines.push(format!(
875                "    support: {}, confidence: {:.2}",
876                trigram.support, trigram.confidence
877            ));
878            lines.push(String::new());
879        }
880    }
881
882    lines.push(String::new());
883    lines.push("Metadata".to_string());
884    lines.push("-".repeat(40));
885    lines.push(format!(
886        "  Files analyzed: {}",
887        report.metadata.files_analyzed
888    ));
889    lines.push(format!(
890        "  Sequences extracted: {}",
891        report.metadata.sequences_extracted
892    ));
893    lines.push(format!("  Min support: {}", report.metadata.min_support));
894    lines.push(format!(
895        "  Min confidence: {:.2}",
896        report.metadata.min_confidence
897    ));
898
899    lines.join("\n")
900}
901
902// =============================================================================
903// Entry Point
904// =============================================================================
905
906/// Execute the temporal command
907pub fn run(args: TemporalArgs, global_format: GlobalOutputFormat) -> anyhow::Result<()> {
908    let start_time = Instant::now();
909    let path = &args.path;
910
911    // Validate language
912    if args.source_lang.to_lowercase() != "python" && args.source_lang.to_lowercase() != "auto" {
913        return Err(PatternsError::UnsupportedLanguage {
914            language: args.source_lang.clone(),
915        }
916        .into());
917    }
918
919    let report = if path.is_dir() {
920        analyze_temporal_directory(path, &args, start_time)?
921    } else {
922        let (sequences, mut constraints) = analyze_temporal_file(path, &args)?;
923
924        // Apply query filter if specified
925        if let Some(ref query) = args.query {
926            constraints = filter_by_query(constraints, query);
927        }
928
929        // Mine trigrams if requested
930        let trigrams = if args.include_trigrams {
931            let mut trigrams = mine_trigrams(&sequences, &args);
932            if let Some(ref query) = args.query {
933                trigrams = filter_trigrams_by_query(trigrams, query);
934            }
935            trigrams
936        } else {
937            Vec::new()
938        };
939
940        let sequences_extracted: u32 = sequences.values().map(|v| v.len() as u32).sum();
941
942        TemporalReport {
943            constraints,
944            trigrams,
945            metadata: TemporalMetadata {
946                files_analyzed: 1,
947                sequences_extracted,
948                min_support: args.min_support,
949                min_confidence: args.min_confidence,
950            },
951        }
952    };
953
954    // Resolve format: global -f flag takes priority over hidden --output-format
955    let use_text = matches!(global_format, GlobalOutputFormat::Text)
956        || matches!(args.output_format, OutputFormat::Text);
957
958    // Check if no constraints found -> exit code 2
959    if report.constraints.is_empty() && report.trigrams.is_empty() {
960        if use_text {
961            println!("{}", format_temporal_text(&report));
962        } else {
963            let json = serde_json::to_string_pretty(&report)?;
964            println!("{}", json);
965        }
966        std::process::exit(2);
967    }
968
969    if use_text {
970        println!("{}", format_temporal_text(&report));
971    } else {
972        let json = serde_json::to_string_pretty(&report)?;
973        println!("{}", json);
974    }
975
976    Ok(())
977}
978
979// =============================================================================
980// Tests
981// =============================================================================
982
983#[cfg(test)]
984mod tests {
985    use super::*;
986    
987    
988
989    #[test]
990    fn test_extract_sequences_simple() {
991        let code = r#"
992def read_config(path):
993    f = open(path)
994    content = f.read()
995    f.close()
996    return content
997"#;
998        let sequences = extract_sequences(code);
999
1000        // Should have a sequence for f
1001        let has_f_sequence = sequences.keys().any(|k| k.contains(":f"));
1002        assert!(has_f_sequence, "Should extract sequence for variable f");
1003    }
1004
1005    #[test]
1006    fn test_bigram_counter() {
1007        let mut sequences = HashMap::new();
1008        sequences.insert(
1009            "func:f".to_string(),
1010            vec!["open".to_string(), "read".to_string(), "close".to_string()],
1011        );
1012
1013        let mut counter = BigramCounter::new();
1014        counter.add_sequences(&sequences, "test.py");
1015
1016        assert_eq!(
1017            counter
1018                .counts
1019                .get(&("open".to_string(), "read".to_string())),
1020            Some(&1)
1021        );
1022        assert_eq!(
1023            counter
1024                .counts
1025                .get(&("read".to_string(), "close".to_string())),
1026            Some(&1)
1027        );
1028    }
1029
1030    #[test]
1031    fn test_mine_bigrams_filter() {
1032        let mut sequences = HashMap::new();
1033        sequences.insert(
1034            "func:f".to_string(),
1035            vec!["open".to_string(), "read".to_string(), "close".to_string()],
1036        );
1037
1038        let args = TemporalArgs {
1039            path: PathBuf::new(),
1040            min_support: 1,
1041            min_confidence: 0.0,
1042            query: None,
1043            source_lang: "python".to_string(),
1044            max_files: 1000,
1045            include_trigrams: false,
1046            include_examples: 3,
1047            output_format: OutputFormat::Json,
1048            timeout: 60,
1049            project_root: None,
1050            lang: None,
1051        };
1052
1053        let (_, constraints) = mine_bigrams(&sequences, "test.py", &args);
1054
1055        assert!(!constraints.is_empty(), "Should find bigram constraints");
1056    }
1057
1058    #[test]
1059    fn test_filter_by_query() {
1060        let constraints = vec![
1061            TemporalConstraint {
1062                before: "open".to_string(),
1063                after: "read".to_string(),
1064                support: 5,
1065                confidence: 0.8,
1066                examples: vec![],
1067            },
1068            TemporalConstraint {
1069                before: "acquire".to_string(),
1070                after: "release".to_string(),
1071                support: 3,
1072                confidence: 0.9,
1073                examples: vec![],
1074            },
1075        ];
1076
1077        let filtered = filter_by_query(constraints, "open");
1078        assert_eq!(filtered.len(), 1);
1079        assert_eq!(filtered[0].before, "open");
1080    }
1081
1082    #[test]
1083    fn test_mine_trigrams_limit() {
1084        // Create sequences that would generate many trigrams
1085        let mut sequences = HashMap::new();
1086        let calls: Vec<String> = (0..100).map(|i| format!("method{}", i)).collect();
1087        sequences.insert("func:obj".to_string(), calls);
1088
1089        let args = TemporalArgs {
1090            path: PathBuf::new(),
1091            min_support: 1,
1092            min_confidence: 0.0,
1093            query: None,
1094            source_lang: "python".to_string(),
1095            max_files: 1000,
1096            include_trigrams: true,
1097            include_examples: 3,
1098            output_format: OutputFormat::Json,
1099            timeout: 60,
1100            project_root: None,
1101            lang: None,
1102        };
1103
1104        let trigrams = mine_trigrams(&sequences, &args);
1105
1106        // Should respect MAX_TRIGRAMS limit
1107        assert!(trigrams.len() <= MAX_TRIGRAMS);
1108    }
1109
1110    #[test]
1111    fn test_format_temporal_text() {
1112        let report = TemporalReport {
1113            constraints: vec![TemporalConstraint {
1114                before: "open".to_string(),
1115                after: "close".to_string(),
1116                support: 10,
1117                confidence: 0.95,
1118                examples: vec![TemporalExample {
1119                    file: "test.py".to_string(),
1120                    line: 5,
1121                }],
1122            }],
1123            trigrams: vec![],
1124            metadata: TemporalMetadata {
1125                files_analyzed: 1,
1126                sequences_extracted: 5,
1127                min_support: 2,
1128                min_confidence: 0.5,
1129            },
1130        };
1131
1132        let text = format_temporal_text(&report);
1133        assert!(text.contains("open -> close"));
1134        assert!(text.contains("support: 10"));
1135        assert!(text.contains("confidence: 0.95"));
1136    }
1137
1138    #[test]
1139    fn test_temporal_args_lang_flag() {
1140        use tldr_core::types::Language;
1141
1142        // Verify TemporalArgs has a lang field of type Option<Language>
1143        let args = TemporalArgs {
1144            path: PathBuf::from("src/"),
1145            min_support: 2,
1146            min_confidence: 0.5,
1147            query: None,
1148            source_lang: "python".to_string(),
1149            max_files: 1000,
1150            include_trigrams: false,
1151            include_examples: 3,
1152            output_format: OutputFormat::Json,
1153            timeout: 60,
1154            project_root: None,
1155            lang: Some(Language::Python),
1156        };
1157        assert_eq!(args.lang, Some(Language::Python));
1158
1159        // Also test None case (auto-detect)
1160        let args_auto = TemporalArgs {
1161            path: PathBuf::from("src/"),
1162            min_support: 2,
1163            min_confidence: 0.5,
1164            query: None,
1165            source_lang: "python".to_string(),
1166            max_files: 1000,
1167            include_trigrams: false,
1168            include_examples: 3,
1169            output_format: OutputFormat::Json,
1170            timeout: 60,
1171            project_root: None,
1172            lang: None,
1173        };
1174        assert_eq!(args_auto.lang, None);
1175    }
1176}