1use std::cmp::Reverse;
32use std::collections::{BinaryHeap, HashMap};
33use std::path::{Path, PathBuf};
34use std::time::{Duration, Instant};
35
36use clap::Args;
37use tree_sitter::{Node, Parser};
38
39use tldr_core::callgraph::{
40 build_project_call_graph_v2, extract_calls_for_language, BuildConfig, CallSite,
41};
42use tldr_core::types::Language;
43
44use crate::output::OutputFormat as GlobalOutputFormat;
45
46use super::error::{PatternsError, PatternsResult};
47use super::types::{
48 OutputFormat, TemporalConstraint, TemporalExample, TemporalMetadata, TemporalReport, Trigram,
49};
50use super::validation::{
51 check_directory_file_count, read_file_safe, validate_directory_path, validate_file_path,
52 validate_file_path_in_project, MAX_TRIGRAMS,
53};
54
55#[derive(Debug, Args)]
61pub struct TemporalArgs {
62 pub path: PathBuf,
64
65 #[arg(long, default_value = "2")]
67 pub min_support: u32,
68
69 #[arg(long, default_value = "0.5")]
71 pub min_confidence: f64,
72
73 #[arg(long)]
75 pub query: Option<String>,
76
77 #[arg(long = "source-lang", default_value = "python")]
80 pub source_lang: String,
81
82 #[arg(long, default_value = "1000")]
84 pub max_files: u32,
85
86 #[arg(long)]
88 pub include_trigrams: bool,
89
90 #[arg(long, default_value = "3")]
92 pub include_examples: u32,
93
94 #[arg(
96 long = "output",
97 short = 'o',
98 hide = true,
99 default_value = "json",
100 value_enum
101 )]
102 pub output_format: OutputFormat,
103
104 #[arg(long, default_value = "60")]
106 pub timeout: u64,
107
108 #[arg(long)]
110 pub project_root: Option<PathBuf>,
111
112 #[arg(long, short = 'l')]
114 pub lang: Option<Language>,
115}
116
117impl TemporalArgs {
118 pub fn run(&self, global_format: GlobalOutputFormat) -> anyhow::Result<()> {
120 run(self.clone(), global_format)
121 }
122}
123
124impl Clone for TemporalArgs {
125 fn clone(&self) -> Self {
126 Self {
127 path: self.path.clone(),
128 min_support: self.min_support,
129 min_confidence: self.min_confidence,
130 query: self.query.clone(),
131 source_lang: self.source_lang.clone(),
132 max_files: self.max_files,
133 include_trigrams: self.include_trigrams,
134 include_examples: self.include_examples,
135 output_format: self.output_format,
136 timeout: self.timeout,
137 project_root: self.project_root.clone(),
138 lang: self.lang,
139 }
140 }
141}
142
143#[derive(Debug, Default)]
149pub struct SequenceExtractor {
150 current_function: String,
152 sequences: HashMap<String, Vec<String>>,
154 var_assignments: HashMap<String, String>,
156 current_line: u32,
158}
159
160impl SequenceExtractor {
161 pub fn new() -> Self {
163 Self::default()
164 }
165
166 pub fn extract_function(&mut self, func_node: Node, source: &[u8]) {
168 let func_name = self.get_function_name(func_node, source);
170 if func_name.is_empty() {
171 return;
172 }
173 self.current_function = func_name;
174 self.var_assignments.clear();
175
176 self.extract_calls_recursive(func_node, source, 0);
178 }
179
180 fn extract_calls_recursive(&mut self, node: Node, source: &[u8], depth: usize) {
182 if depth > 100 {
184 return;
185 }
186
187 self.current_line = node.start_position().row as u32 + 1;
188
189 match node.kind() {
190 "assignment" => {
192 self.handle_assignment(node, source);
193 }
194
195 "call" => {
197 self.handle_call(node, source);
198 }
199
200 "with_statement" => {
202 self.handle_with_statement(node, source);
203 }
204
205 _ => {}
206 }
207
208 let mut cursor = node.walk();
210 for child in node.children(&mut cursor) {
211 self.extract_calls_recursive(child, source, depth + 1);
212 }
213 }
214
215 fn handle_assignment(&mut self, node: Node, source: &[u8]) {
217 let var_name = if let Some(left) = node.child_by_field_name("left") {
219 self.node_text(left, source).to_string()
220 } else {
221 let mut var = String::new();
223 for child in node.children(&mut node.walk()) {
224 if child.kind() == "identifier" {
225 var = self.node_text(child, source).to_string();
226 break;
227 }
228 }
229 var
230 };
231
232 if var_name.is_empty() {
233 return;
234 }
235
236 if let Some(right) = node.child_by_field_name("right") {
238 if right.kind() == "call" {
240 let call_name = self.extract_call_name(right, source);
241 if !call_name.is_empty() {
242 self.var_assignments
244 .insert(var_name.clone(), call_name.clone());
245
246 let key = format!("{}:{}", self.current_function, var_name);
248 self.sequences.entry(key).or_default().push(call_name);
249 }
250 }
251 }
252 }
253
254 fn handle_call(&mut self, node: Node, source: &[u8]) {
256 if let Some(func) = node.child_by_field_name("function") {
258 if func.kind() == "attribute" {
259 if let Some(obj) = func.child_by_field_name("object") {
261 let obj_name = self.node_text(obj, source).to_string();
262 if let Some(method) = func.child_by_field_name("attribute") {
263 let method_name = self.node_text(method, source).to_string();
264
265 let key = format!("{}:{}", self.current_function, obj_name);
267 self.sequences.entry(key).or_default().push(method_name);
268 }
269 }
270 }
271 }
272 }
273
274 fn handle_with_statement(&mut self, node: Node, source: &[u8]) {
276 for child in node.children(&mut node.walk()) {
278 if child.kind() == "with_clause" {
279 for item in child.children(&mut child.walk()) {
280 if item.kind() == "with_item" {
281 let mut call_name = String::new();
283 let mut var_name = String::new();
284
285 for part in item.children(&mut item.walk()) {
286 if part.kind() == "call" {
287 call_name = self.extract_call_name(part, source);
288 } else if part.kind() == "as_pattern" || part.kind() == "identifier" {
289 if part.kind() == "identifier" {
291 var_name = self.node_text(part, source).to_string();
292 } else {
293 for as_child in part.children(&mut part.walk()) {
294 if as_child.kind() == "identifier" {
295 var_name = self.node_text(as_child, source).to_string();
296 break;
297 }
298 }
299 }
300 }
301 }
302
303 if !call_name.is_empty() && !var_name.is_empty() {
304 let key = format!("{}:{}", self.current_function, var_name);
305 self.sequences
306 .entry(key.clone())
307 .or_default()
308 .push(call_name);
309 self.sequences
311 .entry(key)
312 .or_default()
313 .push("__exit__".to_string());
314 }
315 }
316 }
317 }
318 }
319 }
320
321 fn extract_call_name(&self, node: Node, source: &[u8]) -> String {
323 if let Some(func) = node.child_by_field_name("function") {
324 return self.extract_name_from_expr(func, source);
325 }
326
327 for child in node.children(&mut node.walk()) {
329 match child.kind() {
330 "identifier" => return self.node_text(child, source).to_string(),
331 "attribute" => return self.extract_name_from_expr(child, source),
332 _ => continue,
333 }
334 }
335 String::new()
336 }
337
338 fn extract_name_from_expr(&self, node: Node, source: &[u8]) -> String {
340 match node.kind() {
341 "identifier" => self.node_text(node, source).to_string(),
342 "attribute" => {
343 if let Some(attr) = node.child_by_field_name("attribute") {
345 self.node_text(attr, source).to_string()
346 } else {
347 String::new()
348 }
349 }
350 _ => self.node_text(node, source).to_string(),
351 }
352 }
353
354 fn get_function_name(&self, node: Node, source: &[u8]) -> String {
356 for child in node.children(&mut node.walk()) {
357 if child.kind() == "identifier" {
358 return self.node_text(child, source).to_string();
359 }
360 }
361 String::new()
362 }
363
364 fn node_text<'a>(&self, node: Node, source: &'a [u8]) -> &'a str {
366 node.utf8_text(source).unwrap_or("")
367 }
368
369 pub fn get_sequences(&self) -> &HashMap<String, Vec<String>> {
371 &self.sequences
372 }
373}
374
375pub fn extract_sequences(source: &str) -> HashMap<String, Vec<String>> {
377 let mut extractor = SequenceExtractor::new();
378
379 let mut parser = match get_python_parser() {
381 Ok(p) => p,
382 Err(_) => return HashMap::new(),
383 };
384
385 let tree = match parser.parse(source, None) {
386 Some(t) => t,
387 None => return HashMap::new(),
388 };
389
390 let root = tree.root_node();
391 let source_bytes = source.as_bytes();
392
393 extract_functions_recursive(root, source_bytes, &mut extractor);
395
396 extractor.sequences
397}
398
399fn extract_functions_recursive(node: Node, source: &[u8], extractor: &mut SequenceExtractor) {
401 match node.kind() {
402 "function_definition" | "async_function_definition" => {
403 extractor.extract_function(node, source);
404 }
405 _ => {}
406 }
407
408 let mut cursor = node.walk();
410 for child in node.children(&mut cursor) {
411 extract_functions_recursive(child, source, extractor);
412 }
413}
414
415fn sequences_from_callsite_map(
442 file_key: &str,
443 calls_by_func: &HashMap<String, Vec<CallSite>>,
444) -> HashMap<String, Vec<String>> {
445 let mut out: HashMap<String, Vec<String>> = HashMap::new();
446 for (caller, sites) in calls_by_func {
447 if sites.is_empty() {
448 continue;
449 }
450 let mut ordered = sites.clone();
453 ordered.sort_by_key(|s| s.line.unwrap_or(u32::MAX));
454
455 let names: Vec<String> = ordered
456 .into_iter()
457 .map(|s| s.target)
458 .filter(|t| !t.is_empty())
459 .collect();
460
461 if names.is_empty() {
462 continue;
463 }
464 let key = format!("{}::{}", file_key, caller);
465 out.insert(key, names);
466 }
467 out
468}
469
470struct FileSequences {
474 sequences: HashMap<String, Vec<String>>,
475 first_line: HashMap<(String, String, String), u32>,
478}
479
480fn extract_sequences_for_file(
498 path: &Path,
499 source: &str,
500 language: Language,
501) -> PatternsResult<FileSequences> {
502 let file_key = path.to_string_lossy().to_string();
503
504 let mut sequences: HashMap<String, Vec<String>> = HashMap::new();
505 let mut first_line: HashMap<(String, String, String), u32> = HashMap::new();
506
507 if language == Language::Python {
509 let legacy = extract_sequences(source);
510 for (k, v) in legacy {
511 sequences.entry(k).or_default().extend(v);
513 }
514 }
515
516 let lang_str = language.as_str();
519 let calls_by_func = match extract_calls_for_language(lang_str, path, source) {
520 Ok(map) => map,
521 Err(_) => {
522 return Ok(FileSequences {
528 sequences,
529 first_line,
530 });
531 }
532 };
533
534 let scoped = sequences_from_callsite_map(&file_key, &calls_by_func);
535 for (k, v) in scoped {
536 sequences.entry(k).or_default().extend(v);
537 }
538
539 for (caller, sites) in &calls_by_func {
542 let mut ordered = sites.clone();
543 ordered.sort_by_key(|s| s.line.unwrap_or(u32::MAX));
544 for pair in ordered.windows(2) {
545 let before = pair[0].target.clone();
546 let after = pair[1].target.clone();
547 if before.is_empty() || after.is_empty() || before == after {
548 continue;
549 }
550 let line = pair[1].line.unwrap_or(1);
551 first_line
552 .entry((caller.clone(), before, after))
553 .or_insert(line);
554 }
555 }
556
557 Ok(FileSequences {
558 sequences,
559 first_line,
560 })
561}
562
563fn resolve_directory_language(path: &Path, args: &TemporalArgs) -> Option<Language> {
566 if let Some(lang) = args.lang {
567 return Some(lang);
568 }
569 Language::from_directory(path)
570}
571
572fn per_caller_first_line(
577 calls_by_func: &HashMap<String, Vec<CallSite>>,
578) -> HashMap<(String, String, String), u32> {
579 let mut first_line: HashMap<(String, String, String), u32> = HashMap::new();
580 for (caller, sites) in calls_by_func {
581 let mut ordered = sites.clone();
582 ordered.sort_by_key(|s| s.line.unwrap_or(u32::MAX));
583 for pair in ordered.windows(2) {
584 let before = pair[0].target.clone();
585 let after = pair[1].target.clone();
586 if before.is_empty() || after.is_empty() || before == after {
587 continue;
588 }
589 let line = pair[1].line.unwrap_or(1);
590 first_line
591 .entry((caller.clone(), before, after))
592 .or_insert(line);
593 }
594 }
595 first_line
596}
597
598#[allow(clippy::too_many_arguments)]
602fn aggregate_file_sequences(
603 file_sequences: &HashMap<String, Vec<String>>,
604 file_path_str: &str,
605 first_line: &HashMap<(String, String, String), u32>,
606 all_sequences: &mut HashMap<String, Vec<String>>,
607 bigram_counts: &mut HashMap<(String, String), u32>,
608 before_counts: &mut HashMap<String, u32>,
609 all_examples: &mut HashMap<(String, String), Vec<TemporalExample>>,
610 args: &TemporalArgs,
611) {
612 for (key, calls) in file_sequences {
613 all_sequences
614 .entry(key.clone())
615 .or_default()
616 .extend(calls.clone());
617
618 let caller_for_lookup = key
624 .rsplit_once("::")
625 .map(|(_, c)| c.to_string())
626 .unwrap_or_default();
627
628 for i in 0..calls.len().saturating_sub(1) {
629 let before = &calls[i];
630 let after = &calls[i + 1];
631
632 if before == after {
633 continue;
634 }
635
636 let pair = (before.clone(), after.clone());
637 *bigram_counts.entry(pair.clone()).or_default() += 1;
638 *before_counts.entry(before.clone()).or_default() += 1;
639
640 let examples = all_examples.entry(pair).or_default();
642 if examples.len() < args.include_examples as usize {
643 let line = first_line
644 .get(&(caller_for_lookup.clone(), before.clone(), after.clone()))
645 .copied()
646 .unwrap_or(1);
647 examples.push(TemporalExample {
648 file: file_path_str.to_string(),
649 line,
650 });
651 }
652 }
653 }
654}
655
656#[derive(Debug, Default)]
662pub struct BigramCounter {
663 pub counts: HashMap<(String, String), u32>,
665 pub before_counts: HashMap<String, u32>,
667 pub examples: HashMap<(String, String), Vec<TemporalExample>>,
669}
670
671impl BigramCounter {
672 pub fn new() -> Self {
674 Self::default()
675 }
676
677 pub fn add_sequences(&mut self, sequences: &HashMap<String, Vec<String>>, file: &str) {
679 for calls in sequences.values() {
680 let line = 1u32; for i in 0..calls.len().saturating_sub(1) {
684 let before = &calls[i];
685 let after = &calls[i + 1];
686
687 if before == after {
689 continue;
690 }
691
692 let pair = (before.clone(), after.clone());
693
694 *self.counts.entry(pair.clone()).or_default() += 1;
696
697 *self.before_counts.entry(before.clone()).or_default() += 1;
699
700 self.examples
702 .entry(pair)
703 .or_default()
704 .push(TemporalExample {
705 file: file.to_string(),
706 line,
707 });
708 }
709 }
710 }
711}
712
713pub fn mine_bigrams(
715 sequences: &HashMap<String, Vec<String>>,
716 file: &str,
717 args: &TemporalArgs,
718) -> (BigramCounter, Vec<TemporalConstraint>) {
719 let mut counter = BigramCounter::new();
720 counter.add_sequences(sequences, file);
721
722 let mut constraints = Vec::new();
723
724 for ((before, after), count) in &counter.counts {
725 if *count < args.min_support {
727 continue;
728 }
729
730 let before_total = *counter.before_counts.get(before).unwrap_or(&1);
732 let confidence = (*count as f64) / (before_total as f64);
733
734 if confidence < args.min_confidence {
736 continue;
737 }
738
739 let examples = counter
741 .examples
742 .get(&(before.clone(), after.clone()))
743 .map(|ex| {
744 ex.iter()
745 .take(args.include_examples as usize)
746 .cloned()
747 .collect()
748 })
749 .unwrap_or_default();
750
751 constraints.push(TemporalConstraint {
752 before: before.clone(),
753 after: after.clone(),
754 support: *count,
755 confidence,
756 examples,
757 });
758 }
759
760 constraints.sort_by(|a, b| {
762 b.confidence
763 .partial_cmp(&a.confidence)
764 .unwrap_or(std::cmp::Ordering::Equal)
765 .then_with(|| b.support.cmp(&a.support))
766 });
767
768 (counter, constraints)
769}
770
771pub fn mine_trigrams(
777 sequences: &HashMap<String, Vec<String>>,
778 args: &TemporalArgs,
779) -> Vec<Trigram> {
780 let mut trigram_counts: HashMap<(String, String, String), u32> = HashMap::new();
782 let mut bigram_follows: HashMap<(String, String), u32> = HashMap::new();
783
784 for calls in sequences.values() {
785 for i in 0..calls.len().saturating_sub(2) {
786 let a = &calls[i];
787 let b = &calls[i + 1];
788 let c = &calls[i + 2];
789
790 if a == b || b == c {
792 continue;
793 }
794
795 *trigram_counts
796 .entry((a.clone(), b.clone(), c.clone()))
797 .or_default() += 1;
798
799 if a != b {
801 *bigram_follows.entry((a.clone(), b.clone())).or_default() += 1;
802 }
803 }
804 }
805
806 let mut heap: BinaryHeap<Reverse<(u32, String, String, String)>> = BinaryHeap::new();
809
810 for ((a, b, c), count) in &trigram_counts {
811 if *count < args.min_support {
812 continue;
813 }
814
815 let bigram_total = *bigram_follows.get(&(a.clone(), b.clone())).unwrap_or(&1);
817 let confidence = (*count as f64) / (bigram_total as f64);
818
819 if confidence < args.min_confidence {
820 continue;
821 }
822
823 if heap.len() < MAX_TRIGRAMS {
825 heap.push(Reverse((*count, a.clone(), b.clone(), c.clone())));
826 } else if let Some(&Reverse((min_support, _, _, _))) = heap.peek() {
827 if *count > min_support {
828 heap.pop();
829 heap.push(Reverse((*count, a.clone(), b.clone(), c.clone())));
830 }
831 }
832 }
833
834 let mut trigrams: Vec<Trigram> = heap
836 .into_iter()
837 .map(|Reverse((support, a, b, c))| {
838 let bigram_total = *bigram_follows.get(&(a.clone(), b.clone())).unwrap_or(&1);
839 let confidence = (support as f64) / (bigram_total as f64);
840
841 Trigram {
842 sequence: [a, b, c],
843 support,
844 confidence,
845 }
846 })
847 .collect();
848
849 trigrams.sort_by(|a, b| {
851 b.confidence
852 .partial_cmp(&a.confidence)
853 .unwrap_or(std::cmp::Ordering::Equal)
854 .then_with(|| b.support.cmp(&a.support))
855 });
856
857 trigrams
858}
859
860pub fn filter_by_query(
866 constraints: Vec<TemporalConstraint>,
867 query: &str,
868) -> Vec<TemporalConstraint> {
869 constraints
870 .into_iter()
871 .filter(|c| c.before.contains(query) || c.after.contains(query))
872 .collect()
873}
874
875pub fn filter_trigrams_by_query(trigrams: Vec<Trigram>, query: &str) -> Vec<Trigram> {
877 trigrams
878 .into_iter()
879 .filter(|t| t.sequence.iter().any(|s| s.contains(query)))
880 .collect()
881}
882
883fn get_python_parser() -> PatternsResult<Parser> {
889 let mut parser = Parser::new();
890 let language = tree_sitter_python::LANGUAGE;
891 parser.set_language(&language.into()).map_err(|e| {
892 PatternsError::parse_error(PathBuf::new(), format!("Failed to set language: {}", e))
893 })?;
894 Ok(parser)
895}
896
897type TemporalFileAnalysis = (HashMap<String, Vec<String>>, Vec<TemporalConstraint>);
902
903fn analyze_temporal_file(path: &Path, args: &TemporalArgs) -> PatternsResult<TemporalFileAnalysis> {
905 let canonical = if let Some(ref root) = args.project_root {
907 validate_file_path_in_project(path, root)?
908 } else {
909 validate_file_path(path)?
910 };
911
912 let source = read_file_safe(&canonical)?;
914 let file_path_str = canonical.to_string_lossy().to_string();
915
916 let language = args
920 .lang
921 .or_else(|| Language::from_path(&canonical))
922 .unwrap_or(Language::Python);
923
924 let file_seqs = extract_sequences_for_file(&canonical, &source, language)?;
926 let sequences = file_seqs.sequences;
927
928 let (_, constraints) = mine_bigrams(&sequences, &file_path_str, args);
930
931 Ok((sequences, constraints))
932}
933
934fn analyze_temporal_directory(
936 path: &Path,
937 args: &TemporalArgs,
938 start_time: Instant,
939) -> PatternsResult<TemporalReport> {
940 let canonical = validate_directory_path(path)?;
941 let timeout = Duration::from_secs(args.timeout);
942
943 let mut all_sequences: HashMap<String, Vec<String>> = HashMap::new();
944 let mut all_examples: HashMap<(String, String), Vec<TemporalExample>> = HashMap::new();
945 let mut bigram_counts: HashMap<(String, String), u32> = HashMap::new();
946 let mut before_counts: HashMap<String, u32> = HashMap::new();
947 let mut files_analyzed = 0u32;
948
949 let resolved_lang = resolve_directory_language(&canonical, args);
954
955 let use_project_builder = matches!(resolved_lang, Some(Language::Ocaml));
961
962 if use_project_builder {
963 let lang = resolved_lang.expect("checked above");
967 let mut config = BuildConfig {
968 language: lang.as_str().to_string(),
969 respect_ignore: false,
970 ..Default::default()
971 };
972 config.use_type_resolution = false;
973 match build_project_call_graph_v2(&canonical, config) {
974 Ok(ir) => {
975 for (file_path, file_ir) in &ir.files {
976 if start_time.elapsed() > timeout {
977 break;
978 }
979 files_analyzed += 1;
980 if files_analyzed > args.max_files {
981 break;
982 }
983 check_directory_file_count(files_analyzed as usize)?;
984
985 let abs_path = if file_path.is_absolute() {
987 file_path.clone()
988 } else {
989 canonical.join(file_path)
990 };
991 let file_key = abs_path.to_string_lossy().to_string();
992 let scoped = sequences_from_callsite_map(&file_key, &file_ir.calls);
993
994 aggregate_file_sequences(
995 &scoped,
996 &file_key,
997 &per_caller_first_line(&file_ir.calls),
998 &mut all_sequences,
999 &mut bigram_counts,
1000 &mut before_counts,
1001 &mut all_examples,
1002 args,
1003 );
1004 }
1005 }
1006 Err(_) => {
1007 }
1012 }
1013 } else {
1014 for entry in tldr_core::walker::walk_project(&canonical) {
1016 if start_time.elapsed() > timeout {
1018 break;
1019 }
1020
1021 let entry_path = entry.path();
1022
1023 let entry_lang = match Language::from_path(entry_path) {
1029 Some(lang) => lang,
1030 None => continue,
1031 };
1032 if let Some(forced) = args.lang {
1033 if forced != entry_lang {
1034 continue;
1035 }
1036 } else if let Some(project_lang) = resolved_lang {
1037 if project_lang != entry_lang {
1038 continue;
1039 }
1040 }
1041
1042 files_analyzed += 1;
1044 if files_analyzed > args.max_files {
1045 break;
1046 }
1047 check_directory_file_count(files_analyzed as usize)?;
1048
1049 let file_path_str = entry_path.to_string_lossy().to_string();
1051 if let Ok(source) = read_file_safe(entry_path) {
1052 let file_seqs = match extract_sequences_for_file(entry_path, &source, entry_lang) {
1053 Ok(s) => s,
1054 Err(_) => continue,
1055 };
1056
1057 aggregate_file_sequences(
1058 &file_seqs.sequences,
1059 &file_path_str,
1060 &file_seqs.first_line,
1061 &mut all_sequences,
1062 &mut bigram_counts,
1063 &mut before_counts,
1064 &mut all_examples,
1065 args,
1066 );
1067 }
1068 }
1069 }
1070
1071 let mut constraints = Vec::new();
1073
1074 for ((before, after), count) in &bigram_counts {
1075 if *count < args.min_support {
1076 continue;
1077 }
1078
1079 let before_total = *before_counts.get(before).unwrap_or(&1);
1080 let confidence = (*count as f64) / (before_total as f64);
1081
1082 if confidence < args.min_confidence {
1083 continue;
1084 }
1085
1086 let examples = all_examples
1087 .get(&(before.clone(), after.clone()))
1088 .cloned()
1089 .unwrap_or_default();
1090
1091 constraints.push(TemporalConstraint {
1092 before: before.clone(),
1093 after: after.clone(),
1094 support: *count,
1095 confidence,
1096 examples,
1097 });
1098 }
1099
1100 constraints.sort_by(|a, b| {
1102 b.confidence
1103 .partial_cmp(&a.confidence)
1104 .unwrap_or(std::cmp::Ordering::Equal)
1105 .then_with(|| b.support.cmp(&a.support))
1106 });
1107
1108 if let Some(ref query) = args.query {
1110 constraints = filter_by_query(constraints, query);
1111 }
1112
1113 let trigrams = if args.include_trigrams {
1115 let mut trigrams = mine_trigrams(&all_sequences, args);
1116 if let Some(ref query) = args.query {
1117 trigrams = filter_trigrams_by_query(trigrams, query);
1118 }
1119 trigrams
1120 } else {
1121 Vec::new()
1122 };
1123
1124 let sequences_extracted: u32 = all_sequences.values().map(|v| v.len() as u32).sum();
1125
1126 Ok(TemporalReport {
1127 constraints,
1128 trigrams,
1129 metadata: TemporalMetadata {
1130 files_analyzed,
1131 sequences_extracted,
1132 min_support: args.min_support,
1133 min_confidence: args.min_confidence,
1134 },
1135 })
1136}
1137
1138pub fn format_temporal_text(report: &TemporalReport) -> String {
1144 let mut lines = Vec::new();
1145
1146 lines.push("Temporal Constraints".to_string());
1147 lines.push("=".repeat(40));
1148 lines.push(String::new());
1149
1150 if report.constraints.is_empty() {
1151 lines.push("No constraints found matching criteria.".to_string());
1152 } else {
1153 lines.push(format!("Found {} constraints:", report.constraints.len()));
1154 lines.push(String::new());
1155
1156 for constraint in &report.constraints {
1157 lines.push(format!(" {} -> {}", constraint.before, constraint.after));
1158 lines.push(format!(
1159 " support: {}, confidence: {:.2}",
1160 constraint.support, constraint.confidence
1161 ));
1162
1163 if !constraint.examples.is_empty() {
1164 lines.push(" examples:".to_string());
1165 for example in &constraint.examples {
1166 lines.push(format!(" - {}:{}", example.file, example.line));
1167 }
1168 }
1169 lines.push(String::new());
1170 }
1171 }
1172
1173 if !report.trigrams.is_empty() {
1174 lines.push(String::new());
1175 lines.push("Trigrams".to_string());
1176 lines.push("-".repeat(40));
1177 lines.push(String::new());
1178
1179 for trigram in &report.trigrams {
1180 lines.push(format!(
1181 " {} -> {} -> {}",
1182 trigram.sequence[0], trigram.sequence[1], trigram.sequence[2]
1183 ));
1184 lines.push(format!(
1185 " support: {}, confidence: {:.2}",
1186 trigram.support, trigram.confidence
1187 ));
1188 lines.push(String::new());
1189 }
1190 }
1191
1192 lines.push(String::new());
1193 lines.push("Metadata".to_string());
1194 lines.push("-".repeat(40));
1195 lines.push(format!(
1196 " Files analyzed: {}",
1197 report.metadata.files_analyzed
1198 ));
1199 lines.push(format!(
1200 " Sequences extracted: {}",
1201 report.metadata.sequences_extracted
1202 ));
1203 lines.push(format!(" Min support: {}", report.metadata.min_support));
1204 lines.push(format!(
1205 " Min confidence: {:.2}",
1206 report.metadata.min_confidence
1207 ));
1208
1209 lines.join("\n")
1210}
1211
1212pub fn run(args: TemporalArgs, global_format: GlobalOutputFormat) -> anyhow::Result<()> {
1218 let start_time = Instant::now();
1219 let path = &args.path;
1220
1221 let source_lang_norm = args.source_lang.to_lowercase();
1227 if source_lang_norm != "auto" && source_lang_norm.parse::<Language>().is_err() {
1228 return Err(PatternsError::UnsupportedLanguage {
1229 language: args.source_lang.clone(),
1230 }
1231 .into());
1232 }
1233
1234 let report = if path.is_dir() {
1235 analyze_temporal_directory(path, &args, start_time)?
1236 } else {
1237 let (sequences, mut constraints) = analyze_temporal_file(path, &args)?;
1238
1239 if let Some(ref query) = args.query {
1241 constraints = filter_by_query(constraints, query);
1242 }
1243
1244 let trigrams = if args.include_trigrams {
1246 let mut trigrams = mine_trigrams(&sequences, &args);
1247 if let Some(ref query) = args.query {
1248 trigrams = filter_trigrams_by_query(trigrams, query);
1249 }
1250 trigrams
1251 } else {
1252 Vec::new()
1253 };
1254
1255 let sequences_extracted: u32 = sequences.values().map(|v| v.len() as u32).sum();
1256
1257 TemporalReport {
1258 constraints,
1259 trigrams,
1260 metadata: TemporalMetadata {
1261 files_analyzed: 1,
1262 sequences_extracted,
1263 min_support: args.min_support,
1264 min_confidence: args.min_confidence,
1265 },
1266 }
1267 };
1268
1269 let use_text = matches!(global_format, GlobalOutputFormat::Text)
1271 || matches!(args.output_format, OutputFormat::Text);
1272
1273 if report.constraints.is_empty() && report.trigrams.is_empty() {
1275 if use_text {
1276 println!("{}", format_temporal_text(&report));
1277 } else {
1278 let json = serde_json::to_string_pretty(&report)?;
1279 println!("{}", json);
1280 }
1281 std::process::exit(2);
1282 }
1283
1284 if use_text {
1285 println!("{}", format_temporal_text(&report));
1286 } else {
1287 let json = serde_json::to_string_pretty(&report)?;
1288 println!("{}", json);
1289 }
1290
1291 Ok(())
1292}
1293
1294#[cfg(test)]
1299mod tests {
1300 use super::*;
1301
1302 #[test]
1303 fn test_extract_sequences_simple() {
1304 let code = r#"
1305def read_config(path):
1306 f = open(path)
1307 content = f.read()
1308 f.close()
1309 return content
1310"#;
1311 let sequences = extract_sequences(code);
1312
1313 let has_f_sequence = sequences.keys().any(|k| k.contains(":f"));
1315 assert!(has_f_sequence, "Should extract sequence for variable f");
1316 }
1317
1318 #[test]
1319 fn test_bigram_counter() {
1320 let mut sequences = HashMap::new();
1321 sequences.insert(
1322 "func:f".to_string(),
1323 vec!["open".to_string(), "read".to_string(), "close".to_string()],
1324 );
1325
1326 let mut counter = BigramCounter::new();
1327 counter.add_sequences(&sequences, "test.py");
1328
1329 assert_eq!(
1330 counter
1331 .counts
1332 .get(&("open".to_string(), "read".to_string())),
1333 Some(&1)
1334 );
1335 assert_eq!(
1336 counter
1337 .counts
1338 .get(&("read".to_string(), "close".to_string())),
1339 Some(&1)
1340 );
1341 }
1342
1343 #[test]
1344 fn test_mine_bigrams_filter() {
1345 let mut sequences = HashMap::new();
1346 sequences.insert(
1347 "func:f".to_string(),
1348 vec!["open".to_string(), "read".to_string(), "close".to_string()],
1349 );
1350
1351 let args = TemporalArgs {
1352 path: PathBuf::new(),
1353 min_support: 1,
1354 min_confidence: 0.0,
1355 query: None,
1356 source_lang: "python".to_string(),
1357 max_files: 1000,
1358 include_trigrams: false,
1359 include_examples: 3,
1360 output_format: OutputFormat::Json,
1361 timeout: 60,
1362 project_root: None,
1363 lang: None,
1364 };
1365
1366 let (_, constraints) = mine_bigrams(&sequences, "test.py", &args);
1367
1368 assert!(!constraints.is_empty(), "Should find bigram constraints");
1369 }
1370
1371 #[test]
1372 fn test_filter_by_query() {
1373 let constraints = vec![
1374 TemporalConstraint {
1375 before: "open".to_string(),
1376 after: "read".to_string(),
1377 support: 5,
1378 confidence: 0.8,
1379 examples: vec![],
1380 },
1381 TemporalConstraint {
1382 before: "acquire".to_string(),
1383 after: "release".to_string(),
1384 support: 3,
1385 confidence: 0.9,
1386 examples: vec![],
1387 },
1388 ];
1389
1390 let filtered = filter_by_query(constraints, "open");
1391 assert_eq!(filtered.len(), 1);
1392 assert_eq!(filtered[0].before, "open");
1393 }
1394
1395 #[test]
1396 fn test_mine_trigrams_limit() {
1397 let mut sequences = HashMap::new();
1399 let calls: Vec<String> = (0..100).map(|i| format!("method{}", i)).collect();
1400 sequences.insert("func:obj".to_string(), calls);
1401
1402 let args = TemporalArgs {
1403 path: PathBuf::new(),
1404 min_support: 1,
1405 min_confidence: 0.0,
1406 query: None,
1407 source_lang: "python".to_string(),
1408 max_files: 1000,
1409 include_trigrams: true,
1410 include_examples: 3,
1411 output_format: OutputFormat::Json,
1412 timeout: 60,
1413 project_root: None,
1414 lang: None,
1415 };
1416
1417 let trigrams = mine_trigrams(&sequences, &args);
1418
1419 assert!(trigrams.len() <= MAX_TRIGRAMS);
1421 }
1422
1423 #[test]
1424 fn test_format_temporal_text() {
1425 let report = TemporalReport {
1426 constraints: vec![TemporalConstraint {
1427 before: "open".to_string(),
1428 after: "close".to_string(),
1429 support: 10,
1430 confidence: 0.95,
1431 examples: vec![TemporalExample {
1432 file: "test.py".to_string(),
1433 line: 5,
1434 }],
1435 }],
1436 trigrams: vec![],
1437 metadata: TemporalMetadata {
1438 files_analyzed: 1,
1439 sequences_extracted: 5,
1440 min_support: 2,
1441 min_confidence: 0.5,
1442 },
1443 };
1444
1445 let text = format_temporal_text(&report);
1446 assert!(text.contains("open -> close"));
1447 assert!(text.contains("support: 10"));
1448 assert!(text.contains("confidence: 0.95"));
1449 }
1450
1451 #[test]
1452 fn test_temporal_args_lang_flag() {
1453 use tldr_core::types::Language;
1454
1455 let args = TemporalArgs {
1457 path: PathBuf::from("src/"),
1458 min_support: 2,
1459 min_confidence: 0.5,
1460 query: None,
1461 source_lang: "python".to_string(),
1462 max_files: 1000,
1463 include_trigrams: false,
1464 include_examples: 3,
1465 output_format: OutputFormat::Json,
1466 timeout: 60,
1467 project_root: None,
1468 lang: Some(Language::Python),
1469 };
1470 assert_eq!(args.lang, Some(Language::Python));
1471
1472 let args_auto = TemporalArgs {
1474 path: PathBuf::from("src/"),
1475 min_support: 2,
1476 min_confidence: 0.5,
1477 query: None,
1478 source_lang: "python".to_string(),
1479 max_files: 1000,
1480 include_trigrams: false,
1481 include_examples: 3,
1482 output_format: OutputFormat::Json,
1483 timeout: 60,
1484 project_root: None,
1485 lang: None,
1486 };
1487 assert_eq!(args_auto.lang, None);
1488 }
1489
1490 use std::io::Write;
1501
1502 fn extract_for_lang(extension: &str, source: &str, language: Language) -> Vec<Vec<String>> {
1505 let mut tmp = tempfile::Builder::new()
1506 .suffix(&format!(".{}", extension))
1507 .tempfile()
1508 .expect("tempfile");
1509 tmp.write_all(source.as_bytes()).expect("write source");
1510 let path = tmp.path().to_path_buf();
1511 let file_seqs = extract_sequences_for_file(&path, source, language)
1512 .expect("extract_sequences_for_file");
1513 file_seqs.sequences.into_values().collect()
1514 }
1515
1516 fn assert_helper_then_b_util(seqs: &[Vec<String>], language_label: &str) {
1520 let found = seqs
1521 .iter()
1522 .any(|seq| seq.windows(2).any(|w| w[0] == "helper" && w[1] == "b_util"));
1523 assert!(
1524 found,
1525 "[{}] expected `helper -> b_util` bigram, got: {:?}",
1526 language_label, seqs
1527 );
1528 }
1529
1530 #[test]
1531 fn test_extract_sequences_typescript() {
1532 let source = "\
1534function helper(): number { return 1; }
1535function b_util(): number { return 2; }
1536function main(): void {
1537 helper();
1538 b_util();
1539}
1540";
1541 let seqs = extract_for_lang("ts", source, Language::TypeScript);
1542 assert_helper_then_b_util(&seqs, "typescript");
1543 }
1544
1545 #[test]
1546 fn test_extract_sequences_java() {
1547 let source = "\
1550class Main {
1551 public static int helper() { return 1; }
1552 public static int bUtil() { return 2; }
1553 public static void main(String[] args) {
1554 helper();
1555 bUtil();
1556 }
1557}
1558";
1559 let mut tmp = tempfile::Builder::new().suffix(".java").tempfile().unwrap();
1562 tmp.write_all(source.as_bytes()).unwrap();
1563 let path = tmp.path().to_path_buf();
1564 let file_seqs = extract_sequences_for_file(&path, source, Language::Java).expect("extract");
1565 let seqs: Vec<Vec<String>> = file_seqs.sequences.into_values().collect();
1566 let found = seqs
1567 .iter()
1568 .any(|seq| seq.windows(2).any(|w| w[0] == "helper" && w[1] == "bUtil"));
1569 assert!(
1570 found,
1571 "[java] expected `helper -> bUtil` bigram, got: {:?}",
1572 seqs
1573 );
1574 }
1575
1576 #[test]
1577 fn test_extract_sequences_go() {
1578 let source = "\
1580package main
1581
1582func helper() int { return 1 }
1583func b_util() int { return 2 }
1584func main() {
1585 helper()
1586 b_util()
1587}
1588";
1589 let seqs = extract_for_lang("go", source, Language::Go);
1590 assert_helper_then_b_util(&seqs, "go");
1591 }
1592
1593 #[test]
1594 fn test_extract_sequences_rust() {
1595 let source = "\
1597fn helper() -> i32 { 1 }
1598fn b_util() -> i32 { 2 }
1599fn main() {
1600 let _ = helper();
1601 let _ = b_util();
1602}
1603";
1604 let seqs = extract_for_lang("rs", source, Language::Rust);
1605 assert_helper_then_b_util(&seqs, "rust");
1606 }
1607
1608 #[test]
1609 fn test_extract_sequences_python_via_generalized_path() {
1610 let source = "\
1613def helper():
1614 return 1
1615
1616def b_util():
1617 return 2
1618
1619def main():
1620 helper()
1621 b_util()
1622";
1623 let seqs = extract_for_lang("py", source, Language::Python);
1624 assert_helper_then_b_util(&seqs, "python");
1625 }
1626
1627 #[test]
1628 fn test_extract_sequences_python_legacy_receiver_aware() {
1629 let source = "\
1634def read_config(path):
1635 f = open(path)
1636 content = f.read()
1637 f.close()
1638 return content
1639";
1640 let mut tmp = tempfile::Builder::new().suffix(".py").tempfile().unwrap();
1641 tmp.write_all(source.as_bytes()).unwrap();
1642 let path = tmp.path().to_path_buf();
1643 let file_seqs =
1644 extract_sequences_for_file(&path, source, Language::Python).expect("extract");
1645 let has_open_read = file_seqs
1646 .sequences
1647 .values()
1648 .any(|seq| seq.windows(2).any(|w| w[0] == "open" && w[1] == "read"));
1649 assert!(
1650 has_open_read,
1651 "python legacy: expected `open -> read` bigram for receiver f, got: {:?}",
1652 file_seqs.sequences
1653 );
1654 }
1655
1656 #[test]
1657 fn test_sequences_from_callsite_map_orders_by_line() {
1658 use tldr_core::callgraph::CallSite;
1662 let mut calls: HashMap<String, Vec<CallSite>> = HashMap::new();
1663 calls.insert(
1664 "main".to_string(),
1665 vec![
1666 CallSite::direct("main".to_string(), "b_util".to_string(), Some(8)),
1668 CallSite::direct("main".to_string(), "helper".to_string(), Some(7)),
1669 ],
1670 );
1671 let out = sequences_from_callsite_map("/tmp/foo", &calls);
1672 let main_seq = out.get("/tmp/foo::main").expect("main sequence");
1673 assert_eq!(
1674 main_seq,
1675 &vec!["helper".to_string(), "b_util".to_string()],
1676 "calls must be ordered by line ascending (sequences_from_callsite_map)"
1677 );
1678 }
1679}