use super::*;
use std::path::PathBuf;
fn run_pipeline(content: &str) -> PatternCollection {
let extractor = PatternExtractor::new(EntropyConfig::default());
let file_path = PathBuf::from("pipeline.ruchy");
let mut collection = PatternCollection::new();
extractor
.extract_ruchy_pipeline_patterns(&file_path, content, &mut collection)
.expect("extract_ruchy_pipeline_patterns should not error");
collection
}
#[test]
fn four_pipeline_ops_create_data_transformation_pattern() {
let content = "x |> a() |> b() |> c() |> d()";
let collection = run_pipeline(content);
assert_eq!(
collection.patterns.len(),
1,
"4 pipeline ops should produce exactly one pattern"
);
let pattern = collection.patterns.values().next().unwrap();
assert_eq!(pattern.pattern_type, PatternType::DataTransformation);
assert_eq!(pattern.frequency, 4);
assert_eq!(
pattern.estimated_loc, 8,
"estimated_loc = matches.len() * 2"
);
assert_eq!(pattern.locations.len(), 4);
for loc in &pattern.locations {
assert_eq!(loc.file, PathBuf::from("pipeline.ruchy"));
assert_eq!(loc.column, 1);
}
}
#[test]
fn three_pipeline_ops_produce_no_pattern() {
let content = "x |> a() |> b() |> c()";
let collection = run_pipeline(content);
assert!(
collection.patterns.is_empty(),
"3 pipeline ops should not trigger pattern extraction"
);
}
#[test]
fn zero_pipeline_ops_produce_no_pattern() {
let content = "let x = foo(bar, baz); // no pipelines here";
let collection = run_pipeline(content);
assert!(collection.patterns.is_empty());
}
#[test]
fn twenty_pipeline_ops_truncate_locations_at_sixteen() {
let ops = (0..20).map(|i| format!(" |> f{}()", i)).collect::<String>();
let content = format!("seed{}", ops);
let collection = run_pipeline(&content);
assert_eq!(collection.patterns.len(), 1);
let pattern = collection.patterns.values().next().unwrap();
assert_eq!(
pattern.frequency, 20,
"frequency = total matches, not capped"
);
assert_eq!(
pattern.locations.len(),
16,
"locations capped by `if i >= 15 {{ break; }}` after push"
);
assert_eq!(pattern.estimated_loc, 40, "estimated_loc = 20 * 2");
}
#[test]
fn pattern_hash_is_file_path_scoped() {
let extractor = PatternExtractor::new(EntropyConfig::default());
let content = "x |> a() |> b() |> c() |> d()";
let mut c1 = PatternCollection::new();
let mut c2 = PatternCollection::new();
extractor
.extract_ruchy_pipeline_patterns(&PathBuf::from("alpha.ruchy"), content, &mut c1)
.unwrap();
extractor
.extract_ruchy_pipeline_patterns(&PathBuf::from("beta.ruchy"), content, &mut c2)
.unwrap();
let h1 = c1.patterns.keys().next().unwrap().clone();
let h2 = c2.patterns.keys().next().unwrap().clone();
assert_ne!(h1, h2, "pattern hash should vary by file path");
}
#[test]
fn example_code_captured_around_first_match() {
let content = "leading_padding_here x |> transform_alpha() |> b() |> c() |> d()";
let collection = run_pipeline(content);
let pattern = collection.patterns.values().next().unwrap();
assert!(
pattern.example_code.contains("|> transform_alpha("),
"example_code should include first match: got {:?}",
pattern.example_code
);
}
#[test]
fn location_line_numbers_reflect_newline_positions() {
let content = "seed\n |> a()\n |> b()\n |> c()\n |> d()";
let collection = run_pipeline(content);
let pattern = collection.patterns.values().next().unwrap();
let lines: Vec<usize> = pattern.locations.iter().map(|l| l.line).collect();
assert_eq!(lines, vec![2, 3, 4, 5]);
}
#[test]
fn test_calculate_pipeline_variation_score_all_distinct() {
use regex::Regex;
let extractor = PatternExtractor::new(EntropyConfig::default());
let content = "|>a |>b |>c";
let pattern = Regex::new(r"\|>\w").unwrap();
let matches: Vec<_> = pattern.find_iter(content).collect();
assert_eq!(matches.len(), 3);
let score = extractor.calculate_pipeline_variation_score(&matches, content);
assert!((score - 1.0).abs() < f64::EPSILON, "got {score}");
}
#[test]
fn test_calculate_pipeline_variation_score_all_identical() {
use regex::Regex;
let extractor = PatternExtractor::new(EntropyConfig::default());
let content = "|>a |>a |>a";
let pattern = Regex::new(r"\|>\w").unwrap();
let matches: Vec<_> = pattern.find_iter(content).collect();
assert_eq!(matches.len(), 3);
let score = extractor.calculate_pipeline_variation_score(&matches, content);
assert!((score - (1.0 / 3.0)).abs() < 1e-9, "got {score}");
}
#[test]
fn test_calculate_pipeline_variation_score_partial() {
use regex::Regex;
let extractor = PatternExtractor::new(EntropyConfig::default());
let content = "|>a |>b |>a |>b";
let pattern = Regex::new(r"\|>\w").unwrap();
let matches: Vec<_> = pattern.find_iter(content).collect();
assert_eq!(matches.len(), 4);
let score = extractor.calculate_pipeline_variation_score(&matches, content);
assert!((score - 0.5).abs() < 1e-9, "got {score}");
}