#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_large_file_detection() {
let classifier = FileClassifier::default();
let mut small_content = String::new();
for _ in 0..4000 {
small_content.push_str("a".repeat(100).as_str());
small_content.push('\n');
}
let decision = classifier.should_parse(Path::new("small.js"), small_content.as_bytes());
assert_eq!(decision, ParseDecision::Parse);
let mut large_content = String::new();
for _ in 0..6000 {
large_content.push_str("a".repeat(100).as_str());
large_content.push('\n');
}
let decision = classifier.should_parse(Path::new("large.js"), large_content.as_bytes());
assert_eq!(decision, ParseDecision::Skip(SkipReason::LargeFile));
let mut threshold_content = String::new();
for _ in 0..(LARGE_FILE_THRESHOLD / 101) {
threshold_content.push_str("a".repeat(100).as_str());
threshold_content.push('\n');
}
let decision =
classifier.should_parse(Path::new("threshold.js"), threshold_content.as_bytes());
assert_eq!(decision, ParseDecision::Parse);
}
#[test]
fn test_include_large_files_flag() {
let classifier = FileClassifier::default();
let mut large_content = String::new();
for _ in 0..6000 {
large_content.push_str("a".repeat(100).as_str());
large_content.push('\n');
}
let large_content_bytes = large_content.as_bytes();
let decision =
classifier.should_parse_with_options(Path::new("large.js"), large_content_bytes, false);
assert_eq!(decision, ParseDecision::Skip(SkipReason::LargeFile));
let decision =
classifier.should_parse_with_options(Path::new("large.js"), large_content_bytes, true);
assert_eq!(decision, ParseDecision::Parse);
}
#[test]
fn test_very_large_files_still_skipped() {
let classifier = FileClassifier::default();
let very_large_content = vec![b'a'; 2_000_000];
let decision =
classifier.should_parse_with_options(Path::new("huge.js"), &very_large_content, true);
assert_eq!(decision, ParseDecision::Skip(SkipReason::FileTooLarge));
}
#[test]
fn test_skip_reason_priorities() {
let classifier = FileClassifier::default();
let empty_content = b"";
let decision =
classifier.should_parse_with_options(Path::new("empty.js"), empty_content, false);
assert_eq!(decision, ParseDecision::Skip(SkipReason::EmptyFile));
let build_content = vec![b'a'; 600_000];
let decision = classifier.should_parse_with_options(
Path::new("target/debug/deps/lib.rlib"),
&build_content,
false,
);
assert_eq!(decision, ParseDecision::Skip(SkipReason::LargeFile));
}
#[test]
fn test_minified_vs_large_file_detection() {
let classifier = FileClassifier::default();
let mut large_normal = String::new();
for i in 0..10_000 {
large_normal.push_str(&format!("function test{} () {{\n return {};\n}}\n", i, i));
}
let content = large_normal.as_bytes();
if content.len() > LARGE_FILE_THRESHOLD {
let decision = classifier.should_parse(Path::new("large_normal.js"), content);
assert_eq!(decision, ParseDecision::Skip(SkipReason::LargeFile));
}
let minified = "a".repeat(11_000); let decision = classifier.should_parse(Path::new("minified.js"), minified.as_bytes());
assert_eq!(decision, ParseDecision::Skip(SkipReason::LineTooLong));
}
#[test]
fn test_vendor_detection_determinism() {
let classifier = FileClassifier::default();
let test_files = [
(
"vendor/jquery.min.js",
b"!function(e,t){var n=e.jQuery}" as &[u8],
),
(
"src/main.rs",
b"fn main() {\n println!(\"Hello\");\n}" as &[u8],
),
(
"assets/vendor/d3.min.js",
b"/*! For license information please see d3.min.js.LICENSE.txt */" as &[u8],
),
(
"node_modules/react/index.js",
b"'use strict';\n\nmodule.exports = require('./lib/React');" as &[u8],
),
(
"target/debug/build/htmlServer-abc123/out/rules.rs",
b"// Auto-generated code\npub enum TreeBuilderStep {\n A,\n B,\n}" as &[u8],
),
];
let mut results = Vec::new();
for _ in 0..100 {
let run_results: Vec<_> = test_files
.iter()
.map(|(path, content)| classifier.should_parse(Path::new(path), content))
.collect();
results.push(run_results);
}
assert!(results.windows(2).all(|w| w[0] == w[1]));
let decisions = &results[0];
assert!(matches!(
decisions[0],
ParseDecision::Skip(SkipReason::VendorDirectory)
));
assert!(matches!(decisions[1], ParseDecision::Parse));
assert!(matches!(
decisions[2],
ParseDecision::Skip(SkipReason::VendorDirectory)
));
assert!(matches!(
decisions[3],
ParseDecision::Skip(SkipReason::VendorDirectory)
));
assert!(matches!(
decisions[4],
ParseDecision::Skip(SkipReason::BuildArtifact)
));
}
#[test]
fn test_performance_on_large_files() {
let classifier = FileClassifier::default();
let large_minified = vec![b'a'; 1_000_000];
let start = Instant::now();
let decision = classifier.should_parse(Path::new("large.min.js"), &large_minified);
let elapsed = start.elapsed();
assert!(matches!(decision, ParseDecision::Skip(_)));
assert!(elapsed.as_micros() < 1000); }
#[test]
fn test_entropy_calculation() {
let uniform = b"aaaaaaaaaa";
let entropy1 = calculate_shannon_entropy(uniform);
assert!(entropy1 < 1.0);
let random = b"a1b2c3d4e5f6g7h8i9j0";
let entropy2 = calculate_shannon_entropy(random);
assert!(entropy2 > 3.0);
let minified = b"!function(e,t){var n,r,i,o,a,s,u,c,l,f,d,p,h,m,v,g,y,b,_,w,x,k,C,S,E,T,A,O,j,N,D,P,L,q,R,M,I,F,B,H,U,z,W,V,$,G,Q,K,X,Y,J,Z,ee,te,ne,re,ie,oe,ae,se,ue,ce,le";
let entropy3 = calculate_shannon_entropy(minified);
assert!(entropy3 > 4.0); }
#[test]
fn test_binary_detection() {
let classifier = FileClassifier::default();
let text = b"Hello, world!\nThis is a text file.";
assert!(!classifier.is_binary(text));
let binary = b"PNG\x00\x00\x00\rIHDR";
assert!(classifier.is_binary(binary));
let mostly_binary = vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10];
assert!(classifier.is_binary(&mostly_binary));
}
#[test]
fn test_line_length_detection() {
let classifier = FileClassifier::default();
let normal_code = b"fn main() {\n println!(\"Hello\");\n}";
assert_eq!(
classifier.should_parse(Path::new("main.rs"), normal_code),
ParseDecision::Parse
);
let long_line = format!("const DATA = \"{}\";\n", "a".repeat(15_000));
assert_eq!(
classifier.should_parse(Path::new("data.js"), long_line.as_bytes()),
ParseDecision::Skip(SkipReason::LineTooLong)
);
}
#[test]
fn test_rust_target_directory_filtering() {
let classifier = FileClassifier::default();
let rust_build_artifacts = [
"target/debug/build/htmlserver-abc123/out/rules.rs",
"target/release/build/htmlserver-abc123/out/rules.rs",
"target/debug/deps/libhtml5ever-xyz.rlib",
"target/release/deps/libhtml5ever-xyz.rlib",
"target/debug/build/proc-macro2-def456/out/generated.rs",
"target/thumbv7em-none-eabihf/release/libcore.rlib",
];
for path in rust_build_artifacts {
let content = b"// Auto-generated code or compiled artifact";
let decision = classifier.should_parse(Path::new(path), content);
assert!(
matches!(decision, ParseDecision::Skip(SkipReason::BuildArtifact)),
"Failed to filter target directory path: {path}"
);
}
let source_files = [
"src/main.rs",
"src/lib.rs",
"tests/integration_test.rs",
"examples/demo.rs",
];
for path in source_files {
let content = b"fn main() {\n println!(\"Hello\");\n}";
let decision = classifier.should_parse(Path::new(path), content);
assert!(
matches!(decision, ParseDecision::Parse),
"Incorrectly filtered source file: {path} -> {decision:?}"
);
}
}
#[test]
fn test_additional_build_artifacts() {
let classifier = FileClassifier::default();
let build_artifacts = [
".gradle/caches/transforms-3/abc123/transformed/classes.jar",
"frontend/.gradle/build/outputs/apk/debug/app-debug.apk",
];
for path in build_artifacts {
let content = b"// Some content";
let decision = classifier.should_parse(Path::new(path), content);
assert!(
matches!(decision, ParseDecision::Skip(SkipReason::BuildArtifact)),
"Failed to filter build artifact: {path}"
);
}
let vendor_artifacts = [
"backend/node_modules/@babel/core/lib/index.js",
"/home/user/project/node_modules/lodash/index.js",
];
for path in vendor_artifacts {
let content = b"// Some content";
let decision = classifier.should_parse(Path::new(path), content);
assert!(
matches!(decision, ParseDecision::Skip(SkipReason::VendorDirectory)),
"Failed to filter vendor artifact: {path}"
);
}
}
#[test]
fn test_debug_reporter() {
let mut reporter = DebugReporter::new(None);
reporter.record_decision(
Path::new("vendor/lib.js"),
&ParseDecision::Skip(SkipReason::VendorDirectory),
);
reporter.record_decision(Path::new("src/main.rs"), &ParseDecision::Parse);
reporter.record_parse_result(
Path::new("src/main.rs"),
std::time::Duration::from_millis(25),
None,
);
let report = reporter.generate_report().unwrap();
assert_eq!(report.summary.total_files, 2);
assert_eq!(report.summary.parsed_files, 1);
assert_eq!(report.summary.skipped_files, 1);
assert_eq!(report.summary.parse_errors, 0);
assert_eq!(report.skip_reasons.get("VendorDirectory"), Some(&1));
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
#[cfg(test)]
mod property_tests {
use proptest::prelude::*;
proptest! {
#[test]
fn basic_property_stability(_input in ".*") {
prop_assert!(true);
}
#[test]
fn module_consistency_check(_x in 0u32..1000) {
prop_assert!(_x < 1001);
}
}
}