use std::collections::BTreeSet;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::{Mutex, OnceLock};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct OracleMatch {
path: String,
line_number: u32,
line_content: Vec<u8>,
}
fn normalize_oracle_line_content(mut line_content: Vec<u8>) -> Vec<u8> {
if line_content.last() == Some(&b'\r') {
line_content.pop();
}
line_content
}
const EXPECTED_RG_VERSION: &str = "ripgrep 15.1.0";
fn rg_matches(corpus: &Path, pattern: &str, extra_flags: &[&str]) -> BTreeSet<OracleMatch> {
let mut cmd = Command::new("rg");
cmd.arg("--line-number")
.arg("--no-heading")
.arg("--color=never");
for flag in extra_flags {
cmd.arg(flag);
}
cmd.arg("--").arg(pattern).arg(corpus);
let output = cmd.output().expect("rg invocation failed");
if !output.status.success() && output.status.code() != Some(1) {
panic!(
"rg failed with status {:?}: {}",
output.status,
String::from_utf8_lossy(&output.stderr)
);
}
parse_rg_output(&output.stdout, corpus)
}
fn parse_rg_output(stdout: &[u8], corpus: &Path) -> BTreeSet<OracleMatch> {
let mut out = BTreeSet::new();
for line in stdout.split(|&b| b == b'\n') {
if line.is_empty() {
continue;
}
let mut parts = line.splitn(3, |&b| b == b':');
let path_bytes = match parts.next() {
Some(p) => p,
None => continue,
};
let line_num_bytes = match parts.next() {
Some(n) => n,
None => continue,
};
let line_num: u32 = match std::str::from_utf8(line_num_bytes)
.ok()
.and_then(|s| s.parse().ok())
{
Some(n) => n,
None => continue,
};
let line_content = match parts.next() {
Some(content) => normalize_oracle_line_content(content.to_vec()),
None => continue,
};
let abs = PathBuf::from(String::from_utf8_lossy(path_bytes).into_owned());
let rel = abs
.strip_prefix(corpus)
.unwrap_or(&abs)
.to_string_lossy()
.into_owned();
out.insert(OracleMatch {
path: rel,
line_number: line_num,
line_content,
});
}
out
}
fn rg_available() -> bool {
Command::new("rg").arg("--version").output().is_ok()
}
fn assert_rg_version_pinned() {
let output = Command::new("rg")
.arg("--version")
.output()
.expect("rg --version failed");
assert!(
output.status.success(),
"rg --version failed with status {:?}",
output.status
);
let stdout = String::from_utf8(output.stdout).expect("rg --version output is not UTF-8");
let first_line = stdout.lines().next().unwrap_or("");
assert_eq!(
first_line, EXPECTED_RG_VERSION,
"correctness oracle pinned to {EXPECTED_RG_VERSION}, found {first_line}"
);
}
fn corpus_path() -> PathBuf {
let manifest = std::env::var("CARGO_MANIFEST_DIR")
.expect("CARGO_MANIFEST_DIR not set; run via cargo test");
PathBuf::from(manifest).join("tests/fixtures/corpus")
}
fn correctness_build_lock() -> &'static Mutex<()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
}
use syntext::index::Index;
use syntext::{Config, SearchOptions};
fn build_test_index(corpus: &Path) -> (tempfile::TempDir, Index) {
let tmp = tempfile::TempDir::new().expect("tempdir");
let config = Config {
index_dir: tmp.path().to_path_buf(),
repo_root: corpus.to_path_buf(),
..Config::default()
};
let index = Index::build(config).expect("Index::build failed");
(tmp, index)
}
fn syntext_matches(
index: &Index,
_corpus: &Path,
pattern: &str,
case_insensitive: bool,
path_glob: Option<&str>,
) -> BTreeSet<OracleMatch> {
let (path_filter, file_type) = match path_glob {
Some(g) if g.starts_with("*.") => (None, Some(g.trim_start_matches("*.").to_string())),
Some(g) => (Some(g.to_string()), None),
None => (None, None),
};
let opts = SearchOptions {
case_insensitive,
path_filter,
file_type,
..SearchOptions::default()
};
let results = index.search(pattern, &opts).expect("search failed");
results
.into_iter()
.map(|m| OracleMatch {
path: m.path.to_string_lossy().into_owned(),
line_number: m.line_number,
line_content: normalize_oracle_line_content(m.line_content),
})
.collect()
}
#[allow(dead_code)]
fn assert_no_false_negatives(
corpus: &Path,
rg_result: &BTreeSet<OracleMatch>,
syntext_result: &BTreeSet<OracleMatch>,
pattern: &str,
) {
let missed: Vec<_> = rg_result.difference(syntext_result).collect();
assert!(
missed.is_empty(),
"syntext missed {} matches for pattern {:?} on corpus {:?}.\n\
First missed: {:?}\n\
rg found {} total, syntext found {}.",
missed.len(),
pattern,
corpus,
missed.first(),
rg_result.len(),
syntext_result.len(),
);
}
#[allow(dead_code)]
fn assert_exact_match(
corpus: &Path,
rg_result: &BTreeSet<OracleMatch>,
syntext_result: &BTreeSet<OracleMatch>,
pattern: &str,
) {
assert_eq!(
rg_result, syntext_result,
"syntext results differ from rg for pattern {:?} on corpus {:?}",
pattern, corpus
);
}
#[test]
fn literal_parse_query() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse_query", &[]);
assert!(
rg_result.len() >= 3,
"fixture invariant: parse_query must appear in >=3 files, got {}",
rg_result.len()
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse_query", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "parse_query");
drop(index);
}
#[test]
fn literal_process_batch() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "process_batch", &[]);
assert!(
rg_result.len() >= 2,
"fixture invariant: process_batch must appear in >=2 files, got {}",
rg_result.len()
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "process_batch", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "process_batch");
drop(index);
}
#[test]
fn literal_with_punctuation() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse_query(", &["-F"]);
assert!(
!rg_result.is_empty(),
"fixture invariant: parse_query( must appear in at least 1 file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, r"parse_query\(", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "parse_query(");
drop(index);
}
#[test]
fn regex_alternation() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse_query|process_batch", &[]);
assert!(
!rg_result.is_empty(),
"fixture invariant: alternation must match at least one file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse_query|process_batch", false, None);
assert_exact_match(
&corpus,
&rg_result,
&syntext_result,
"parse_query|process_batch",
);
drop(index);
}
#[test]
fn regex_character_class() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse_quer[yi]", &[]);
assert!(
!rg_result.is_empty(),
"fixture invariant: character class must match at least 1 file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse_quer[yi]", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "parse_quer[yi]");
drop(index);
}
#[test]
fn indexed_regex_repetition() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "(fn_parse_filter_query)+", &[]);
assert!(
!rg_result.is_empty(),
"fixture invariant: repetition pattern must match at least 1 file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "(fn_parse_filter_query)+", false, None);
assert_exact_match(
&corpus,
&rg_result,
&syntext_result,
"(fn_parse_filter_query)+",
);
drop(index);
}
#[test]
fn case_insensitive_literal() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_ci = rg_matches(&corpus, "ParseQuery", &["-i"]);
let rg_cs = rg_matches(&corpus, "ParseQuery", &[]);
assert!(
rg_ci.len() >= rg_cs.len(),
"case-insensitive must find at least as many matches as case-sensitive"
);
assert!(
!rg_ci.is_empty(),
"fixture invariant: case-insensitive ParseQuery must match at least 1 file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "ParseQuery", true, None);
assert_exact_match(
&corpus,
&rg_ci,
&syntext_result,
"ParseQuery (case-insensitive)",
);
drop(index);
}
#[test]
fn no_match_pattern() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "xyzzy_no_match_sentinel_42", &[]);
assert!(
rg_result.is_empty(),
"sentinel pattern must not appear in any fixture file"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result =
syntext_matches(&index, &corpus, "xyzzy_no_match_sentinel_42", false, None);
assert!(
syntext_result.is_empty(),
"syntext must return empty for no-match sentinel, got {:?}",
syntext_result
);
drop(index);
}
#[test]
fn unicode_identifier() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "café", &[]);
assert!(
!rg_result.is_empty(),
"fixture invariant: unicode_identifiers.py must contain 'café'"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "café", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "café");
drop(index);
}
#[test]
fn optional_prefix_pattern() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "(foo)?bar", &[]);
let bar_only = rg_matches(&corpus, r"\bbar\b", &[]);
let foobar = rg_matches(&corpus, "foobar", &[]);
let _ = (bar_only, foobar);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "(foo)?bar", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "(foo)?bar");
drop(index);
}
#[test]
fn dot_star_fallback_to_scan() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse.*batch", &[]);
assert!(
!rg_result.is_empty(),
"fixture invariant: parse.*batch must match at least 1 file \
(long_line.txt has 'parse_query' and 'process_batch' on the same line)"
);
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse.*batch", false, None);
assert_exact_match(&corpus, &rg_result, &syntext_result, "parse.*batch");
drop(index);
}
#[test]
fn path_filter_py_only() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_all = rg_matches(&corpus, "parse_query", &[]);
let rg_py = rg_matches(&corpus, "parse_query", &["--glob=*.py"]);
assert!(
rg_py.len() < rg_all.len(),
"py filter must return fewer results than unfiltered"
);
for m in &rg_py {
assert!(
m.path.ends_with(".py"),
"path filter returned non-.py file: {}",
m.path
);
}
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse_query", false, Some("*.py"));
for m in &syntext_result {
assert!(
m.path.ends_with(".py"),
"syntext path filter returned non-.py file: {}",
m.path
);
}
assert_no_false_negatives(
&corpus,
&rg_py,
&syntext_result,
"parse_query (*.py filter)",
);
drop(index);
}
#[test]
fn gitignore_excludes_build_dir() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let rg_result = rg_matches(&corpus, "parse_query", &[]);
for m in &rg_result {
assert!(
!m.path.starts_with("build/") && !m.path.contains("/build/"),
"rg returned gitignored file: {}",
m.path
);
}
let (_tmp, index) = build_test_index(&corpus);
let syntext_result = syntext_matches(&index, &corpus, "parse_query", false, None);
for m in &syntext_result {
assert!(
!m.path.starts_with("build/") && !m.path.contains("/build/"),
"syntext returned gitignored file: {}",
m.path
);
}
drop(index);
}
#[test]
fn oversized_file_after_growth_returns_no_results() {
let _guard = correctness_build_lock().lock().unwrap();
let repo = tempfile::TempDir::new().unwrap();
let index_dir = tempfile::TempDir::new().unwrap();
let file = repo.path().join("big.rs");
std::fs::write(&file, "fn unique_sentinel_xyzzy() {}\n").unwrap();
let config = Config {
index_dir: index_dir.path().to_path_buf(),
repo_root: repo.path().to_path_buf(),
max_file_size: 1024,
..Config::default()
};
let index = syntext::index::Index::build(config.clone()).unwrap();
let opts = SearchOptions::default();
let results = index.search("unique_sentinel_xyzzy", &opts).unwrap();
assert_eq!(
results.len(),
1,
"baseline: must find match in original file"
);
drop(index);
let mut bloated = String::from("fn unique_sentinel_xyzzy() {}\n");
bloated.push_str(&"x".repeat(2048));
std::fs::write(&file, &bloated).unwrap();
let index2 = syntext::index::Index::open(config).unwrap();
let results2 = index2.search("unique_sentinel_xyzzy", &opts).unwrap();
assert_eq!(
results2.len(),
0,
"oversized file must be skipped entirely, not verified against truncated content"
);
drop(index2);
}
#[cfg(unix)]
#[test]
fn search_does_not_follow_symlink_outside_repo() {
use std::os::unix::fs::symlink;
let _guard = correctness_build_lock().lock().unwrap();
let repo = tempfile::TempDir::new().unwrap();
let index_dir = tempfile::TempDir::new().unwrap();
let outside = tempfile::TempDir::new().unwrap();
let real_file = repo.path().join("data.rs");
std::fs::write(&real_file, "fn real_content() {}\n").unwrap();
let config = Config {
index_dir: index_dir.path().to_path_buf(),
repo_root: repo.path().to_path_buf(),
..Config::default()
};
drop(Index::build(config.clone()).unwrap());
let secret = outside.path().join("secret.txt");
std::fs::write(&secret, "fn real_content() {}\n").unwrap();
std::fs::remove_file(&real_file).unwrap();
symlink(&secret, &real_file).unwrap();
let index2 = Index::open(config).unwrap();
let opts = SearchOptions::default();
let results = index2.search("real_content", &opts).unwrap();
assert!(
results.is_empty(),
"search should not follow symlinks that escape repo root"
);
}
#[test]
fn oracle_is_deterministic() {
let _guard = correctness_build_lock().lock().unwrap();
if !rg_available() {
eprintln!("SKIP: rg not on PATH");
return;
}
assert_rg_version_pinned();
let corpus = corpus_path();
let run1 = rg_matches(&corpus, "parse_query", &[]);
let run2 = rg_matches(&corpus, "parse_query", &[]);
assert_eq!(
run1, run2,
"rg produced different results on two consecutive runs"
);
}