use std::cmp::Ordering;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Write as _;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering as AtomicOrdering};
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::model::{Chunk, MatchLine, SearchResult};
use crate::ranking::penalties::file_path_penalty;
use super::{hash, path_string, AsrError, AsrResult, IndexStateRecord, RepoRecord};
const FORMAT_VERSION: u32 = 1;
const REGEX_PREFIX: &str = "re:";
const MAX_MATCH_LINES: usize = 8;
const PERSISTENT_EXACT_SCORE_BASE: f64 = 20_000.0;
type Trigram = [u8; 3];
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExactShardSummary {
pub status: String,
pub format_version: u32,
pub path: String,
pub manifest_path: String,
pub chunks_path: String,
pub postings_path: String,
pub content_hash: String,
pub index_hash: String,
pub chunk_count: usize,
pub posting_count: usize,
pub rebuilt: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub rebuild_reason: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ExactShardManifest {
format_version: u32,
repo_name: String,
source_type: String,
git_root: String,
head_commit: Option<String>,
dirty: bool,
modified: bool,
untracked: bool,
worktree_fingerprint: String,
content_hash: String,
chunks_hash: String,
postings_hash: String,
index_hash: String,
chunk_count: usize,
posting_count: usize,
generated_at: String,
}
#[derive(Debug, Clone)]
pub(crate) struct ExactShardSnapshot {
pub chunks: Vec<Chunk>,
pub summary: ExactShardSummary,
postings: BTreeMap<Trigram, Vec<usize>>,
}
pub(crate) fn persistent_exact_score_floor() -> f64 {
PERSISTENT_EXACT_SCORE_BASE
}
pub(crate) fn validate_asr_query(query: &str) -> AsrResult<()> {
let Some(pattern) = query.trim().strip_prefix(REGEX_PREFIX) else {
return Ok(());
};
if pattern.trim().is_empty() {
return Err(AsrError::new(
"invalid_query",
"Regex query must include a non-empty pattern after re:",
));
}
if let Err(err) = Regex::new(pattern) {
return Err(AsrError::new(
"invalid_regex",
format!("Regex query is invalid: {err}"),
));
}
match mandatory_literals_for_safe_regex(pattern) {
Some(literals) if !literals.is_empty() => Ok(()),
_ => Err(AsrError::new(
"query_too_broad",
"Regex query must contain at least one mandatory literal of 3 bytes or more; ASR does not run broad whole-repository regex scans",
)),
}
}
pub(crate) fn write_ready(
exact_root: &Path,
repo: &RepoRecord,
state: &IndexStateRecord,
chunks: &[Chunk],
) -> AsrResult<ExactShardSummary> {
let written = build_and_write(exact_root, repo, state, chunks, false, None)?;
Ok(written.summary)
}
pub(crate) fn ensure_ready(
exact_root: &Path,
repo: &RepoRecord,
state: &IndexStateRecord,
chunks: &[Chunk],
) -> AsrResult<ExactShardSnapshot> {
match read_ready(exact_root, repo, state) {
Ok(snapshot) => Ok(snapshot),
Err(err) => build_and_write(
exact_root,
repo,
state,
chunks,
true,
Some(format!("{}: {}", err.code, err.message)),
),
}
}
pub(crate) fn error_summary(
exact_root: &Path,
repo_name: &str,
message: &str,
) -> ExactShardSummary {
let paths = shard_paths(exact_root, repo_name);
ExactShardSummary {
status: "failed".to_string(),
format_version: FORMAT_VERSION,
path: path_string(&paths.dir),
manifest_path: path_string(&paths.manifest),
chunks_path: path_string(&paths.chunks),
postings_path: path_string(&paths.postings),
content_hash: String::new(),
index_hash: String::new(),
chunk_count: 0,
posting_count: 0,
rebuilt: false,
rebuild_reason: Some(message.to_string()),
}
}
impl ExactShardSnapshot {
pub(crate) fn search(&self, query: &str, top_k: usize) -> Vec<SearchResult> {
let query = query.trim();
if query.is_empty() || top_k == 0 {
return Vec::new();
}
if let Some(pattern) = query.strip_prefix(REGEX_PREFIX) {
return self.search_regex(pattern, top_k);
}
self.search_literal(query, top_k)
}
fn search_literal(&self, needle: &str, top_k: usize) -> Vec<SearchResult> {
let required = unique_trigrams(needle.as_bytes());
if required.is_empty() {
return Vec::new();
}
let candidates = self.candidate_chunks(&required);
let mut results = Vec::new();
for idx in candidates {
let Some(chunk) = self.chunks.get(idx) else {
continue;
};
if !chunk.content.contains(needle) {
continue;
}
let match_lines = literal_match_lines(chunk, needle);
results.push(SearchResult {
chunk: chunk.clone(),
score: PERSISTENT_EXACT_SCORE_BASE + match_lines.len() as f64,
match_lines,
});
}
sort_and_limit(results, top_k)
}
fn search_regex(&self, pattern: &str, top_k: usize) -> Vec<SearchResult> {
let literals = match mandatory_literals_for_safe_regex(pattern) {
Some(literals) if !literals.is_empty() => literals,
_ => return Vec::new(),
};
let mut required = Vec::new();
for literal in &literals {
required.extend(unique_trigrams(literal.as_bytes()));
}
required.sort_unstable();
required.dedup();
if required.is_empty() {
return Vec::new();
}
let regex = match Regex::new(pattern) {
Ok(regex) => regex,
Err(_) => return Vec::new(),
};
let candidates = self.candidate_chunks(&required);
let mut results = Vec::new();
for idx in candidates {
let Some(chunk) = self.chunks.get(idx) else {
continue;
};
if !regex.is_match(&chunk.content) {
continue;
}
let match_lines = regex_match_lines(chunk, ®ex);
results.push(SearchResult {
chunk: chunk.clone(),
score: PERSISTENT_EXACT_SCORE_BASE + 100.0 + match_lines.len() as f64,
match_lines,
});
}
sort_and_limit(results, top_k)
}
fn candidate_chunks(&self, required: &[Trigram]) -> Vec<usize> {
let mut lists: Vec<&Vec<usize>> = Vec::new();
for trigram in required {
let Some(list) = self.postings.get(trigram) else {
return Vec::new();
};
lists.push(list);
}
lists.sort_by_key(|list| list.len());
let Some(first) = lists.first() else {
return Vec::new();
};
let mut current = (*first).clone();
for list in lists.iter().skip(1) {
current = intersect_sorted(¤t, list);
if current.is_empty() {
break;
}
}
current
}
}
fn read_ready(
exact_root: &Path,
repo: &RepoRecord,
state: &IndexStateRecord,
) -> AsrResult<ExactShardSnapshot> {
let paths = shard_paths(exact_root, &repo.name);
let manifest_text = fs::read_to_string(&paths.manifest).map_err(|err| {
AsrError::with_path(
"exact_shard_missing",
format!("Unable to read exact shard manifest: {err}"),
path_string(&paths.manifest),
)
})?;
let manifest: ExactShardManifest = serde_json::from_str(&manifest_text).map_err(|err| {
AsrError::with_path(
"exact_shard_corrupt",
format!("Exact shard manifest is invalid JSON: {err}"),
path_string(&paths.manifest),
)
})?;
validate_manifest(repo, state, &manifest, &paths)?;
let chunks_text = fs::read_to_string(&paths.chunks).map_err(|err| {
AsrError::with_path(
"exact_shard_corrupt",
format!("Unable to read exact shard chunks: {err}"),
path_string(&paths.chunks),
)
})?;
let actual_chunks_hash = hash_text(&chunks_text);
if actual_chunks_hash != manifest.chunks_hash {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard chunks hash does not match manifest",
path_string(&paths.chunks),
));
}
let chunks = parse_chunks(&paths.chunks, &chunks_text)?;
if chunks.len() != manifest.chunk_count {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard chunk count does not match manifest",
path_string(&paths.chunks),
));
}
let actual_content_hash = content_hash_for_chunks(&chunks);
if actual_content_hash != manifest.content_hash {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard chunk content hash does not match manifest",
path_string(&paths.chunks),
));
}
let postings_text = fs::read_to_string(&paths.postings).map_err(|err| {
AsrError::with_path(
"exact_shard_corrupt",
format!("Unable to read exact shard postings: {err}"),
path_string(&paths.postings),
)
})?;
let postings_hash = hash_text(&postings_text);
if postings_hash != manifest.postings_hash {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard postings hash does not match manifest",
path_string(&paths.postings),
));
}
let postings = parse_postings(&postings_text, chunks.len())?;
if postings.len() != manifest.posting_count {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard posting count does not match manifest",
path_string(&paths.postings),
));
}
let index_hash = index_hash(&actual_content_hash, &actual_chunks_hash, &postings_hash);
if index_hash != manifest.index_hash {
return Err(AsrError::with_path(
"exact_shard_corrupt",
"Exact shard index hash does not match manifest",
path_string(&paths.manifest),
));
}
Ok(ExactShardSnapshot {
chunks,
postings,
summary: summary_from_manifest(&paths, &manifest, false, None),
})
}
fn build_and_write(
exact_root: &Path,
repo: &RepoRecord,
state: &IndexStateRecord,
chunks: &[Chunk],
rebuilt: bool,
rebuild_reason: Option<String>,
) -> AsrResult<ExactShardSnapshot> {
if chunks.is_empty() {
return Err(AsrError::new(
"exact_shard_empty",
"Cannot write exact shard without chunks",
));
}
fs::create_dir_all(exact_root).map_err(|err| {
AsrError::with_path(
"exact_shard_write_failed",
format!("Failed to create exact shard root: {err}"),
path_string(exact_root),
)
})?;
let paths = shard_paths(exact_root, &repo.name);
fs::create_dir_all(&paths.dir).map_err(|err| {
AsrError::with_path(
"exact_shard_write_failed",
format!("Failed to create exact shard directory: {err}"),
path_string(&paths.dir),
)
})?;
let canonical_chunks = canonical_chunks(chunks);
let chunks_text = chunks_to_jsonl(&canonical_chunks)?;
let chunks_hash = hash_text(&chunks_text);
let content_hash = content_hash_for_chunks(&canonical_chunks);
if state.content_hash.as_deref() != Some(content_hash.as_str()) {
return Err(AsrError::new(
"repo_index_corrupt",
"Cannot write exact shard because chunk hash differs from index state",
));
}
let postings = build_postings(&canonical_chunks);
let postings_text = postings_to_text(&postings);
let postings_hash = hash_text(&postings_text);
let index_hash = index_hash(&content_hash, &chunks_hash, &postings_hash);
let manifest = ExactShardManifest {
format_version: FORMAT_VERSION,
repo_name: repo.name.clone(),
source_type: repo.source_type.clone(),
git_root: repo.git_root.clone(),
head_commit: state.head_commit.clone(),
dirty: state.dirty,
modified: state.modified,
untracked: state.untracked,
worktree_fingerprint: state.worktree_fingerprint.clone(),
content_hash,
chunks_hash,
postings_hash,
index_hash,
chunk_count: canonical_chunks.len(),
posting_count: postings.len(),
generated_at: state.indexed_at.clone(),
};
let manifest_text = serde_json::to_string_pretty(&manifest).map_err(|err| {
AsrError::new(
"exact_shard_write_failed",
format!("Failed to serialize exact shard manifest: {err}"),
)
})?;
write_file_atomically(&paths.chunks, &chunks_text)?;
write_file_atomically(&paths.postings, &postings_text)?;
write_file_atomically(&paths.manifest, &manifest_text)?;
Ok(ExactShardSnapshot {
chunks: canonical_chunks,
postings,
summary: summary_from_manifest(&paths, &manifest, rebuilt, rebuild_reason),
})
}
fn validate_manifest(
repo: &RepoRecord,
state: &IndexStateRecord,
manifest: &ExactShardManifest,
paths: &ShardPaths,
) -> AsrResult<()> {
if manifest.format_version != FORMAT_VERSION {
return Err(AsrError::with_path(
"exact_shard_version_mismatch",
format!(
"Exact shard format version {} is not supported by ASR format {}",
manifest.format_version, FORMAT_VERSION
),
path_string(&paths.manifest),
));
}
if manifest.repo_name != repo.name
|| manifest.source_type != repo.source_type
|| manifest.git_root != repo.git_root
|| manifest.head_commit != state.head_commit
|| manifest.dirty != state.dirty
|| manifest.modified != state.modified
|| manifest.untracked != state.untracked
|| manifest.worktree_fingerprint != state.worktree_fingerprint
|| state.content_hash.as_deref() != Some(manifest.content_hash.as_str())
{
return Err(AsrError::with_path(
"exact_shard_stale",
"Exact shard manifest does not match current ASR index state",
path_string(&paths.manifest),
));
}
Ok(())
}
fn summary_from_manifest(
paths: &ShardPaths,
manifest: &ExactShardManifest,
rebuilt: bool,
rebuild_reason: Option<String>,
) -> ExactShardSummary {
ExactShardSummary {
status: "ready".to_string(),
format_version: manifest.format_version,
path: path_string(&paths.dir),
manifest_path: path_string(&paths.manifest),
chunks_path: path_string(&paths.chunks),
postings_path: path_string(&paths.postings),
content_hash: manifest.content_hash.clone(),
index_hash: manifest.index_hash.clone(),
chunk_count: manifest.chunk_count,
posting_count: manifest.posting_count,
rebuilt,
rebuild_reason,
}
}
#[derive(Debug, Clone)]
struct ShardPaths {
dir: PathBuf,
manifest: PathBuf,
chunks: PathBuf,
postings: PathBuf,
}
fn shard_paths(exact_root: &Path, repo_name: &str) -> ShardPaths {
let dir = exact_root.join(repo_key(repo_name));
ShardPaths {
manifest: dir.join("manifest.json"),
chunks: dir.join("chunks.jsonl"),
postings: dir.join("postings.tsv"),
dir,
}
}
fn repo_key(repo_name: &str) -> String {
repo_name
.chars()
.map(|ch| {
if ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.') {
ch
} else {
'_'
}
})
.collect()
}
fn canonical_chunks(chunks: &[Chunk]) -> Vec<Chunk> {
let mut out = chunks.to_vec();
out.sort_by(|left, right| {
left.file_path
.cmp(&right.file_path)
.then_with(|| left.start_line.cmp(&right.start_line))
.then_with(|| left.end_line.cmp(&right.end_line))
.then_with(|| left.content.cmp(&right.content))
});
out
}
fn chunks_to_jsonl(chunks: &[Chunk]) -> AsrResult<String> {
let mut out = String::new();
for chunk in chunks {
let line = serde_json::to_string(chunk).map_err(|err| {
AsrError::new(
"exact_shard_write_failed",
format!("Failed to serialize exact shard chunk: {err}"),
)
})?;
out.push_str(&line);
out.push('\n');
}
Ok(out)
}
fn parse_chunks(path: &Path, text: &str) -> AsrResult<Vec<Chunk>> {
let mut chunks = Vec::new();
for (line_no, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let chunk: Chunk = serde_json::from_str(line).map_err(|err| {
AsrError::with_path(
"exact_shard_corrupt",
format!(
"Invalid exact shard chunk JSON on line {}: {err}",
line_no + 1
),
path_string(path),
)
})?;
chunks.push(chunk);
}
Ok(chunks)
}
fn build_postings(chunks: &[Chunk]) -> BTreeMap<Trigram, Vec<usize>> {
let mut postings: BTreeMap<Trigram, BTreeSet<usize>> = BTreeMap::new();
for (idx, chunk) in chunks.iter().enumerate() {
for trigram in unique_trigrams(chunk.content.as_bytes()) {
postings.entry(trigram).or_default().insert(idx);
}
}
postings
.into_iter()
.map(|(key, values)| (key, values.into_iter().collect()))
.collect()
}
fn postings_to_text(postings: &BTreeMap<Trigram, Vec<usize>>) -> String {
let mut out = String::new();
for (trigram, values) in postings {
out.push_str(&trigram_hex(*trigram));
out.push('\t');
for (idx, value) in values.iter().enumerate() {
if idx > 0 {
out.push(',');
}
let _ = write!(out, "{value}");
}
out.push('\n');
}
out
}
fn parse_postings(text: &str, chunk_count: usize) -> AsrResult<BTreeMap<Trigram, Vec<usize>>> {
let mut postings = BTreeMap::new();
for (line_no, line) in text.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let Some((hex, values)) = line.split_once('\t') else {
return Err(AsrError::new(
"exact_shard_corrupt",
format!("Invalid posting row on line {}", line_no + 1),
));
};
let trigram = trigram_from_hex(hex).ok_or_else(|| {
AsrError::new(
"exact_shard_corrupt",
format!("Invalid trigram key on line {}", line_no + 1),
)
})?;
let mut indices = Vec::new();
for value in values.split(',').filter(|value| !value.is_empty()) {
let index = value.parse::<usize>().map_err(|err| {
AsrError::new(
"exact_shard_corrupt",
format!("Invalid posting index on line {}: {err}", line_no + 1),
)
})?;
if index >= chunk_count {
return Err(AsrError::new(
"exact_shard_corrupt",
format!("Posting index out of bounds on line {}", line_no + 1),
));
}
indices.push(index);
}
indices.sort_unstable();
indices.dedup();
postings.insert(trigram, indices);
}
Ok(postings)
}
static WRITE_COUNTER: AtomicU64 = AtomicU64::new(0);
fn write_file_atomically(path: &Path, text: &str) -> AsrResult<()> {
let file_name = path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("exact-shard-file");
let seq = WRITE_COUNTER.fetch_add(1, AtomicOrdering::Relaxed);
let tmp = path.with_file_name(format!(".{file_name}.{}.{seq}.tmp", std::process::id()));
fs::write(&tmp, text).map_err(|err| {
AsrError::with_path(
"exact_shard_write_failed",
format!("Failed to write exact shard file: {err}"),
path_string(&tmp),
)
})?;
#[cfg(windows)]
if path.exists() {
fs::remove_file(path).map_err(|err| {
let _ = fs::remove_file(&tmp);
AsrError::with_path(
"exact_shard_write_failed",
format!("Failed to replace exact shard file: {err}"),
path_string(path),
)
})?;
}
fs::rename(&tmp, path).map_err(|err| {
let _ = fs::remove_file(&tmp);
AsrError::with_path(
"exact_shard_write_failed",
format!("Failed to commit exact shard file atomically: {err}"),
path_string(path),
)
})
}
fn trigram_hex(trigram: Trigram) -> String {
format!("{:02x}{:02x}{:02x}", trigram[0], trigram[1], trigram[2])
}
fn trigram_from_hex(value: &str) -> Option<Trigram> {
if value.len() != 6 {
return None;
}
let b0 = u8::from_str_radix(&value[0..2], 16).ok()?;
let b1 = u8::from_str_radix(&value[2..4], 16).ok()?;
let b2 = u8::from_str_radix(&value[4..6], 16).ok()?;
Some([b0, b1, b2])
}
fn trigrams(bytes: &[u8]) -> impl Iterator<Item = Trigram> + '_ {
bytes
.windows(3)
.map(|window| [window[0], window[1], window[2]])
}
fn unique_trigrams(bytes: &[u8]) -> Vec<Trigram> {
if bytes.len() < 3 {
return Vec::new();
}
let mut values: Vec<Trigram> = trigrams(bytes).collect();
values.sort_unstable();
values.dedup();
values
}
fn intersect_sorted(left: &[usize], right: &[usize]) -> Vec<usize> {
let mut out = Vec::new();
let mut left_index = 0;
let mut right_index = 0;
while left_index < left.len() && right_index < right.len() {
match left[left_index].cmp(&right[right_index]) {
Ordering::Less => left_index += 1,
Ordering::Greater => right_index += 1,
Ordering::Equal => {
out.push(left[left_index]);
left_index += 1;
right_index += 1;
}
}
}
out
}
fn literal_match_lines(chunk: &Chunk, needle: &str) -> Vec<MatchLine> {
let mut lines = Vec::new();
for (offset, line) in chunk.content.lines().enumerate() {
if line.contains(needle) {
lines.push(MatchLine {
line: chunk.start_line + offset,
content: line.trim().to_string(),
});
if lines.len() >= MAX_MATCH_LINES {
return lines;
}
}
}
if lines.is_empty() {
if let Some(byte_idx) = chunk.content.find(needle) {
lines.push(MatchLine {
line: byte_to_line(&chunk.content, byte_idx, chunk.start_line),
content: preview_at(&chunk.content, byte_idx),
});
}
}
lines
}
fn regex_match_lines(chunk: &Chunk, regex: &Regex) -> Vec<MatchLine> {
let mut lines = Vec::new();
for mat in regex.find_iter(&chunk.content) {
lines.push(MatchLine {
line: byte_to_line(&chunk.content, mat.start(), chunk.start_line),
content: preview_at(&chunk.content, mat.start()),
});
if lines.len() >= MAX_MATCH_LINES {
break;
}
}
lines
}
fn byte_to_line(content: &str, byte_idx: usize, start_line: usize) -> usize {
start_line
+ content[..byte_idx]
.bytes()
.filter(|byte| *byte == b'\n')
.count()
}
fn preview_at(content: &str, byte_idx: usize) -> String {
let before = content[..byte_idx].rfind('\n').map_or(0, |idx| idx + 1);
let after = content[byte_idx..]
.find('\n')
.map_or(content.len(), |idx| byte_idx + idx);
content[before..after].trim().to_string()
}
fn mandatory_literals_for_safe_regex(pattern: &str) -> Option<Vec<String>> {
let mut literals = Vec::new();
let mut current = String::new();
let mut chars = pattern.chars().peekable();
let mut in_class = false;
while let Some(ch) = chars.next() {
if in_class {
if ch == '\\' {
chars.next();
} else if ch == ']' {
in_class = false;
}
flush_literal(&mut literals, &mut current);
continue;
}
match ch {
'[' => {
in_class = true;
flush_literal(&mut literals, &mut current);
}
'\\' => match chars.next() {
Some(escaped) if escaped.is_ascii_alphanumeric() => {
flush_literal(&mut literals, &mut current);
}
Some(escaped) => current.push(escaped),
None => return None,
},
'|' | '?' | '*' | '{' => return None,
'.' | '^' | '$' | '(' | ')' | '+' => {
flush_literal(&mut literals, &mut current);
}
_ => current.push(ch),
}
}
if in_class {
return None;
}
flush_literal(&mut literals, &mut current);
literals.retain(|literal| literal.len() >= 3);
literals.sort();
literals.dedup();
Some(literals)
}
fn flush_literal(literals: &mut Vec<String>, current: &mut String) {
if current.len() >= 3 {
literals.push(std::mem::take(current));
} else {
current.clear();
}
}
fn sort_and_limit(mut results: Vec<SearchResult>, top_k: usize) -> Vec<SearchResult> {
results.sort_by(|left, right| {
exact_rank_score(right)
.partial_cmp(&exact_rank_score(left))
.unwrap_or(Ordering::Equal)
.then_with(|| {
right
.score
.partial_cmp(&left.score)
.unwrap_or(Ordering::Equal)
})
.then_with(|| left.chunk.file_path.cmp(&right.chunk.file_path))
.then_with(|| left.chunk.start_line.cmp(&right.chunk.start_line))
.then_with(|| left.chunk.end_line.cmp(&right.chunk.end_line))
});
results.truncate(top_k);
results
}
fn exact_rank_score(result: &SearchResult) -> f64 {
result.score * file_path_penalty(&result.chunk.file_path)
}
fn content_hash_for_chunks(chunks: &[Chunk]) -> String {
hash::content_hash_for_chunks(chunks)
}
fn index_hash(content_hash: &str, chunks_hash: &str, postings_hash: &str) -> String {
let mut h = hash::FNV_OFFSET;
hash::update_hash(&mut h, content_hash.as_bytes());
hash::update_hash(&mut h, chunks_hash.as_bytes());
hash::update_hash(&mut h, postings_hash.as_bytes());
format!("{h:016x}")
}
fn hash_text(text: &str) -> String {
hash::hash_text(text)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn escaped_brace_is_accepted_as_literal() {
let result = mandatory_literals_for_safe_regex("HashMap\\{");
assert!(
result.is_some(),
"escaped brace should be treated as a literal"
);
let lits = result.unwrap();
assert!(
lits.iter().any(|l| l.contains("HashMap")),
"HashMap should be a mandatory literal"
);
}
#[test]
fn unescaped_brace_is_rejected() {
assert!(mandatory_literals_for_safe_regex("a{3}").is_none());
}
#[test]
fn alternation_is_rejected() {
assert!(mandatory_literals_for_safe_regex("foo|bar").is_none());
}
#[test]
fn star_quantifier_is_rejected() {
assert!(mandatory_literals_for_safe_regex("foo.*").is_none());
}
#[test]
fn escaped_star_is_accepted() {
let result = mandatory_literals_for_safe_regex("retry\\*");
assert!(result.is_some(), "escaped star should be a literal");
}
#[test]
fn plain_literal_returns_fragments() {
let result = mandatory_literals_for_safe_regex("async fn main");
assert!(result.is_some());
let lits = result.unwrap();
assert!(!lits.is_empty());
}
#[test]
fn short_pattern_below_trigram_threshold_returns_empty_vec() {
let result = mandatory_literals_for_safe_regex("fn");
assert_eq!(result, Some(vec![]));
}
}