use std::collections::HashMap;
use std::io::{BufReader, Read, Write};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use flate2::read::GzDecoder;
use indicatif::{ProgressBar, ProgressStyle};
use tantivy::collector::TopDocs;
use tantivy::query::{BooleanQuery, FuzzyTermQuery, Occur, Query, TermQuery};
use tantivy::schema::{
IndexRecordOption, NamedFieldDocument, OwnedValue, Schema, STORED, STRING, TEXT,
};
use tantivy::{doc, Document, Index, TantivyDocument, Term};
const NEXUS_INDEX_URL: &str =
"https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.gz";
const NEXUS_PROPS_URL: &str =
"https://repo1.maven.org/maven2/.index/nexus-maven-repository-index.properties";
const WRITER_HEAP_BYTES: usize = 256 * 1024 * 1024;
const INDEX_STALENESS_DAYS: i64 = 30;
const NEXUS_EOF_MARKER: i32 = i32::MIN;
pub struct IndexHandle {
pub reader: tantivy::IndexReader,
pub schema: Schema,
#[allow(dead_code)]
pub f_coord: tantivy::schema::Field,
pub f_coord_text: tantivy::schema::Field,
pub f_name: tantivy::schema::Field,
pub f_description: tantivy::schema::Field,
#[allow(dead_code)]
pub f_version: tantivy::schema::Field,
}
#[derive(Debug, Clone)]
pub struct ArtifactRecord {
pub coord: String,
pub name: String,
pub description: String,
pub version: String,
}
#[derive(serde::Serialize, serde::Deserialize)]
struct IndexMeta {
index_timestamp_ms: i64,
artifact_count: u64,
}
impl IndexMeta {
fn age_days(&self) -> i64 {
let now_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_millis() as i64)
.unwrap_or(0);
(now_ms - self.index_timestamp_ms) / (1_000 * 60 * 60 * 24)
}
}
struct BestRecord {
version: String,
name: String,
description: String,
}
#[derive(Default)]
struct NexusFields {
group_id: String,
artifact_id: String,
version: String,
packaging: String,
classifier: String,
artifact_name: String,
description: String,
unified_coord: String,
}
pub fn index_dir() -> PathBuf {
curie_home().join("artifact-index")
}
fn index_tmp_dir() -> PathBuf {
curie_home().join("artifact-index.tmp")
}
fn gz_cache_path() -> PathBuf {
curie_home().join("nexus-index.gz")
}
fn gz_tmp_path() -> PathBuf {
curie_home().join("nexus-index.gz.tmp")
}
fn meta_path() -> PathBuf {
curie_home().join("artifact-index.meta.json")
}
fn curie_home() -> PathBuf {
dirs::home_dir()
.unwrap_or_else(|| PathBuf::from("."))
.join(".curie")
}
fn read_meta() -> Option<IndexMeta> {
let content = std::fs::read_to_string(meta_path()).ok()?;
serde_json::from_str(&content).ok()
}
fn is_dir_empty(path: &std::path::Path) -> bool {
std::fs::read_dir(path)
.map(|mut d| d.next().is_none())
.unwrap_or(true)
}
fn gz_present() -> bool {
let p = gz_cache_path();
p.exists() && p.metadata().map(|m| m.len() > 1024).unwrap_or(false)
}
fn index_present() -> bool {
index_exists()
}
pub fn index_exists() -> bool {
let d = index_dir();
d.exists() && !is_dir_empty(&d)
}
fn make_schema() -> (
Schema,
tantivy::schema::Field,
tantivy::schema::Field,
tantivy::schema::Field,
tantivy::schema::Field,
tantivy::schema::Field,
) {
let mut b = Schema::builder();
let f_coord = b.add_text_field("coord", STRING | STORED);
let f_coord_text = b.add_text_field("coord_text", TEXT);
let f_name = b.add_text_field("name", TEXT | STORED);
let f_description = b.add_text_field("description", TEXT | STORED);
let f_version = b.add_text_field("version", STRING | STORED);
(b.build(), f_coord, f_coord_text, f_name, f_description, f_version)
}
pub fn ensure_index(force_refresh: bool, offline: bool) -> Result<IndexHandle> {
let home = curie_home();
std::fs::create_dir_all(&home).context("failed to create ~/.curie")?;
if force_refresh {
if !offline {
download_gz()?;
} else if !gz_present() {
anyhow::bail!(
"No cached index file at {}.\n\
Run `curie add --refresh-index` without --offline to download it.",
gz_cache_path().display()
);
}
build_index_from_gz()?;
} else if !index_present() {
if !gz_present() {
if offline {
anyhow::bail!(
"No artifact index found. Run `curie add` without --offline to download it."
);
}
download_gz()?;
} else {
eprintln!(" Rebuilding index from cached download…");
}
build_index_from_gz()?;
} else {
if let Some(meta) = read_meta() {
let age = meta.age_days();
if age > INDEX_STALENESS_DAYS {
eprintln!(
" Index is {} days old. Run `curie add --refresh-index` to update.",
age
);
}
}
}
open_index()
}
pub fn search(handle: &IndexHandle, query_str: &str, limit: usize) -> Result<Vec<ArtifactRecord>> {
let q = query_str.trim().to_lowercase();
let tokens: Vec<&str> = q.split_whitespace().collect();
if tokens.is_empty() {
return Ok(vec![]);
}
let searcher = handle.reader.searcher();
let n = tokens.len();
let search_fields = [handle.f_coord_text, handle.f_name, handle.f_description];
let mut token_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::with_capacity(n);
for (i, token) in tokens.iter().enumerate() {
let is_last = i == n - 1;
let mut field_clauses: Vec<(Occur, Box<dyn Query>)> = Vec::with_capacity(3);
for &field in &search_fields {
let term = Term::from_field_text(field, token);
let q: Box<dyn Query> = if is_last {
Box::new(FuzzyTermQuery::new_prefix(term, 0, true))
} else {
Box::new(TermQuery::new(term, IndexRecordOption::Basic))
};
field_clauses.push((Occur::Should, q));
}
token_clauses.push((Occur::Must, Box::new(BooleanQuery::new(field_clauses))));
}
let query = BooleanQuery::new(token_clauses);
let top_docs = searcher
.search(&query, &TopDocs::with_limit(limit))
.context("search failed")?;
let mut results = Vec::with_capacity(top_docs.len());
for (_, addr) in top_docs {
let doc: TantivyDocument = searcher.doc(addr).context("doc retrieval failed")?;
let named: NamedFieldDocument = doc.to_named_doc(&handle.schema);
let get = |key: &str| -> String {
named
.0
.get(key)
.and_then(|vs| vs.first())
.and_then(|v| {
if let OwnedValue::Str(s) = v {
Some(s.clone())
} else {
None
}
})
.unwrap_or_default()
};
results.push(ArtifactRecord {
coord: get("coord"),
name: get("name"),
description: get("description"),
version: get("version"),
});
}
Ok(results)
}
pub fn total_count(handle: &IndexHandle) -> u64 {
handle.reader.searcher().num_docs()
}
pub fn open_index() -> Result<IndexHandle> {
let dir = index_dir();
let index = Index::open_in_dir(&dir)
.with_context(|| format!("failed to open index at {}", dir.display()))?;
let schema = index.schema();
let get_field = |name: &str| -> Result<tantivy::schema::Field> {
schema
.get_field(name)
.map_err(|_| anyhow::anyhow!("index schema missing field '{}'", name))
};
let f_coord = get_field("coord")?;
let f_coord_text = get_field("coord_text")?;
let f_name = get_field("name")?;
let f_description = get_field("description")?;
let f_version = get_field("version")?;
let reader = index
.reader_builder()
.reload_policy(tantivy::ReloadPolicy::Manual)
.try_into()
.context("failed to build index reader")?;
Ok(IndexHandle {
reader,
schema,
f_coord,
f_coord_text,
f_name,
f_description,
f_version,
})
}
fn fetch_timestamp_ms() -> Option<i64> {
let client = reqwest::blocking::Client::builder()
.user_agent("curie/0.1")
.timeout(std::time::Duration::from_secs(15))
.build()
.ok()?;
let text = client.get(NEXUS_PROPS_URL).send().ok()?.text().ok()?;
for line in text.lines() {
if let Some(rest) = line.strip_prefix("nexus.index.timestamp=") {
return rest.trim().parse::<i64>().ok();
}
}
None
}
fn download_gz() -> Result<()> {
let home = curie_home();
std::fs::create_dir_all(&home).context("failed to create ~/.curie")?;
let gz = gz_cache_path();
let tmp = gz_tmp_path();
if tmp.exists() {
let _ = std::fs::remove_file(&tmp);
}
eprintln!(" Downloading Maven Central artifact index (this only happens once)…");
let client = reqwest::blocking::Client::builder()
.user_agent("curie/0.1")
.timeout(std::time::Duration::from_secs(600))
.build()
.context("failed to build HTTP client")?;
let mut response = client
.get(NEXUS_INDEX_URL)
.send()
.context("failed to connect to Maven Central")?;
let content_length = response.content_length();
let pb = make_download_bar(content_length);
let mut file =
std::fs::File::create(&tmp).with_context(|| format!("failed to create {}", tmp.display()))?;
let mut pr = ProgressReader { inner: &mut response, pb: pb.clone() };
std::io::copy(&mut pr, &mut file).context("download failed")?;
file.flush().context("failed to flush gz file")?;
drop(file);
std::fs::rename(&tmp, &gz).context("failed to finalise download")?;
pb.finish_with_message("Downloaded");
Ok(())
}
fn build_index_from_gz() -> Result<()> {
let gz_path = gz_cache_path();
anyhow::ensure!(
gz_path.exists(),
"Cached index file not found at {}. Re-run without --offline.",
gz_path.display()
);
let timestamp_ms = fetch_timestamp_ms().unwrap_or(0);
let compressed_size = std::fs::metadata(&gz_path).map(|m| m.len()).unwrap_or(0);
let parse_progress = make_parse_bar(compressed_size);
let artifact_map = parse_gz_to_artifact_map(&gz_path, &parse_progress)?;
let artifact_count = artifact_map.len() as u64;
parse_progress.finish_with_message(format!("{} artifacts", artifact_count));
write_artifacts_to_tantivy(&artifact_map)?;
save_index_metadata(timestamp_ms, artifact_count)?;
eprintln!(" Artifact index ready ({} artifacts).", artifact_count);
Ok(())
}
const DECOMPRESS_CHUNK_BYTES: usize = 512 * 1024; const CHANNEL_BUFFER_CHUNKS: usize = 4;
fn parse_gz_to_artifact_map(
gz_path: &Path,
parse_progress: &ProgressBar,
) -> Result<HashMap<String, BestRecord>> {
let file = std::fs::File::open(gz_path)
.with_context(|| format!("failed to open {}", gz_path.display()))?;
let (chunk_sender, chunk_receiver) =
std::sync::mpsc::sync_channel::<Vec<u8>>(CHANNEL_BUFFER_CHUNKS);
let decompressor_progress = parse_progress.clone();
let decompressor = std::thread::spawn(move || {
decompress_to_channel(file, decompressor_progress, chunk_sender)
});
let mut channel_reader = ChannelReader::new(chunk_receiver);
let parse_result = parse_records_to_artifact_map(&mut channel_reader, parse_progress);
drop(channel_reader);
let decompress_result = decompressor
.join()
.map_err(|_| anyhow::anyhow!("decompressor thread panicked"))?;
let artifact_map = parse_result.context("failed to parse Nexus index")?;
decompress_result.context("failed to decompress Nexus index")?;
Ok(artifact_map)
}
fn decompress_to_channel(
file: std::fs::File,
parse_progress: ProgressBar,
sender: std::sync::mpsc::SyncSender<Vec<u8>>,
) -> Result<()> {
let decoder = GzDecoder::new(ProgressReader { inner: file, pb: parse_progress });
let mut reader = BufReader::with_capacity(1 << 20, decoder); let mut chunk = vec![0u8; DECOMPRESS_CHUNK_BYTES];
loop {
let bytes_decompressed = reader.read(&mut chunk).context("decompression error")?;
if bytes_decompressed == 0 {
break; }
if sender.send(chunk[..bytes_decompressed].to_vec()).is_err() {
break; }
}
Ok(())
}
struct ChannelReader {
receiver: std::sync::mpsc::Receiver<Vec<u8>>,
current_chunk: Vec<u8>,
position: usize,
}
impl ChannelReader {
fn new(receiver: std::sync::mpsc::Receiver<Vec<u8>>) -> Self {
Self { receiver, current_chunk: Vec::new(), position: 0 }
}
}
impl Read for ChannelReader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
while self.position >= self.current_chunk.len() {
match self.receiver.recv() {
Ok(chunk) => {
self.current_chunk = chunk;
self.position = 0;
}
Err(_) => return Ok(0), }
}
let bytes_available = self.current_chunk.len() - self.position;
let bytes_to_copy = buf.len().min(bytes_available);
buf[..bytes_to_copy]
.copy_from_slice(&self.current_chunk[self.position..self.position + bytes_to_copy]);
self.position += bytes_to_copy;
Ok(bytes_to_copy)
}
}
fn parse_records_to_artifact_map<R: Read>(
reader: &mut R,
progress: &ProgressBar,
) -> Result<HashMap<String, BestRecord>> {
skip_nexus_header(reader)?;
let mut best_by_coord: HashMap<String, BestRecord> = HashMap::with_capacity(600_000);
while let Some(nexus_fields) = read_next_record(reader)? {
let Some((coord, record)) = resolve_artifact(nexus_fields) else { continue };
let is_newer = best_by_coord
.get(&coord)
.map(|existing| version_gt(&record.version, &existing.version))
.unwrap_or(true);
if is_newer {
best_by_coord.insert(coord, record);
let artifact_count = best_by_coord.len() as u64;
if artifact_count.is_power_of_two() {
progress.set_message(format!("{} artifacts", artifact_count));
}
}
}
Ok(best_by_coord)
}
fn skip_nexus_header<R: Read>(reader: &mut R) -> Result<()> {
read_u8(reader).context("failed to read index version byte")?;
read_i64(reader).context("failed to read index timestamp")?;
Ok(())
}
fn read_next_record<R: Read>(reader: &mut R) -> Result<Option<NexusFields>> {
let field_count = match read_i32(reader) {
Ok(0) | Ok(NEXUS_EOF_MARKER) => return Ok(None),
Ok(fc) if fc < 0 => return Ok(None),
Ok(fc) => fc as usize,
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
Err(e) => return Err(e).context("error reading field count"),
};
let mut fields = NexusFields::default();
for _ in 0..field_count {
let (field_name, field_value) = match read_field(reader) {
Ok(pair) => pair,
Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => return Ok(None),
Err(e) => return Err(e.into()),
};
match field_name.as_str() {
"g" => fields.group_id = field_value,
"a" => fields.artifact_id = field_value,
"v" => fields.version = field_value,
"p" => fields.packaging = field_value,
"l" => fields.classifier = field_value,
"n" => fields.artifact_name = field_value,
"d" => fields.description = field_value,
"u" => fields.unified_coord = field_value,
_ => {}
}
}
Ok(Some(fields))
}
fn read_field<R: Read>(reader: &mut R) -> std::io::Result<(String, String)> {
let _lucene_type_tag = read_u8(reader)?;
let field_name = read_utf(reader)?;
let field_value = read_value(reader)?;
Ok((field_name, field_value))
}
fn fill_from_unified_coord(fields: &mut NexusFields) {
if (fields.group_id.is_empty() || fields.artifact_id.is_empty())
&& !fields.unified_coord.is_empty()
{
let parts: Vec<&str> = fields.unified_coord.splitn(5, '|').collect();
if parts.len() >= 2 {
if fields.group_id.is_empty() {
fields.group_id = parts[0].to_string();
}
if fields.artifact_id.is_empty() {
fields.artifact_id = parts[1].to_string();
}
if fields.version.is_empty() && parts.len() >= 3 {
fields.version = parts[2].to_string();
}
if fields.classifier.is_empty() && parts.len() >= 4 && parts[3] != "NA" {
fields.classifier = parts[3].to_string();
}
if fields.packaging.is_empty() && parts.len() >= 5 {
fields.packaging = parts[4].to_string();
}
}
}
}
fn is_jar_artifact(fields: &NexusFields) -> bool {
if fields.group_id.is_empty() || fields.artifact_id.is_empty() {
return false;
}
let effective_packaging = if fields.packaging.is_empty() { "jar" } else { &fields.packaging };
if effective_packaging != "jar" {
return false;
}
if !fields.classifier.is_empty() && fields.classifier != "NA" {
return false;
}
true
}
fn resolve_artifact(mut fields: NexusFields) -> Option<(String, BestRecord)> {
fill_from_unified_coord(&mut fields);
if !is_jar_artifact(&fields) {
return None;
}
let coord = format!("{}:{}", fields.group_id, fields.artifact_id);
let record = BestRecord {
version: fields.version,
name: fields.artifact_name,
description: fields.description,
};
Some((coord, record))
}
fn write_artifacts_to_tantivy(artifact_map: &HashMap<String, BestRecord>) -> Result<()> {
let artifact_count = artifact_map.len() as u64;
eprintln!(" Indexing {} artifacts…", artifact_count);
let tmp_dir = index_tmp_dir();
prepare_build_directory(&tmp_dir)?;
let (schema, f_coord, f_coord_text, f_name, f_description, f_version) = make_schema();
let index =
Index::create_in_dir(&tmp_dir, schema).context("failed to create Tantivy index")?;
let index_progress = make_index_progress_bar(artifact_count);
let mut writer = index
.writer(WRITER_HEAP_BYTES)
.context("failed to create index writer")?;
for (coord, record) in artifact_map {
writer
.add_document(doc!(
f_coord => coord.as_str(),
f_coord_text => coord.as_str(),
f_name => record.name.as_str(),
f_description => record.description.as_str(),
f_version => record.version.as_str(),
))
.context("failed to add document")?;
index_progress.inc(1);
}
writer.commit().context("failed to commit index")?;
drop(writer);
index_progress.finish_with_message("Indexed");
install_index_atomically(&tmp_dir)
}
fn prepare_build_directory(tmp_dir: &Path) -> Result<()> {
if tmp_dir.exists() {
std::fs::remove_dir_all(tmp_dir).context("failed to remove old temp index")?;
}
std::fs::create_dir_all(tmp_dir).context("failed to create temp index directory")
}
fn install_index_atomically(tmp_dir: &Path) -> Result<()> {
let final_dir = index_dir();
if final_dir.exists() {
std::fs::remove_dir_all(&final_dir).context("failed to remove old index")?;
}
std::fs::rename(tmp_dir, &final_dir).context("failed to install new index")
}
fn save_index_metadata(timestamp_ms: i64, artifact_count: u64) -> Result<()> {
let meta = IndexMeta { index_timestamp_ms: timestamp_ms, artifact_count };
std::fs::write(meta_path(), serde_json::to_string_pretty(&meta)?)
.context("failed to write index metadata")
}
fn make_index_progress_bar(artifact_count: u64) -> ProgressBar {
let pb = ProgressBar::new(artifact_count);
pb.set_style(
ProgressStyle::default_bar()
.template(
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} artifacts",
)
.unwrap()
.progress_chars("#>-"),
);
pb
}
fn read_u8<R: Read>(r: &mut R) -> std::io::Result<u8> {
let mut b = [0u8; 1];
r.read_exact(&mut b)?;
Ok(b[0])
}
fn read_i32<R: Read>(r: &mut R) -> std::io::Result<i32> {
let mut b = [0u8; 4];
r.read_exact(&mut b)?;
Ok(i32::from_be_bytes(b))
}
fn read_i64<R: Read>(r: &mut R) -> std::io::Result<i64> {
let mut b = [0u8; 8];
r.read_exact(&mut b)?;
Ok(i64::from_be_bytes(b))
}
fn read_utf<R: Read>(r: &mut R) -> std::io::Result<String> {
let mut lb = [0u8; 2];
r.read_exact(&mut lb)?;
let len = u16::from_be_bytes(lb) as usize;
let mut bytes = vec![0u8; len];
r.read_exact(&mut bytes)?;
Ok(String::from_utf8_lossy(&bytes).into_owned())
}
fn read_value<R: Read>(r: &mut R) -> std::io::Result<String> {
let mut lb = [0u8; 4];
r.read_exact(&mut lb)?;
let len = i32::from_be_bytes(lb);
if len < 0 {
return Ok(String::new());
}
let mut bytes = vec![0u8; len as usize];
r.read_exact(&mut bytes)?;
Ok(String::from_utf8_lossy(&bytes).into_owned())
}
fn make_download_bar(content_length: Option<u64>) -> ProgressBar {
if let Some(len) = content_length {
let pb = ProgressBar::new(len);
pb.set_style(
ProgressStyle::default_bar()
.template(
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] \
{bytes}/{total_bytes} ({eta})",
)
.unwrap()
.progress_chars("#>-"),
);
pb
} else {
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.green} [{elapsed_precise}] {bytes} downloaded")
.unwrap(),
);
pb
}
}
fn make_parse_bar(gz_size: u64) -> ProgressBar {
if gz_size > 0 {
let pb = ProgressBar::new(gz_size);
pb.set_style(
ProgressStyle::default_bar()
.template(
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] \
{bytes}/{total_bytes} parsed ({eta}) {msg}",
)
.unwrap()
.progress_chars("#>-"),
);
pb
} else {
let pb = ProgressBar::new_spinner();
pb.set_style(
ProgressStyle::default_spinner()
.template("{spinner:.green} [{elapsed_precise}] {bytes} parsed {msg}")
.unwrap(),
);
pb
}
}
struct ProgressReader<R: Read> {
inner: R,
pb: ProgressBar,
}
impl<R: Read> Read for ProgressReader<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
self.pb.inc(n as u64);
Ok(n)
}
}
fn version_gt(a: &str, b: &str) -> bool {
version_cmp(a, b) == std::cmp::Ordering::Greater
}
fn version_cmp(a: &str, b: &str) -> std::cmp::Ordering {
let pa = split_version(a);
let pb = split_version(b);
for (x, y) in pa.iter().zip(pb.iter()) {
let ord = match (x.parse::<u64>(), y.parse::<u64>()) {
(Ok(n), Ok(m)) => n.cmp(&m),
_ => x.as_str().cmp(y.as_str()),
};
if ord != std::cmp::Ordering::Equal {
return ord;
}
}
pa.len().cmp(&pb.len())
}
fn split_version(v: &str) -> Vec<String> {
v.split(|c| c == '.' || c == '-').map(str::to_string).collect()
}
#[cfg(test)]
fn build_fts_query(q: &str) -> String {
let tokens: Vec<&str> = q.split_whitespace().collect();
if tokens.is_empty() {
return String::new();
}
tokens.iter().map(|t| format!("{}*", t)).collect::<Vec<_>>().join(" ")
}
#[cfg(test)]
mod tests {
use super::*;
use tantivy::Index;
fn make_test_handle(records: &[(&str, &str, &str, &str)]) -> IndexHandle {
let (schema, f_coord, f_coord_text, f_name, f_description, f_version) = make_schema();
let index = Index::create_in_ram(schema.clone());
let mut writer = index.writer(15_000_000).unwrap();
for &(coord, name, desc, version) in records {
writer
.add_document(doc!(
f_coord => coord,
f_coord_text => coord,
f_name => name,
f_description => desc,
f_version => version,
))
.unwrap();
}
writer.commit().unwrap();
let reader = index
.reader_builder()
.reload_policy(tantivy::ReloadPolicy::Manual)
.try_into()
.unwrap();
IndexHandle { reader, schema, f_coord, f_coord_text, f_name, f_description, f_version }
}
#[test]
fn build_fts_query_single_token() {
assert_eq!(build_fts_query("guava"), "guava*");
}
#[test]
fn build_fts_query_multi_token() {
assert_eq!(build_fts_query("jackson databind"), "jackson* databind*");
}
#[test]
fn build_fts_query_empty() {
assert_eq!(build_fts_query(""), "");
}
fn field_bytes(tag: u8, name: &str, value: &str) -> Vec<u8> {
let mut v = Vec::new();
v.push(tag);
v.extend_from_slice(&(name.len() as u16).to_be_bytes());
v.extend_from_slice(name.as_bytes());
v.extend_from_slice(&(value.len() as i32).to_be_bytes());
v.extend_from_slice(value.as_bytes());
v
}
fn minimal_stream(fields: &[(u8, &str, &str)]) -> Vec<u8> {
let mut v = Vec::new();
v.push(1u8);
v.extend_from_slice(&0i64.to_be_bytes());
v.extend_from_slice(&(fields.len() as i32).to_be_bytes());
for &(tag, name, value) in fields {
v.extend_from_slice(&field_bytes(tag, name, value));
}
v.extend_from_slice(&0i32.to_be_bytes());
v
}
#[test]
fn parse_nexus_header_version() {
let bytes: &[u8] = &[1, 0, 0, 0, 0, 0, 0, 0, 42, 99];
let mut cur = std::io::Cursor::new(bytes);
assert_eq!(read_u8(&mut cur).unwrap(), 1); assert_eq!(read_i64(&mut cur).unwrap(), 42i64); }
#[test]
fn parse_nexus_field_uses_4byte_value_length() {
let raw = field_bytes(0x05, "u", "zw.co.paynow|java-sdk|1.1.2|NA");
let mut cur = std::io::Cursor::new(&raw[..]);
assert_eq!(read_u8(&mut cur).unwrap(), 0x05); assert_eq!(read_utf(&mut cur).unwrap(), "u"); assert_eq!(read_value(&mut cur).unwrap(), "zw.co.paynow|java-sdk|1.1.2|NA"); }
#[test]
fn parse_nexus_real_first_record_bytes() {
#[rustfmt::skip]
let raw: &[u8] = &[
0x01, 0x00,0x00,0x01,0x9E,0x47,0xC3,0x85,0xC4, 0x00,0x00,0x00,0x05,
0x05, 0x00,0x01,0x75, 0x00,0x00,0x00,0x1E,
b'z',b'w',b'.',b'c',b'o',b'.',b'p',b'a',b'y',b'n',b'o',b'w',b'|',
b'j',b'a',b'v',b'a',b'-',b's',b'd',b'k',b'|',b'1',b'.',b'1',b'.',b'2',b'|',b'N',b'A',
0x04, 0x00,0x01,0x6D, 0x00,0x00,0x00,0x0D,
b'1',b'7',b'6',b'5',b'3',b'7',b'8',b'9',b'2',b'7',b'4',b'7',b'0',
0x04, 0x00,0x01,0x69, 0x00,0x00,0x00,0x2D,
b'p',b'o',b'm',b'.',b's',b'h',b'a',b'5',b'1',b'2',b'|',
b'1',b'7',b'3',b'8',b'1',b'5',b'5',b'0',b'9',b'8',b'0',b'0',b'0',b'|',
b'1',b'2',b'8',b'|',b'1',b'|',b'1',b'|',b'0',b'|',
b'p',b'o',b'm',b'.',b's',b'h',b'a',b'5',b'1',b'2',
0x07, 0x00,0x01,0x6E, 0x00,0x00,0x00,0x08,
b'j',b'a',b'v',b'a',b'-',b's',b'd',b'k',
0x07, 0x00,0x01,0x64, 0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
];
let mut cur = std::io::Cursor::new(raw);
assert_eq!(read_u8(&mut cur).unwrap(), 1);
let ts = read_i64(&mut cur).unwrap();
assert_eq!(ts, 0x000001_9E47C385C4u64 as i64);
let fc = read_i32(&mut cur).unwrap();
assert_eq!(fc, 5);
let mut u_val = String::new();
let mut n_val = String::new();
for _ in 0..fc {
let _tag = read_u8(&mut cur).unwrap();
let name = read_utf(&mut cur).unwrap();
let val = read_value(&mut cur).unwrap();
match name.as_str() {
"u" => u_val = val,
"n" => n_val = val,
_ => {}
}
}
assert_eq!(u_val, "zw.co.paynow|java-sdk|1.1.2|NA");
assert_eq!(n_val, "java-sdk");
assert_eq!(read_i32(&mut cur).unwrap(), 0);
}
#[test]
fn parse_stream_extracts_artifact_via_u_field() {
let stream = minimal_stream(&[
(0x05, "u", "com.example|mylib|2.3.0|NA"),
(0x07, "n", "My Library"),
]);
let mut map: HashMap<String, BestRecord> = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(map.contains_key("com.example:mylib"),
"map: {:?}", map.keys().collect::<Vec<_>>());
assert_eq!(map["com.example:mylib"].version, "2.3.0");
assert_eq!(map["com.example:mylib"].name, "My Library");
}
#[test]
fn parse_stream_extracts_artifact_via_g_a_fields() {
let stream = minimal_stream(&[
(0x05, "g", "org.apache.commons"),
(0x05, "a", "commons-lang3"),
(0x05, "v", "3.14.0"),
(0x05, "p", "jar"),
]);
let mut map: HashMap<String, BestRecord> = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(
map.contains_key("org.apache.commons:commons-lang3"),
"map: {:?}", map.keys().collect::<Vec<_>>()
);
assert_eq!(map["org.apache.commons:commons-lang3"].version, "3.14.0");
}
#[test]
fn parse_stream_skips_pom_packaging() {
let stream = minimal_stream(&[
(0x05, "g", "com.example"),
(0x05, "a", "mylib"),
(0x05, "v", "1.0"),
(0x05, "p", "pom"),
]);
let mut map: HashMap<String, BestRecord> = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(map.is_empty(), "pom should be filtered; map: {:?}", map.keys().collect::<Vec<_>>());
}
#[test]
fn parse_stream_skips_sources_classifier() {
let stream = minimal_stream(&[
(0x05, "g", "com.example"),
(0x05, "a", "mylib"),
(0x05, "v", "1.0"),
(0x05, "l", "sources"),
]);
let mut map: HashMap<String, BestRecord> = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(map.is_empty());
}
#[test]
fn parse_stream_terminates_on_zero_field_count() {
let mut stream = Vec::new();
stream.push(1u8); stream.extend_from_slice(&0i64.to_be_bytes()); stream.extend_from_slice(&0i32.to_be_bytes()); let mut map = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(map.is_empty());
}
#[test]
fn parse_stream_terminates_on_min_value_sentinel() {
let mut stream = Vec::new();
stream.push(1u8);
stream.extend_from_slice(&0i64.to_be_bytes());
stream.extend_from_slice(&i32::MIN.to_be_bytes()); let mut map = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert!(map.is_empty());
}
#[test]
fn parse_stream_deduplicates_by_highest_version() {
let stream = {
let mut v = Vec::new();
v.push(1u8);
v.extend_from_slice(&0i64.to_be_bytes());
v.extend_from_slice(&3i32.to_be_bytes());
v.extend_from_slice(&field_bytes(0x05, "g", "com.example"));
v.extend_from_slice(&field_bytes(0x05, "a", "lib"));
v.extend_from_slice(&field_bytes(0x05, "v", "1.0"));
v.extend_from_slice(&3i32.to_be_bytes());
v.extend_from_slice(&field_bytes(0x05, "g", "com.example"));
v.extend_from_slice(&field_bytes(0x05, "a", "lib"));
v.extend_from_slice(&field_bytes(0x05, "v", "2.0"));
v.extend_from_slice(&3i32.to_be_bytes());
v.extend_from_slice(&field_bytes(0x05, "g", "com.example"));
v.extend_from_slice(&field_bytes(0x05, "a", "lib"));
v.extend_from_slice(&field_bytes(0x05, "v", "1.5"));
v.extend_from_slice(&0i32.to_be_bytes()); v
};
let mut map = HashMap::new();
parse_stream_into_map(&stream, &mut map);
assert_eq!(map.len(), 1);
assert_eq!(map["com.example:lib"].version, "2.0");
}
fn parse_stream_into_map(raw: &[u8], map: &mut HashMap<String, BestRecord>) {
let pb = ProgressBar::hidden();
let result = parse_records_to_artifact_map(
&mut BufReader::new(std::io::Cursor::new(raw)),
&pb,
);
map.extend(result.unwrap_or_default());
}
#[test]
fn filter_skips_sources_classifier() {
let l = "sources";
assert!(!l.is_empty() && l != "NA");
}
#[test]
fn filter_skips_pom_packaging() {
let p = "pom";
let packaging = if p.is_empty() { "jar" } else { p };
assert_ne!(packaging, "jar");
}
#[test]
fn filter_accepts_jar_packaging() {
let p = "jar";
let packaging = if p.is_empty() { "jar" } else { p };
assert_eq!(packaging, "jar");
}
#[test]
fn dedup_keeps_latest_version() {
assert!(version_gt("2.0.0", "1.0.0"));
assert!(!version_gt("1.0.0", "2.0.0"));
assert!(!version_gt("1.0.0", "1.0.0"));
}
#[test]
fn search_returns_matching_results() {
let h = make_test_handle(&[(
"com.google.guava:guava",
"Guava",
"Google core libraries",
"33.0.0-jre",
)]);
let r = search(&h, "guava", 10).unwrap();
assert!(!r.is_empty());
assert_eq!(r[0].coord, "com.google.guava:guava");
}
#[test]
fn search_prefix_matches_partial_name() {
let h = make_test_handle(&[("com.google.guava:guava", "Guava", "", "33.0.0-jre")]);
let r = search(&h, "guav", 10).unwrap();
assert!(!r.is_empty());
}
#[test]
fn search_by_group_token() {
let h = make_test_handle(&[("com.google.guava:guava", "Guava", "", "33.0.0-jre")]);
let r = search(&h, "google", 10).unwrap();
assert!(!r.is_empty());
assert_eq!(r[0].coord, "com.google.guava:guava");
}
#[test]
fn search_multi_token() {
let h = make_test_handle(&[(
"com.fasterxml.jackson.core:jackson-databind",
"Jackson Databind",
"",
"2.17.2",
)]);
let r = search(&h, "jackson databind", 10).unwrap();
assert_eq!(r.len(), 1);
}
#[test]
fn search_empty_query_returns_empty() {
let h = make_test_handle(&[("com.google.guava:guava", "Guava", "", "33.0.0-jre")]);
assert!(search(&h, "", 10).unwrap().is_empty());
}
}