use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use crate::formats::FormatError;
pub use crate::graph::load_mm_file;
fn normalize_map_name(name: &str) -> String {
let name = name.trim();
if name.len() >= 2 && name.starts_with('<') && name.ends_with('>') {
name[1..name.len() - 1].to_owned()
} else {
name.to_owned()
}
}
pub(crate) fn apply_base_iri(name: String, base: Option<&str>) -> String {
match base {
Some(b) if !b.is_empty() => format!("{b}{name}"),
_ => name,
}
}
type IndexMap = (HashMap<usize, String>, HashMap<String, usize>);
pub(crate) fn parse_index_map(path: &Path) -> Result<IndexMap, FormatError> {
let file_name = path
.file_name()
.map(|n| n.to_string_lossy().into_owned())
.unwrap_or_else(|| path.display().to_string());
let reader = BufReader::new(File::open(path)?);
let mut by_idx: HashMap<usize, String> = HashMap::new();
let mut by_name: HashMap<String, usize> = HashMap::new();
for (line_no, line) in reader.lines().enumerate() {
let line = line?;
let line = line.trim();
if line.is_empty() {
continue;
}
let (name, idx_str) =
line.rsplit_once(char::is_whitespace)
.ok_or_else(|| FormatError::InvalidFormat {
file: file_name.clone(),
line: line_no + 1,
reason: "expected '<name> <index>' but found no whitespace".into(),
})?;
let idx: usize = idx_str
.trim()
.parse()
.map_err(|_| FormatError::InvalidFormat {
file: file_name.clone(),
line: line_no + 1,
reason: format!("index '{}' is not a valid positive integer", idx_str.trim()),
})?;
if idx == 0 {
return Err(FormatError::InvalidFormat {
file: file_name.clone(),
line: line_no + 1,
reason: "index must be a positive integer (>= 1)".into(),
});
}
let name = normalize_map_name(name);
if by_idx.insert(idx, name.clone()).is_some() {
return Err(FormatError::InvalidFormat {
file: file_name.clone(),
line: line_no + 1,
reason: format!("duplicate index {idx}"),
});
}
by_name.insert(name, idx);
}
Ok((by_idx, by_name))
}
pub struct MatrixMarket {
pub(crate) dir: PathBuf,
pub(crate) base_iri: Option<String>,
}
impl MatrixMarket {
pub fn from_dir(dir: impl Into<PathBuf>) -> Self {
Self {
dir: dir.into(),
base_iri: None,
}
}
pub fn with_base_iri(mut self, base: impl Into<String>) -> Self {
self.base_iri = Some(base.into());
self
}
pub fn mm_path(&self, idx: usize) -> PathBuf {
self.dir.join(format!("{}.txt", idx))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::TempDir;
fn write_file(dir: &Path, name: &str, content: &str) {
let mut f = File::create(dir.join(name)).unwrap();
f.write_all(content.as_bytes()).unwrap();
}
#[test]
fn test_parse_index_map_basic() {
let tmp = TempDir::new().unwrap();
write_file(
tmp.path(),
"vertices.txt",
"<Article1> 1\n<Paul_Erdoes> 2\n<1940> 3\n",
);
let (by_idx, by_name) = parse_index_map(&tmp.path().join("vertices.txt")).unwrap();
assert_eq!(by_idx[&1], "Article1");
assert_eq!(by_idx[&2], "Paul_Erdoes");
assert_eq!(by_idx[&3], "1940");
assert_eq!(by_name["Article1"], 1);
assert_eq!(by_name["Paul_Erdoes"], 2);
}
#[test]
fn test_parse_index_map_preserves_unbracketed_names() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "v.txt", "plain_name 1\n");
let (by_idx, by_name) = parse_index_map(&tmp.path().join("v.txt")).unwrap();
assert_eq!(by_idx[&1], "plain_name");
assert_eq!(by_name["plain_name"], 1);
}
#[test]
fn test_parse_index_map_rejects_zero_index() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "v.txt", "<a> 0\n");
let err = parse_index_map(&tmp.path().join("v.txt")).unwrap_err();
assert!(matches!(err, FormatError::InvalidFormat { .. }));
}
#[test]
fn test_parse_index_map_rejects_duplicate_index() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "v.txt", "<a> 1\n<b> 1\n");
let err = parse_index_map(&tmp.path().join("v.txt")).unwrap_err();
assert!(matches!(err, FormatError::InvalidFormat { .. }));
}
#[test]
fn test_parse_index_map_empty_lines_ignored() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "edges.txt", "\n<journal> 1\n\n<creator> 2\n");
let (by_idx, _) = parse_index_map(&tmp.path().join("edges.txt")).unwrap();
assert_eq!(by_idx.len(), 2);
assert_eq!(by_idx[&1], "journal");
assert_eq!(by_idx[&2], "creator");
}
#[test]
fn test_parse_index_map_bad_index_returns_error() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "bad.txt", "<foo> notanumber\n");
let err = parse_index_map(&tmp.path().join("bad.txt")).unwrap_err();
assert!(
matches!(err, FormatError::InvalidFormat { .. }),
"expected InvalidFormat, got {:?}",
err
);
}
#[test]
fn test_parse_index_map_missing_whitespace_returns_error() {
let tmp = TempDir::new().unwrap();
write_file(tmp.path(), "bad.txt", "nospacehere\n");
let err = parse_index_map(&tmp.path().join("bad.txt")).unwrap_err();
assert!(matches!(err, FormatError::InvalidFormat { .. }));
}
#[test]
fn test_load_nonexistent_mm_file_returns_io_error() {
use crate::formats::FormatError;
use crate::graph::GraphError;
let result = load_mm_file("/nonexistent/path/to/file.txt");
assert!(
matches!(result, Err(GraphError::Format(FormatError::Io(_)))),
"expected Format(Io) error for missing file, got: {:?}",
result
);
}
#[test]
fn test_from_dir_stores_path() {
let src = MatrixMarket::from_dir("/some/path");
assert_eq!(src.dir, PathBuf::from("/some/path"));
assert!(src.base_iri.is_none());
}
#[test]
fn test_with_base_iri() {
let m = MatrixMarket::from_dir("/x").with_base_iri("http://example.org/");
assert_eq!(m.dir, PathBuf::from("/x"));
assert_eq!(m.base_iri.as_deref(), Some("http://example.org/"));
}
#[test]
fn test_apply_base_iri() {
assert_eq!(apply_base_iri("foo".into(), None), "foo");
assert_eq!(
apply_base_iri("Article1".into(), Some("http://example.org/")),
"http://example.org/Article1"
);
assert_eq!(apply_base_iri("foo".into(), Some("")), "foo");
}
#[test]
fn test_mm_path() {
let src = MatrixMarket::from_dir("/graph");
assert_eq!(src.mm_path(3), PathBuf::from("/graph/3.txt"));
}
}