use std::collections::HashMap;
use std::io::Read;
use crate::graph::Graph;
use crate::types::{ulid_encode, DbError, Edge, Node, NodeId, Properties, Value};
#[derive(Debug, Default)]
pub struct NodesLoaded {
pub inserted: usize,
pub id_map: HashMap<String, NodeId>,
}
#[derive(Debug, Default)]
pub struct EdgesLoaded {
pub inserted: usize,
pub skipped: usize,
}
pub fn infer_value(s: &str) -> Value {
if s.is_empty() {
return Value::Null;
}
let lower = s.to_ascii_lowercase();
if lower == "true" {
return Value::Bool(true);
}
if lower == "false" {
return Value::Bool(false);
}
if let Ok(i) = s.parse::<i64>() {
return Value::Int(i);
}
if let Ok(f) = s.parse::<f64>() {
return Value::Float(f);
}
Value::String(s.to_string())
}
pub fn load_nodes_csv<R: Read>(
reader: R,
graph: &mut Graph,
default_label: Option<&str>,
) -> Result<NodesLoaded, DbError> {
let mut rdr = csv::ReaderBuilder::new()
.trim(csv::Trim::All)
.from_reader(reader);
let headers = rdr
.headers()
.map_err(|e| DbError::Parse(format!("CSV header error: {e}")))?
.clone();
let id_col = headers.iter().position(|h| h == ":ID");
let label_col = headers.iter().position(|h| h == ":LABEL");
let mut result = NodesLoaded::default();
for record in rdr.records() {
let rec = record.map_err(|e| DbError::Parse(format!("CSV row error: {e}")))?;
let label: String = label_col
.and_then(|i| rec.get(i))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| default_label.unwrap_or("").to_string());
let csv_id: Option<String> = id_col
.and_then(|i| rec.get(i))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let mut props = Properties::new();
if let Some(ref cid) = csv_id {
props.insert("_csv_id".to_string(), Value::String(cid.clone()));
}
for (i, header) in headers.iter().enumerate() {
if header.starts_with(':') {
continue;
}
let cell = rec.get(i).unwrap_or("").trim();
let v = infer_value(cell);
if v != Value::Null {
props.insert(header.to_string(), v);
}
}
let node_id = graph.alloc_node_id();
let labels = if label.is_empty() { vec![] } else { vec![label] };
graph.apply_insert_node(Node::new(node_id, labels, props));
if let Some(cid) = csv_id {
if result.id_map.contains_key(&cid) {
return Err(DbError::Parse(format!(
"duplicate :ID value '{cid}' in node CSV"
)));
}
result.id_map.insert(cid, node_id);
}
result.inserted += 1;
}
Ok(result)
}
pub fn load_edges_csv<R: Read>(
reader: R,
graph: &mut Graph,
id_map: &HashMap<String, NodeId>,
default_label: Option<&str>,
) -> Result<EdgesLoaded, DbError> {
let mut rdr = csv::ReaderBuilder::new()
.trim(csv::Trim::All)
.from_reader(reader);
let headers = rdr
.headers()
.map_err(|e| DbError::Parse(format!("CSV header error: {e}")))?
.clone();
let start_col = headers.iter().position(|h| h == ":START_ID");
let end_col = headers.iter().position(|h| h == ":END_ID");
let type_col = headers.iter().position(|h| h == ":TYPE");
let mut result = EdgesLoaded::default();
for record in rdr.records() {
let rec = record.map_err(|e| DbError::Parse(format!("CSV row error: {e}")))?;
let src_str = start_col.and_then(|i| rec.get(i)).unwrap_or("").trim();
let dst_str = end_col.and_then(|i| rec.get(i)).unwrap_or("").trim();
let (from_node, to_node) = match (id_map.get(src_str), id_map.get(dst_str)) {
(Some(&f), Some(&t)) => (f, t),
_ => {
result.skipped += 1;
continue;
}
};
let label: String = type_col
.and_then(|i| rec.get(i))
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.unwrap_or_else(|| default_label.unwrap_or("RELATED").to_string());
let mut props = Properties::new();
for (i, header) in headers.iter().enumerate() {
if header.starts_with(':') {
continue;
}
let cell = rec.get(i).unwrap_or("").trim();
let v = infer_value(cell);
if v != Value::Null {
props.insert(header.to_string(), v);
}
}
let edge_id = graph.alloc_edge_id();
graph.apply_insert_edge(Edge::new(edge_id, label, from_node, to_node, props, true));
result.inserted += 1;
}
Ok(result)
}
pub fn id_map_to_strings(id_map: &HashMap<String, NodeId>) -> HashMap<String, String> {
id_map
.iter()
.map(|(k, v)| (k.clone(), ulid_encode(v.0)))
.collect()
}
pub fn id_map_from_strings(raw: &HashMap<String, String>) -> HashMap<String, NodeId> {
raw.iter()
.filter_map(|(k, v)| {
crate::types::ulid_decode(v)
.ok()
.map(|raw| (k.clone(), NodeId(raw)))
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::Graph;
fn bytes(s: &str) -> &[u8] {
s.as_bytes()
}
#[test]
fn infer_empty_is_null() {
assert_eq!(infer_value(""), Value::Null);
}
#[test]
fn infer_bool() {
assert_eq!(infer_value("true"), Value::Bool(true));
assert_eq!(infer_value("True"), Value::Bool(true));
assert_eq!(infer_value("FALSE"), Value::Bool(false));
}
#[test]
fn infer_int() {
assert_eq!(infer_value("42"), Value::Int(42));
assert_eq!(infer_value("-7"), Value::Int(-7));
}
#[test]
fn infer_float() {
assert_eq!(infer_value("3.14"), Value::Float(3.14));
}
#[test]
fn infer_string() {
assert_eq!(infer_value("hello"), Value::String("hello".to_string()));
}
#[test]
fn load_nodes_basic() {
let mut g = Graph::new();
let csv = ":ID,name,age,:LABEL\n1,Alice,30,Person\n2,Bob,25,Person\n";
let r = load_nodes_csv(bytes(csv), &mut g, None).unwrap();
assert_eq!(r.inserted, 2);
assert_eq!(r.id_map.len(), 2);
assert!(r.id_map.contains_key("1"));
assert!(r.id_map.contains_key("2"));
assert_eq!(g.node_count(), 2);
let nid = r.id_map["1"];
let node = g.get_node(nid).unwrap();
assert_eq!(node.labels, vec!["Person"]);
assert_eq!(node.properties["name"], Value::String("Alice".to_string()));
assert_eq!(node.properties["age"], Value::Int(30));
assert_eq!(node.properties["_csv_id"], Value::String("1".to_string()));
}
#[test]
fn load_nodes_default_label() {
let mut g = Graph::new();
let csv = ":ID,name\n1,Alice\n";
let r = load_nodes_csv(bytes(csv), &mut g, Some("Employee")).unwrap();
let node = g.get_node(r.id_map["1"]).unwrap();
assert_eq!(node.labels, vec!["Employee"]);
}
#[test]
fn load_nodes_no_id_column() {
let mut g = Graph::new();
let csv = "name,age\nAlice,30\nBob,25\n";
let r = load_nodes_csv(bytes(csv), &mut g, Some("Person")).unwrap();
assert_eq!(r.inserted, 2);
assert_eq!(r.id_map.len(), 0); assert_eq!(g.node_count(), 2);
}
#[test]
fn load_nodes_bool_and_float_props() {
let mut g = Graph::new();
let csv = ":ID,active,score\n1,true,9.5\n";
let r = load_nodes_csv(bytes(csv), &mut g, Some("P")).unwrap();
let node = g.get_node(r.id_map["1"]).unwrap();
assert_eq!(node.properties["active"], Value::Bool(true));
assert_eq!(node.properties["score"], Value::Float(9.5));
}
#[test]
fn load_edges_basic() {
let mut g = Graph::new();
let node_csv = ":ID,name\n1,Alice\n2,Bob\n";
let nr = load_nodes_csv(bytes(node_csv), &mut g, Some("Person")).unwrap();
let edge_csv = ":START_ID,:END_ID,:TYPE,weight\n1,2,KNOWS,0.9\n";
let er = load_edges_csv(bytes(edge_csv), &mut g, &nr.id_map, None).unwrap();
assert_eq!(er.inserted, 1);
assert_eq!(er.skipped, 0);
assert_eq!(g.edge_count(), 1);
}
#[test]
fn load_edges_skips_unresolved() {
let mut g = Graph::new();
let node_csv = ":ID,name\n1,Alice\n";
let nr = load_nodes_csv(bytes(node_csv), &mut g, Some("P")).unwrap();
let edge_csv = ":START_ID,:END_ID,:TYPE\n1,99,KNOWS\n"; let er = load_edges_csv(bytes(edge_csv), &mut g, &nr.id_map, None).unwrap();
assert_eq!(er.inserted, 0);
assert_eq!(er.skipped, 1);
}
#[test]
fn load_edges_default_label() {
let mut g = Graph::new();
let node_csv = ":ID,name\n1,Alice\n2,Bob\n";
let nr = load_nodes_csv(bytes(node_csv), &mut g, Some("P")).unwrap();
let edge_csv = ":START_ID,:END_ID\n1,2\n"; let er = load_edges_csv(bytes(edge_csv), &mut g, &nr.id_map, Some("LINKED")).unwrap();
assert_eq!(er.inserted, 1);
let eid = g.outgoing_edges(nr.id_map["1"])[0];
let edge = g.get_edge(eid).unwrap();
assert_eq!(edge.label, "LINKED");
}
#[test]
fn load_edges_with_props() {
let mut g = Graph::new();
let node_csv = ":ID,name\n1,A\n2,B\n";
let nr = load_nodes_csv(bytes(node_csv), &mut g, Some("P")).unwrap();
let edge_csv = ":START_ID,:END_ID,:TYPE,weight,since\n1,2,KNOWS,0.9,2020\n";
let er = load_edges_csv(bytes(edge_csv), &mut g, &nr.id_map, None).unwrap();
assert_eq!(er.inserted, 1);
let eid = g.outgoing_edges(nr.id_map["1"])[0];
let edge = g.get_edge(eid).unwrap();
assert_eq!(edge.properties["weight"], Value::Float(0.9));
assert_eq!(edge.properties["since"], Value::Int(2020));
}
#[test]
fn id_map_round_trip() {
let mut g = Graph::new();
let csv = ":ID,name\nA,Alice\n";
let r = load_nodes_csv(bytes(csv), &mut g, Some("P")).unwrap();
let strings = id_map_to_strings(&r.id_map);
let back = id_map_from_strings(&strings);
assert_eq!(back["A"], r.id_map["A"]);
}
}