use crate::sankey::{Sankey, SankeyFlow};
use crate::Error;
pub fn parse(src: &str) -> Result<Sankey, Error> {
let mut header_seen = false;
let mut flows = Vec::new();
for raw in src.lines() {
let stripped = strip_sankey_inline_comment(raw);
let trimmed = stripped.trim();
if trimmed.is_empty() || trimmed.starts_with("%%") {
continue;
}
if !header_seen {
let keyword = trimmed.split_whitespace().next().unwrap_or(trimmed);
if keyword.eq_ignore_ascii_case("sankey-beta")
|| keyword.eq_ignore_ascii_case("sankey")
{
header_seen = true;
continue;
} else {
return Err(Error::ParseError(format!(
"expected `sankey-beta` header, got {trimmed:?}"
)));
}
}
if trimmed.starts_with("accTitle") || trimmed.starts_with("accDescr") {
continue;
}
let flow = parse_flow_line(trimmed)?;
flows.push(flow);
}
if !header_seen {
return Err(Error::ParseError(
"missing `sankey-beta` header line".to_string(),
));
}
Ok(Sankey { flows })
}
fn parse_flow_line(line: &str) -> Result<SankeyFlow, Error> {
let fields = split_csv_fields(line);
if fields.len() != 3 {
return Err(Error::ParseError(format!(
"expected 3 CSV fields (source,target,value), got {} in {line:?}",
fields.len()
)));
}
let source = unquote(fields[0]);
let target = unquote(fields[1]);
let value_str = fields[2].trim();
if source.is_empty() {
return Err(Error::ParseError(format!(
"source field is empty in {line:?}"
)));
}
if target.is_empty() {
return Err(Error::ParseError(format!(
"target field is empty in {line:?}"
)));
}
let value = value_str.parse::<f64>().map_err(|_| {
Error::ParseError(format!(
"invalid numeric value {value_str:?} in {line:?}"
))
})?;
if !value.is_finite() {
return Err(Error::ParseError(format!(
"value must be finite, got {value} in {line:?}"
)));
}
if value <= 0.0 {
return Err(Error::ParseError(format!(
"value must be positive (got {value}) in {line:?}"
)));
}
Ok(SankeyFlow {
source: source.to_string(),
target: target.to_string(),
value,
})
}
fn split_csv_fields(line: &str) -> Vec<&str> {
let mut fields: Vec<&str> = Vec::new();
let bytes = line.as_bytes();
let len = bytes.len();
let mut start = 0usize;
let mut i = 0usize;
while i < len {
let b = bytes[i];
match b {
b'\'' | b'"' => {
let quote = b;
i += 1;
while i < len && bytes[i] != quote {
i += 1;
}
if i < len {
i += 1; }
}
b',' => {
fields.push(&line[start..i]);
i += 1;
start = i;
}
_ => {
i += 1;
}
}
}
fields.push(&line[start..]);
fields
}
fn unquote(field: &str) -> &str {
let trimmed = field.trim();
let bytes = trimmed.as_bytes();
if bytes.len() >= 2 {
let first = bytes[0];
let last = bytes[bytes.len() - 1];
if (first == b'\'' || first == b'"') && first == last {
return trimmed[1..trimmed.len() - 1].trim();
}
}
trimmed
}
fn strip_sankey_inline_comment(line: &str) -> &str {
let bytes = line.as_bytes();
let len = bytes.len();
let mut in_quote: Option<u8> = None;
let mut i = 0usize;
while i + 1 < len {
let b = bytes[i];
match in_quote {
Some(q) if b == q => {
in_quote = None;
}
Some(_) => {}
None => {
if b == b'\'' || b == b'"' {
in_quote = Some(b);
} else if b == b'%' && bytes[i + 1] == b'%' {
return &line[..i];
}
}
}
i += 1;
}
line
}
#[cfg(test)]
mod tests {
use super::*;
const HEADER: &str = "sankey-beta\n";
#[test]
fn parses_minimal_single_flow() {
let src = format!("{HEADER}Bio-conversion,Liquid,0.597");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 1);
assert_eq!(diag.flows[0].source, "Bio-conversion");
assert_eq!(diag.flows[0].target, "Liquid");
assert!((diag.flows[0].value - 0.597).abs() < 1e-9);
}
#[test]
fn parses_multiple_flows() {
let src = format!(
"{HEADER}\
Bio-conversion,Liquid,0.597\n\
Bio-conversion,Solid,280.322\n\
Coal,Solid,75.571"
);
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 3);
assert!((diag.flows[1].value - 280.322).abs() < 1e-6);
}
#[test]
fn strips_percent_percent_comment_lines() {
let src = format!("{HEADER}%% this is a comment\nCoal,Solid,75.571");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 1);
assert_eq!(diag.flows[0].source, "Coal");
}
#[test]
fn strips_inline_comment_after_header() {
let src = "sankey-beta\nCoal,Solid,75.571";
let diag = parse(src).unwrap();
assert_eq!(diag.flows.len(), 1);
}
#[test]
fn accepts_sankey_keyword_without_beta() {
let src = "sankey\nA,B,1.0";
let diag = parse(src).unwrap();
assert_eq!(diag.flows.len(), 1);
}
#[test]
fn unquotes_single_quoted_fields() {
let src = format!("{HEADER}Agricultural 'waste',Bio-conversion,124.729");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 1);
let source = &diag.flows[0].source;
assert!(source.contains("waste"), "source should contain waste: {source}");
}
#[test]
fn unquotes_double_quoted_fields() {
let src = format!("{HEADER}\"Source Node\",\"Target Node\",42.0");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows[0].source, "Source Node");
assert_eq!(diag.flows[0].target, "Target Node");
}
#[test]
fn rejects_zero_value() {
let src = format!("{HEADER}A,B,0.0");
assert!(parse(&src).is_err(), "zero value should be rejected");
}
#[test]
fn rejects_negative_value() {
let src = format!("{HEADER}A,B,-5.0");
assert!(parse(&src).is_err(), "negative value should be rejected");
}
#[test]
fn rejects_non_numeric_value() {
let src = format!("{HEADER}A,B,not_a_number");
assert!(parse(&src).is_err(), "non-numeric value should be rejected");
}
#[test]
fn rejects_too_few_fields() {
let src = format!("{HEADER}A,B");
assert!(parse(&src).is_err(), "two fields should be rejected");
}
#[test]
fn rejects_missing_header() {
let src = "A,B,1.0";
assert!(parse(src).is_err(), "missing header should be rejected");
}
#[test]
fn skips_blank_lines() {
let src = format!("{HEADER}\n\nA,B,1.0\n\nB,C,2.0\n");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 2);
}
#[test]
fn skips_acc_title_and_acc_descr() {
let src = format!("{HEADER}accTitle: My Sankey\naccDescr: Flow diagram\nA,B,1.0");
let diag = parse(&src).unwrap();
assert_eq!(diag.flows.len(), 1);
}
#[test]
fn split_csv_fields_basic() {
let fields = split_csv_fields("A,B,1.0");
assert_eq!(fields, vec!["A", "B", "1.0"]);
}
#[test]
fn split_csv_fields_quoted_comma_not_split() {
let fields = split_csv_fields("\"A,B\",C,1.0");
assert_eq!(fields, vec!["\"A,B\"", "C", "1.0"]);
}
#[test]
fn unquote_strips_double_quotes() {
assert_eq!(unquote("\"hello world\""), "hello world");
}
#[test]
fn unquote_strips_single_quotes() {
assert_eq!(unquote("'hello world'"), "hello world");
}
#[test]
fn unquote_unquoted_passthrough() {
assert_eq!(unquote("hello"), "hello");
}
}