Skip to main content

argyph_parse/structural/
csv.rs

1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2
3/// Parse a CSV source into structural nodes (header + rows).
4pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
5    let ls = line_starts(source);
6    let mut nodes = Vec::new();
7    let mut reader = csv::ReaderBuilder::new()
8        .has_headers(true)
9        .flexible(true)
10        .from_reader(source.as_bytes());
11
12    if let Ok(headers) = reader.headers() {
13        let header_labels: Vec<String> = headers.iter().map(|s| s.to_string()).collect();
14        let header_end = source.find('\n').unwrap_or(source.len());
15        let (line_s, line_e) = byte_to_line_range(&ls, 0, header_end);
16
17        nodes.push(StructuralNode {
18            id: StructuralNode::make_id(file_id, NodeKind::CsvHeader, &["header".into()]),
19            file_id,
20            kind: NodeKind::CsvHeader,
21            label: header_labels.join(", "),
22            path: vec!["header".into()],
23            byte_range: (0, header_end),
24            line_range: (line_s, line_e),
25            parent: None,
26            depth: 0,
27        });
28    }
29
30    for record in reader.records().flatten() {
31        if let Some(pos) = record.position().cloned() {
32            let start = pos.byte() as usize;
33            let rest = &source[start..];
34            let line_end_offset = rest.find('\n').unwrap_or(rest.len());
35            let end = start + line_end_offset;
36
37            let idx = nodes.len();
38            let label = record.iter().take(3).collect::<Vec<_>>().join(", ");
39            let path = vec![format!("row_{idx}")];
40            let id = StructuralNode::make_id(file_id, NodeKind::CsvRow, &path);
41            let (line_s, line_e) = byte_to_line_range(&ls, start, end);
42
43            nodes.push(StructuralNode {
44                id,
45                file_id,
46                kind: NodeKind::CsvRow,
47                label,
48                path,
49                byte_range: (start, end),
50                line_range: (line_s, line_e),
51                parent: None,
52                depth: 0,
53            });
54        }
55    }
56
57    nodes
58}
59
60#[cfg(test)]
61mod tests {
62    use super::*;
63    use crate::structural::NodeKind;
64
65    const SAMPLE: &str = "id,name,email\n1,Alice,alice@x.com\n2,Bob,bob@x.com\n";
66
67    #[test]
68    fn extracts_header_and_rows() {
69        let nodes = parse(1, SAMPLE);
70        let header = nodes.iter().find(|n| n.kind == NodeKind::CsvHeader);
71        assert!(header.is_some(), "should have a header node");
72        let rows: Vec<&StructuralNode> = nodes
73            .iter()
74            .filter(|n| n.kind == NodeKind::CsvRow)
75            .collect();
76        assert_eq!(rows.len(), 2, "expected 2 data rows");
77    }
78
79    #[test]
80    fn row_byte_range_is_correct() {
81        let nodes = parse(1, SAMPLE);
82        let row = nodes
83            .iter()
84            .find(|n| n.kind == NodeKind::CsvRow && n.label.contains("Alice"))
85            .unwrap();
86        let row_text = &SAMPLE[row.byte_range.0..row.byte_range.1];
87        assert!(
88            row_text.contains("Alice"),
89            "row text should contain 'Alice', got: {row_text:?}"
90        );
91    }
92}