argyph_parse/structural/
csv.rs1use super::{byte_to_line_range, line_starts, NodeKind, StructuralNode};
2
3pub fn parse(file_id: u64, source: &str) -> Vec<StructuralNode> {
5 let ls = line_starts(source);
6 let mut nodes = Vec::new();
7 let mut reader = csv::ReaderBuilder::new()
8 .has_headers(true)
9 .flexible(true)
10 .from_reader(source.as_bytes());
11
12 if let Ok(headers) = reader.headers() {
13 let header_labels: Vec<String> = headers.iter().map(|s| s.to_string()).collect();
14 let header_end = source.find('\n').unwrap_or(source.len());
15 let (line_s, line_e) = byte_to_line_range(&ls, 0, header_end);
16
17 nodes.push(StructuralNode {
18 id: StructuralNode::make_id(file_id, NodeKind::CsvHeader, &["header".into()]),
19 file_id,
20 kind: NodeKind::CsvHeader,
21 label: header_labels.join(", "),
22 path: vec!["header".into()],
23 byte_range: (0, header_end),
24 line_range: (line_s, line_e),
25 parent: None,
26 depth: 0,
27 });
28 }
29
30 for record in reader.records().flatten() {
31 if let Some(pos) = record.position().cloned() {
32 let start = pos.byte() as usize;
33 let rest = &source[start..];
34 let line_end_offset = rest.find('\n').unwrap_or(rest.len());
35 let end = start + line_end_offset;
36
37 let idx = nodes.len();
38 let label = record.iter().take(3).collect::<Vec<_>>().join(", ");
39 let path = vec![format!("row_{idx}")];
40 let id = StructuralNode::make_id(file_id, NodeKind::CsvRow, &path);
41 let (line_s, line_e) = byte_to_line_range(&ls, start, end);
42
43 nodes.push(StructuralNode {
44 id,
45 file_id,
46 kind: NodeKind::CsvRow,
47 label,
48 path,
49 byte_range: (start, end),
50 line_range: (line_s, line_e),
51 parent: None,
52 depth: 0,
53 });
54 }
55 }
56
57 nodes
58}
59
60#[cfg(test)]
61mod tests {
62 use super::*;
63 use crate::structural::NodeKind;
64
65 const SAMPLE: &str = "id,name,email\n1,Alice,alice@x.com\n2,Bob,bob@x.com\n";
66
67 #[test]
68 fn extracts_header_and_rows() {
69 let nodes = parse(1, SAMPLE);
70 let header = nodes.iter().find(|n| n.kind == NodeKind::CsvHeader);
71 assert!(header.is_some(), "should have a header node");
72 let rows: Vec<&StructuralNode> = nodes
73 .iter()
74 .filter(|n| n.kind == NodeKind::CsvRow)
75 .collect();
76 assert_eq!(rows.len(), 2, "expected 2 data rows");
77 }
78
79 #[test]
80 fn row_byte_range_is_correct() {
81 let nodes = parse(1, SAMPLE);
82 let row = nodes
83 .iter()
84 .find(|n| n.kind == NodeKind::CsvRow && n.label.contains("Alice"))
85 .unwrap();
86 let row_text = &SAMPLE[row.byte_range.0..row.byte_range.1];
87 assert!(
88 row_text.contains("Alice"),
89 "row text should contain 'Alice', got: {row_text:?}"
90 );
91 }
92}