sem_core/parser/plugins/
csv_plugin.rs1use std::collections::HashMap;
2
3use crate::model::entity::{build_entity_id, SemanticEntity};
4use crate::parser::plugin::SemanticParserPlugin;
5use crate::utils::hash::content_hash;
6
7pub struct CsvParserPlugin;
8
9impl SemanticParserPlugin for CsvParserPlugin {
10 fn id(&self) -> &str {
11 "csv"
12 }
13
14 fn extensions(&self) -> &[&str] {
15 &[".csv", ".tsv"]
16 }
17
18 fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
19 let mut entities = Vec::new();
20 let lines: Vec<&str> = content.lines().filter(|l| !l.trim().is_empty()).collect();
21 if lines.is_empty() {
22 return entities;
23 }
24
25 let is_tsv = file_path.ends_with(".tsv");
26 let separator = if is_tsv { '\t' } else { ',' };
27
28 let headers = parse_csv_line(lines[0], separator);
29
30 for (i, &line) in lines.iter().enumerate().skip(1) {
31 let cells = parse_csv_line(line, separator);
32 let row_id = if cells.first().map_or(true, |c| c.is_empty()) {
33 format!("row_{i}")
34 } else {
35 cells[0].clone()
36 };
37 let name = format!("row[{row_id}]");
38
39 let mut metadata = HashMap::new();
40 for (j, header) in headers.iter().enumerate() {
41 metadata.insert(
42 header.clone(),
43 cells.get(j).cloned().unwrap_or_default(),
44 );
45 }
46
47 entities.push(SemanticEntity {
48 id: build_entity_id(file_path, "row", &name, None),
49 file_path: file_path.to_string(),
50 entity_type: "row".to_string(),
51 name,
52 parent_id: None,
53 content_hash: content_hash(line),
54 structural_hash: None,
55 content: line.to_string(),
56 start_line: i + 1,
57 end_line: i + 1,
58 metadata: Some(metadata),
59 });
60 }
61
62 entities
63 }
64}
65
66fn parse_csv_line(line: &str, separator: char) -> Vec<String> {
67 let mut cells = Vec::new();
68 let mut current = String::new();
69 let mut in_quotes = false;
70 let chars: Vec<char> = line.chars().collect();
71
72 let mut i = 0;
73 while i < chars.len() {
74 let ch = chars[i];
75 if in_quotes {
76 if ch == '"' && chars.get(i + 1) == Some(&'"') {
77 current.push('"');
78 i += 1;
79 } else if ch == '"' {
80 in_quotes = false;
81 } else {
82 current.push(ch);
83 }
84 } else if ch == '"' {
85 in_quotes = true;
86 } else if ch == separator {
87 cells.push(current.trim().to_string());
88 current = String::new();
89 } else {
90 current.push(ch);
91 }
92 i += 1;
93 }
94 cells.push(current.trim().to_string());
95 cells
96}