Skip to main content

sem_core/parser/plugins/
csv_plugin.rs

1use std::collections::HashMap;
2
3use crate::model::entity::{build_entity_id, SemanticEntity};
4use crate::parser::plugin::SemanticParserPlugin;
5use crate::utils::hash::content_hash;
6
7pub struct CsvParserPlugin;
8
9impl SemanticParserPlugin for CsvParserPlugin {
10    fn id(&self) -> &str {
11        "csv"
12    }
13
14    fn extensions(&self) -> &[&str] {
15        &[".csv", ".tsv"]
16    }
17
18    fn extract_entities(&self, content: &str, file_path: &str) -> Vec<SemanticEntity> {
19        let mut entities = Vec::new();
20        let lines: Vec<&str> = content.lines().filter(|l| !l.trim().is_empty()).collect();
21        if lines.is_empty() {
22            return entities;
23        }
24
25        let is_tsv = file_path.ends_with(".tsv");
26        let separator = if is_tsv { '\t' } else { ',' };
27
28        let headers = parse_csv_line(lines[0], separator);
29
30        for (i, &line) in lines.iter().enumerate().skip(1) {
31            let cells = parse_csv_line(line, separator);
32            let row_id = if cells.first().map_or(true, |c| c.is_empty()) {
33                format!("row_{i}")
34            } else {
35                cells[0].clone()
36            };
37            let name = format!("row[{row_id}]");
38
39            let mut metadata = HashMap::new();
40            for (j, header) in headers.iter().enumerate() {
41                metadata.insert(
42                    header.clone(),
43                    cells.get(j).cloned().unwrap_or_default(),
44                );
45            }
46
47            entities.push(SemanticEntity {
48                id: build_entity_id(file_path, "row", &name, None),
49                file_path: file_path.to_string(),
50                entity_type: "row".to_string(),
51                name,
52                parent_id: None,
53                content_hash: content_hash(line),
54                structural_hash: None,
55                content: line.to_string(),
56                start_line: i + 1,
57                end_line: i + 1,
58                metadata: Some(metadata),
59            });
60        }
61
62        entities
63    }
64}
65
66fn parse_csv_line(line: &str, separator: char) -> Vec<String> {
67    let mut cells = Vec::new();
68    let mut current = String::new();
69    let mut in_quotes = false;
70    let chars: Vec<char> = line.chars().collect();
71
72    let mut i = 0;
73    while i < chars.len() {
74        let ch = chars[i];
75        if in_quotes {
76            if ch == '"' && chars.get(i + 1) == Some(&'"') {
77                current.push('"');
78                i += 1;
79            } else if ch == '"' {
80                in_quotes = false;
81            } else {
82                current.push(ch);
83            }
84        } else if ch == '"' {
85            in_quotes = true;
86        } else if ch == separator {
87            cells.push(current.trim().to_string());
88            current = String::new();
89        } else {
90            current.push(ch);
91        }
92        i += 1;
93    }
94    cells.push(current.trim().to_string());
95    cells
96}