edgeparse_core/output/
csv.rs1use crate::models::content::ContentElement;
7use crate::models::document::PdfDocument;
8use crate::models::table::TableBorderCell;
9use crate::EdgePdfError;
10
11pub fn to_csv(doc: &PdfDocument) -> Result<String, EdgePdfError> {
19 let mut output = String::new();
20
21 if doc.kids.is_empty() {
22 return Ok(output);
23 }
24
25 let mut first = true;
26 for element in &doc.kids {
27 match element {
28 ContentElement::TableBorder(table) => {
29 if !first {
30 output.push('\n');
31 }
32 for row in &table.rows {
33 let cells: Vec<String> = row
34 .cells
35 .iter()
36 .map(|cell| csv_escape(&cell_text(cell)))
37 .collect();
38 output.push_str(&cells.join(","));
39 output.push('\n');
40 }
41 first = false;
42 }
43 ContentElement::Paragraph(p) => {
44 let text = p.base.value();
45 let trimmed = text.trim();
46 if !trimmed.is_empty() {
47 output.push_str(&csv_escape(trimmed));
48 output.push('\n');
49 }
50 }
51 ContentElement::Heading(h) => {
52 let text = h.base.base.value();
53 let trimmed = text.trim();
54 if !trimmed.is_empty() {
55 output.push_str(&csv_escape(trimmed));
56 output.push('\n');
57 }
58 }
59 _ => {}
60 }
61 }
62
63 Ok(output)
64}
65
66fn cell_text(cell: &TableBorderCell) -> String {
68 cell.content
69 .iter()
70 .map(|token| token.base.value.as_str())
71 .collect::<Vec<_>>()
72 .join(" ")
73 .trim()
74 .to_string()
75}
76
77fn csv_escape(value: &str) -> String {
79 if value.contains(',') || value.contains('"') || value.contains('\n') || value.contains('\r') {
80 format!("\"{}\"", value.replace('"', "\"\""))
81 } else {
82 value.to_string()
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn test_csv_escape_plain() {
92 assert_eq!(csv_escape("hello"), "hello");
93 }
94
95 #[test]
96 fn test_csv_escape_comma() {
97 assert_eq!(csv_escape("a,b"), "\"a,b\"");
98 }
99
100 #[test]
101 fn test_csv_escape_quotes() {
102 assert_eq!(csv_escape("say \"hi\""), "\"say \"\"hi\"\"\"");
103 }
104
105 #[test]
106 fn test_csv_escape_newline() {
107 assert_eq!(csv_escape("line1\nline2"), "\"line1\nline2\"");
108 }
109
110 #[test]
111 fn test_empty_doc() {
112 let doc = PdfDocument::new("test.pdf".to_string());
113 let csv = to_csv(&doc).unwrap();
114 assert!(csv.is_empty());
115 }
116}