Skip to main content

plato_tile_import/
lib.rs

1//! plato-tile-import — Import/export knowledge tiles from external formats.
2//! Supports Markdown, JSON, CSV, and plaintext. Zero external dependencies.
3
4/// A knowledge tile imported from an external format.
5#[derive(Debug, Clone, PartialEq)]
6pub struct Tile {
7    pub question: String,
8    pub answer: String,
9    pub tags: Vec<String>,
10    pub domain: String,
11}
12
13impl Tile {
14    pub fn new(question: impl Into<String>, answer: impl Into<String>) -> Self {
15        Self {
16            question: question.into(),
17            answer: answer.into(),
18            tags: vec![],
19            domain: String::new(),
20        }
21    }
22}
23
24// ── Markdown ──────────────────────────────────────────────────────────────────
25
26/// Import tiles from Markdown.
27/// Each `## Header` section becomes a tile:
28///   - question = the header text (stripped of ##)
29///   - answer = the body text (trimmed)
30///   - tags = [BracketedWord] anchors found in body
31///   - domain = first word of question, lowercased
32/// Sections with empty body are skipped.
33pub fn import_markdown(md: &str) -> Vec<Tile> {
34    let mut tiles = Vec::new();
35    let mut current_q: Option<String> = None;
36    let mut current_body = String::new();
37
38    let flush = |current_q: &mut Option<String>, current_body: &mut String, tiles: &mut Vec<Tile>| {
39        if let Some(q) = current_q.take() {
40            let body = current_body.trim().to_string();
41            if !body.is_empty() {
42                let tags = extract_bracketed_tags(&body);
43                let domain = q.split_whitespace().next().unwrap_or("").to_lowercase();
44                tiles.push(Tile {
45                    question: q,
46                    answer: body,
47                    tags,
48                    domain,
49                });
50            }
51        }
52        current_body.clear();
53    };
54
55    for line in md.lines() {
56        if let Some(header) = line.strip_prefix("## ") {
57            flush(&mut current_q, &mut current_body, &mut tiles);
58            current_q = Some(header.trim().to_string());
59        } else if current_q.is_some() {
60            if !current_body.is_empty() {
61                current_body.push('\n');
62            }
63            current_body.push_str(line);
64        }
65    }
66    flush(&mut current_q, &mut current_body, &mut tiles);
67    tiles
68}
69
70/// Extract [BracketedWord] tokens from a string.
71fn extract_bracketed_tags(s: &str) -> Vec<String> {
72    let mut tags = Vec::new();
73    let mut chars = s.chars().peekable();
74    while let Some(c) = chars.next() {
75        if c == '[' {
76            let mut tag = String::new();
77            let mut closed = false;
78            for inner in chars.by_ref() {
79                if inner == ']' {
80                    closed = true;
81                    break;
82                }
83                tag.push(inner);
84            }
85            if closed && !tag.is_empty() && tag.split_whitespace().count() == 1 {
86                tags.push(tag);
87            }
88        }
89    }
90    tags
91}
92
93// ── JSON ──────────────────────────────────────────────────────────────────────
94
95/// Import tiles from JSON array.
96/// Expected format: [{"question": "...", "answer": "...", "tags": [...], "domain": "..."}]
97/// Fields "tags" and "domain" are optional (default to empty).
98/// Parse manually (no serde dependency).
99pub fn import_json(json: &str) -> Vec<Tile> {
100    let mut tiles = Vec::new();
101    let trimmed = json.trim();
102    if !trimmed.starts_with('[') {
103        return tiles;
104    }
105
106    // Split objects by scanning depth
107    let mut depth = 0i32;
108    let mut current = String::new();
109    let mut in_string = false;
110    let mut escape_next = false;
111
112    for c in trimmed.chars() {
113        if escape_next {
114            escape_next = false;
115            if depth >= 1 {
116                current.push(c);
117            }
118            continue;
119        }
120        if in_string {
121            if c == '\\' {
122                escape_next = true;
123                if depth >= 1 {
124                    current.push(c);
125                }
126                continue;
127            }
128            if c == '"' {
129                in_string = false;
130            }
131            if depth >= 1 {
132                current.push(c);
133            }
134            continue;
135        }
136        match c {
137            '"' => {
138                in_string = true;
139                if depth >= 1 {
140                    current.push(c);
141                }
142            }
143            '{' => {
144                depth += 1;
145                if depth > 1 {
146                    current.push(c);
147                }
148            }
149            '}' => {
150                depth -= 1;
151                if depth == 0 {
152                    let obj = current.trim().to_string();
153                    current.clear();
154                    if let Some(tile) = parse_json_object(&obj) {
155                        tiles.push(tile);
156                    }
157                } else {
158                    current.push(c);
159                }
160            }
161            _ => {
162                if depth >= 1 {
163                    current.push(c);
164                }
165            }
166        }
167    }
168    tiles
169}
170
171/// Parse a single JSON object body (contents between `{` and `}`) into a Tile.
172fn parse_json_object(obj: &str) -> Option<Tile> {
173    let question = json_string_value(obj, "question")?;
174    let answer = json_string_value(obj, "answer")?;
175    let domain = json_string_value(obj, "domain").unwrap_or_default();
176    let tags = json_string_array(obj, "tags");
177    Some(Tile { question, answer, tags, domain })
178}
179
180/// Extract the string value for a given key from a flat JSON object body.
181/// Finds `"key": "value"` and returns `value`.
182fn json_string_value(obj: &str, key: &str) -> Option<String> {
183    let needle = format!("\"{}\"", key);
184    let start = obj.find(&needle)?;
185    let after_key = &obj[start + needle.len()..];
186    // skip whitespace and colon
187    let after_colon = after_key.trim_start().strip_prefix(':')?.trim_start();
188    if !after_colon.starts_with('"') {
189        return None;
190    }
191    let content = &after_colon[1..];
192    let mut out = String::new();
193    let mut chars = content.chars();
194    loop {
195        match chars.next()? {
196            '\\' => {
197                match chars.next()? {
198                    'n' => out.push('\n'),
199                    't' => out.push('\t'),
200                    other => out.push(other),
201                }
202            }
203            '"' => break,
204            c => out.push(c),
205        }
206    }
207    Some(out)
208}
209
210/// Extract a JSON string array value for a given key.
211/// Returns empty Vec if key not found or value is not a string array.
212fn json_string_array(obj: &str, key: &str) -> Vec<String> {
213    let needle = format!("\"{}\"", key);
214    let start = match obj.find(&needle) {
215        Some(s) => s,
216        None => return vec![],
217    };
218    let after_key = &obj[start + needle.len()..];
219    let after_colon = match after_key.trim_start().strip_prefix(':') {
220        Some(s) => s.trim_start(),
221        None => return vec![],
222    };
223    if !after_colon.starts_with('[') {
224        return vec![];
225    }
226    let inner_start = 1;
227    // find closing bracket
228    let end = match after_colon.find(']') {
229        Some(e) => e,
230        None => return vec![],
231    };
232    let array_body = &after_colon[inner_start..end];
233    let mut result = Vec::new();
234    // scan for quoted strings inside the array
235    let mut chars = array_body.chars().peekable();
236    while let Some(c) = chars.next() {
237        if c == '"' {
238            let mut s = String::new();
239            let mut closed = false;
240            loop {
241                match chars.next() {
242                    None => break,
243                    Some('\\') => {
244                        if let Some(e) = chars.next() {
245                            s.push(e);
246                        }
247                    }
248                    Some('"') => { closed = true; break; }
249                    Some(other) => s.push(other),
250                }
251            }
252            if closed {
253                result.push(s);
254            }
255        }
256    }
257    result
258}
259
260// ── CSV ───────────────────────────────────────────────────────────────────────
261
262/// Import tiles from CSV.
263/// Format: question,answer,domain (header row required with those names)
264/// Comma-separated, values may be quoted with double quotes.
265/// Tags field is optional and skipped.
266pub fn import_csv(csv: &str) -> Vec<Tile> {
267    let mut tiles = Vec::new();
268    let mut lines = csv.lines();
269
270    // Read and parse header
271    let header_line = loop {
272        match lines.next() {
273            None => return tiles,
274            Some(l) if !l.trim().is_empty() => break l,
275            _ => {}
276        }
277    };
278
279    let headers: Vec<String> = csv_parse_row(header_line)
280        .into_iter()
281        .map(|h| h.trim().to_lowercase())
282        .collect();
283
284    let question_idx = headers.iter().position(|h| h == "question");
285    let answer_idx = headers.iter().position(|h| h == "answer");
286    let domain_idx = headers.iter().position(|h| h == "domain");
287
288    let (qi, ai) = match (question_idx, answer_idx) {
289        (Some(q), Some(a)) => (q, a),
290        _ => return tiles,
291    };
292
293    for line in lines {
294        let line = line.trim();
295        if line.is_empty() {
296            continue;
297        }
298        let fields = csv_parse_row(line);
299        let question = match fields.get(qi) {
300            Some(v) => v.trim().to_string(),
301            None => continue,
302        };
303        let answer = match fields.get(ai) {
304            Some(v) => v.trim().to_string(),
305            None => continue,
306        };
307        if question.is_empty() {
308            continue;
309        }
310        let domain = domain_idx
311            .and_then(|di| fields.get(di))
312            .map(|d| d.trim().to_string())
313            .unwrap_or_default();
314        tiles.push(Tile { question, answer, tags: vec![], domain });
315    }
316    tiles
317}
318
319/// Parse a single CSV row, handling double-quote quoting.
320fn csv_parse_row(line: &str) -> Vec<String> {
321    let mut fields = Vec::new();
322    let mut field = String::new();
323    let mut in_quotes = false;
324    let mut chars = line.chars().peekable();
325
326    while let Some(c) = chars.next() {
327        match c {
328            '"' if in_quotes => {
329                if chars.peek() == Some(&'"') {
330                    // escaped quote
331                    chars.next();
332                    field.push('"');
333                } else {
334                    in_quotes = false;
335                }
336            }
337            '"' => {
338                in_quotes = true;
339            }
340            ',' if !in_quotes => {
341                fields.push(field.clone());
342                field.clear();
343            }
344            other => {
345                field.push(other);
346            }
347        }
348    }
349    fields.push(field);
350    fields
351}
352
353// ── Plaintext ─────────────────────────────────────────────────────────────────
354
355/// Import tiles from plain text.
356/// Split by double-newline (paragraph boundaries).
357/// Within each paragraph: first non-empty line = question, remaining lines joined = answer.
358/// Skip paragraphs with no answer.
359pub fn import_plaintext(text: &str) -> Vec<Tile> {
360    let mut tiles = Vec::new();
361    // Split on double newlines
362    let paragraphs: Vec<&str> = text.split("\n\n").collect();
363
364    for para in paragraphs {
365        let mut non_empty_lines: Vec<&str> = para.lines().filter(|l| !l.trim().is_empty()).collect();
366        if non_empty_lines.len() < 2 {
367            continue;
368        }
369        let question = non_empty_lines.remove(0).trim().to_string();
370        let answer = non_empty_lines.join(" ").trim().to_string();
371        if answer.is_empty() {
372            continue;
373        }
374        tiles.push(Tile::new(question, answer));
375    }
376    tiles
377}
378
379// ── Export ────────────────────────────────────────────────────────────────────
380
381/// Export tiles to Markdown format.
382/// Each tile: "## {question}\n{answer}\n\n"
383pub fn export_markdown(tiles: &[Tile]) -> String {
384    tiles
385        .iter()
386        .map(|t| format!("## {}\n{}\n\n", t.question, t.answer))
387        .collect()
388}
389
390// ── Tests ─────────────────────────────────────────────────────────────────────
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395
396    // --- import_markdown ---
397
398    #[test]
399    fn test_markdown_parses_sections() {
400        let md = "## What is Rust?\nA systems programming language.\n\n## What is cargo?\nThe Rust package manager.";
401        let tiles = import_markdown(md);
402        assert_eq!(tiles.len(), 2);
403        assert_eq!(tiles[0].question, "What is Rust?");
404        assert_eq!(tiles[0].answer, "A systems programming language.");
405        assert_eq!(tiles[1].question, "What is cargo?");
406        assert_eq!(tiles[1].answer, "The Rust package manager.");
407    }
408
409    #[test]
410    fn test_markdown_skips_empty_sections() {
411        let md = "## Empty Section\n\n## Real Section\nHas content here.";
412        let tiles = import_markdown(md);
413        assert_eq!(tiles.len(), 1);
414        assert_eq!(tiles[0].question, "Real Section");
415    }
416
417    #[test]
418    fn test_markdown_extracts_bracketed_tags() {
419        let md = "## What is [Rust]?\nA language. See [Systems] and [Memory] safety.";
420        let tiles = import_markdown(md);
421        assert_eq!(tiles.len(), 1);
422        assert!(tiles[0].tags.contains(&"Systems".to_string()));
423        assert!(tiles[0].tags.contains(&"Memory".to_string()));
424        // "Rust]?" should not parse as a tag since it's in the header not the body
425    }
426
427    #[test]
428    fn test_markdown_domain_from_first_word() {
429        let md = "## Ownership in Rust\nCore memory concept.";
430        let tiles = import_markdown(md);
431        assert_eq!(tiles[0].domain, "ownership");
432    }
433
434    #[test]
435    fn test_markdown_multiline_answer() {
436        let md = "## Question\nLine one.\nLine two.\nLine three.";
437        let tiles = import_markdown(md);
438        assert_eq!(tiles.len(), 1);
439        assert!(tiles[0].answer.contains("Line one."));
440        assert!(tiles[0].answer.contains("Line two."));
441        assert!(tiles[0].answer.contains("Line three."));
442    }
443
444    // --- import_json ---
445
446    #[test]
447    fn test_json_parses_basic_array() {
448        let json = r#"[{"question":"What is 2+2?","answer":"4","domain":"math"}]"#;
449        let tiles = import_json(json);
450        assert_eq!(tiles.len(), 1);
451        assert_eq!(tiles[0].question, "What is 2+2?");
452        assert_eq!(tiles[0].answer, "4");
453        assert_eq!(tiles[0].domain, "math");
454    }
455
456    #[test]
457    fn test_json_handles_missing_optional_fields() {
458        let json = r#"[{"question":"Q?","answer":"A."}]"#;
459        let tiles = import_json(json);
460        assert_eq!(tiles.len(), 1);
461        assert_eq!(tiles[0].domain, "");
462        assert!(tiles[0].tags.is_empty());
463    }
464
465    #[test]
466    fn test_json_skips_objects_without_question_or_answer() {
467        let json = r#"[{"domain":"math"},{"question":"Q?","answer":"A."}]"#;
468        let tiles = import_json(json);
469        assert_eq!(tiles.len(), 1);
470        assert_eq!(tiles[0].question, "Q?");
471    }
472
473    #[test]
474    fn test_json_parses_tags_array() {
475        let json = r#"[{"question":"Q","answer":"A","tags":["tag1","tag2"]}]"#;
476        let tiles = import_json(json);
477        assert_eq!(tiles[0].tags, vec!["tag1", "tag2"]);
478    }
479
480    // --- import_csv ---
481
482    #[test]
483    fn test_csv_parses_header_and_rows() {
484        let csv = "question,answer,domain\nWhat is Rust?,A systems language,programming\nWhat is cargo?,Package manager,tooling";
485        let tiles = import_csv(csv);
486        assert_eq!(tiles.len(), 2);
487        assert_eq!(tiles[0].question, "What is Rust?");
488        assert_eq!(tiles[0].answer, "A systems language");
489        assert_eq!(tiles[0].domain, "programming");
490    }
491
492    #[test]
493    fn test_csv_quoted_fields() {
494        let csv = "question,answer\n\"Question, with comma\",\"Answer, with comma\"";
495        let tiles = import_csv(csv);
496        assert_eq!(tiles.len(), 1);
497        assert_eq!(tiles[0].question, "Question, with comma");
498        assert_eq!(tiles[0].answer, "Answer, with comma");
499    }
500
501    // --- import_plaintext ---
502
503    #[test]
504    fn test_plaintext_splits_on_double_newlines() {
505        let text = "Question one\nAnswer one\n\nQuestion two\nAnswer two";
506        let tiles = import_plaintext(text);
507        assert_eq!(tiles.len(), 2);
508    }
509
510    #[test]
511    fn test_plaintext_first_line_is_question() {
512        let text = "What is the sky?\nBlue.";
513        let tiles = import_plaintext(text);
514        // Single paragraph: question = "What is the sky?", answer = "Blue."
515        // But wait — single \n, not double, so whole thing is one paragraph
516        assert_eq!(tiles.len(), 1);
517        assert_eq!(tiles[0].question, "What is the sky?");
518        assert_eq!(tiles[0].answer, "Blue.");
519    }
520
521    #[test]
522    fn test_plaintext_skips_paragraphs_with_no_answer() {
523        let text = "Lonely header\n\nReal Question\nReal Answer";
524        let tiles = import_plaintext(text);
525        assert_eq!(tiles.len(), 1);
526        assert_eq!(tiles[0].question, "Real Question");
527    }
528
529    // --- export_markdown ---
530
531    #[test]
532    fn test_export_markdown_format() {
533        let tiles = vec![Tile::new("What is pi?", "Approximately 3.14159.")];
534        let md = export_markdown(&tiles);
535        assert!(md.contains("## What is pi?"));
536        assert!(md.contains("Approximately 3.14159."));
537    }
538
539    #[test]
540    fn test_export_markdown_roundtrip_with_import() {
541        let original = vec![
542            Tile::new("What is Rust?", "A systems language."),
543            Tile::new("What is cargo?", "The package manager."),
544        ];
545        let md = export_markdown(&original);
546        let imported = import_markdown(&md);
547        assert_eq!(imported.len(), 2);
548        assert_eq!(imported[0].question, original[0].question);
549        assert_eq!(imported[0].answer, original[0].answer);
550        assert_eq!(imported[1].question, original[1].question);
551        assert_eq!(imported[1].answer, original[1].answer);
552    }
553
554    #[test]
555    fn test_import_export_markdown_consistency() {
556        let md = "## Alpha\nFirst answer.\n\n## Beta\nSecond answer.\n\n";
557        let tiles = import_markdown(md);
558        let re_exported = export_markdown(&tiles);
559        let re_imported = import_markdown(&re_exported);
560        assert_eq!(tiles.len(), re_imported.len());
561        for (a, b) in tiles.iter().zip(re_imported.iter()) {
562            assert_eq!(a.question, b.question);
563            assert_eq!(a.answer, b.answer);
564        }
565    }
566}