nvs_core/chunker/
annotate.rs1use super::TokenCounter;
2
3#[derive(Clone, Copy, Debug, PartialEq, Eq)]
4pub enum LineType {
5 Normal,
6 MajorHeading,
7 MinorHeading,
8 ListItem,
9 Blank,
10 CodeBlock,
11}
12
13#[derive(Clone, Debug)]
14pub struct AnnotatedLine {
15 pub text: String,
16 pub line_type: LineType,
17 pub tokens: usize,
18 pub page: i32,
19 pub heading_level: i32,
20}
21
22fn detect_line_type(line: &str) -> (LineType, i32) {
23 let s = line.trim();
24 if s.is_empty() {
25 return (LineType::Blank, 0);
26 }
27 if let Some(stripped) = s.strip_prefix('#') {
29 let mut level = 1;
30 let mut rest = stripped;
31 while let Some(r) = rest.strip_prefix('#') {
32 level += 1;
33 rest = r;
34 }
35 if level <= 2 {
36 return (LineType::MajorHeading, level as i32);
37 }
38 return (LineType::MinorHeading, level as i32);
39 }
40 if s.starts_with('-') || s.starts_with('*') || s.starts_with('+') {
42 return (LineType::ListItem, 0);
43 }
44 if s.chars().all(|c| c == '`') {
45 return (LineType::CodeBlock, 0);
46 }
47 (LineType::Normal, 0)
48}
49
50pub fn annotate_lines(pages: &[(String, i32)], tokenizer: &dyn TokenCounter) -> Vec<AnnotatedLine> {
51 let mut out = Vec::new();
52 for (text, page) in pages {
53 for line in text.split('\n') {
54 let (lt, lvl) = detect_line_type(line);
55 let tokens = tokenizer.count_tokens(line);
56 out.push(AnnotatedLine {
57 text: line.to_string(),
58 line_type: lt,
59 tokens,
60 page: *page,
61 heading_level: lvl,
62 });
63 }
64 }
65 out
66}