terraphim_markdown_parser/
heading.rs1use std::ops::Range;
2
3use markdown::mdast::Node;
4use serde::{Deserialize, Serialize};
5use ulid::Ulid;
6
7use crate::{MarkdownParserError, NormalizedMarkdown, children, collect_text_content};
8
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
11pub enum MatchStrategy {
12 Prefix,
14 Contains,
16}
17
18#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19pub enum SectionType {
20 Main,
21 Sidebar(String),
22 Career,
23 Assessment,
24}
25
26impl std::fmt::Display for SectionType {
27 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
28 match self {
29 SectionType::Main => write!(f, "Main"),
30 SectionType::Sidebar(s) => write!(f, "Sidebar({s})"),
31 SectionType::Career => write!(f, "Career"),
32 SectionType::Assessment => write!(f, "Assessment"),
33 }
34 }
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct SectionPattern {
39 pub pattern: String,
40 pub section_type: SectionType,
41 pub match_strategy: MatchStrategy,
42}
43
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct SectionConfig {
46 pub rules: Vec<SectionPattern>,
47}
48
49impl SectionConfig {
50 pub fn textbook_default() -> Self {
51 Self {
52 rules: vec![
53 SectionPattern {
54 pattern: "Power Selling".to_string(),
55 section_type: SectionType::Sidebar("PowerSelling".to_string()),
56 match_strategy: MatchStrategy::Prefix,
57 },
58 SectionPattern {
59 pattern: "Power Player".to_string(),
60 section_type: SectionType::Sidebar("PowerPlayer".to_string()),
61 match_strategy: MatchStrategy::Prefix,
62 },
63 SectionPattern {
64 pattern: "Power Point".to_string(),
65 section_type: SectionType::Sidebar("PowerPoint".to_string()),
66 match_strategy: MatchStrategy::Prefix,
67 },
68 SectionPattern {
71 pattern: "Selling U".to_string(),
72 section_type: SectionType::Career,
73 match_strategy: MatchStrategy::Contains,
74 },
75 SectionPattern {
76 pattern: "Key Takeaways".to_string(),
77 section_type: SectionType::Assessment,
78 match_strategy: MatchStrategy::Prefix,
79 },
80 SectionPattern {
81 pattern: "Test Your Power Knowledge".to_string(),
82 section_type: SectionType::Assessment,
83 match_strategy: MatchStrategy::Prefix,
84 },
85 ],
86 }
87 }
88
89 pub fn classify(&self, title: &str) -> SectionType {
90 let title_trimmed = title.trim();
91 for rule in &self.rules {
92 let matched = match rule.match_strategy {
93 MatchStrategy::Prefix => title_trimmed.starts_with(&rule.pattern),
94 MatchStrategy::Contains => title_trimmed.contains(&rule.pattern),
95 };
96 if matched {
97 return rule.section_type.clone();
98 }
99 }
100 SectionType::Main
101 }
102}
103
104impl Default for SectionConfig {
105 fn default() -> Self {
106 Self::textbook_default()
107 }
108}
109
110#[derive(Debug, Clone)]
111pub struct HeadingNode {
112 pub level: u8,
113 pub title: String,
114 pub section_type: SectionType,
115 pub blocks: Vec<Ulid>,
116 pub children: Vec<HeadingNode>,
117 pub byte_range: Range<usize>,
118}
119
120#[derive(Debug, Clone)]
121pub struct HeadingTree {
122 pub roots: Vec<HeadingNode>,
123}
124
125pub fn build_heading_tree(
126 normalized: &NormalizedMarkdown,
127) -> Result<HeadingTree, MarkdownParserError> {
128 let Some(ref ast) = normalized.ast else {
129 return Ok(HeadingTree { roots: vec![] });
130 };
131
132 let headings = extract_headings(ast);
133 let tree = build_tree_from_headings(&headings, normalized);
134 Ok(tree)
135}
136
137pub fn classify_sections(tree: &mut HeadingTree, config: &SectionConfig) {
138 for root in &mut tree.roots {
139 classify_node(root, config);
140 }
141}
142
143fn classify_node(node: &mut HeadingNode, config: &SectionConfig) {
144 node.section_type = config.classify(&node.title);
145 for child in &mut node.children {
146 classify_node(child, config);
147 }
148}
149
150pub struct RawHeading {
151 level: u8,
152 title: String,
153 byte_start: usize,
154 byte_end: usize,
155}
156
157fn extract_headings(node: &Node) -> Vec<RawHeading> {
158 let mut result = Vec::new();
159 collect_headings(node, &mut result);
160 result
161}
162
163fn collect_headings(node: &Node, out: &mut Vec<RawHeading>) {
164 if let Node::Heading(h) = node {
165 let title = collect_text_content(&h.children);
166 let (start, end) = if let Some(pos) = node.position() {
167 (pos.start.offset, pos.end.offset)
168 } else {
169 (0, 0)
170 };
171 out.push(RawHeading {
172 level: h.depth,
173 title,
174 byte_start: start,
175 byte_end: end,
176 });
177 return;
178 }
179
180 if let Some(children) = children(node) {
181 for child in children {
182 collect_headings(child, out);
183 }
184 }
185}
186
187fn build_tree_from_headings(
188 headings: &[RawHeading],
189 normalized: &NormalizedMarkdown,
190) -> HeadingTree {
191 if headings.is_empty() {
192 return HeadingTree { roots: vec![] };
193 }
194
195 let mut roots: Vec<HeadingNode> = Vec::new();
196 let mut stack: Vec<HeadingNode> = Vec::new();
197
198 for (i, heading) in headings.iter().enumerate() {
199 let next_byte_start = headings
200 .get(i + 1)
201 .map(|h| h.byte_start)
202 .unwrap_or(normalized.markdown.len());
203
204 let blocks = blocks_in_range(&normalized.blocks, heading.byte_end, next_byte_start);
205
206 let node = HeadingNode {
207 level: heading.level,
208 title: heading.title.clone(),
209 section_type: SectionType::Main,
210 blocks,
211 children: Vec::new(),
212 byte_range: heading.byte_start..next_byte_start,
213 };
214
215 while let Some(top) = stack.last() {
216 if top.level < node.level {
217 break;
218 }
219 let popped = stack.pop().unwrap();
220 if let Some(parent) = stack.last_mut() {
221 parent.children.push(popped);
222 } else {
223 roots.push(popped);
224 }
225 }
226
227 stack.push(node);
228 }
229
230 while let Some(popped) = stack.pop() {
231 if let Some(parent) = stack.last_mut() {
232 parent.children.push(popped);
233 } else {
234 roots.push(popped);
235 }
236 }
237
238 HeadingTree { roots }
239}
240
241fn blocks_in_range(blocks: &[crate::Block], start: usize, end: usize) -> Vec<Ulid> {
242 blocks
243 .iter()
244 .filter(|b| b.span.start >= start && b.span.start < end)
245 .map(|b| b.id)
246 .collect()
247}
248
249#[cfg(test)]
250mod tests {
251 use super::*;
252 use crate::normalize_markdown;
253
254 #[test]
255 fn section_config_classify_sidebar() {
256 let config = SectionConfig::textbook_default();
257 assert_eq!(
258 config.classify("Power Selling: The Art of Persuasion"),
259 SectionType::Sidebar("PowerSelling".to_string())
260 );
261 }
262
263 #[test]
264 fn section_config_classify_career() {
265 let config = SectionConfig::textbook_default();
266 assert_eq!(
267 config.classify("Selling U: Your Career"),
268 SectionType::Career
269 );
270 }
271
272 #[test]
273 fn section_config_classify_assessment() {
274 let config = SectionConfig::textbook_default();
275 assert_eq!(
276 config.classify("Key Takeaways from Chapter 3"),
277 SectionType::Assessment
278 );
279 }
280
281 #[test]
282 fn section_config_classify_main_fallback() {
283 let config = SectionConfig::textbook_default();
284 assert_eq!(config.classify("Introduction to Sales"), SectionType::Main);
285 }
286
287 #[test]
288 fn build_heading_tree_simple() {
289 let input = "# Chapter 1\n\nIntro paragraph\n\n## Section 1.1\n\nSome text\n\n# Chapter 2\n\nMore text\n";
290 let normalized = normalize_markdown(input).unwrap();
291 let tree = build_heading_tree(&normalized).unwrap();
292
293 assert_eq!(tree.roots.len(), 2);
294 assert_eq!(tree.roots[0].title, "Chapter 1");
295 assert_eq!(tree.roots[0].level, 1);
296 assert_eq!(tree.roots[0].children.len(), 1);
297 assert_eq!(tree.roots[0].children[0].title, "Section 1.1");
298 assert_eq!(tree.roots[1].title, "Chapter 2");
299 }
300
301 #[test]
302 fn build_heading_tree_attaches_blocks() {
303 let input = "# Chapter\n\nParagraph one\n\nParagraph two\n";
304 let normalized = normalize_markdown(input).unwrap();
305 let tree = build_heading_tree(&normalized).unwrap();
306
307 assert_eq!(tree.roots.len(), 1);
308 assert_eq!(tree.roots[0].blocks.len(), 2);
309 }
310
311 #[test]
312 fn build_heading_tree_all_levels() {
313 let input = "# H1\n\n## H2\n\n### H3\n\n#### H4\n\n##### H5\n\n###### H6\n\nText\n";
314 let normalized = normalize_markdown(input).unwrap();
315 let tree = build_heading_tree(&normalized).unwrap();
316
317 assert_eq!(tree.roots.len(), 1);
318 assert_eq!(tree.roots[0].level, 1);
319 assert_eq!(tree.roots[0].children.len(), 1);
320 assert_eq!(tree.roots[0].children[0].level, 2);
321 assert_eq!(tree.roots[0].children[0].children.len(), 1);
322 assert_eq!(tree.roots[0].children[0].children[0].level, 3);
323 }
324
325 #[test]
326 fn classify_sections_applies_config() {
327 let input = "# Main Title\n\nText\n\n## Power Selling: Tips\n\nTip text\n\n## Selling U: Careers\n\nCareer text\n";
328 let normalized = normalize_markdown(input).unwrap();
329 let mut tree = build_heading_tree(&normalized).unwrap();
330 classify_sections(&mut tree, &SectionConfig::textbook_default());
331
332 assert_eq!(tree.roots[0].section_type, SectionType::Main);
333 let ps = &tree.roots[0].children[0];
334 assert_eq!(
335 ps.section_type,
336 SectionType::Sidebar("PowerSelling".to_string())
337 );
338 let su = &tree.roots[0].children[1];
339 assert_eq!(su.section_type, SectionType::Career);
340 }
341
342 #[test]
343 fn build_heading_tree_empty() {
344 let input = "No headings here\n\nJust text\n";
345 let normalized = normalize_markdown(input).unwrap();
346 let tree = build_heading_tree(&normalized).unwrap();
347 assert!(tree.roots.is_empty());
348 }
349
350 #[test]
351 fn custom_section_config() {
352 let config = SectionConfig {
353 rules: vec![SectionPattern {
354 pattern: "Experiment".to_string(),
355 section_type: SectionType::Sidebar("Lab".to_string()),
356 match_strategy: MatchStrategy::Prefix,
357 }],
358 };
359 assert_eq!(
360 config.classify("Experiment 3: Results"),
361 SectionType::Sidebar("Lab".to_string())
362 );
363 assert_eq!(config.classify("Introduction"), SectionType::Main);
364 }
365
366 #[test]
367 fn match_strategy_contains() {
368 let config = SectionConfig {
369 rules: vec![SectionPattern {
370 pattern: "Selling U".to_string(),
371 section_type: SectionType::Career,
372 match_strategy: MatchStrategy::Contains,
373 }],
374 };
375 assert_eq!(
377 config.classify("Chapter 3: Selling U -- Your Career"),
378 SectionType::Career,
379 );
380 assert_eq!(config.classify("Selling U: Careers"), SectionType::Career,);
382 assert_eq!(config.classify("Introduction"), SectionType::Main);
384 }
385}