rumdl_lib/utils/
mkdocs_definition_lists.rs

1//! MkDocs/Python-Markdown Definition Lists extension support
2//!
3//! This module provides support for the Python-Markdown Definition Lists extension,
4//! which allows creating definition lists with terms and their definitions.
5//!
6//! ## Syntax
7//!
8//! ```markdown
9//! Term 1
10//! :   Definition for term 1
11//!
12//! Term 2
13//! :   Definition for term 2
14//!     Continuation of the definition
15//!
16//! Term with multiple definitions
17//! :   First definition
18//! :   Second definition
19//! ```
20//!
21//! ## Format Requirements
22//!
23//! - Term appears on its own line (no leading whitespace required for the term)
24//! - Definition starts with `:` followed by whitespace (typically 3 spaces after `:`)
25//! - Multiple definitions for a term use separate `:` lines
26//! - Continuation lines are indented (typically 4 spaces)
27//!
28//! ## MkDocs Material Specifics
29//!
30//! MkDocs Material supports definition lists via the Python-Markdown Definition Lists extension,
31//! which is enabled by default in the Material theme.
32//!
33//! ## References
34//!
35//! - [Python-Markdown Definition Lists](https://python-markdown.github.io/extensions/definition_lists/)
36//! - [MkDocs Material - Lists](https://squidfunk.github.io/mkdocs-material/reference/lists/#using-definition-lists)
37
38/// Check if a line is a definition (starts with `:` followed by whitespace)
39///
40/// Reexported from utils::mod.rs for compatibility
41#[inline]
42pub fn is_definition_line(line: &str) -> bool {
43    crate::utils::is_definition_list_item(line)
44}
45
46/// Check if a line could be a term (precedes a definition)
47///
48/// A term line is a non-empty line that:
49/// - Doesn't start with whitespace (or has consistent indentation for nested terms)
50/// - Is followed by a definition line (checked by caller)
51/// - Is not a definition line itself
52/// - Is not a blank line
53#[inline]
54pub fn could_be_term_line(line: &str) -> bool {
55    let trimmed = line.trim();
56    !trimmed.is_empty() && !is_definition_line(line) && !line.starts_with(' ')
57}
58
59/// Check if a line is a definition continuation (indented after a definition)
60///
61/// Continuation lines are typically indented 4 spaces
62#[inline]
63pub fn is_definition_continuation(line: &str) -> bool {
64    // Continuation is indented (at least 4 spaces) and not a new definition
65    line.starts_with("    ") && !line.trim_start().starts_with(':')
66}
67
68/// Parsed definition list entry
69#[derive(Debug, Clone, PartialEq)]
70pub struct DefinitionEntry {
71    /// The term being defined
72    pub term: String,
73    /// Line number of the term (1-indexed)
74    pub term_line: usize,
75    /// List of definitions (each definition may span multiple lines)
76    pub definitions: Vec<Definition>,
77}
78
79/// A single definition within a definition list entry
80#[derive(Debug, Clone, PartialEq)]
81pub struct Definition {
82    /// The definition text (may include continuation lines joined)
83    pub text: String,
84    /// Line number where this definition starts (1-indexed)
85    pub line: usize,
86}
87
88/// Extract all definition list entries from content
89///
90/// # Returns
91/// A vector of DefinitionEntry structs for each term+definitions found
92pub fn extract_definition_lists(content: &str) -> Vec<DefinitionEntry> {
93    let lines: Vec<&str> = content.lines().collect();
94    let mut entries = Vec::new();
95
96    let mut i = 0;
97    while i < lines.len() {
98        // Look for a potential term (non-definition, non-blank line)
99        let line = lines[i];
100        let trimmed = line.trim();
101
102        // Skip blank lines and definition lines
103        if trimmed.is_empty() || is_definition_line(line) {
104            i += 1;
105            continue;
106        }
107
108        // Check if next line is a definition
109        if i + 1 < lines.len() && is_definition_line(lines[i + 1]) {
110            // Found a term with at least one definition
111            let term = trimmed.to_string();
112            let term_line = i + 1;
113            let mut definitions = Vec::new();
114
115            // Collect all definitions for this term
116            i += 1;
117            while i < lines.len() && is_definition_line(lines[i]) {
118                let def_start_line = i + 1;
119                let def_line = lines[i].trim_start();
120                // Remove the `:` prefix and any following whitespace
121                // Common formats: `:   text`, `: text`, `:\ttext`
122                let def_text = if let Some(stripped) = def_line.strip_prefix(':') {
123                    stripped.trim_start().to_string()
124                } else {
125                    def_line.to_string()
126                };
127
128                // Check for continuation lines
129                let mut full_def = def_text;
130                while i + 1 < lines.len() && is_definition_continuation(lines[i + 1]) {
131                    i += 1;
132                    let continuation = lines[i].trim();
133                    if !continuation.is_empty() {
134                        full_def.push(' ');
135                        full_def.push_str(continuation);
136                    }
137                }
138
139                definitions.push(Definition {
140                    text: full_def,
141                    line: def_start_line,
142                });
143
144                i += 1;
145            }
146
147            entries.push(DefinitionEntry {
148                term,
149                term_line,
150                definitions,
151            });
152        } else {
153            i += 1;
154        }
155    }
156
157    entries
158}
159
160/// Check if a position in a line is within a definition marker
161pub fn is_in_definition_marker(line: &str, position: usize) -> bool {
162    if !is_definition_line(line) {
163        return false;
164    }
165
166    // The definition marker is the `:` at the beginning (after any leading whitespace)
167    let trimmed = line.trim_start();
168    let leading_ws = line.len() - trimmed.len();
169
170    // Position is in the marker if it's at the `:` or the space after
171    position >= leading_ws && position < leading_ws + 2
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_is_definition_line() {
180        assert!(is_definition_line(":   Definition text"));
181        assert!(is_definition_line(": Definition text"));
182        assert!(is_definition_line(":\tDefinition text"));
183        assert!(is_definition_line(":    Long definition"));
184
185        assert!(!is_definition_line("Term"));
186        assert!(!is_definition_line("  Term"));
187        assert!(!is_definition_line(""));
188        assert!(!is_definition_line(":NoSpace")); // No space after colon
189    }
190
191    #[test]
192    fn test_could_be_term_line() {
193        assert!(could_be_term_line("Term"));
194        assert!(could_be_term_line("Multi Word Term"));
195        assert!(could_be_term_line("Term with special chars: like this"));
196
197        assert!(!could_be_term_line("")); // Empty
198        assert!(!could_be_term_line("   ")); // Blank
199        assert!(!could_be_term_line(":   Definition")); // Definition
200        assert!(!could_be_term_line(" Term")); // Leading space
201    }
202
203    #[test]
204    fn test_is_definition_continuation() {
205        assert!(is_definition_continuation("    Continuation text"));
206        assert!(is_definition_continuation("    More continuation"));
207
208        assert!(!is_definition_continuation(":   New definition"));
209        assert!(!is_definition_continuation("No indent"));
210        assert!(!is_definition_continuation("  Only 2 spaces"));
211    }
212
213    #[test]
214    fn test_extract_definition_lists() {
215        let content = r#"First Term
216:   Definition of first term
217
218Second Term
219:   Definition of second term
220    With continuation
221
222Third Term
223:   First definition
224:   Second definition
225"#;
226        let entries = extract_definition_lists(content);
227
228        assert_eq!(entries.len(), 3);
229
230        assert_eq!(entries[0].term, "First Term");
231        assert_eq!(entries[0].definitions.len(), 1);
232        assert_eq!(entries[0].definitions[0].text, "Definition of first term");
233
234        assert_eq!(entries[1].term, "Second Term");
235        assert_eq!(entries[1].definitions.len(), 1);
236        assert_eq!(
237            entries[1].definitions[0].text,
238            "Definition of second term With continuation"
239        );
240
241        assert_eq!(entries[2].term, "Third Term");
242        assert_eq!(entries[2].definitions.len(), 2);
243        assert_eq!(entries[2].definitions[0].text, "First definition");
244        assert_eq!(entries[2].definitions[1].text, "Second definition");
245    }
246
247    #[test]
248    fn test_extract_definition_lists_complex() {
249        let content = r#"# Document
250
251Regular paragraph.
252
253Apple
254:   A fruit
255:   A technology company
256
257Banana
258:   A yellow fruit
259    that grows in tropical climates
260
261Not a definition list line.
262"#;
263        let entries = extract_definition_lists(content);
264
265        assert_eq!(entries.len(), 2);
266        assert_eq!(entries[0].term, "Apple");
267        assert_eq!(entries[0].definitions.len(), 2);
268        assert_eq!(entries[1].term, "Banana");
269        assert_eq!(entries[1].definitions.len(), 1);
270        assert!(entries[1].definitions[0].text.contains("tropical climates"));
271    }
272
273    #[test]
274    fn test_is_in_definition_marker() {
275        let line = ":   Definition text";
276        assert!(is_in_definition_marker(line, 0)); // At ':'
277        assert!(is_in_definition_marker(line, 1)); // At first space
278        assert!(!is_in_definition_marker(line, 4)); // In text
279
280        let line_with_ws = "  :   Definition";
281        assert!(!is_in_definition_marker(line_with_ws, 0)); // Before ':'
282        assert!(is_in_definition_marker(line_with_ws, 2)); // At ':'
283        assert!(is_in_definition_marker(line_with_ws, 3)); // At first space after ':'
284
285        let not_def = "Regular line";
286        assert!(!is_in_definition_marker(not_def, 0));
287    }
288}