ndg_commonmark/utils/
codeblock.rs

1/// State tracking for code fence detection in markdown.
2///
3/// This tracks whether we're currently inside a fenced code block  and
4/// maintains the fence character and count for proper closing detection.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct FenceTracker {
7  in_code_block:    bool,
8  code_fence_char:  Option<char>,
9  code_fence_count: usize,
10}
11
12impl FenceTracker {
13  /// Create a new fence tracker.
14  #[must_use]
15  pub const fn new() -> Self {
16    Self {
17      in_code_block:    false,
18      code_fence_char:  None,
19      code_fence_count: 0,
20    }
21  }
22
23  /// Check if currently inside a code block.
24  #[must_use]
25  pub const fn in_code_block(&self) -> bool {
26    self.in_code_block
27  }
28
29  /// Process a line and update fence state.
30  ///
31  /// Returns the updated state after processing the line.
32  /// Call this for each line to maintain accurate fence tracking.
33  #[must_use]
34  pub fn process_line(&self, line: &str) -> Self {
35    let trimmed = line.trim_start();
36
37    // Check for code fences (``` or ~~~)
38    if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
39      // Get the first character to determine fence type
40      let Some(fence_char) = trimmed.chars().next() else {
41        // Empty string after trim - no state change
42        return *self;
43      };
44
45      let fence_count =
46        trimmed.chars().take_while(|&c| c == fence_char).count();
47
48      if fence_count >= 3 {
49        if !self.in_code_block {
50          // Starting a code block
51          return Self {
52            in_code_block:    true,
53            code_fence_char:  Some(fence_char),
54            code_fence_count: fence_count,
55          };
56        } else if self.code_fence_char == Some(fence_char)
57          && fence_count >= self.code_fence_count
58        {
59          // Ending a code block
60          return Self {
61            in_code_block:    false,
62            code_fence_char:  None,
63            code_fence_count: 0,
64          };
65        }
66      }
67    }
68
69    // No state change
70    *self
71  }
72}
73
74/// State tracking for code fences AND inline code in markdown.
75///
76/// This extends `FenceTracker` to also track inline code spans (`code`).
77/// This is needed for character-level processing where inline code must be
78/// skipped along with fenced code blocks.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
80pub struct InlineTracker {
81  in_code_block:  bool,
82  in_inline_code: bool,
83  fence_char:     Option<char>,
84  fence_count:    usize,
85}
86
87impl InlineTracker {
88  /// Create a new inline code tracker.
89  #[must_use]
90  pub const fn new() -> Self {
91    Self {
92      in_code_block:  false,
93      in_inline_code: false,
94      fence_char:     None,
95      fence_count:    0,
96    }
97  }
98
99  /// Check if currently inside any kind of code (block or inline).
100  #[must_use]
101  pub const fn in_any_code(&self) -> bool {
102    self.in_code_block || self.in_inline_code
103  }
104
105  /// Check if currently inside a code block.
106  #[must_use]
107  pub const fn in_code_block(&self) -> bool {
108    self.in_code_block
109  }
110
111  /// Check if currently inside inline code.
112  #[must_use]
113  pub const fn in_inline_code(&self) -> bool {
114    self.in_inline_code
115  }
116
117  /// Process backticks and update state.
118  ///
119  /// Returns (`new_state`, `number_of_backticks_consumed`).
120  #[must_use]
121  pub fn process_backticks<I>(&self, chars: &mut I) -> (Self, usize)
122  where
123    I: Iterator<Item = char> + Clone,
124  {
125    let mut tick_count = 1; // we've already seen the first backtick
126    let mut temp_chars = chars.clone();
127
128    // Count consecutive backticks
129    while temp_chars.next() == Some('`') {
130      tick_count += 1;
131    }
132
133    // Actually consume the backticks from the iterator
134    for _ in 1..tick_count {
135      chars.next();
136    }
137
138    if tick_count >= 3 {
139      // This is a code fence
140      if !self.in_code_block {
141        // Starting a code block
142        (
143          Self {
144            in_code_block:  true,
145            in_inline_code: false, // clear inline code when entering block
146            fence_char:     Some('`'),
147            fence_count:    tick_count,
148          },
149          tick_count,
150        )
151      } else if self.fence_char == Some('`') && tick_count >= self.fence_count {
152        // Ending a code block
153        (
154          Self {
155            in_code_block:  false,
156            in_inline_code: false,
157            fence_char:     None,
158            fence_count:    0,
159          },
160          tick_count,
161        )
162      } else {
163        // Inside a different fence type, no state change
164        (*self, tick_count)
165      }
166    } else if tick_count == 1 && !self.in_code_block {
167      // Single backtick - inline code toggle
168      (
169        Self {
170          in_inline_code: !self.in_inline_code,
171          ..*self
172        },
173        tick_count,
174      )
175    } else {
176      // Multiple backticks but less than 3, or inside code block
177      (*self, tick_count)
178    }
179  }
180
181  /// Process tildes and update state.
182  ///
183  /// Returns (`new_state`, `number_of_tildes_consumed`).
184  #[must_use]
185  pub fn process_tildes<I>(&self, chars: &mut I) -> (Self, usize)
186  where
187    I: Iterator<Item = char> + Clone,
188  {
189    let mut tilde_count = 1; // we've already seen the first tilde
190    let mut temp_chars = chars.clone();
191
192    // Count consecutive tildes
193    while temp_chars.next() == Some('~') {
194      tilde_count += 1;
195    }
196
197    // Actually consume the tildes from the iterator
198    for _ in 1..tilde_count {
199      chars.next();
200    }
201
202    if tilde_count >= 3 {
203      if !self.in_code_block {
204        // Starting a tilde code block
205        (
206          Self {
207            in_code_block:  true,
208            in_inline_code: false, // clear inline code when entering block
209            fence_char:     Some('~'),
210            fence_count:    tilde_count,
211          },
212          tilde_count,
213        )
214      } else if self.fence_char == Some('~') && tilde_count >= self.fence_count
215      {
216        // Ending a tilde code block
217        (
218          Self {
219            in_code_block:  false,
220            in_inline_code: false,
221            fence_char:     None,
222            fence_count:    0,
223          },
224          tilde_count,
225        )
226      } else {
227        // Inside a different fence type, no state change
228        (*self, tilde_count)
229      }
230    } else {
231      // Less than 3 tildes, no state change
232      (*self, tilde_count)
233    }
234  }
235
236  /// Process a newline and update state.
237  ///
238  /// Newlines end inline code if not properly closed.
239  #[must_use]
240  pub const fn process_newline(&self) -> Self {
241    Self {
242      in_inline_code: false,
243      ..*self
244    }
245  }
246}
247
248#[cfg(test)]
249mod tests {
250  use super::*;
251
252  #[test]
253  fn test_fence_tracker_basic() {
254    let tracker = FenceTracker::new();
255    assert!(!tracker.in_code_block());
256
257    // Opening fence
258    let tracker = tracker.process_line("```rust");
259    assert!(tracker.in_code_block());
260
261    // Inside code block
262    let tracker = tracker.process_line("fn main() {}");
263    assert!(tracker.in_code_block());
264
265    // Closing fence
266    let tracker = tracker.process_line("```");
267    assert!(!tracker.in_code_block());
268  }
269
270  #[test]
271  fn test_fence_tracker_tilde() {
272    let tracker = FenceTracker::new();
273
274    // Tilde fence
275    let tracker = tracker.process_line("~~~");
276    assert!(tracker.in_code_block());
277
278    let tracker = tracker.process_line("code");
279    assert!(tracker.in_code_block());
280
281    let tracker = tracker.process_line("~~~");
282    assert!(!tracker.in_code_block());
283  }
284
285  #[test]
286  fn test_fence_tracker_mismatched() {
287    let tracker = FenceTracker::new();
288
289    // Backtick fence
290    let tracker = tracker.process_line("```");
291    assert!(tracker.in_code_block());
292
293    // Tilde doesn't close backtick fence
294    let tracker = tracker.process_line("~~~");
295    assert!(tracker.in_code_block());
296
297    // Backtick closes
298    let tracker = tracker.process_line("```");
299    assert!(!tracker.in_code_block());
300  }
301
302  #[test]
303  fn test_fence_tracker_count() {
304    let tracker = FenceTracker::new();
305
306    // 4 backticks
307    let tracker = tracker.process_line("````");
308    assert!(tracker.in_code_block());
309
310    // 3 backticks don't close 4-backtick fence
311    let tracker = tracker.process_line("```");
312    assert!(tracker.in_code_block());
313
314    // 4+ backticks close
315    let tracker = tracker.process_line("````");
316    assert!(!tracker.in_code_block());
317  }
318
319  #[test]
320  fn test_fence_tracker_indented() {
321    let tracker = FenceTracker::new();
322
323    // Indented fence (trim_start handles this)
324    let tracker = tracker.process_line("    ```");
325    assert!(tracker.in_code_block());
326
327    let tracker = tracker.process_line("    ```");
328    assert!(!tracker.in_code_block());
329  }
330
331  #[test]
332  fn test_inline_code_tracker_basic() {
333    let tracker = InlineTracker::new();
334    assert!(!tracker.in_any_code());
335
336    // Single backtick - start inline code
337    let mut chars = "rest".chars();
338    let (tracker, count) = tracker.process_backticks(&mut chars);
339    assert_eq!(count, 1);
340    assert!(tracker.in_inline_code());
341    assert!(tracker.in_any_code());
342
343    // Another single backtick - end inline code
344    let mut chars = "rest".chars();
345    let (tracker, count) = tracker.process_backticks(&mut chars);
346    assert_eq!(count, 1);
347    assert!(!tracker.in_inline_code());
348    assert!(!tracker.in_any_code());
349  }
350
351  #[test]
352  fn test_inline_code_tracker_fence() {
353    let tracker = InlineTracker::new();
354
355    // Three backticks - code fence
356    let mut chars = "``rust".chars();
357    let (tracker, count) = tracker.process_backticks(&mut chars);
358    assert_eq!(count, 3);
359    assert!(tracker.in_code_block());
360    assert!(!tracker.in_inline_code());
361
362    // Single backtick inside fence - no inline code
363    let mut chars = "rest".chars();
364    let (tracker, _) = tracker.process_backticks(&mut chars);
365    assert!(tracker.in_code_block());
366    assert!(!tracker.in_inline_code());
367
368    // Three backticks - close fence
369    let mut chars = "``".chars();
370    let (tracker, count) = tracker.process_backticks(&mut chars);
371    assert_eq!(count, 3);
372    assert!(!tracker.in_code_block());
373    assert!(!tracker.in_inline_code());
374  }
375
376  #[test]
377  fn test_inline_code_tracker_tildes() {
378    let tracker = InlineTracker::new();
379
380    // Three tildes - code fence
381    let mut chars = "~~".chars();
382    let (tracker, count) = tracker.process_tildes(&mut chars);
383    assert_eq!(count, 3);
384    assert!(tracker.in_code_block());
385
386    // Close with tildes
387    let mut chars = "~~".chars();
388    let (tracker, count) = tracker.process_tildes(&mut chars);
389    assert_eq!(count, 3);
390    assert!(!tracker.in_code_block());
391  }
392
393  #[test]
394  fn test_inline_code_tracker_newline() {
395    let tracker = InlineTracker::new();
396
397    // Start inline code
398    let mut chars = "rest".chars();
399    let (tracker, _) = tracker.process_backticks(&mut chars);
400    assert!(tracker.in_inline_code());
401
402    // Newline ends inline code
403    let tracker = tracker.process_newline();
404    assert!(!tracker.in_inline_code());
405  }
406}