strip_codeblocks/
lib.rs

1//! # strip-codeblocks
2//!
3//! A Rust library to strip markdown fenced code blocks from text while preserving
4//! the inner content and leaving inline code blocks untouched.
5//!
6//! ## Features
7//!
8//! - Removes fenced code blocks (triple backticks: \`\`\`)
9//! - Preserves the content inside code blocks
10//! - Keeps inline code blocks (single backticks: \`) intact
11//! - Handles code blocks with or without language identifiers
12//!
13//! ## Usage
14//!
15//! ```rust
16//! use strip_codeblocks::strip_codeblocks;
17//!
18//! let markdown = "Here is some text.\n\n```rust\nfn main() {\n    println!(\"Hello, world!\");\n}\n```\n\nMore text with `inline code` here.";
19//!
20//! let result = strip_codeblocks(markdown);
21//! // Result: "Here is some text.\n\nfn main() {\n    println!(\"Hello, world!\");\n}\n\nMore text with `inline code` here."
22//! ```
23//!
24//! ## Examples
25//!
26//! ### Basic Usage
27//!
28//! ```rust
29//! use strip_codeblocks::strip_codeblocks;
30//!
31//! let input = "```python\nprint('hello')\n```";
32//! let output = strip_codeblocks(input);
33//! assert_eq!(output, "print('hello')\n");
34//! ```
35//!
36//! ### Preserving Inline Code
37//!
38//! ```rust
39//! use strip_codeblocks::strip_codeblocks;
40//!
41//! let input = "This has `inline code` and ```\ncode block\n```";
42//! let output = strip_codeblocks(input);
43//! assert_eq!(output, "This has `inline code` and code block\n");
44//! ```
45
46use regex::Regex;
47
48/// Strips fenced code blocks from markdown text while preserving the inner content.
49///
50/// This function removes markdown fenced code blocks (triple backticks) but keeps
51/// the content inside them. Inline code blocks (single backticks) are left untouched.
52///
53/// # Arguments
54///
55/// * `text` - The markdown text containing code blocks to strip
56///
57/// # Returns
58///
59/// A new string with fenced code blocks removed, but their content preserved.
60///
61/// # Examples
62///
63/// ```
64/// use strip_codeblocks::strip_codeblocks;
65///
66/// let markdown = "Some text before.\n\n```rust\nfn example() {\n    println!(\"Hello\");\n}\n```\n\nSome text after with `inline code`.";
67///
68/// let result = strip_codeblocks(markdown);
69/// // The fenced code block is removed, but its content remains
70/// //Inline code is preserved
71/// ```
72pub fn strip_codeblocks(text: &str) -> String {
73    // Match fenced code blocks: ```optional_lang\n...content...\n```
74    // This regex matches:
75    // - Three backticks (```)
76    // - Optional language identifier (any characters except newline and backtick)
77    // - Newline
78    // - Content (non-greedy, including newlines)
79    // - Three backticks (```)
80    // The (?s) flag makes . match newlines
81    let re = Regex::new(r"(?s)```[^\n`]*\n(.*?)```").unwrap();
82
83    re.replace_all(text, |caps: &regex::Captures| {
84        // Extract the content (first capture group)
85        caps.get(1)
86            .map_or(String::new(), |m| m.as_str().to_string())
87    })
88    .to_string()
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn test_basic_codeblock() {
97        let input = "```rust\nfn main() {}\n```";
98        let output = strip_codeblocks(input);
99        assert_eq!(output, "fn main() {}\n");
100    }
101
102    #[test]
103    fn test_codeblock_with_language() {
104        let input = "```python\nprint('hello')\n```";
105        let output = strip_codeblocks(input);
106        assert_eq!(output, "print('hello')\n");
107    }
108
109    #[test]
110    fn test_codeblock_without_language() {
111        let input = "```\njust code\n```";
112        let output = strip_codeblocks(input);
113        assert_eq!(output, "just code\n");
114    }
115
116    #[test]
117    fn test_preserves_inline_code() {
118        let input = "This has `inline code` in it.";
119        let output = strip_codeblocks(input);
120        assert_eq!(output, "This has `inline code` in it.");
121    }
122
123    #[test]
124    fn test_multiple_codeblocks() {
125        let input = "```rust\nfn a() {}\n```\n```python\nprint('b')\n```";
126        let output = strip_codeblocks(input);
127        assert_eq!(output, "fn a() {}\n\nprint('b')\n");
128    }
129
130    #[test]
131    fn test_codeblock_with_text_around() {
132        let input = "Before\n```rust\ncode here\n```\nAfter";
133        let output = strip_codeblocks(input);
134        assert_eq!(output, "Before\ncode here\n\nAfter");
135    }
136
137    #[test]
138    fn test_codeblock_with_inline_code() {
139        let input = "Text with `inline` and ```\nblock code\n```";
140        let output = strip_codeblocks(input);
141        assert_eq!(output, "Text with `inline` and block code\n");
142    }
143
144    #[test]
145    fn test_empty_codeblock() {
146        let input = "```\n\n```";
147        let output = strip_codeblocks(input);
148        assert_eq!(output, "\n");
149    }
150
151    #[test]
152    fn test_codeblock_with_multiline_content() {
153        let input = "```python\ndef hello():\n    print('hi')\n    return True\n```";
154        let output = strip_codeblocks(input);
155        assert_eq!(output, "def hello():\n    print('hi')\n    return True\n");
156    }
157
158    #[test]
159    fn test_no_codeblocks() {
160        let input = "Just regular text with `inline code`.";
161        let output = strip_codeblocks(input);
162        assert_eq!(output, "Just regular text with `inline code`.");
163    }
164
165    #[test]
166    fn test_codeblock_with_special_chars_in_language() {
167        let input = "```c++\nint x = 0;\n```";
168        let output = strip_codeblocks(input);
169        assert_eq!(output, "int x = 0;\n");
170    }
171
172    #[test]
173    fn test_codeblock_with_backticks_inside() {
174        // Code blocks can contain backticks, but we should still match the closing ```
175        let input = "```\nThis has `backticks` inside\n```";
176        let output = strip_codeblocks(input);
177        assert_eq!(output, "This has `backticks` inside\n");
178    }
179
180    #[test]
181    fn test_complex_markdown() {
182        let input = r#"# Title
183
184Some paragraph with `inline code`.
185
186```rust
187fn main() {
188    println!("Hello");
189}
190```
191
192More text with ``double backticks`` inline.
193
194```python
195x = 1
196y = 2
197```
198"#;
199        let output = strip_codeblocks(input);
200        assert!(output.contains("fn main()"));
201        assert!(output.contains("`inline code`"));
202        assert!(output.contains("``double backticks``"));
203        assert!(output.contains("x = 1"));
204        assert!(!output.contains("```rust"));
205        assert!(!output.contains("```python"));
206    }
207}