markdown_ai_cite_remove/
remover.rs1use crate::config::RemoverConfig;
2use crate::patterns::Patterns;
3
4pub struct CitationRemover {
6 config: RemoverConfig,
7 patterns: &'static Patterns,
8}
9
10impl CitationRemover {
11 pub fn new() -> Self {
13 Self {
14 config: RemoverConfig::default(),
15 patterns: Patterns::get(),
16 }
17 }
18
19 pub fn with_config(config: RemoverConfig) -> Self {
21 Self {
22 config,
23 patterns: Patterns::get(),
24 }
25 }
26
27 pub fn remove(&self, markdown: &str) -> String {
29 let mut result = markdown.to_string();
30
31 if self.config.remove_reference_links
34 || self.config.remove_reference_entries
35 || self.config.remove_reference_headers
36 {
37 result = self.remove_reference_sections(&result);
38 }
39
40 if self.config.remove_inline_citations {
42 result = self.remove_inline_citations(&result);
43 }
44
45 if self.config.normalize_whitespace {
47 result = self.normalize_whitespace(&result);
48 }
49
50 if self.config.remove_blank_lines {
52 result = self.remove_excessive_blank_lines(&result);
53 }
54
55 if self.config.trim_lines {
57 result = self.trim_all_lines(&result);
58 }
59
60 result
61 }
62
63 fn remove_inline_citations(&self, text: &str) -> String {
66 self.patterns
68 .inline_citations
69 .replace_all(text, "")
70 .to_string()
71 }
72
73 fn remove_reference_sections(&self, text: &str) -> String {
76 let lines: Vec<&str> = text.lines().collect();
77 let mut references_start = None;
78
79 for (i, line) in lines.iter().enumerate() {
81 if references_start.is_some() {
83 break;
84 }
85
86 if self.config.remove_reference_headers && self.patterns.reference_header.is_match(line)
88 {
89 references_start = Some(i);
90 break;
91 }
92
93 if self.patterns.reference_definitions.is_match(line)
95 || self.patterns.reference_entry.is_match(line)
96 {
97 references_start = Some(i);
98 break;
99 }
100 }
101
102 if let Some(start) = references_start {
104 lines[..start].join("\n")
105 } else {
106 text.to_string()
107 }
108 }
109
110 fn normalize_whitespace(&self, text: &str) -> String {
112 self.patterns
113 .multiple_whitespace
114 .replace_all(text, " ")
115 .to_string()
116 }
117
118 fn remove_excessive_blank_lines(&self, text: &str) -> String {
120 self.patterns
121 .excessive_newlines
122 .replace_all(text, "\n\n")
123 .to_string()
124 }
125
126 fn trim_all_lines(&self, text: &str) -> String {
128 text.lines()
129 .map(|line| line.trim_end())
130 .collect::<Vec<_>>()
131 .join("\n")
132 }
133}
134
135impl Default for CitationRemover {
136 fn default() -> Self {
137 Self::new()
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn test_remove_inline_numeric() {
147 let remover = CitationRemover::new();
148 let input = "Text[1] with[2] citations[3].";
149 let result = remover.remove_inline_citations(input);
150 assert_eq!(result, "Text with citations.");
151 }
152
153 #[test]
154 fn test_remove_inline_named() {
155 let remover = CitationRemover::new();
156 let input = "Text[source:1] with[ref:2] citations.";
157 let result = remover.remove_inline_citations(input);
158 assert_eq!(result, "Text with citations.");
159 }
160
161 #[test]
162 fn test_normalize_whitespace() {
163 let remover = CitationRemover::new();
164 let input = "Text with multiple spaces.";
165 let result = remover.normalize_whitespace(input);
166 assert_eq!(result, "Text with multiple spaces.");
167 }
168
169 #[test]
170 fn test_remove_excessive_blank_lines() {
171 let remover = CitationRemover::new();
172 let input = "Line 1\n\n\n\n\nLine 2";
173 let result = remover.remove_excessive_blank_lines(input);
174 assert_eq!(result, "Line 1\n\nLine 2");
175 }
176
177 #[test]
178 fn test_trim_all_lines() {
179 let remover = CitationRemover::new();
180 let input = "Line 1 \nLine 2 \nLine 3 ";
181 let result = remover.trim_all_lines(input);
182 assert_eq!(result, "Line 1\nLine 2\nLine 3");
183 }
184
185 #[test]
186 fn test_remove_reference_sections_with_header() {
187 let remover = CitationRemover::new();
188 let input = "Content here.\n\n## References\n[1]: https://example.com";
189 let result = remover.remove_reference_sections(input);
190 assert_eq!(result.trim(), "Content here.");
191 }
192
193 #[test]
194 fn test_remove_reference_sections_without_header() {
195 let remover = CitationRemover::new();
196 let input = "Content here.\n\n[1]: https://example.com\n[2]: https://test.com";
197 let result = remover.remove_reference_sections(input);
198 assert_eq!(result.trim(), "Content here.");
199 }
200
201 #[test]
202 fn test_custom_config() {
203 let config = RemoverConfig {
204 remove_inline_citations: true,
205 remove_reference_links: false,
206 remove_reference_headers: false,
207 remove_reference_entries: false,
208 normalize_whitespace: false,
209 remove_blank_lines: false,
210 trim_lines: false,
211 };
212 let remover = CitationRemover::with_config(config);
213 let input = "Text[1].\n\n[1]: https://example.com";
214 let result = remover.remove(input);
215 assert!(!result.contains("[1]"));
216 assert!(result.contains("https://example.com"));
217 }
218
219 #[test]
220 fn test_full_pipeline() {
221 let remover = CitationRemover::new();
222 let input = "Text[1] with spaces.\n\n\n\n## References\n[1]: https://example.com";
223 let result = remover.remove(input);
224 assert!(!result.contains("[1]"));
225 assert!(!result.contains("https://example.com"));
226 assert!(!result.contains(" "));
227 assert!(!result.contains("\n\n\n"));
228 }
229}