use crate::config::RemoverConfig;
use crate::patterns::Patterns;
pub struct CitationRemover {
config: RemoverConfig,
patterns: &'static Patterns,
}
impl CitationRemover {
pub fn new() -> Self {
Self {
config: RemoverConfig::default(),
patterns: Patterns::get(),
}
}
pub fn with_config(config: RemoverConfig) -> Self {
Self {
config,
patterns: Patterns::get(),
}
}
pub fn remove(&self, markdown: &str) -> String {
let mut result = markdown.to_string();
if self.config.remove_reference_links
|| self.config.remove_reference_entries
|| self.config.remove_reference_headers
{
result = self.remove_reference_sections(&result);
}
if self.config.remove_inline_citations {
result = self.remove_inline_citations(&result);
}
if self.config.normalize_whitespace {
result = self.normalize_whitespace(&result);
}
if self.config.remove_blank_lines {
result = self.remove_excessive_blank_lines(&result);
}
if self.config.trim_lines {
result = self.trim_all_lines(&result);
}
result
}
fn remove_inline_citations(&self, text: &str) -> String {
self.patterns
.inline_citations
.replace_all(text, "")
.to_string()
}
fn remove_reference_sections(&self, text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let mut references_start = None;
for (i, line) in lines.iter().enumerate() {
if references_start.is_some() {
break;
}
if self.config.remove_reference_headers && self.patterns.reference_header.is_match(line)
{
references_start = Some(i);
break;
}
if self.patterns.reference_definitions.is_match(line)
|| self.patterns.reference_entry.is_match(line)
{
references_start = Some(i);
break;
}
}
if let Some(start) = references_start {
lines[..start].join("\n")
} else {
text.to_string()
}
}
fn normalize_whitespace(&self, text: &str) -> String {
self.patterns
.multiple_whitespace
.replace_all(text, " ")
.to_string()
}
fn remove_excessive_blank_lines(&self, text: &str) -> String {
self.patterns
.excessive_newlines
.replace_all(text, "\n\n")
.to_string()
}
fn trim_all_lines(&self, text: &str) -> String {
text.lines()
.map(|line| line.trim_end())
.collect::<Vec<_>>()
.join("\n")
}
}
impl Default for CitationRemover {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_remove_inline_numeric() {
let remover = CitationRemover::new();
let input = "Text[1] with[2] citations[3].";
let result = remover.remove_inline_citations(input);
assert_eq!(result, "Text with citations.");
}
#[test]
fn test_remove_inline_named() {
let remover = CitationRemover::new();
let input = "Text[source:1] with[ref:2] citations.";
let result = remover.remove_inline_citations(input);
assert_eq!(result, "Text with citations.");
}
#[test]
fn test_normalize_whitespace() {
let remover = CitationRemover::new();
let input = "Text with multiple spaces.";
let result = remover.normalize_whitespace(input);
assert_eq!(result, "Text with multiple spaces.");
}
#[test]
fn test_remove_excessive_blank_lines() {
let remover = CitationRemover::new();
let input = "Line 1\n\n\n\n\nLine 2";
let result = remover.remove_excessive_blank_lines(input);
assert_eq!(result, "Line 1\n\nLine 2");
}
#[test]
fn test_trim_all_lines() {
let remover = CitationRemover::new();
let input = "Line 1 \nLine 2 \nLine 3 ";
let result = remover.trim_all_lines(input);
assert_eq!(result, "Line 1\nLine 2\nLine 3");
}
#[test]
fn test_remove_reference_sections_with_header() {
let remover = CitationRemover::new();
let input = "Content here.\n\n## References\n[1]: https://example.com";
let result = remover.remove_reference_sections(input);
assert_eq!(result.trim(), "Content here.");
}
#[test]
fn test_remove_reference_sections_without_header() {
let remover = CitationRemover::new();
let input = "Content here.\n\n[1]: https://example.com\n[2]: https://test.com";
let result = remover.remove_reference_sections(input);
assert_eq!(result.trim(), "Content here.");
}
#[test]
fn test_custom_config() {
let config = RemoverConfig {
remove_inline_citations: true,
remove_reference_links: false,
remove_reference_headers: false,
remove_reference_entries: false,
normalize_whitespace: false,
remove_blank_lines: false,
trim_lines: false,
};
let remover = CitationRemover::with_config(config);
let input = "Text[1].\n\n[1]: https://example.com";
let result = remover.remove(input);
assert!(!result.contains("[1]"));
assert!(result.contains("https://example.com"));
}
#[test]
fn test_full_pipeline() {
let remover = CitationRemover::new();
let input = "Text[1] with spaces.\n\n\n\n## References\n[1]: https://example.com";
let result = remover.remove(input);
assert!(!result.contains("[1]"));
assert!(!result.contains("https://example.com"));
assert!(!result.contains(" "));
assert!(!result.contains("\n\n\n"));
}
}