use crate::ast::visitor::{CommentInfo, CommentVisitor};
use crate::config::{ConfigManager, ResolvedConfig};
use crate::languages::registry::LanguageRegistry;
use crate::rules::preservation::PreservationRule;
use anyhow::{Context, Result};
use std::borrow::Cow;
use std::path::Path;
use tree_sitter::Parser;
#[derive(Debug, Clone)]
pub struct ProcessingOptions {
pub remove_todo: bool,
pub remove_fixme: bool,
pub remove_doc: bool,
pub custom_preserve_patterns: Vec<String>,
pub use_default_ignores: bool,
pub dry_run: bool,
pub show_diff: bool,
pub respect_gitignore: bool,
pub traverse_git_repos: bool,
}
pub struct Processor {
parser: Parser,
registry: LanguageRegistry,
}
impl Default for Processor {
fn default() -> Self {
Self::new()
}
}
impl Processor {
pub fn new() -> Self {
Self {
parser: Parser::new(),
registry: LanguageRegistry::new(),
}
}
pub fn new_with_config(config_manager: &ConfigManager) -> Self {
let mut registry = LanguageRegistry::new();
let all_languages = config_manager.get_all_languages();
registry.register_configured_languages(&all_languages);
Self {
parser: Parser::new(),
registry,
}
}
pub fn process_file_with_config(
&mut self,
path: &Path,
config_manager: &ConfigManager,
cli_overrides: Option<&ProcessingOptions>,
) -> Result<ProcessedFile> {
let content = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read file: {}", path.display()))?;
let language_config = self
.registry
.detect_language_arc(path)
.with_context(|| format!("Unsupported file type: {}", path.display()))?;
let language_name = if language_config
.name
.bytes()
.all(|byte| !byte.is_ascii_uppercase())
{
Cow::Borrowed(language_config.name.as_str())
} else {
Cow::Owned(language_config.name.to_lowercase())
};
let mut resolved_config =
config_manager.get_config_for_file_with_language(path, &language_name);
if let Some(overrides) = cli_overrides {
if overrides.remove_doc {
resolved_config.remove_docs = true;
}
resolved_config.use_default_ignores = overrides.use_default_ignores;
if overrides.remove_todo {
resolved_config.remove_todos = true;
}
if overrides.remove_fixme {
resolved_config.remove_fixme = true;
}
if !overrides.custom_preserve_patterns.is_empty() {
resolved_config
.preserve_patterns
.extend(overrides.custom_preserve_patterns.iter().cloned());
}
resolved_config.respect_gitignore = overrides.respect_gitignore;
resolved_config.traverse_git_repos = overrides.traverse_git_repos;
}
let (processed_content, comments_removed, important_removals) =
self.process_content_with_config(&content, language_config.as_ref(), &resolved_config)?;
Ok(ProcessedFile {
path: path.to_path_buf(),
original_content: content,
processed_content,
modified: false,
comments_removed,
important_removals,
})
}
fn process_content_with_config(
&mut self,
content: &str,
language_config: &crate::languages::config::LanguageConfig,
resolved_config: &ResolvedConfig,
) -> Result<(String, usize, Vec<ImportantRemoval>)> {
let language = tree_sitter_language_pack::get_language(&language_config.tslp_name)
.with_context(|| {
format!(
"Failed to load grammar for '{}' (tslp name: '{}')",
language_config.name, language_config.tslp_name
)
})?;
self.parser
.set_language(&language)
.context("Failed to set parser language")?;
let tree = self
.parser
.parse(content, None)
.context("Failed to parse source code")?;
let preservation_rules = self.create_preservation_rules_from_config(resolved_config);
let mut visitor = CommentVisitor::new_with_language(
content,
&preservation_rules,
&language_config.comment_types,
&language_config.doc_comment_types,
&language_config.name,
);
visitor.visit_node(tree.root_node());
let comments_to_remove = visitor.get_comments_to_remove();
let comments_removed = comments_to_remove.len();
let important_removals = detect_important_removals(&comments_to_remove, content);
let output = self.remove_comments_from_content(content, &comments_to_remove);
Ok((output, comments_removed, important_removals))
}
fn create_preservation_rules_from_config(
&self,
config: &ResolvedConfig,
) -> Vec<PreservationRule> {
let mut rules = Vec::new();
rules.push(PreservationRule::shebang());
rules.push(PreservationRule::pattern("~keep"));
if !config.remove_todos {
rules.push(PreservationRule::pattern("TODO"));
rules.push(PreservationRule::pattern("todo"));
}
if !config.remove_fixme {
rules.push(PreservationRule::pattern("FIXME"));
rules.push(PreservationRule::pattern("fixme"));
}
if !config.remove_docs {
rules.push(PreservationRule::documentation());
}
for pattern in &config.preserve_patterns {
rules.push(PreservationRule::pattern_owned(pattern.clone()));
}
if config.use_default_ignores {
let mut comprehensive_rules = PreservationRule::comprehensive_rules();
if config.remove_todos {
comprehensive_rules
.retain(|rule| !rule.pattern_matches("TODO") && !rule.pattern_matches("todo"));
}
if config.remove_fixme {
comprehensive_rules.retain(|rule| {
!rule.pattern_matches("FIXME") && !rule.pattern_matches("fixme")
});
}
if config.remove_docs {
comprehensive_rules.retain(|rule| !matches!(rule, PreservationRule::Documentation));
}
rules.extend(comprehensive_rules);
}
rules
}
fn remove_comments_from_content(
&self,
content: &str,
comments_to_remove: &[&CommentInfo],
) -> String {
if comments_to_remove.is_empty() {
return content.to_string();
}
let bytes = content.as_bytes();
let mut ranges: Vec<(usize, usize)> = Vec::with_capacity(comments_to_remove.len());
for comment in comments_to_remove {
ranges.push((comment.start_byte, comment.end_byte));
}
ranges.sort_unstable_by(|a, b| a.0.cmp(&b.0).then(b.1.cmp(&a.1)));
let mut filtered: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
for (start, end) in ranges {
if let Some(previous) = filtered.last()
&& start >= previous.0
&& end <= previous.1
{
continue;
}
filtered.push((start, end));
}
let mut removal_ranges: Vec<(usize, usize)> = Vec::with_capacity(filtered.len());
for (start, end) in &filtered {
let start = *start;
let end = (*end).min(bytes.len());
if start >= end || start >= bytes.len() {
continue;
}
let line_start = match memchr::memrchr(b'\n', &bytes[..start]) {
Some(pos) => pos + 1,
None => 0,
};
let line_end = match memchr::memchr(b'\n', &bytes[end..]) {
Some(pos) => end + pos + 1,
None => bytes.len(),
};
let before = &bytes[line_start..start];
let after = &bytes[end..line_end];
let before_ws = before.iter().all(|b| b.is_ascii_whitespace());
let after_ws = after.iter().all(|b| b.is_ascii_whitespace());
if before_ws && after_ws {
removal_ranges.push((line_start, line_end));
} else {
removal_ranges.push((start, end));
}
}
let mut output = String::with_capacity(content.len());
let mut cursor = 0;
for (start, end) in &removal_ranges {
let start = cursor.max(*start);
if cursor < start {
output.push_str(&content[cursor..start]);
}
cursor = *end;
}
if cursor < content.len() {
output.push_str(&content[cursor..]);
}
output
}
}
#[derive(Debug)]
pub struct ProcessedFile {
pub path: std::path::PathBuf,
pub original_content: String,
pub processed_content: String,
pub modified: bool,
pub comments_removed: usize,
pub important_removals: Vec<ImportantRemoval>,
}
#[derive(Debug, Clone)]
pub struct ImportantRemoval {
pub line: usize,
pub reason: Cow<'static, str>,
pub preview: String,
}
pub struct OutputWriter {
dry_run: bool,
verbose: bool,
show_diff: bool,
}
impl OutputWriter {
pub fn new(dry_run: bool, verbose: bool, show_diff: bool) -> Self {
Self {
dry_run,
verbose,
show_diff,
}
}
pub fn write_file(&self, processed_file: &ProcessedFile) -> Result<()> {
let modified = processed_file.original_content != processed_file.processed_content;
if !modified {
if self.verbose {
println!("✓ No changes needed: {}", processed_file.path.display());
}
return Ok(());
}
if self.dry_run {
println!("[DRY RUN] Would modify: {}", processed_file.path.display());
if self.verbose {
println!(" Removed {} comment(s)", processed_file.comments_removed);
}
if self.show_diff {
self.show_diff(processed_file)?;
}
} else {
std::fs::write(&processed_file.path, &processed_file.processed_content).with_context(
|| format!("Failed to write file: {}", processed_file.path.display()),
)?;
if self.verbose {
println!(
"✓ Modified: {} (removed {} comment(s))",
processed_file.path.display(),
processed_file.comments_removed
);
} else {
println!("Modified: {}", processed_file.path.display());
}
}
Ok(())
}
fn show_diff(&self, processed_file: &ProcessedFile) -> Result<()> {
println!("\n--- {}", processed_file.path.display());
println!("+++ {} (processed)", processed_file.path.display());
let original_lines: Vec<&str> = processed_file.original_content.lines().collect();
let processed_lines: Vec<&str> = processed_file.processed_content.lines().collect();
let max_lines = original_lines.len().max(processed_lines.len());
for i in 0..max_lines {
let original_line = original_lines.get(i).copied().unwrap_or("");
let processed_line = processed_lines.get(i).copied().unwrap_or("");
if original_line != processed_line {
if i < original_lines.len() && i >= processed_lines.len() {
println!("-{original_line}");
} else if i >= original_lines.len() && i < processed_lines.len() {
println!("+{processed_line}");
} else if original_line != processed_line {
println!("-{original_line}");
println!("+{processed_line}");
}
}
}
Ok(())
}
pub fn print_summary(&self, total_files: usize, modified_files: usize) {
if self.dry_run {
println!(
"\n[DRY RUN] Summary: {total_files} files processed, {modified_files} would be modified"
);
} else {
println!("\nSummary: {total_files} files processed, {modified_files} modified");
}
if total_files > 0 && modified_files == 0 {
println!("All files were already comment-free or only contained preserved comments.");
}
}
}
fn detect_important_removals(
comments_to_remove: &[&CommentInfo],
source: &str,
) -> Vec<ImportantRemoval> {
comments_to_remove
.iter()
.copied()
.filter_map(|comment| {
let trimmed = comment.content(source).trim_start();
let reason = if trimmed.starts_with("#!") {
Some(Cow::Borrowed("shebang"))
} else if trimmed.starts_with("//go:")
|| trimmed.starts_with("/*go:")
|| trimmed.starts_with("//+build")
|| trimmed.starts_with("// +build")
|| trimmed.starts_with("//line ")
|| trimmed.starts_with("/*line ")
{
Some(Cow::Borrowed("go directive"))
} else if trimmed.contains("shellcheck") {
Some(Cow::Borrowed("shellcheck directive"))
} else if trimmed.contains("eslint-")
|| trimmed.contains("prettier-")
|| trimmed.contains("@ts-")
|| trimmed.contains("biome-")
|| trimmed.contains("deno-")
|| trimmed.contains("nolint")
{
Some(Cow::Borrowed("linter/formatter directive"))
} else if trimmed.starts_with("#pragma")
|| trimmed.contains("NOLINT")
|| trimmed.contains("clang-format")
{
Some(Cow::Borrowed("compiler/formatter directive"))
} else if trimmed.starts_with("# frozen_string_literal:")
|| trimmed.starts_with("# encoding:")
|| trimmed.starts_with("# coding:")
|| trimmed.starts_with("# typed:")
{
Some(Cow::Borrowed("language magic comment"))
} else {
None
}?;
let normalized_preview = if trimmed.contains('\n') {
Cow::Owned(trimmed.replace('\n', " "))
} else {
Cow::Borrowed(trimmed)
};
let mut preview = normalized_preview.into_owned();
const MAX: usize = 120;
if preview.len() > MAX {
let mut cut = MAX;
while cut > 0 && !preview.is_char_boundary(cut) {
cut -= 1;
}
preview.truncate(cut);
preview.push('…');
}
Some(ImportantRemoval {
line: comment.start_row + 1,
reason,
preview,
})
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::{Config, ConfigManager, ResolvedConfig};
use crate::languages::config::LanguageConfig;
use tempfile::tempdir;
fn default_resolved_config() -> ResolvedConfig {
ResolvedConfig {
remove_todos: false,
remove_fixme: false,
remove_docs: false,
preserve_patterns: Vec::new(),
use_default_ignores: true,
respect_gitignore: true,
traverse_git_repos: false,
language_config: None,
}
}
fn process_rust(source: &str) -> String {
let mut processor = Processor::new();
let language_config = LanguageConfig::rust();
let resolved_config = default_resolved_config();
let (output, _, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing rust source");
output
}
fn process_go(source: &str, use_default_ignores: bool, remove_docs: bool) -> String {
let mut processor = Processor::new();
let language_config = LanguageConfig::go();
let mut resolved_config = default_resolved_config();
resolved_config.use_default_ignores = use_default_ignores;
resolved_config.remove_docs = remove_docs;
let (output, _, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing go source");
output
}
fn process_language(source: &str, language_config: LanguageConfig) -> String {
let mut processor = Processor::new();
let resolved_config = default_resolved_config();
let (output, _, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing source");
output
}
fn process_language_with_default_ignores(
source: &str,
language_config: LanguageConfig,
use_default_ignores: bool,
) -> String {
let mut processor = Processor::new();
let mut resolved_config = default_resolved_config();
resolved_config.use_default_ignores = use_default_ignores;
let (output, _, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing source");
output
}
#[test]
fn preserves_strings_matching_comment_text() {
let source = r#"fn main() {
let pattern = "// comment";
println!("{}", pattern); // comment
}
"#;
let processed = process_rust(source);
assert!(processed.contains("\"// comment\""));
assert!(!processed.contains("; // comment"));
}
#[test]
fn preserves_macro_invocations_with_comment_like_strings() {
let source = r#"macro_rules! announce {
($msg:expr) => {{
println!("{}", $msg); // keep
}};
}
fn main() {
announce!("// keep");
}
"#;
let processed = process_rust(source);
assert!(processed.contains("announce!(\"// keep\");"));
assert!(!processed.contains("// keep\n"));
}
#[test]
fn preserves_attributes_when_removing_doc_comments() {
let source = r#"#[derive(Subcommand, Debug)]
pub enum Commands {
/// Create smart config
#[command(about = "Create a template configuration file")]
Init,
}
"#;
let mut processor = Processor::new();
let language_config = LanguageConfig::rust();
let mut config = default_resolved_config();
config.remove_docs = true;
let (processed, _, _) = processor
.process_content_with_config(source, &language_config, &config)
.expect("process doc comments");
assert!(processed.contains("#[command(about = \"Create a template configuration file\")]"));
assert!(!processed.contains("Create smart config"));
}
#[test]
fn respects_no_default_ignores_override() {
let dir = tempdir().expect("create temp dir");
let file_path = dir.path().join("sample.rs");
let source = r#"/// #![feature(never_type)]
// NOTE: this would normally be preserved
fn main() {}
"#;
std::fs::write(&file_path, source).expect("write test file");
let config_manager = ConfigManager::from_single_config(dir.path(), Config::default())
.expect("config manager");
let mut processor = Processor::new();
let overrides_with_defaults = ProcessingOptions {
remove_todo: true,
remove_fixme: true,
remove_doc: true,
custom_preserve_patterns: Vec::new(),
use_default_ignores: true,
dry_run: true,
show_diff: false,
respect_gitignore: true,
traverse_git_repos: false,
};
let with_defaults = processor
.process_file_with_config(&file_path, &config_manager, Some(&overrides_with_defaults))
.expect("process with defaults");
assert!(with_defaults.processed_content.contains("NOTE"));
assert!(with_defaults.processed_content.contains("#![feature"));
let overrides_without_defaults = ProcessingOptions {
use_default_ignores: false,
..overrides_with_defaults
};
let without_defaults = processor
.process_file_with_config(
&file_path,
&config_manager,
Some(&overrides_without_defaults),
)
.expect("process without defaults");
assert!(!without_defaults.processed_content.contains("NOTE"));
assert!(!without_defaults.processed_content.contains("#![feature"));
assert!(without_defaults.processed_content.contains("fn main()"));
}
#[test]
fn preserves_go_embed_directives_even_without_default_ignores() {
let source = r#"package main
//go:embed hello.txt
var embedded string
func main() { /* regular comment should be removed */ }
"#;
let processed = process_go(source, false, true);
assert!(processed.contains("//go:embed hello.txt"));
assert!(!processed.contains("regular comment should be removed"));
}
#[test]
fn preserves_go_cgo_preamble_comments() {
let source = r#"package htmltomarkdown
// #cgo LDFLAGS: -lhtml_to_markdown_ffi
// #include <stdlib.h>
// extern const char* html_to_markdown_version();
import "C"
func Version() string { return C.GoString(C.html_to_markdown_version()) /* regular comment should be removed */ }
"#;
for use_default_ignores in [true, false] {
let processed = process_go(source, use_default_ignores, true);
assert!(
processed.contains("// #cgo LDFLAGS: -lhtml_to_markdown_ffi"),
"expected to preserve cgo preamble with use_default_ignores={use_default_ignores}"
);
assert!(
processed.contains("// #include <stdlib.h>"),
"expected to preserve cgo preamble with use_default_ignores={use_default_ignores}"
);
assert!(
processed.contains("// extern const char* html_to_markdown_version();"),
"expected to preserve cgo preamble with use_default_ignores={use_default_ignores}"
);
assert!(processed.contains("import \"C\""));
assert!(!processed.contains("regular comment should be removed"));
}
}
#[test]
fn removes_ruby_comments_without_touching_strings() {
let source = r#"# remove me
puts "Hello # not a comment"
"#;
let processed = process_language(source, LanguageConfig::ruby());
assert!(!processed.contains("# remove me"));
assert!(processed.contains("Hello # not a comment"));
}
#[test]
fn preserves_ruby_frozen_string_literal_magic_comment() {
let source = r#"# frozen_string_literal: true
# remove me
puts "ok"
"#;
let processed = process_language(source, LanguageConfig::ruby());
assert!(processed.contains("# frozen_string_literal: true"));
assert!(!processed.contains("# remove me"));
}
#[test]
fn preserves_shebangs_even_without_default_ignores() {
let source = r#"#!/usr/bin/env bash
# remove me
echo "ok"
"#;
let processed =
process_language_with_default_ignores(source, LanguageConfig::shell(), false);
assert!(processed.starts_with("#!/usr/bin/env bash\n"));
assert!(!processed.contains("# remove me"));
assert!(processed.contains("echo \"ok\""));
}
#[test]
fn preserves_ruby_yard_doc_comments_by_default() {
let source = r#"# @param x [Integer]
def foo(x)
x + 1
end
"#;
let processed = process_language(source, LanguageConfig::ruby());
assert!(processed.contains("# @param x [Integer]"));
}
#[test]
fn removes_php_comments_without_touching_strings() {
let source = r#"<?php
// remove me
$s = "// not a comment";
echo $s;
"#;
let processed = process_language(source, LanguageConfig::php());
assert!(!processed.contains("// remove me"));
assert!(processed.contains("\"// not a comment\""));
}
#[test]
fn preserves_c_header_guard_trailing_comments() {
let source = r#"#ifndef HTML_TO_MARKDOWN_H
#define HTML_TO_MARKDOWN_H
// remove me
int x;
#endif /* HTML_TO_MARKDOWN_H */
"#;
let processed = process_language(source, LanguageConfig::c());
assert!(processed.contains("#endif /* HTML_TO_MARKDOWN_H */"));
assert!(!processed.contains("remove me"));
assert!(processed.contains("int x;"));
}
#[test]
fn removes_elixir_comments_without_touching_strings() {
let source = r##"# remove me
IO.puts("# not a comment")
"##;
let processed = process_language(source, LanguageConfig::elixir());
assert!(!processed.contains("# remove me"));
assert!(processed.contains("\"# not a comment\""));
}
#[test]
fn removes_toml_comments_without_touching_strings() {
let source = r##"# remove me
key = "# not a comment"
"##;
let processed = process_language(source, LanguageConfig::toml());
assert!(!processed.contains("# remove me"));
assert!(processed.contains("\"# not a comment\""));
}
#[test]
fn removes_csharp_comments_without_touching_strings() {
let source = r#"// remove me
class C { void M() { var s = "// not a comment"; } }
"#;
let processed = process_language(source, LanguageConfig::csharp());
assert!(!processed.contains("// remove me"));
assert!(processed.contains("\"// not a comment\""));
}
#[test]
fn removes_haskell_comments_without_touching_strings() {
let source = r#"-- remove me
main = putStrLn "-- not a comment"
"#;
let processed = process_language(source, LanguageConfig::haskell());
assert!(!processed.contains("-- remove me"));
assert!(processed.contains("\"-- not a comment\""));
}
#[test]
fn removes_html_comments_without_touching_content() {
let source = r#"<!-- remove me -->
<div>Hello</div>
"#;
let processed = process_language(source, LanguageConfig::html());
assert!(!processed.contains("remove me"));
assert!(processed.contains("<div>Hello</div>"));
}
#[test]
fn removes_css_comments_without_touching_strings() {
let source = r#"/* remove me */
.a::before { content: "/* not a comment */"; }
"#;
let processed = process_language(source, LanguageConfig::css());
assert!(!processed.contains("remove me"));
assert!(processed.contains("\"/* not a comment */\""));
}
#[test]
fn removes_xml_comments_without_touching_text() {
let source = r#"<!-- remove me -->
<root>hello</root>
"#;
let processed = process_language(source, LanguageConfig::xml());
assert!(!processed.contains("remove me"));
assert!(processed.contains("<root>hello</root>"));
}
#[test]
fn removes_sql_comments_without_touching_strings() {
let source = r#"-- remove me
SELECT '-- not a comment' as val;
"#;
let processed = process_language(source, LanguageConfig::sql());
assert!(!processed.contains("-- remove me"));
assert!(processed.contains("'-- not a comment'"));
}
#[test]
fn removes_kotlin_comments_without_touching_strings() {
let source = r#"// remove me
fun main() { val s = "// not a comment" }
"#;
let processed = process_language(source, LanguageConfig::kotlin());
assert!(!processed.contains("// remove me"));
assert!(processed.contains("\"// not a comment\""));
}
#[test]
fn removes_swift_comments_without_touching_strings() {
let source = r#"// remove me
let s = "// not a comment"
"#;
let processed = process_language(source, LanguageConfig::swift());
assert!(!processed.contains("// remove me"));
assert!(processed.contains("\"// not a comment\""));
}
#[test]
fn removes_lua_comments_without_touching_strings() {
let source = r#"-- remove me
local s = "-- not a comment"
"#;
let processed = process_language(source, LanguageConfig::lua());
assert!(!processed.contains("-- remove me"));
assert!(processed.contains("\"-- not a comment\""));
}
#[test]
fn removes_nix_comments_without_touching_strings() {
let source = r##"# remove me
let s = "# not a comment"; in s
"##;
let processed = process_language(source, LanguageConfig::nix());
assert!(!processed.contains("# remove me"));
assert!(processed.contains("\"# not a comment\""));
}
#[test]
fn removes_powershell_comments_without_touching_strings() {
let source = r##"# remove me
$s = "# not a comment"
Write-Output $s
"##;
let processed = process_language(source, LanguageConfig::powershell());
assert!(!processed.contains("# remove me"));
assert!(processed.contains("\"# not a comment\""));
}
#[test]
fn removes_proto_comments_without_touching_strings() {
let source = r#"// remove me
syntax = "proto3";
message A { string s = 1 [default = "// not a comment"]; }
"#;
let processed = process_language(source, LanguageConfig::proto());
assert!(!processed.contains("// remove me"));
assert!(processed.contains("\"// not a comment\""));
}
#[test]
fn removes_ini_comments_without_touching_values() {
let source = r#"; remove me
[section]
key = # not a comment
"#;
let processed = process_language(source, LanguageConfig::ini());
assert!(!processed.contains("; remove me"));
assert!(processed.contains("key = # not a comment"));
}
#[test]
fn removes_python_docstrings_when_remove_docs_enabled() {
let source = r#""""This is a docstring"""
# TODO: regular todo
# mypy: ignore
def hello(): pass"#;
let mut processor = Processor::new();
let language_config = LanguageConfig::python();
let mut resolved_config = default_resolved_config();
resolved_config.remove_docs = true;
let (output, _, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing python source");
assert!(
!output.contains("This is a docstring"),
"Python docstring should be removed when remove_docs=true"
);
assert!(
output.contains("TODO: regular todo"),
"TODO should be preserved"
);
assert!(output.contains("mypy: ignore"), "mypy should be preserved");
}
#[test]
fn handles_utf8_multibyte_in_comments() {
let source = "// Comment with emoji 🎉\nfn main() {}\n";
let processed = process_rust(source);
assert!(!processed.contains("🎉"));
assert!(processed.contains("fn main()"));
}
#[test]
fn handles_file_with_only_comments() {
let source = "// Only comments\n// Nothing else\n";
let processed = process_rust(source);
assert!(processed.trim().is_empty());
}
#[test]
fn handles_empty_file() {
let source = "";
let mut processor = Processor::new();
let language_config = LanguageConfig::rust();
let resolved_config = default_resolved_config();
let (output, removed, _) = processor
.process_content_with_config(source, &language_config, &resolved_config)
.expect("processing empty source");
assert_eq!(output, "");
assert_eq!(removed, 0);
}
#[test]
fn handles_comment_at_end_of_file_no_trailing_newline() {
let source = "fn main() {} // trailing";
let processed = process_rust(source);
assert!(!processed.contains("// trailing"));
assert!(processed.contains("fn main()"));
}
}