use crate::models::Language;
pub trait LineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool;
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool;
}
pub fn get_filter(lang: Language) -> Option<Box<dyn LineFilter>> {
match lang {
Language::Rust => Some(Box::new(RustLineFilter)),
Language::C => Some(Box::new(CLineFilter)),
Language::Cpp => Some(Box::new(CppLineFilter)),
Language::Go => Some(Box::new(GoLineFilter)),
Language::Java => Some(Box::new(JavaLineFilter)),
Language::JavaScript => Some(Box::new(JavaScriptLineFilter)),
Language::TypeScript => Some(Box::new(TypeScriptLineFilter)),
Language::Python => Some(Box::new(PythonLineFilter)),
Language::Ruby => Some(Box::new(RubyLineFilter)),
Language::PHP => Some(Box::new(PHPLineFilter)),
Language::CSharp => Some(Box::new(CSharpLineFilter)),
Language::Kotlin => Some(Box::new(KotlinLineFilter)),
Language::Zig => Some(Box::new(ZigLineFilter)),
Language::Vue => Some(Box::new(VueLineFilter)),
Language::Svelte => Some(Box::new(SvelteLineFilter)),
Language::Swift | Language::Unknown => None,
}
}
struct RustLineFilter;
impl LineFilter for RustLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(raw_start) = line.find("r#") {
if raw_start <= pattern_pos {
let hash_count = line[raw_start + 1..].chars().take_while(|&c| c == '#').count();
let closing = format!("\"{}#", "#".repeat(hash_count));
if let Some(raw_end) = line[raw_start..].find(&closing) {
let raw_end_pos = raw_start + raw_end + closing.len();
if pattern_pos < raw_end_pos {
return true;
}
}
}
}
if let Some(raw_start) = line.find("r\"") {
if raw_start <= pattern_pos {
if let Some(raw_end) = line[raw_start + 2..].find('"') {
let raw_end_pos = raw_start + 2 + raw_end + 1;
if pattern_pos < raw_end_pos {
return true;
}
}
}
}
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct CLineFilter;
impl LineFilter for CLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct CppLineFilter;
impl LineFilter for CppLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
CLineFilter.is_in_comment(line, pattern_pos)
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
CLineFilter.is_in_string(line, pattern_pos)
}
}
struct GoLineFilter;
impl LineFilter for GoLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_raw_string = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_raw_string;
}
if ch == '`' {
in_raw_string = !in_raw_string;
}
}
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct JavaLineFilter;
impl LineFilter for JavaLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct JavaScriptLineFilter;
impl LineFilter for JavaScriptLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_double_quote = false;
let mut in_single_quote = false;
let mut in_backtick = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_double_quote || in_single_quote || in_backtick;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if (in_double_quote || in_single_quote || in_backtick) => escaped = true,
'"' if !in_single_quote && !in_backtick => in_double_quote = !in_double_quote,
'\'' if !in_double_quote && !in_backtick => in_single_quote = !in_single_quote,
'`' if !in_double_quote && !in_single_quote => in_backtick = !in_backtick,
_ => {}
}
}
false
}
}
struct TypeScriptLineFilter;
impl LineFilter for TypeScriptLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_comment(line, pattern_pos)
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_string(line, pattern_pos)
}
}
struct PythonLineFilter;
impl LineFilter for PythonLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find('#') {
if comment_start <= pattern_pos {
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(triple_double) = line.find("\"\"\"") {
if triple_double <= pattern_pos {
if let Some(close) = line[triple_double + 3..].find("\"\"\"") {
let close_pos = triple_double + 3 + close + 3;
if pattern_pos < close_pos {
return true;
}
}
}
}
if let Some(triple_single) = line.find("'''") {
if triple_single <= pattern_pos {
if let Some(close) = line[triple_single + 3..].find("'''") {
let close_pos = triple_single + 3 + close + 3;
if pattern_pos < close_pos {
return true;
}
}
}
}
let mut in_double_quote = false;
let mut in_single_quote = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_double_quote || in_single_quote;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if (in_double_quote || in_single_quote) => escaped = true,
'"' if !in_single_quote => in_double_quote = !in_double_quote,
'\'' if !in_double_quote => in_single_quote = !in_single_quote,
_ => {}
}
}
false
}
}
struct RubyLineFilter;
impl LineFilter for RubyLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find('#') {
if comment_start <= pattern_pos {
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_double_quote = false;
let mut in_single_quote = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_double_quote || in_single_quote;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if (in_double_quote || in_single_quote) => escaped = true,
'"' if !in_single_quote => in_double_quote = !in_double_quote,
'\'' if !in_double_quote => in_single_quote = !in_single_quote,
_ => {}
}
}
false
}
}
struct PHPLineFilter;
impl LineFilter for PHPLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(comment_start) = line.find('#') {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_double_quote = false;
let mut in_single_quote = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_double_quote || in_single_quote;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if (in_double_quote || in_single_quote) => escaped = true,
'"' if !in_single_quote => in_double_quote = !in_double_quote,
'\'' if !in_double_quote => in_single_quote = !in_single_quote,
_ => {}
}
}
false
}
}
struct CSharpLineFilter;
impl LineFilter for CSharpLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(verbatim_start) = line.find("@\"") {
if verbatim_start <= pattern_pos {
let mut pos = verbatim_start + 2;
let chars: Vec<char> = line.chars().collect();
while pos < chars.len() {
if chars[pos] == '"' {
if pos + 1 < chars.len() && chars[pos + 1] == '"' {
pos += 2;
continue;
}
if pattern_pos <= pos {
return true;
}
break;
}
pos += 1;
}
}
}
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct KotlinLineFilter;
impl LineFilter for KotlinLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
if let Some(ml_start) = line.find("/*") {
if ml_start <= pattern_pos {
if let Some(ml_end) = line[ml_start..].find("*/") {
let ml_end_pos = ml_start + ml_end + 2;
if pattern_pos >= ml_end_pos {
return false;
}
}
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(triple_start) = line.find("\"\"\"") {
if triple_start <= pattern_pos {
if let Some(close) = line[triple_start + 3..].find("\"\"\"") {
let close_pos = triple_start + 3 + close + 3;
if pattern_pos < close_pos {
return true;
}
}
}
}
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct ZigLineFilter;
impl LineFilter for ZigLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
if let Some(comment_start) = line.find("//") {
if comment_start <= pattern_pos {
return true;
}
}
false
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
let mut in_string = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if i >= pattern_pos {
return in_string;
}
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
_ => {}
}
}
false
}
}
struct VueLineFilter;
impl LineFilter for VueLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_comment(line, pattern_pos)
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_string(line, pattern_pos)
}
}
struct SvelteLineFilter;
impl LineFilter for SvelteLineFilter {
fn is_in_comment(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_comment(line, pattern_pos)
}
fn is_in_string(&self, line: &str, pattern_pos: usize) -> bool {
JavaScriptLineFilter.is_in_string(line, pattern_pos)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rust_single_line_comment() {
let filter = RustLineFilter;
let line = "let x = 5; // extract_symbols here";
assert!(filter.is_in_comment(line, 15)); assert!(!filter.is_in_comment(line, 4)); }
#[test]
fn test_rust_multiline_comment() {
let filter = RustLineFilter;
let line = "let x = /* extract_symbols */ 5;";
assert!(filter.is_in_comment(line, 11)); assert!(!filter.is_in_comment(line, 30)); }
#[test]
fn test_rust_string_literal() {
let filter = RustLineFilter;
let line = r#"let s = "extract_symbols";"#;
assert!(filter.is_in_string(line, 9)); assert!(!filter.is_in_string(line, 27)); }
#[test]
fn test_rust_raw_string() {
let filter = RustLineFilter;
let line = r#"let s = r"extract_symbols";"#;
assert!(filter.is_in_string(line, 10)); }
#[test]
fn test_rust_raw_string_with_hashes() {
let filter = RustLineFilter;
let line = r###"let s = r#"extract_symbols"#;"###;
assert!(filter.is_in_string(line, 11)); }
#[test]
fn test_rust_escaped_quote() {
let filter = RustLineFilter;
let line = r#"let s = "before \" extract_symbols after";"#;
assert!(filter.is_in_string(line, 15)); }
#[test]
fn test_js_single_line_comment() {
let filter = JavaScriptLineFilter;
let line = "let x = 5; // extract_symbols here";
assert!(filter.is_in_comment(line, 15));
assert!(!filter.is_in_comment(line, 4));
}
#[test]
fn test_js_string_double_quote() {
let filter = JavaScriptLineFilter;
let line = r#"let s = "extract_symbols";"#;
assert!(filter.is_in_string(line, 9));
assert!(!filter.is_in_string(line, 27));
}
#[test]
fn test_js_string_single_quote() {
let filter = JavaScriptLineFilter;
let line = "let s = 'extract_symbols';";
assert!(filter.is_in_string(line, 9));
}
#[test]
fn test_js_template_literal() {
let filter = JavaScriptLineFilter;
let line = "let s = `extract_symbols`;";
assert!(filter.is_in_string(line, 9));
}
#[test]
fn test_python_comment() {
let filter = PythonLineFilter;
let line = "x = 5 # extract_symbols here";
assert!(filter.is_in_comment(line, 9));
assert!(!filter.is_in_comment(line, 0));
}
#[test]
fn test_python_string() {
let filter = PythonLineFilter;
let line = r#"s = "extract_symbols""#;
assert!(filter.is_in_string(line, 5));
}
#[test]
fn test_python_triple_quote() {
let filter = PythonLineFilter;
let line = r#"s = """extract_symbols""""#;
assert!(filter.is_in_string(line, 7));
}
#[test]
fn test_go_raw_string() {
let filter = GoLineFilter;
let line = "s := `extract_symbols`";
assert!(filter.is_in_string(line, 6));
}
#[test]
fn test_csharp_verbatim_string() {
let filter = CSharpLineFilter;
let line = r#"string s = @"extract_symbols";"#;
assert!(filter.is_in_string(line, 13));
}
#[test]
fn test_csharp_verbatim_escaped_quote() {
let filter = CSharpLineFilter;
let line = r#"string s = @"before "" extract_symbols after";"#;
assert!(filter.is_in_string(line, 19));
}
}