use crate::language::{CommentPair, LanguageInfo};
use crate::stats::FileStats;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LineType {
Blank,
Comment,
Code,
}
#[derive(Debug, Clone)]
pub struct ClassifierState {
in_multi_line_comment: bool,
current_delimiter: Option<CommentPair>,
}
impl ClassifierState {
pub fn new() -> Self {
Self {
in_multi_line_comment: false,
current_delimiter: None,
}
}
pub fn reset(&mut self) {
self.in_multi_line_comment = false;
self.current_delimiter = None;
}
}
impl Default for ClassifierState {
fn default() -> Self {
Self::new()
}
}
pub struct LineClassifier<'a> {
language: &'a LanguageInfo,
}
impl<'a> LineClassifier<'a> {
pub fn new(language: &'a LanguageInfo) -> Self {
Self { language }
}
fn trim_line(line: &str) -> &str {
line.trim()
}
fn is_blank(line: &str) -> bool {
Self::trim_line(line).is_empty()
}
fn starts_with_single_comment(&self, line: &str) -> bool {
let trimmed = Self::trim_line(line);
for comment in self.language.single_line_comments {
if trimmed.starts_with(comment) {
return true;
}
}
false
}
fn contains_multi_line_start(&self, line: &str) -> Option<(usize, &CommentPair)> {
for pair in self.language.multi_line_comments {
if let Some(pos) = line.find(pair.start) {
return Some((pos, pair));
}
}
None
}
fn contains_multi_line_end(&self, line: &str, delimiter: &CommentPair) -> Option<usize> {
line.find(delimiter.end)
.map(|pos| pos + delimiter.end.len())
}
pub fn classify_line(&self, line: &str, state: &mut ClassifierState) -> LineType {
if Self::is_blank(line) {
return LineType::Blank;
}
let trimmed = Self::trim_line(line);
if trimmed.starts_with("#!") {
return LineType::Code;
}
if state.in_multi_line_comment {
if let Some(ref delimiter) = state.current_delimiter {
if let Some(end_pos) = self.contains_multi_line_end(line, delimiter) {
state.in_multi_line_comment = false;
state.current_delimiter = None;
let after_comment = &line[end_pos..];
if !Self::is_blank(after_comment) {
return LineType::Code;
} else {
return LineType::Comment;
}
} else {
return LineType::Comment;
}
}
}
if self.starts_with_single_comment(trimmed) {
return LineType::Comment;
}
if let Some((start_pos, pair)) = self.contains_multi_line_start(line) {
let after_start = &line[start_pos + pair.start.len()..];
if let Some(end_pos_relative) = after_start.find(pair.end) {
let end_pos = start_pos + pair.start.len() + end_pos_relative + pair.end.len();
let before_comment = &line[..start_pos];
let after_comment = &line[end_pos..];
if !Self::is_blank(before_comment) || !Self::is_blank(after_comment) {
return LineType::Code;
} else {
return LineType::Comment;
}
} else {
state.in_multi_line_comment = true;
state.current_delimiter = Some(pair.clone());
let before_comment = &line[..start_pos];
if !Self::is_blank(before_comment) {
return LineType::Code;
} else {
return LineType::Comment;
}
}
}
LineType::Code
}
}
pub fn classify_file(lines: &[String], language: &LanguageInfo) -> FileStats {
let classifier = LineClassifier::new(language);
let mut state = ClassifierState::new();
let mut stats = FileStats::new();
for line in lines {
match classifier.classify_line(line, &mut state) {
LineType::Blank => stats.blank += 1,
LineType::Comment => stats.comment += 1,
LineType::Code => stats.code += 1,
}
}
stats
}
#[cfg(test)]
mod tests {
use super::*;
use crate::language::LANGUAGES;
fn get_rust_language() -> &'static LanguageInfo {
LANGUAGES.iter().find(|l| l.name == "Rust").unwrap()
}
fn get_python_language() -> &'static LanguageInfo {
LANGUAGES.iter().find(|l| l.name == "Python").unwrap()
}
#[test]
fn test_line_type_equality() {
assert_eq!(LineType::Blank, LineType::Blank);
assert_eq!(LineType::Comment, LineType::Comment);
assert_eq!(LineType::Code, LineType::Code);
assert_ne!(LineType::Blank, LineType::Code);
}
#[test]
fn test_classifier_state_new() {
let state = ClassifierState::new();
assert!(!state.in_multi_line_comment);
assert!(state.current_delimiter.is_none());
}
#[test]
fn test_classifier_state_reset() {
let mut state = ClassifierState::new();
state.in_multi_line_comment = true;
state.current_delimiter = Some(CommentPair::new("/*", "*/"));
state.reset();
assert!(!state.in_multi_line_comment);
assert!(state.current_delimiter.is_none());
}
#[test]
fn test_classify_blank_lines() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(classifier.classify_line("", &mut state), LineType::Blank);
assert_eq!(classifier.classify_line(" ", &mut state), LineType::Blank);
assert_eq!(
classifier.classify_line("\t\t", &mut state),
LineType::Blank
);
assert_eq!(
classifier.classify_line(" \t ", &mut state),
LineType::Blank
);
}
#[test]
fn test_classify_single_line_comments() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("// this is a comment", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line(" // comment with leading spaces", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line("/// doc comment", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line("//! inner doc comment", &mut state),
LineType::Comment
);
}
#[test]
fn test_classify_code_lines() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("let x = 5;", &mut state),
LineType::Code
);
assert_eq!(
classifier.classify_line("fn main() {", &mut state),
LineType::Code
);
assert_eq!(
classifier.classify_line(" println!(\"hello\");", &mut state),
LineType::Code
);
}
#[test]
fn test_classify_mixed_lines() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("let x = 5; // comment", &mut state),
LineType::Code
);
}
#[test]
fn test_classify_multi_line_comment_single_line() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("/* comment */", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line(" /* comment */ ", &mut state),
LineType::Comment
);
}
#[test]
fn test_classify_multi_line_comment_with_code() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("/* comment */ let x = 5;", &mut state),
LineType::Code
);
assert_eq!(
classifier.classify_line("let x = 5; /* comment */", &mut state),
LineType::Code
);
}
#[test]
fn test_classify_multi_line_comment_spanning() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("/* start of comment", &mut state),
LineType::Comment
);
assert!(state.in_multi_line_comment);
assert_eq!(
classifier.classify_line("still in comment", &mut state),
LineType::Comment
);
assert!(state.in_multi_line_comment);
assert_eq!(
classifier.classify_line("end of comment */", &mut state),
LineType::Comment
);
assert!(!state.in_multi_line_comment);
}
#[test]
fn test_classify_multi_line_comment_with_code_after_end() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("/* comment", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line("more comment */ let x = 5;", &mut state),
LineType::Code
);
assert!(!state.in_multi_line_comment);
}
#[test]
fn test_classify_shebang() {
let lang = get_python_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("#!/usr/bin/env python3", &mut state),
LineType::Code
);
}
#[test]
fn test_classify_python_triple_quotes() {
let lang = get_python_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("\"\"\"This is a docstring\"\"\"", &mut state),
LineType::Comment
);
state.reset();
assert_eq!(
classifier.classify_line("\"\"\"", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line("Docstring content", &mut state),
LineType::Comment
);
assert_eq!(
classifier.classify_line("\"\"\"", &mut state),
LineType::Comment
);
}
#[test]
fn test_classify_file() {
let lang = get_rust_language();
let lines = vec![
"// File header comment".to_string(),
"".to_string(),
"fn main() {".to_string(),
" let x = 5; // inline comment".to_string(),
" /* multi-line".to_string(),
" comment */".to_string(),
" println!(\"hello\");".to_string(),
"}".to_string(),
];
let stats = classify_file(&lines, lang);
assert_eq!(stats.blank, 1);
assert_eq!(stats.comment, 3); assert_eq!(stats.code, 4); }
#[test]
fn test_empty_multi_line_comment() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("/**/", &mut state),
LineType::Comment
);
}
#[test]
fn test_code_before_multi_line_start() {
let lang = get_rust_language();
let classifier = LineClassifier::new(lang);
let mut state = ClassifierState::new();
assert_eq!(
classifier.classify_line("let x = 5; /* comment", &mut state),
LineType::Code
);
assert!(state.in_multi_line_comment);
assert_eq!(
classifier.classify_line("continues */", &mut state),
LineType::Comment
);
}
}