rloc 0.2.0

A fast, modern Rust implementation of cloc (Count Lines of Code)
Documentation
use crate::languages::Language;
use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::{BufRead, BufReader, Read};
use std::path::Path;

#[derive(Debug, Clone, Default)]
pub struct FileStats {
    pub path: String,
    pub language: String,
    pub code: u64,
    pub comments: u64,
    pub blanks: u64,
}

impl FileStats {
    pub fn total(&self) -> u64 {
        self.code + self.comments + self.blanks
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum State {
    Code,
    BlockComment { depth: u32 },
    String { delimiter: char },
}

pub fn count_lines(path: &Path, language: &Language) -> std::io::Result<FileStats> {
    let file = File::open(path)?;

    if is_binary(&file)? {
        return Ok(FileStats {
            path: path.display().to_string(),
            language: language.name.to_string(),
            ..Default::default()
        });
    }

    let file = File::open(path)?;
    let reader = BufReader::new(file);

    let mut stats = FileStats {
        path: path.display().to_string(),
        language: language.name.to_string(),
        ..Default::default()
    };

    let has_comments = !language.line_comments.is_empty() || language.block_comment_start.is_some();

    if !has_comments {
        for line in reader.lines() {
            let line = match line {
                Ok(l) => l,
                Err(_) => continue,
            };
            if line.trim().is_empty() {
                stats.blanks += 1;
            } else {
                stats.code += 1;
            }
        }
        return Ok(stats);
    }

    let mut state = State::Code;

    for line in reader.lines() {
        let line = match line {
            Ok(l) => l,
            Err(_) => continue,
        };

        let trimmed = line.trim();

        if trimmed.is_empty() {
            if matches!(state, State::BlockComment { .. }) {
                stats.comments += 1;
            } else {
                stats.blanks += 1;
            }
            continue;
        }

        let (new_state, line_type) = classify_line(trimmed, state, language);
        state = new_state;

        match line_type {
            LineType::Code => stats.code += 1,
            LineType::Comment => stats.comments += 1,
            LineType::Mixed => {
                stats.code += 1;
            }
            LineType::Blank => stats.blanks += 1,
        }
    }

    Ok(stats)
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum LineType {
    Code,
    Comment,
    Mixed,
    Blank,
}

#[allow(unused_assignments)]
pub fn classify_line(line: &str, initial_state: State, lang: &Language) -> (State, LineType) {
    let mut state = initial_state;
    let mut has_code = false;
    let mut has_comment = matches!(state, State::BlockComment { .. });

    let mut chars = line.char_indices().peekable();

    while let Some((byte_idx, c)) = chars.next() {
        let remaining = &line[byte_idx..];

        match state {
            State::Code => {
                if c.is_whitespace() {
                    continue;
                }

                if let Some(block_start) = lang.block_comment_start {
                    if remaining.starts_with(block_start) {
                        has_comment = true;
                        state = State::BlockComment { depth: 1 };
                        for _ in 0..block_start.chars().count().saturating_sub(1) {
                            chars.next();
                        }
                        continue;
                    }
                }

                for &line_comment in lang.line_comments {
                    if remaining.starts_with(line_comment) {
                        has_comment = true;
                        return (
                            State::Code,
                            if has_code {
                                LineType::Mixed
                            } else {
                                LineType::Comment
                            },
                        );
                    }
                }

                if c == '"' || c == '\'' {
                    for &delim in lang.string_delimiters {
                        if remaining.starts_with(delim) && delim.len() == 1 {
                            has_code = true;
                            state = State::String { delimiter: c };
                            break;
                        }
                    }
                    if matches!(state, State::String { .. }) {
                        continue;
                    }
                }

                has_code = true;
            }

            State::BlockComment { depth } => {
                if let Some(block_end) = lang.block_comment_end {
                    if remaining.starts_with(block_end) {
                        let new_depth = depth - 1;
                        if new_depth == 0 {
                            state = State::Code;
                        } else {
                            state = State::BlockComment { depth: new_depth };
                        }
                        for _ in 0..block_end.chars().count().saturating_sub(1) {
                            chars.next();
                        }
                        continue;
                    }
                }

                if lang.nested_comments {
                    if let Some(block_start) = lang.block_comment_start {
                        if remaining.starts_with(block_start) {
                            state = State::BlockComment { depth: depth + 1 };
                            for _ in 0..block_start.chars().count().saturating_sub(1) {
                                chars.next();
                            }
                            continue;
                        }
                    }
                }
            }

            State::String { delimiter } => {
                if c == '\\' {
                    chars.next();
                    continue;
                }
                if c == delimiter {
                    state = State::Code;
                }
            }
        }
    }

    if matches!(state, State::String { .. }) {
        state = State::Code;
    }

    let line_type = match (has_code, has_comment) {
        (true, true) => LineType::Mixed,
        (true, false) => LineType::Code,
        (false, true) => LineType::Comment,
        (false, false) => LineType::Blank,
    };

    (state, line_type)
}

fn is_binary(file: &File) -> std::io::Result<bool> {
    let mut buffer = [0u8; 8192];
    let mut handle = file.try_clone()?;
    let bytes_read = handle.read(&mut buffer)?;

    if bytes_read == 0 {
        return Ok(false);
    }

    let null_count = buffer[..bytes_read].iter().filter(|&&b| b == 0).count();
    let binary_threshold = bytes_read / 10;

    Ok(null_count > binary_threshold.max(1))
}

pub fn compute_file_hash(path: &Path) -> std::io::Result<u64> {
    let content = std::fs::read(path)?;
    let mut hasher = ahash::AHasher::default();
    content.hash(&mut hasher);
    Ok(hasher.finish())
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::languages::LANGUAGES;

    #[test]
    fn test_c_style_comments() {
        let rust = LANGUAGES.get("Rust").unwrap();

        let cases = [
            ("let x = 5;", State::Code, LineType::Code),
            ("// comment", State::Code, LineType::Comment),
            ("let x = 5; // comment", State::Code, LineType::Mixed),
            ("/* block */", State::Code, LineType::Comment),
            (
                "/* start",
                State::BlockComment { depth: 1 },
                LineType::Comment,
            ),
        ];

        for (line, expected_state, expected_type) in cases {
            let (state, line_type) = classify_line(line, State::Code, rust);
            assert_eq!(state, expected_state, "Failed state for: {}", line);
            assert_eq!(line_type, expected_type, "Failed type for: {}", line);
        }
    }

    #[test]
    fn test_nested_comments() {
        let rust = LANGUAGES.get("Rust").unwrap();
        assert!(rust.nested_comments);

        let (state, _) = classify_line("/* outer /* inner */", State::Code, rust);
        assert_eq!(state, State::BlockComment { depth: 1 });
    }
}