use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
#[cfg(debug_assertions)]
use std::sync::Mutex;
use std::sync::OnceLock;
use std::time::{Instant, UNIX_EPOCH};
use rayon::prelude::*;
use crate::cache_freshness::{self, FileFreshness};
use crate::inspect::cache::Tier1FileMemo;
use crate::inspect::{InspectJob, InspectResult, InspectScanSuccess};
const MAX_LINES_PER_FILE: usize = 100_000;
const MAX_ITEMS: usize = 100;
const MAX_TEXT_CHARS: usize = 200;
const MARKERS: [&str; 5] = ["TODO", "FIXME", "HACK", "XXX", "BUG"];
static TODOS_MEMO: OnceLock<Tier1FileMemo<FileScan>> = OnceLock::new();
#[cfg(debug_assertions)]
static FILE_READS: OnceLock<Mutex<BTreeMap<PathBuf, usize>>> = OnceLock::new();
#[derive(Debug, Clone)]
struct TodoItem {
file: String,
line: usize,
marker: &'static str,
author: Option<String>,
text: String,
}
#[derive(Debug, Clone)]
struct FileScan {
scanned_file: Option<PathBuf>,
items: Vec<TodoItem>,
}
pub fn run_todos_scan(job: &InspectJob) -> InspectResult {
let started = Instant::now();
let per_file: Vec<FileScan> = job
.scope_files
.par_iter()
.map(|path| {
todos_memo().get_or_insert_with(path, |path| scan_file(path, &job.project_root))
})
.collect();
let mut scanned_files = Vec::new();
let mut all_items = Vec::new();
for scan in per_file {
if let Some(path) = scan.scanned_file {
scanned_files.push(path);
}
all_items.extend(scan.items);
}
let mut by_kind = BTreeMap::new();
for marker in MARKERS {
by_kind.insert(marker.to_string(), 0usize);
}
for item in &all_items {
if let Some(count) = by_kind.get_mut(item.marker) {
*count += 1;
}
}
let total_count = all_items.len();
let drill_down_capped = total_count > MAX_ITEMS;
let items = all_items
.into_iter()
.take(MAX_ITEMS)
.map(|item| {
serde_json::json!({
"file": item.file,
"line": item.line,
"marker": item.marker,
"author": item.author,
"text": item.text,
})
})
.collect::<Vec<_>>();
let aggregate = serde_json::json!({
"count": total_count,
"by_kind": by_kind,
"items": items,
"drill_down_capped": drill_down_capped,
});
let success = InspectScanSuccess {
scanned_files,
contributions: Vec::new(),
aggregate,
};
InspectResult::success(job, success, started.elapsed())
}
fn todos_memo() -> &'static Tier1FileMemo<FileScan> {
TODOS_MEMO.get_or_init(Tier1FileMemo::default)
}
fn scan_file(path: &Path, project_root: &Path) -> (Option<FileFreshness>, FileScan) {
let (freshness, source) = read_text_file(path);
let Some(source) = source else {
return (
freshness,
FileScan {
scanned_file: None,
items: Vec::new(),
},
);
};
let file = display_file_path(project_root, path);
let mut items = Vec::new();
let mut in_block_comment = false;
for (line_index, line) in source.lines().take(MAX_LINES_PER_FILE).enumerate() {
if let Some(item) = scan_line(line, line_index + 1, &file, &mut in_block_comment) {
items.push(item);
}
}
(
freshness,
FileScan {
scanned_file: Some(path.to_path_buf()),
items,
},
)
}
fn read_text_file(path: &Path) -> (Option<FileFreshness>, Option<String>) {
let metadata = std::fs::metadata(path).ok();
#[cfg(debug_assertions)]
bump_file_read_count(path);
let bytes = std::fs::read(path).ok();
let freshness = metadata
.as_ref()
.map(|metadata| freshness_from_metadata(metadata, bytes.as_deref()));
let Some(bytes) = bytes else {
return (freshness, None);
};
if bytes.contains(&0) {
return (freshness, None);
}
(freshness, String::from_utf8(bytes).ok())
}
fn freshness_from_metadata(metadata: &std::fs::Metadata, bytes: Option<&[u8]>) -> FileFreshness {
let size = metadata.len();
let content_hash = if size <= cache_freshness::CONTENT_HASH_SIZE_CAP {
bytes
.map(cache_freshness::hash_bytes)
.unwrap_or_else(cache_freshness::zero_hash)
} else {
cache_freshness::zero_hash()
};
FileFreshness {
mtime: metadata.modified().unwrap_or(UNIX_EPOCH),
size,
content_hash,
}
}
fn display_file_path(project_root: &Path, path: &Path) -> String {
path.strip_prefix(project_root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn scan_line(
line: &str,
line_number: usize,
file: &str,
in_block_comment: &mut bool,
) -> Option<TodoItem> {
if *in_block_comment {
let item = parse_todo_body(strip_block_comment_prefix(line), line_number, file);
if line.contains("*/") {
*in_block_comment = false;
}
if item.is_some() {
return item;
}
}
let mut search_start = 0usize;
let mut found_item = None;
while search_start < line.len() {
let Some(prefix_match) = find_next_comment_prefix(line, search_start) else {
break;
};
let body = &line[prefix_match.body_start..];
if prefix_match.starts_block_comment && !body.contains("*/") {
*in_block_comment = true;
}
let body = if prefix_match.starts_block_comment {
strip_block_comment_prefix(body)
} else {
body.trim_start()
};
if let Some(item) = parse_todo_body(body, line_number, file) {
found_item = Some(item);
break;
}
search_start = prefix_match.body_start;
}
found_item
}
#[derive(Debug, Clone, Copy)]
struct CommentPrefixMatch {
body_start: usize,
starts_block_comment: bool,
}
fn find_next_comment_prefix(line: &str, search_start: usize) -> Option<CommentPrefixMatch> {
let prefixes = ["<!--", "//", "/*", "#", "--"];
prefixes
.iter()
.filter_map(|prefix| find_prefix_match(line, search_start, prefix))
.min_by_key(|prefix_match| prefix_match.body_start)
}
fn find_prefix_match(line: &str, search_start: usize, prefix: &str) -> Option<CommentPrefixMatch> {
let mut cursor = search_start;
while cursor < line.len() {
let offset = line[cursor..].find(prefix)?;
let prefix_start = cursor + offset;
if is_comment_prefix_boundary(line, prefix_start, prefix) {
return Some(CommentPrefixMatch {
body_start: prefix_start + prefix.len(),
starts_block_comment: prefix == "/*",
});
}
cursor = prefix_start + prefix.len();
}
None
}
fn is_comment_prefix_boundary(line: &str, prefix_start: usize, prefix: &str) -> bool {
if prefix == "<!--" {
return true;
}
prefix_start == 0
|| line[..prefix_start]
.chars()
.next_back()
.is_some_and(char::is_whitespace)
}
fn strip_block_comment_prefix(body: &str) -> &str {
let mut trimmed = body.trim_start();
while let Some(rest) = trimmed.strip_prefix('*') {
trimmed = rest.trim_start();
}
trimmed
}
fn parse_todo_body(body: &str, line_number: usize, file: &str) -> Option<TodoItem> {
let body = body.trim_start();
for marker in MARKERS {
let Some(rest) = body.strip_prefix(marker) else {
continue;
};
let Some((author, text_start)) = parse_marker_suffix(marker, rest) else {
continue;
};
return Some(TodoItem {
file: file.to_string(),
line: line_number,
marker,
author,
text: truncate_text(strip_comment_closer(text_start)),
});
}
None
}
fn parse_marker_suffix<'a>(
marker: &'static str,
rest: &'a str,
) -> Option<(Option<String>, &'a str)> {
if rest.is_empty() {
return Some((None, rest));
}
let trimmed = rest.trim_start();
if let Some(after_colon) = trimmed.strip_prefix(':') {
return Some((None, after_colon.trim_start()));
}
if let Some(after_author_start) = trimmed.strip_prefix('(') {
if !matches!(marker, "TODO" | "FIXME") {
return None;
}
let author_end = after_author_start.find(')')?;
let author = after_author_start[..author_end].trim();
if author.is_empty() {
return None;
}
let after_author = &after_author_start[author_end + 1..];
let after_author = after_author.trim_start();
let text_start = after_author
.strip_prefix(':')
.map(str::trim_start)
.unwrap_or(after_author);
return Some((Some(author.to_string()), text_start));
}
if rest.chars().next().is_some_and(char::is_whitespace) {
return Some((None, rest.trim_start()));
}
if rest.starts_with("*/") || rest.starts_with("-->") {
return Some((None, rest));
}
None
}
fn strip_comment_closer(text: &str) -> &str {
let mut trimmed = text.trim();
loop {
let without_closer = trimmed
.strip_suffix("*/")
.or_else(|| trimmed.strip_suffix("-->"));
let Some(next) = without_closer else {
break;
};
trimmed = next.trim_end();
}
trimmed
}
fn truncate_text(text: &str) -> String {
text.chars().take(MAX_TEXT_CHARS).collect()
}
#[cfg(debug_assertions)]
fn debug_file_reads() -> &'static Mutex<BTreeMap<PathBuf, usize>> {
FILE_READS.get_or_init(|| Mutex::new(BTreeMap::new()))
}
#[cfg(debug_assertions)]
fn bump_file_read_count(path: &Path) {
if let Ok(mut reads) = debug_file_reads().lock() {
*reads.entry(path.to_path_buf()).or_default() += 1;
}
}
#[cfg(debug_assertions)]
#[doc(hidden)]
pub fn reset_file_read_count_for_debug(project_root: &Path) {
let project_root =
std::fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
if let Ok(mut reads) = debug_file_reads().lock() {
reads.retain(|path, _| !path.starts_with(&project_root));
}
}
#[cfg(debug_assertions)]
#[doc(hidden)]
pub fn file_read_count_for_debug(project_root: &Path) -> usize {
let project_root =
std::fs::canonicalize(project_root).unwrap_or_else(|_| project_root.to_path_buf());
debug_file_reads()
.lock()
.map(|reads| {
reads
.iter()
.filter(|(path, _)| path.starts_with(&project_root))
.map(|(_, count)| *count)
.sum()
})
.unwrap_or_default()
}