use crate::detectors::base::{Detector, DetectorConfig};
use crate::graph::GraphQueryExt;
use crate::models::{deterministic_finding_id, Finding, Severity};
use anyhow::Result;
use std::path::PathBuf;
use tracing::info;
pub struct MissingDocstringsDetector {
#[allow(dead_code)] repository_path: PathBuf,
max_findings: usize,
min_lines: u32,
}
impl MissingDocstringsDetector {
pub fn new(repository_path: impl Into<PathBuf>) -> Self {
Self {
repository_path: repository_path.into(),
max_findings: 100,
min_lines: 5,
}
}
fn is_entry_point(func_name: &str, file_path: &str) -> bool {
let name_lower = func_name.to_lowercase();
let path_lower = file_path.to_lowercase();
name_lower.starts_with("get_") || name_lower.starts_with("post_") ||
name_lower.starts_with("put_") || name_lower.starts_with("delete_") ||
name_lower.starts_with("handle_") || name_lower.starts_with("api_") ||
name_lower.ends_with("_handler") || name_lower.ends_with("_endpoint") ||
name_lower.ends_with("_view") || name_lower.ends_with("_route") ||
name_lower == "main" || name_lower == "run" || name_lower == "start" ||
name_lower == "execute" || name_lower == "init" || name_lower == "setup" ||
path_lower.contains("route") || path_lower.contains("view") ||
path_lower.contains("controller") || path_lower.contains("handler")
}
fn generate_template(func_name: &str, param_count: Option<i64>, ext: &str) -> String {
let params = param_count.unwrap_or(0) as usize;
match ext {
"py" => {
let mut template = format!(
"```python\n\
def {}(...):\n\
\"\"\"\n\
Brief description of what the function does.\n",
func_name
);
if params > 0 {
template.push_str("\n Args:\n");
for i in 0..params.min(3) {
template.push_str(&format!(" param{}: Description.\n", i + 1));
}
}
template.push_str("\n Returns:\n Description of return value.\n");
template.push_str("\"\"\"\n```");
template
}
"js" | "ts" => {
let mut template = "```javascript\n\
/**\n\
* Brief description of what the function does.\n\
*\n"
.to_string();
if params > 0 {
for i in 0..params.min(3) {
template.push_str(&format!(
" * @param {{type}} param{} - Description.\n",
i + 1
));
}
}
template.push_str(" * @returns {{type}} Description of return value.\n */\n```");
template
}
"rs" => "```rust\n\
/// Brief description of what the function does.\n\
///\n\
/// # Arguments\n\
///\n\
/// * `param` - Description.\n\
///\n\
/// # Returns\n\
///\n\
/// Description of return value.\n\
```"
.to_string(),
"go" => {
format!(
"```go\n\
// {} does something.\n\
//\n\
// Parameters:\n\
// - param: description\n\
//\n\
// Returns description.\n\
```",
func_name
)
}
_ => "Add a docstring describing the function's purpose, parameters, and return value."
.to_string(),
}
}
}
impl Detector for MissingDocstringsDetector {
fn name(&self) -> &'static str {
"missing-docstrings"
}
fn description(&self) -> &'static str {
"Detects functions without documentation"
}
fn file_extensions(&self) -> &'static [&'static str] {
&["py"]
}
fn detect(
&self,
ctx: &crate::detectors::analysis_context::AnalysisContext,
) -> Result<Vec<Finding>> {
let graph = ctx.graph;
let i = graph.interner();
let mut findings = vec![];
for func in graph.get_functions_shared().iter() {
if findings.len() >= self.max_findings {
break;
}
let lines = func.line_end.saturating_sub(func.line_start);
if lines < self.min_lines {
continue;
}
if func.node_name(i).starts_with('_') && !func.node_name(i).starts_with("__") {
continue;
}
if func.node_name(i).starts_with("test_")
|| crate::detectors::base::is_test_path(func.path(i))
{
continue;
}
if func.path(i).contains("vendor") || func.path(i).contains("node_modules") {
continue;
}
let callers = graph.get_callers(func.qn(i));
let caller_count = callers.len();
let is_entry = Self::is_entry_point(func.node_name(i), func.path(i));
let ext = func.path(i).rsplit('.').next().unwrap_or("");
let file_path = PathBuf::from(func.path(i));
if let Ok(content) = std::fs::read_to_string(&file_path) {
let file_lines: Vec<&str> = content.lines().collect();
let start = (func.line_start as usize).saturating_sub(1);
let end = (start + 5).min(file_lines.len());
let has_doc = file_lines
.get(start..end)
.map(|s| {
s.iter().any(|l| {
l.contains("\"\"\"")
|| l.contains("'''")
|| l.contains("///")
|| l.contains("/**")
|| l.trim().starts_with("//") && l.len() > 10 })
})
.unwrap_or(false);
if !has_doc {
let severity = if is_entry || caller_count >= 5 {
Severity::Medium } else {
Severity::Low
};
let mut notes = Vec::new();
notes.push(format!("📏 {} lines", lines));
if caller_count > 0 {
notes.push(format!("📞 {} callers", caller_count));
}
if is_entry {
notes.push("🚪 Entry point / API endpoint".to_string());
}
if let Some(pc) = func.param_count_opt() {
notes.push(format!("📝 {} parameters", pc));
}
let context_notes = format!("\n\n**Analysis:**\n{}", notes.join("\n"));
let template =
Self::generate_template(func.node_name(i), func.param_count_opt(), ext);
findings.push(Finding {
id: String::new(),
detector: "MissingDocstringsDetector".to_string(),
severity,
title: format!("Missing documentation: `{}`", func.node_name(i)),
description: format!(
"Function `{}` has no documentation.{}",
func.node_name(i), context_notes
),
affected_files: vec![file_path.clone()],
line_start: Some(func.line_start),
line_end: Some(func.line_start),
suggested_fix: Some(format!(
"Add a docstring:\n\n{}",
template
)),
estimated_effort: Some("10 minutes".to_string()),
category: Some("documentation".to_string()),
cwe_id: None,
why_it_matters: Some(if is_entry {
"Entry points and API endpoints are the first thing developers encounter. \
Good documentation helps them understand how to use your code.".to_string()
} else if caller_count >= 5 {
"This function is used by many other parts of the codebase. \
Documentation prevents misuse and makes maintenance easier.".to_string()
} else {
"Documentation helps future maintainers (including yourself) understand \
the function's purpose without reading the implementation.".to_string()
}),
..Default::default()
});
}
}
}
findings.sort_by_key(|f| std::cmp::Reverse(f.severity));
info!(
"MissingDocstringsDetector found {} findings (graph-aware)",
findings.len()
);
Ok(findings)
}
}
impl crate::detectors::RegisteredDetector for MissingDocstringsDetector {
fn create(init: &crate::detectors::DetectorInit) -> std::sync::Arc<dyn Detector> {
std::sync::Arc::new(Self::new(init.repo_path))
}
fn max_tier() -> crate::models::Tier {
crate::models::Tier::Deep
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::builder::GraphBuilder;
use crate::graph::CodeNode;
#[test]
fn test_detects_missing_docstring() {
let dir = tempfile::tempdir().expect("should create temp dir");
let file = dir.path().join("module.py");
std::fs::write(
&file,
r#"def calculate_score(data, weights, threshold):
total = 0
for item in data:
total += item * weights
if total > threshold:
return total
return 0
"#,
)
.expect("should write test file");
let mut store = GraphBuilder::new();
let file_path_str = file.to_string_lossy().to_string();
store.add_node(
CodeNode::function("calculate_score", &file_path_str)
.with_qualified_name("module::calculate_score")
.with_lines(1, 8),
);
let detector = MissingDocstringsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
!findings.is_empty(),
"Should detect missing docstring. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
assert!(findings[0].title.contains("calculate_score"));
}
#[test]
fn test_no_finding_when_docstring_present() {
let dir = tempfile::tempdir().expect("should create temp dir");
let file = dir.path().join("module.py");
std::fs::write(
&file,
r#"def calculate_score(data, weights, threshold):
"""Calculate score from data using given weights and threshold."""
total = 0
for item in data:
total += item * weights
if total > threshold:
return total
return 0
"#,
)
.expect("should write test file");
let mut store = GraphBuilder::new();
let file_path_str = file.to_string_lossy().to_string();
store.add_node(
CodeNode::function("calculate_score", &file_path_str)
.with_qualified_name("module::calculate_score")
.with_lines(1, 9),
);
let detector = MissingDocstringsDetector::new(dir.path());
let ctx = crate::detectors::analysis_context::AnalysisContext::test_with_mock_files(
&store,
vec![],
);
let findings = detector.detect(&ctx).expect("detection should succeed");
assert!(
findings.is_empty(),
"Should not flag function with docstring. Found: {:?}",
findings.iter().map(|f| &f.title).collect::<Vec<_>>()
);
}
}