fn extract_contract_metadata_from_context(
full_content: &str,
start_line: usize,
) -> (Option<String>, Option<String>) {
let lines: Vec<&str> = full_content.lines().collect();
let scan_start = start_line.saturating_sub(5).max(1);
for line_num in scan_start..start_line {
if line_num == 0 || line_num > lines.len() {
continue;
}
let trimmed = lines[line_num - 1].trim(); if trimmed.contains("contract(") && trimmed.contains("equation") {
if let Some(eq_start) = trimmed.find("equation") {
let after_eq = &trimmed[eq_start..];
if let Some(q1) = after_eq.find('"') {
let after_q1 = &after_eq[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let equation = after_q1[..q2].to_string();
return (Some("L2".to_string()), Some(equation));
}
}
}
return (Some("L2".to_string()), None);
}
}
(None, None)
}
pub(super) fn is_ignored_dir(path: &Path) -> bool {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
matches!(
name,
"target"
| "node_modules"
| ".git"
| ".pmat"
| "__pycache__"
| "venv"
| ".venv"
| "dist"
| "build"
| ".next"
| ".cache"
| "vendor"
| "third_party"
| "third-party"
| "external"
| "deps"
| "book"
| "theme"
| "fixtures"
| ".cargo"
)
}
pub(super) fn detect_language(path: &Path) -> Option<Language> {
let ext = path.extension()?.to_str()?;
match ext {
"rs" => Some(Language::Rust),
"ts" | "tsx" | "js" | "jsx" => Some(Language::TypeScript),
"py" => Some(Language::Python),
"c" => Some(Language::C),
"h" => Some(Language::C), "cpp" | "cc" | "cxx" | "hpp" | "cu" | "cuh" => Some(Language::Cpp),
"go" => Some(Language::Go),
"lua" => Some(Language::Lua),
"ptx" => Some(Language::Ptx),
_ => None,
}
}
pub(super) fn classify_header_language(content: &str) -> Language {
const CPP_INDICATORS: &[&str] = &[
"extern \"C\"",
"class ",
"namespace ",
"template<",
"template <",
"virtual ",
"constexpr ",
"nullptr",
"std::",
"public:",
"private:",
"protected:",
];
if CPP_INDICATORS.iter().any(|kw| content.contains(kw)) {
return Language::Cpp;
}
Language::C
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn extract_quality_metrics(chunk: &CodeChunk, _full_content: &str) -> QualityMetrics {
let loc = chunk.content.lines().count() as u32;
let mut complexity = count_complexity(&chunk.content);
let lang = chunk.language.as_str();
if lang == "cpp" || lang == "c" || lang == "cuda" {
complexity += cpp_complexity_penalty(&chunk.content);
}
let satd_count = count_satd_markers(&chunk.content);
let big_o = estimate_big_o(&chunk.content);
use crate::services::semantic::ChunkType;
let effective_loc = match chunk.chunk_type {
ChunkType::Enum | ChunkType::Struct | ChunkType::Trait | ChunkType::TypeAlias => 0,
_ => loc,
};
let tdg_score = calculate_simple_tdg(complexity, satd_count, effective_loc);
let tdg_grade = score_to_grade(tdg_score);
let (contract_level, contract_equation) = extract_contract_metadata_from_context(
_full_content, chunk.start_line
);
QualityMetrics {
tdg_score,
tdg_grade,
complexity,
cognitive_complexity: complexity, big_o,
satd_count,
loc,
commit_count: 0, churn_score: 0.0, contract_level,
contract_equation,
}
}
pub(super) fn count_complexity(source: &str) -> u32 {
let mut complexity = 1u32;
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("if ")
|| trimmed.starts_with("if(")
|| trimmed.starts_with("else if ")
|| trimmed.starts_with("} else if ")
|| trimmed.contains(" if ")
|| trimmed.starts_with("match ")
|| trimmed.starts_with("switch ")
|| trimmed.starts_with("switch(")
|| trimmed.starts_with("while ")
|| trimmed.starts_with("while(")
|| trimmed.starts_with("for ")
|| trimmed.starts_with("for(")
|| trimmed.starts_with("loop ")
|| trimmed.starts_with("do {")
|| trimmed.starts_with("do{")
|| trimmed.starts_with("catch ")
|| trimmed.starts_with("catch(")
|| trimmed.contains("&&")
|| trimmed.contains("||")
|| trimmed.contains("? ")
{
complexity += 1;
}
if trimmed.starts_with("case ") && trimmed.contains(':') && !trimmed.starts_with("//") {
complexity += 1;
}
if trimmed.contains("=>") && !trimmed.starts_with("//") {
complexity += 1;
}
}
complexity
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn cpp_complexity_penalty(source: &str) -> u32 {
let mut penalty = 0u32;
let mut ifdef_depth = 0u32;
let mut macro_call_count = 0u32;
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("#if") || trimmed.starts_with("#ifdef") || trimmed.starts_with("#ifndef") {
ifdef_depth += 1;
penalty += ifdef_depth; } else if trimmed.starts_with("#endif") {
ifdef_depth = ifdef_depth.saturating_sub(1);
}
if trimmed.contains("GGML_") || trimmed.contains("TORCH_") || trimmed.contains("AT_")
|| trimmed.contains("CUDA_") || trimmed.contains("CHECK_") {
macro_call_count += 1;
}
}
if macro_call_count > 5 {
penalty += 3;
}
if source.contains("enable_if") || source.contains("requires ") || source.contains("SFINAE") {
penalty += 3;
}
let template_depth = source.matches("template<").count() + source.matches("template <").count();
if template_depth > 1 {
penalty += (template_depth as u32 - 1) * 2;
}
if source.contains("const_cast<") || source.contains("reinterpret_cast<") {
penalty += 2;
}
if source.contains("__shared__") {
penalty += 2;
}
if source.contains("__syncthreads()") {
penalty += 3;
}
if source.contains("__shfl_") || source.contains("__ballot_") || source.contains("__any_sync")
|| source.contains("__all_sync") {
penalty += 2;
}
if source.contains("__global__") && (source.contains("if (") || source.contains("if(")) {
penalty += 2;
}
penalty
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn count_satd_markers(source: &str) -> u32 {
let mut count = 0u32;
let mut in_block_comment = false;
let mut in_raw_string = false;
for line in source.lines() {
let trimmed = line.trim();
if update_raw_string_state(trimmed, &mut in_raw_string) {
continue;
}
if in_block_comment {
count += count_markers_in_line(trimmed);
if trimmed.contains("*/") {
in_block_comment = false;
}
continue;
}
if trimmed.starts_with("/*") {
in_block_comment = true;
count += count_markers_in_line(trimmed);
if trimmed.contains("*/") {
in_block_comment = false;
}
continue;
}
if trimmed.starts_with("///") || trimmed.starts_with("//!") {
continue;
}
count += count_markers_in_comment(trimmed);
}
count
}
fn count_markers_in_line(line: &str) -> u32 {
let upper = line.to_uppercase();
let mut count = 0u32;
for marker in ["TODO", "FIXME", "HACK", "OPTIMIZE"] {
count += upper.matches(marker).count() as u32;
}
count
}
fn count_markers_in_comment(trimmed: &str) -> u32 {
let Some(comment_start) = trimmed.find("//") else {
return 0;
};
let before = &trimmed[..comment_start];
if before.chars().filter(|&c| c == '"').count() % 2 != 0 {
return 0;
}
count_markers_in_line(&trimmed[comment_start..])
}
fn update_raw_string_state(trimmed: &str, in_raw_string: &mut bool) -> bool {
if *in_raw_string {
if trimmed.contains("\"#") || trimmed.ends_with('"') {
*in_raw_string = false;
}
return true;
}
if let Some(pos) = trimmed.find("r#\"") {
let after_open = &trimmed[pos + 3..];
if !after_open.contains("\"#") {
*in_raw_string = true;
}
return true;
}
false
}
pub(super) fn estimate_big_o(source: &str) -> String {
let mut current_nesting = 0;
let mut max_nesting = 0;
for line in source.lines() {
let trimmed = line.trim();
if trimmed.starts_with("for ")
|| trimmed.starts_with("while ")
|| trimmed.starts_with("loop ")
{
current_nesting += 1;
max_nesting = max_nesting.max(current_nesting);
}
if trimmed == "}" && current_nesting > 0 {
current_nesting -= 1;
}
}
match max_nesting {
0 => "O(1)".to_string(),
1 => "O(n)".to_string(),
2 => "O(n^2)".to_string(),
3 => "O(n^3)".to_string(),
n => format!("O(n^{n})"),
}
}
#[allow(clippy::cast_possible_truncation)]
pub(super) fn calculate_simple_tdg(complexity: u32, satd_count: u32, loc: u32) -> f32 {
let mut score = 0.0f32;
score += (complexity as f32 / 25.0).min(4.0);
score += (satd_count.saturating_sub(2) as f32 * 0.5).min(2.0);
if loc > 200 {
score += ((loc - 200) as f32 / 200.0).min(2.0);
}
if complexity <= 1 {
score = score.min(1.99);
}
score.min(10.0)
}
pub(super) fn score_to_grade(score: f32) -> String {
match score {
s if s < 2.0 => "A".to_string(),
s if s < 4.0 => "B".to_string(),
s if s < 6.0 => "C".to_string(),
s if s < 8.0 => "D".to_string(),
_ => "F".to_string(),
}
}
enum DocLineKind<'a> {
DocComment(&'a str),
BlockCommentStart,
BlockCommentBody(&'a str),
SkipLine, Other,
}
fn classify_doc_line(line: &str) -> DocLineKind<'_> {
if line.starts_with("///") || line.starts_with("//!") {
DocLineKind::DocComment(
line.trim_start_matches("///")
.trim_start_matches("//!")
.trim(),
)
} else if line.starts_with("/**") || line.starts_with("/*") {
DocLineKind::BlockCommentStart
} else if line.starts_with('*') {
DocLineKind::BlockCommentBody(line.trim_start_matches('*').trim())
} else if line.is_empty() || line.starts_with("#[") || line.starts_with('@') {
DocLineKind::SkipLine
} else {
DocLineKind::Other
}
}
pub(super) fn extract_doc_comment(content: &str, start_line: usize) -> Option<String> {
if start_line <= 1 {
return None;
}
let bytes = content.as_bytes();
let mut line_num = 1usize;
let mut def_line_start = 0usize;
for (i, &b) in bytes.iter().enumerate() {
if line_num >= start_line {
def_line_start = i;
break;
}
if b == b'\n' {
line_num += 1;
if line_num >= start_line {
def_line_start = i + 1;
break;
}
}
}
if line_num < start_line {
return None;
}
let mut doc_lines = Vec::new();
let mut end = def_line_start; if end > 0 && bytes[end.saturating_sub(1)] == b'\n' {
end = end.saturating_sub(1);
}
let mut pos = end;
loop {
let line_start = if pos == 0 {
0
} else {
match content[..pos].rfind('\n') {
Some(nl) => nl + 1,
None => 0,
}
};
let line = content.get(line_start..pos).unwrap_or("").trim();
match classify_doc_line(line) {
DocLineKind::DocComment(text) => doc_lines.push(text),
DocLineKind::BlockCommentBody(text) => doc_lines.push(text),
DocLineKind::BlockCommentStart | DocLineKind::Other => break,
DocLineKind::SkipLine => {
if line_start == 0 {
break;
}
pos = line_start.saturating_sub(1);
continue;
}
}
if line_start == 0 {
break;
}
pos = line_start.saturating_sub(1);
}
if doc_lines.is_empty() {
return None;
}
doc_lines.reverse();
Some(doc_lines.join(" "))
}