use crate::summary_item::SummaryItem;
use crate::{summary::SummaryLevel, traits::SummaryExtraction};
use std::collections::HashSet;
pub struct SummaryExtractor;
impl SummaryExtractor {
pub fn extract_summary(
lines: &[String],
language: Option<&str>,
level: SummaryLevel,
) -> Vec<SummaryItem> {
if !level.is_enabled() {
return Vec::new();
}
let mut summary = Vec::new();
let mut seen_patterns = HashSet::new();
let max_items = match level {
SummaryLevel::Minimal => 25,
SummaryLevel::Standard => 50,
SummaryLevel::Detailed => 100,
SummaryLevel::None => 0,
};
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim();
let line_number = idx + 1;
if trimmed.is_empty() {
continue;
}
if Self::is_important_comment(trimmed) {
summary.push(SummaryItem::new(line, line_number, None, "comment"));
continue;
}
if trimmed.starts_with("//") || trimmed.starts_with('#') {
continue;
}
if trimmed.starts_with("//") || trimmed.starts_with('#') {
if matches!(level, SummaryLevel::Detailed) {
let pattern_key = Self::extract_pattern_key(trimmed);
if seen_patterns.insert(pattern_key) {
summary.push(SummaryItem::new(line, line_number, None, "comment"));
}
}
continue;
}
if Self::is_summary_worthy(trimmed, language, level) {
let pattern_key = Self::extract_pattern_key(trimmed);
if !seen_patterns.contains(&pattern_key) {
let kind = Self::infer_kind(trimmed);
summary.push(SummaryItem::new(line, line_number, None, kind));
seen_patterns.insert(pattern_key);
}
}
if max_items > 0 && summary.len() >= max_items {
break;
}
}
if max_items > 0 && summary.len() > max_items {
summary.truncate(max_items);
}
summary
}
fn infer_kind(line: &str) -> &'static str {
let t = line.trim();
if t.starts_with("fn ")
|| t.starts_with("pub fn ")
|| t.starts_with("async fn ")
|| t.starts_with("def ")
|| t.starts_with("async def ")
|| t.starts_with("function ")
|| t.starts_with("async function ")
|| t.starts_with("func ")
{
"function"
} else if t.starts_with("class ") || t.starts_with("pub class ") {
"class"
} else if t.starts_with("struct ") || t.starts_with("pub struct ") {
"struct"
} else if t.starts_with("enum ") || t.starts_with("pub enum ") {
"enum"
} else if t.starts_with("trait ")
|| t.starts_with("pub trait ")
|| t.starts_with("interface ")
|| t.starts_with("protocol ")
{
"trait"
} else if t.starts_with("impl ") {
"impl"
} else if t.starts_with("import ")
|| t.starts_with("use ")
|| t.starts_with("from ")
|| t.starts_with("require ")
{
"import"
} else if t.starts_with("//") || t.starts_with('#') || t.starts_with("/**") {
"comment"
} else {
"other"
}
}
fn is_summary_worthy(line: &str, language: Option<&str>, level: SummaryLevel) -> bool {
let trimmed = line.trim();
if trimmed.is_empty() || trimmed.starts_with("//") || trimmed.starts_with('#') {
return false;
}
let language_match = match language {
Some("Python") => Self::is_python_summary_worthy(trimmed),
Some("Rust") => Self::is_rust_summary_worthy(trimmed),
Some("JavaScript" | "TypeScript") => Self::is_js_ts_summary_worthy(trimmed),
Some("Java") => Self::is_java_summary_worthy(trimmed),
Some("C" | "C++") => Self::is_c_cpp_summary_worthy(trimmed),
Some("Go") => Self::is_go_summary_worthy(trimmed),
Some("Ruby") => Self::is_ruby_summary_worthy(trimmed),
Some("PHP") => Self::is_php_summary_worthy(trimmed),
Some("Swift") => Self::is_swift_summary_worthy(trimmed),
Some("Kotlin") => Self::is_kotlin_summary_worthy(trimmed),
Some("Scala") => Self::is_scala_summary_worthy(trimmed),
Some("Haskell") => Self::is_haskell_summary_worthy(trimmed),
Some("Clojure") => Self::is_clojure_summary_worthy(trimmed),
Some("Elixir") => Self::is_elixir_summary_worthy(trimmed),
Some("Erlang") => Self::is_erlang_summary_worthy(trimmed),
_ => Self::is_generic_summary_worthy(trimmed),
};
let core_match = Self::is_core_structure(trimmed);
let detail_match = Self::is_detail_structure(trimmed);
match level {
SummaryLevel::Minimal => core_match,
SummaryLevel::Standard => language_match || core_match,
SummaryLevel::Detailed => language_match || core_match || detail_match,
SummaryLevel::None => false,
}
}
fn is_python_summary_worthy(line: &str) -> bool {
line.starts_with("def ")
|| line.starts_with("class ")
|| line.starts_with("import ")
|| line.starts_with("from ")
|| line.starts_with('@') || line.starts_with("async def ")
|| line.starts_with("if __name__ == ")
}
fn is_rust_summary_worthy(line: &str) -> bool {
((line.starts_with("fn ") || line.starts_with("pub fn ") || line.starts_with("async fn "))
&& line.contains('{'))
|| (line.starts_with("struct ") || line.starts_with("pub struct "))
|| (line.starts_with("enum ") || line.starts_with("pub enum "))
|| (line.starts_with("trait ") || line.starts_with("pub trait "))
|| line.starts_with("impl ")
|| line.starts_with("use ")
|| (line.starts_with("const ") || line.starts_with("pub const "))
|| (line.starts_with("static ") || line.starts_with("pub static "))
|| line.starts_with("macro_rules!")
}
fn is_js_ts_summary_worthy(line: &str) -> bool {
line.starts_with("function ")
|| line.starts_with("class ")
|| line.starts_with("interface ")
|| line.starts_with("type ")
|| line.starts_with("export ")
|| line.starts_with("import ")
|| line.starts_with("async function ")
|| (line.starts_with("const ") && (line.contains("function") || line.contains("=>")))
|| (line.starts_with("let ") && (line.contains("function") || line.contains("=>")))
|| (line.starts_with("var ") && (line.contains("function") || line.contains("=>")))
}
fn is_java_summary_worthy(line: &str) -> bool {
line.starts_with("public class ")
|| line.starts_with("private class ")
|| line.starts_with("protected class ")
|| line.starts_with("class ")
|| line.starts_with("interface ")
|| line.starts_with("enum ")
|| line.starts_with("public ")
|| line.starts_with("private ")
|| line.starts_with("protected ")
|| line.starts_with("import ")
|| line.starts_with("package ")
|| line.contains("void main(")
}
fn is_c_cpp_summary_worthy(line: &str) -> bool {
line.starts_with("#include ")
|| line.starts_with("#define ")
|| line.starts_with("typedef ")
|| line.starts_with("struct ")
|| line.starts_with("class ")
|| line.starts_with("namespace ")
|| line.starts_with("template ")
|| (line.contains('(') && line.contains(')') && line.contains('{')) || line.starts_with("extern ")
|| line.starts_with("static ")
}
fn is_go_summary_worthy(line: &str) -> bool {
line.starts_with("func ")
|| line.starts_with("type ")
|| line.starts_with("var ")
|| line.starts_with("const ")
|| line.starts_with("package ")
|| line.starts_with("import ")
|| line.starts_with("interface ")
|| line.starts_with("struct ")
}
fn is_ruby_summary_worthy(line: &str) -> bool {
line.starts_with("def ")
|| line.starts_with("class ")
|| line.starts_with("module ")
|| line.starts_with("require ")
|| line.starts_with("include ")
|| line.starts_with("extend ")
|| line.starts_with("attr_")
}
fn is_php_summary_worthy(line: &str) -> bool {
line.starts_with("function ")
|| line.starts_with("class ")
|| line.starts_with("interface ")
|| line.starts_with("trait ")
|| line.starts_with("namespace ")
|| line.starts_with("use ")
|| line.starts_with("require ")
|| line.starts_with("include ")
|| line.starts_with("public function ")
|| line.starts_with("private function ")
|| line.starts_with("protected function ")
}
fn is_swift_summary_worthy(line: &str) -> bool {
line.starts_with("func ")
|| line.starts_with("class ")
|| line.starts_with("struct ")
|| line.starts_with("enum ")
|| line.starts_with("protocol ")
|| line.starts_with("extension ")
|| line.starts_with("import ")
|| line.starts_with("var ")
|| line.starts_with("let ")
|| line.starts_with("typealias ")
}
fn is_kotlin_summary_worthy(line: &str) -> bool {
line.starts_with("fun ")
|| line.starts_with("class ")
|| line.starts_with("interface ")
|| line.starts_with("object ")
|| line.starts_with("enum class ")
|| line.starts_with("data class ")
|| line.starts_with("sealed class ")
|| line.starts_with("import ")
|| line.starts_with("package ")
|| line.starts_with("val ")
|| line.starts_with("var ")
}
fn is_scala_summary_worthy(line: &str) -> bool {
line.starts_with("def ")
|| line.starts_with("class ")
|| line.starts_with("object ")
|| line.starts_with("trait ")
|| line.starts_with("case class ")
|| line.starts_with("sealed trait ")
|| line.starts_with("import ")
|| line.starts_with("package ")
|| line.starts_with("val ")
|| line.starts_with("var ")
}
fn is_haskell_summary_worthy(line: &str) -> bool {
line.contains(" :: ") || line.starts_with("data ")
|| line.starts_with("type ")
|| line.starts_with("newtype ")
|| line.starts_with("class ")
|| line.starts_with("instance ")
|| line.starts_with("import ")
|| line.starts_with("module ")
}
fn is_clojure_summary_worthy(line: &str) -> bool {
line.starts_with("(defn ")
|| line.starts_with("(defn- ")
|| line.starts_with("(defmacro ")
|| line.starts_with("(def ")
|| line.starts_with("(defprotocol ")
|| line.starts_with("(defrecord ")
|| line.starts_with("(deftype ")
|| line.starts_with("(ns ")
|| line.starts_with("(:require ")
|| line.starts_with("(:use ")
}
fn is_elixir_summary_worthy(line: &str) -> bool {
line.starts_with("def ")
|| line.starts_with("defp ")
|| line.starts_with("defmodule ")
|| line.starts_with("defprotocol ")
|| line.starts_with("defimpl ")
|| line.starts_with("defstruct ")
|| line.starts_with("defmacro ")
|| line.starts_with("import ")
|| line.starts_with("alias ")
|| line.starts_with("use ")
}
fn is_erlang_summary_worthy(line: &str) -> bool {
line.starts_with("-module(")
|| line.starts_with("-export(")
|| line.starts_with("-import(")
|| line.starts_with("-include(")
|| line.starts_with("-record(")
|| line.starts_with("-type(")
|| line.starts_with("-spec(")
|| (line.contains('(') && line.contains("->")) }
fn is_generic_summary_worthy(line: &str) -> bool {
(line.starts_with("def ") && line.contains(':'))
|| (line.starts_with("class ") && line.contains(':'))
|| (line.starts_with("function ") && line.contains('{'))
|| ((line.starts_with("fn ") || line.starts_with("pub fn ")) && line.contains('{'))
|| (line.starts_with("struct ") || line.starts_with("pub struct "))
|| (line.starts_with("enum ") || line.starts_with("pub enum "))
|| line.starts_with("import ")
|| line.starts_with("use ")
|| line.starts_with("export ")
|| line.starts_with("module ")
|| line.starts_with("package ")
|| line.starts_with("namespace ")
|| line.starts_with("typedef ")
|| line.starts_with("interface ")
|| line.starts_with("protocol ")
|| line.starts_with("trait ")
}
fn is_core_structure(line: &str) -> bool {
const CORE_PREFIXES: &[&str] = &[
"fn ",
"pub fn ",
"async fn ",
"def ",
"async def ",
"function ",
"class ",
"struct ",
"enum ",
"trait ",
"impl ",
"interface ",
"module ",
"package ",
"namespace ",
"import ",
"export ",
"use ",
"type ",
];
let lower = line.trim_start().to_lowercase();
CORE_PREFIXES.iter().any(|prefix| lower.starts_with(prefix))
}
fn is_detail_structure(line: &str) -> bool {
let trimmed = line.trim_start();
let lower = trimmed.to_lowercase();
lower.starts_with("let ")
|| lower.starts_with("mut ")
|| lower.starts_with("const ")
|| lower.starts_with("static ")
|| lower.starts_with("pub const ")
|| lower.starts_with("pub static ")
|| lower.starts_with("var ")
|| lower.starts_with("val ")
|| lower.starts_with("#[")
|| lower.starts_with('@')
|| lower.starts_with("///")
|| lower.starts_with("//!")
|| lower.starts_with("/**")
|| lower.contains("todo")
|| (lower.contains(" = ")
&& (lower.starts_with("let ")
|| lower.starts_with("const ")
|| lower.starts_with("var ")
|| lower.starts_with("val ")))
}
fn is_important_comment(line: &str) -> bool {
let line_lower = line.to_lowercase();
line_lower.contains("todo:")
|| line_lower.contains("fixme:")
|| line_lower.contains("hack:")
|| line_lower.contains("note:")
|| line_lower.contains("warning:")
|| line_lower.contains("important:")
|| line_lower.starts_with("///") || line_lower.starts_with("/**") || line_lower.starts_with("#!") || line_lower.starts_with("##") }
fn extract_pattern_key(line: &str) -> String {
let trimmed = line.trim();
let words: Vec<&str> = trimmed.split_whitespace().take(3).collect();
words.join(" ")
}
pub fn get_summary_stats(
original_lines: &[String],
summary_lines: &[SummaryItem],
) -> SummaryStats {
SummaryStats {
original_line_count: original_lines.len(),
summary_line_count: summary_lines.len(),
compression_ratio: if original_lines.is_empty() {
0.0
} else {
summary_lines.len() as f64 / original_lines.len() as f64
},
reduction_percentage: if original_lines.is_empty() {
0.0
} else {
(1.0 - (summary_lines.len() as f64 / original_lines.len() as f64)) * 100.0
},
}
}
}
impl SummaryExtraction for SummaryExtractor {
fn extract_summary(
&self,
lines: &[String],
language: Option<&str>,
level: SummaryLevel,
) -> Vec<SummaryItem> {
Self::extract_summary(lines, language, level)
}
fn is_summary_worthy(&self, line: &str, language: Option<&str>, level: SummaryLevel) -> bool {
Self::is_summary_worthy(line, language, level)
}
}
#[derive(Debug, Clone)]
pub struct SummaryStats {
pub original_line_count: usize,
pub summary_line_count: usize,
pub compression_ratio: f64,
pub reduction_percentage: f64,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_python_summary() {
let lines = vec![
"def main():".to_string(),
" print('hello')".to_string(),
"class MyClass:".to_string(),
" pass".to_string(),
"import os".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Python"), SummaryLevel::Standard);
assert_eq!(summary.len(), 3); assert!(summary.iter().any(|s| s.line == "def main():"));
assert!(summary.iter().any(|s| s.line == "class MyClass:"));
assert!(summary.iter().any(|s| s.line == "import os"));
}
#[test]
fn test_rust_summary() {
let lines = vec![
"fn main() {".to_string(),
" println!(\"Hello\");".to_string(),
"}".to_string(),
"struct Point {".to_string(),
" x: i32,".to_string(),
"}".to_string(),
"use std::collections::HashMap;".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Standard);
assert_eq!(summary.len(), 3); assert!(summary.iter().any(|s| s.line == "fn main() {"));
assert!(summary.iter().any(|s| s.line == "struct Point {"));
assert!(summary
.iter()
.any(|s| s.line == "use std::collections::HashMap;"));
}
#[test]
fn test_javascript_summary() {
let lines = vec![
"function hello() {".to_string(),
" console.log('hello');".to_string(),
"}".to_string(),
"class MyClass {".to_string(),
" constructor() {}".to_string(),
"}".to_string(),
"export default MyClass;".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("JavaScript"), SummaryLevel::Standard);
assert_eq!(summary.len(), 3); assert!(summary.iter().any(|s| s.line == "function hello() {"));
assert!(summary.iter().any(|s| s.line == "class MyClass {"));
assert!(summary.iter().any(|s| s.line == "export default MyClass;"));
}
#[test]
fn test_empty_input() {
let lines = vec![];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Python"), SummaryLevel::Standard);
assert!(summary.is_empty());
}
#[test]
fn test_comments_filtering() {
let lines = vec![
"// This is a comment".to_string(),
"fn main() {".to_string(),
" // Another comment".to_string(),
" println!(\"Hello\");".to_string(),
"}".to_string(),
"/// This is an important doc comment".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Standard);
assert_eq!(summary.len(), 2); assert!(summary.iter().any(|s| s.line == "fn main() {"));
assert!(summary
.iter()
.any(|s| s.line == "/// This is an important doc comment"));
}
#[test]
fn test_deduplication() {
let lines = vec![
"fn test1() {".to_string(),
"fn test2() {".to_string(),
"fn test3() {".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Standard);
assert_eq!(summary.len(), 3);
}
#[test]
#[allow(clippy::float_cmp)]
fn test_summary_stats() {
use crate::summary_item::SummaryItem;
let original = vec!["line1".to_string(); 100];
let summary: Vec<SummaryItem> = (0..20)
.map(|i| SummaryItem::new("line1", i + 1, None, "other"))
.collect();
let stats = SummaryExtractor::get_summary_stats(&original, &summary);
assert_eq!(stats.original_line_count, 100);
assert_eq!(stats.summary_line_count, 20);
assert_eq!(stats.compression_ratio, 0.2);
assert_eq!(stats.reduction_percentage, 80.0);
}
#[test]
fn test_generic_language() {
let lines = vec![
"function test() {".to_string(),
" return true;".to_string(),
"}".to_string(),
"import something;".to_string(),
];
let summary = SummaryExtractor::extract_summary(&lines, None, SummaryLevel::Standard);
assert_eq!(summary.len(), 2); assert!(summary.iter().any(|s| s.line == "function test() {"));
assert!(summary.iter().any(|s| s.line == "import something;"));
}
#[test]
fn test_summary_levels_adjust_content() {
let lines = vec![
"fn main() {}".to_string(),
"let config = load();".to_string(),
"use crate::utils;".to_string(),
];
let minimal =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Minimal);
assert!(minimal.iter().any(|s| s.line == "fn main() {}"));
assert!(
!minimal.iter().any(|s| s.line.contains("let config")),
"Minimal summaries should skip fine-grained assignments"
);
let detailed =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Detailed);
assert!(detailed.iter().any(|s| s.line.contains("let config")));
assert!(detailed
.iter()
.any(|s| s.line.contains("use crate::utils;")));
}
#[test]
fn test_line_numbers_in_summary() {
let lines = vec![
"// comment".to_string(),
"fn main() {".to_string(),
" println!(\"Hello\");".to_string(),
"}".to_string(),
"struct Foo {".to_string(),
];
let summary =
SummaryExtractor::extract_summary(&lines, Some("Rust"), SummaryLevel::Standard);
let fn_item = summary.iter().find(|s| s.line == "fn main() {").unwrap();
assert_eq!(fn_item.line_number, 2);
let struct_item = summary.iter().find(|s| s.line == "struct Foo {").unwrap();
assert_eq!(struct_item.line_number, 5);
}
}