use crate::seed::{ProjectRecord, ProjectStatement};
pub const DEFAULT_MAX_STATEMENTS: usize = 30;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum StatementKind {
Identity,
Purpose,
Language,
Stars,
Feature,
UseCase,
Install,
Example,
#[default]
Misc,
}
impl StatementKind {
#[must_use]
pub fn parse(value: &str) -> Self {
match value.trim().to_ascii_lowercase().as_str() {
"identity" => Self::Identity,
"purpose" => Self::Purpose,
"language" => Self::Language,
"stars" => Self::Stars,
"feature" => Self::Feature,
"use_case" | "usecase" | "use-case" => Self::UseCase,
"install" => Self::Install,
"example" => Self::Example,
_ => Self::Misc,
}
}
#[must_use]
pub fn from_slug(slug: &str) -> Self {
match slug {
"summary_kind_install" => Self::Install,
"summary_kind_example" => Self::Example,
"summary_kind_language" => Self::Language,
"summary_kind_stars" => Self::Stars,
"summary_kind_purpose" => Self::Purpose,
"summary_kind_use_case" => Self::UseCase,
"summary_kind_feature" => Self::Feature,
_ => Self::Misc,
}
}
#[must_use]
pub const fn is_essential(self) -> bool {
matches!(
self,
Self::Identity | Self::Purpose | Self::Language | Self::Stars
)
}
#[must_use]
pub const fn is_boilerplate(self) -> bool {
matches!(self, Self::Install | Self::Example)
}
}
#[derive(Debug, Clone)]
pub struct Statement {
pub text: String,
pub kind: StatementKind,
pub weight: u8,
}
impl Statement {
#[must_use]
pub fn new(text: impl Into<String>, kind: StatementKind, weight: u8) -> Self {
Self {
text: text.into(),
kind,
weight,
}
}
#[must_use]
pub fn from_seed(seed: &ProjectStatement) -> Self {
Self {
text: seed.text.clone(),
kind: StatementKind::parse(&seed.kind),
weight: seed.weight,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum SummarizationMode {
Topic,
Short,
#[default]
Standard,
Full,
Expand,
}
impl SummarizationMode {
#[must_use]
pub const fn target_percent(self) -> u32 {
match self {
Self::Topic => 0,
Self::Short => 20,
Self::Standard => 50,
Self::Full => 100,
Self::Expand => 200,
}
}
}
#[derive(Debug, Clone)]
pub struct SummarizationConfig {
pub mode: SummarizationMode,
pub max_statements: Option<usize>,
pub language: String,
pub use_compound_words: bool,
pub use_semantic_primes: bool,
pub drop_boilerplate: bool,
}
impl Default for SummarizationConfig {
fn default() -> Self {
Self {
mode: SummarizationMode::Standard,
max_statements: None,
language: "en".to_string(),
use_compound_words: false,
use_semantic_primes: false,
drop_boilerplate: true,
}
}
}
impl SummarizationConfig {
#[must_use]
pub const fn with_mode(mut self, mode: SummarizationMode) -> Self {
self.mode = mode;
self
}
#[must_use]
pub fn with_language(mut self, language: impl Into<String>) -> Self {
self.language = language.into();
self
}
#[must_use]
pub const fn with_max_statements(mut self, cap: usize) -> Self {
self.max_statements = Some(cap);
self
}
#[must_use]
pub fn effective_max_statements(&self, input_count: usize) -> usize {
if input_count == 0 {
return 0;
}
let ratio_target = match self.mode {
SummarizationMode::Topic => 1,
SummarizationMode::Full | SummarizationMode::Expand => input_count,
other => {
let percent = other.target_percent() as usize;
let suggested = (input_count * percent + 50) / 100;
suggested.max(1)
}
};
self.max_statements
.map_or_else(|| ratio_target.max(1), |cap| cap.min(ratio_target).max(1))
}
}
#[must_use]
pub fn formalize(text: &str) -> Vec<Statement> {
let mut out = Vec::new();
let mut buffer = String::new();
for ch in text.chars() {
buffer.push(ch);
if matches!(ch, '.' | '!' | '?' | '。' | '…' | '\n') {
push_sentence(&mut buffer, &mut out);
}
}
push_sentence(&mut buffer, &mut out);
out
}
fn push_sentence(buffer: &mut String, out: &mut Vec<Statement>) {
let sentence: String = buffer
.chars()
.filter(|c| !matches!(c, '\n'))
.collect::<String>()
.trim()
.to_string();
buffer.clear();
if sentence.is_empty() {
return;
}
let kind = classify_sentence(&sentence);
let weight = weight_for_kind(kind);
out.push(Statement::new(sentence, kind, weight));
}
#[must_use]
pub fn classify_sentence(sentence: &str) -> StatementKind {
let lower = sentence.to_lowercase();
let word_count = lower.split_whitespace().count();
for meaning in
crate::seed::lexicon().meanings_with_role(crate::seed::ROLE_SUMMARY_CLASSIFICATION_CUE)
{
if !meaning.words().any(|cue| lower.contains(cue)) {
continue;
}
let kind = StatementKind::from_slug(&meaning.slug);
if kind == StatementKind::Language && word_count > 12 {
continue;
}
return kind;
}
StatementKind::Misc
}
const fn weight_for_kind(kind: StatementKind) -> u8 {
match kind {
StatementKind::Purpose => 100,
StatementKind::Identity => 90,
StatementKind::Language => 60,
StatementKind::Stars => 55,
StatementKind::Feature => 70,
StatementKind::UseCase => 65,
StatementKind::Install => 10,
StatementKind::Example => 15,
StatementKind::Misc => 30,
}
}
#[must_use]
pub fn summarize(statements: &[Statement], config: &SummarizationConfig) -> Vec<Statement> {
if statements.is_empty() {
return Vec::new();
}
let mut filtered: Vec<Statement> = statements
.iter()
.filter(|s| !(config.drop_boilerplate && s.kind.is_boilerplate()))
.cloned()
.collect();
filtered.sort_by_key(|stmt| core::cmp::Reverse(stmt.weight));
let cap = config.effective_max_statements(filtered.len());
filtered.truncate(cap);
if config.mode == SummarizationMode::Expand {
let mut expanded: Vec<Statement> = Vec::with_capacity(filtered.len() * 2);
for stmt in &filtered {
expanded.push(stmt.clone());
if config.use_semantic_primes {
let mut paraphrase = stmt.clone();
paraphrase.text = apply_semantic_primes(&stmt.text, &config.language);
paraphrase.weight = stmt.weight.saturating_sub(5);
if paraphrase.text != stmt.text {
expanded.push(paraphrase);
}
}
}
return expanded;
}
if config.use_compound_words {
for stmt in &mut filtered {
stmt.text = apply_compound_words(&stmt.text, &config.language);
}
}
filtered
}
#[must_use]
pub fn deformalize(statements: &[Statement]) -> String {
statements
.iter()
.map(|s| {
let trimmed = s.text.trim();
if trimmed.is_empty() {
String::new()
} else if ends_with_terminal_punct(trimmed) {
trimmed.to_string()
} else {
format!("{trimmed}.")
}
})
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" ")
}
fn ends_with_terminal_punct(text: &str) -> bool {
text.chars()
.last()
.is_some_and(|c| matches!(c, '.' | '!' | '?' | '。' | '…' | '」' | '"'))
}
#[must_use]
pub fn to_topic(explicit_topic: &str, statements: &[Statement]) -> String {
let candidate = explicit_topic.trim();
if !candidate.is_empty() {
return clamp_words(candidate, 5);
}
statements
.iter()
.max_by_key(|s| s.weight)
.map(|s| clamp_words(&s.text, 5))
.unwrap_or_default()
}
fn clamp_words(text: &str, max_words: usize) -> String {
text.split_whitespace()
.take(max_words)
.collect::<Vec<_>>()
.join(" ")
.trim_end_matches(['.', ',', '!', '?', ';', ':', '…', '」', '"'])
.to_string()
}
#[must_use]
pub fn apply_compound_words(text: &str, language: &str) -> String {
let pairs: &[(&str, &str)] = match language {
"ru" => &[
("в которой ", "где "),
("для того чтобы ", "чтобы "),
("к примеру", "например"),
],
_ => &[
("in order to ", "to "),
("for the purpose of ", "for "),
("a number of ", "several "),
("user interface", "UI"),
("command line interface", "CLI"),
("artificial intelligence", "AI"),
],
};
let mut out = text.to_string();
for (long, short) in pairs {
out = out.replace(long, short);
}
out
}
#[must_use]
pub fn apply_semantic_primes(text: &str, language: &str) -> String {
let pairs: &[(&str, &str)] = match language {
"ru" => &[
("автоматизация", "когда машина делает"),
("оркестрирует", "управляет вместе"),
("делегирование", "передача работы"),
("детерминированный", "всегда одинаковый"),
],
_ => &[
("orchestrates", "controls many"),
(
"automation of automation",
"machine that makes other machines do",
),
("automation", "machine doing"),
("delegating", "giving work to"),
("deterministic", "always the same"),
("multilingual", "in many languages"),
("symbolic", "rule-based"),
],
};
let mut out = text.to_string();
for (compound, prime) in pairs {
out = out.replace(compound, prime);
}
out
}
#[must_use]
pub fn describe_project(project: &ProjectRecord, config: &SummarizationConfig) -> String {
let seed_statements = project.statements_for(&config.language);
let statements: Vec<Statement> = seed_statements.iter().map(Statement::from_seed).collect();
if config.mode == SummarizationMode::Topic {
return to_topic(project.topic_for(&config.language), &statements);
}
let summarized = summarize(&statements, config);
deformalize(&summarized)
}
mod dialog;
mod markdown;
pub use dialog::{formalize_dialog, generate_chat_title, summarize_dialog, DialogTurn};
pub use markdown::{describe_readme, formalize_markdown, strip_markdown_noise};