Skip to main content

locus_sdk/domain/
compression.rs

1use serde::{Deserialize, Serialize};
2
3#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
4#[serde(rename_all = "snake_case")]
5pub enum StopwordProfile {
6    Basic,
7    Extended,
8    Domain,
9}
10
11impl Default for StopwordProfile {
12    fn default() -> Self {
13        Self::Domain
14    }
15}
16
17#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
18#[serde(rename_all = "snake_case")]
19pub enum PhraseMode {
20    None,
21    RakeLite,
22}
23
24impl Default for PhraseMode {
25    fn default() -> Self {
26        Self::RakeLite
27    }
28}
29
30#[derive(Debug, Clone, Serialize, Deserialize)]
31#[serde(rename_all = "camelCase")]
32pub struct ManualCompressionRequest {
33    pub text: String,
34    pub max_anchors: usize,
35    pub max_points: usize,
36    pub min_token_length: usize,
37    pub stopword_profile: StopwordProfile,
38    pub phrase_mode: PhraseMode,
39    #[serde(default)]
40    pub stopwords_add: Vec<String>,
41    #[serde(default)]
42    pub stopwords_remove: Vec<String>,
43    #[serde(default)]
44    pub fillers_add: Vec<String>,
45    #[serde(default)]
46    pub fillers_remove: Vec<String>,
47    #[serde(default)]
48    pub negations_add: Vec<String>,
49    #[serde(default)]
50    pub negations_remove: Vec<String>,
51}
52
53impl Default for ManualCompressionRequest {
54    fn default() -> Self {
55        Self {
56            text: String::new(),
57            max_anchors: 5,
58            max_points: 5,
59            min_token_length: 3,
60            stopword_profile: StopwordProfile::Domain,
61            phrase_mode: PhraseMode::RakeLite,
62            stopwords_add: Vec::new(),
63            stopwords_remove: Vec::new(),
64            fillers_add: Vec::new(),
65            fillers_remove: Vec::new(),
66            negations_add: Vec::new(),
67            negations_remove: Vec::new(),
68        }
69    }
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
73#[serde(rename_all = "camelCase")]
74pub struct AnchorTerm {
75    pub term: String,
76    pub score: f32,
77    pub evidence_count: usize,
78    pub first_position: usize,
79}
80
81#[derive(Debug, Clone, Default, Serialize, Deserialize)]
82#[serde(rename_all = "camelCase")]
83pub struct ManualCompressionDiagnostics {
84    pub tokens_total: usize,
85    pub tokens_kept: usize,
86    pub stopwords_removed: usize,
87    pub filler_removed: usize,
88    pub sentences_total: usize,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
92#[serde(rename_all = "camelCase")]
93pub struct ManualCompressionResult {
94    pub anchor_topic: String,
95    pub anchor_terms: Vec<AnchorTerm>,
96    pub key_points: Vec<String>,
97    pub salient_phrases: Vec<String>,
98    pub compression_ratio: f32,
99    pub discarded_noise_ratio: f32,
100    pub diagnostics: ManualCompressionDiagnostics,
101}