Skip to main content

lcsa_core/
signals.rs

1use std::fmt;
2use std::time::SystemTime;
3
4use serde::{Deserialize, Serialize};
5
6use crate::filesystem::SemanticSignal;
7use crate::topology::SignalSource;
8
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum SignalType {
12    Clipboard,
13    Selection,
14    Focus,
15}
16
17impl SignalType {
18    pub fn as_str(self) -> &'static str {
19        match self {
20            SignalType::Clipboard => "clipboard",
21            SignalType::Selection => "selection",
22            SignalType::Focus => "focus",
23        }
24    }
25}
26
27impl fmt::Display for SignalType {
28    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
29        f.write_str(self.as_str())
30    }
31}
32
33#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
34#[serde(rename_all = "lowercase")]
35pub enum ContentType {
36    Text,
37    Image,
38    Html,
39    Code,
40    Unknown,
41}
42
43impl ContentType {
44    pub fn as_str(&self) -> &'static str {
45        match self {
46            ContentType::Text => "text",
47            ContentType::Image => "image",
48            ContentType::Html => "html",
49            ContentType::Code => "code",
50            ContentType::Unknown => "unknown",
51        }
52    }
53}
54
55impl fmt::Display for ContentType {
56    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
57        f.write_str(self.as_str())
58    }
59}
60
61#[derive(Clone, Debug, Serialize, Deserialize)]
62pub struct ClipboardSignal {
63    pub content_type: ContentType,
64    pub size_bytes: usize,
65    pub source_app: String,
66    pub likely_sensitive: bool,
67    pub likely_command: bool,
68    pub timestamp: SystemTime,
69}
70
71impl ClipboardSignal {
72    pub fn text(content: &str, source_app: String) -> Self {
73        Self {
74            content_type: detect_content_type(content),
75            size_bytes: content.len(),
76            source_app,
77            likely_sensitive: is_likely_sensitive_text(content),
78            likely_command: is_likely_command_text(content),
79            timestamp: SystemTime::now(),
80        }
81    }
82
83    pub fn image(size_bytes: usize, source_app: String) -> Self {
84        Self {
85            content_type: ContentType::Image,
86            size_bytes,
87            source_app,
88            likely_sensitive: false,
89            likely_command: false,
90            timestamp: SystemTime::now(),
91        }
92    }
93}
94
95#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
96pub struct SelectionSignal {
97    pub content_type: ContentType,
98    pub size_bytes: usize,
99    pub source_app: String,
100    pub likely_sensitive: bool,
101    pub is_editable: bool,
102    pub timestamp: SystemTime,
103}
104
105impl SelectionSignal {
106    pub fn text(content: &str, source_app: String, is_editable: bool) -> Self {
107        Self {
108            content_type: detect_content_type(content),
109            size_bytes: content.len(),
110            source_app,
111            likely_sensitive: is_likely_sensitive_text(content),
112            is_editable,
113            timestamp: SystemTime::now(),
114        }
115    }
116}
117
118#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum FocusTarget {
121    Application,
122    Window,
123    TextInput,
124    Browser,
125    Terminal,
126    Unknown,
127}
128
129impl FocusTarget {
130    pub fn as_str(&self) -> &'static str {
131        match self {
132            FocusTarget::Application => "application",
133            FocusTarget::Window => "window",
134            FocusTarget::TextInput => "text_input",
135            FocusTarget::Browser => "browser",
136            FocusTarget::Terminal => "terminal",
137            FocusTarget::Unknown => "unknown",
138        }
139    }
140}
141
142impl fmt::Display for FocusTarget {
143    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
144        f.write_str(self.as_str())
145    }
146}
147
148#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
149pub struct FocusSignal {
150    pub source_app: String,
151    pub target: FocusTarget,
152    pub is_editable: bool,
153    pub timestamp: SystemTime,
154}
155
156impl FocusSignal {
157    pub fn new(source_app: String, target: FocusTarget, is_editable: bool) -> Self {
158        Self {
159            source_app,
160            target,
161            is_editable,
162            timestamp: SystemTime::now(),
163        }
164    }
165}
166
167#[derive(Clone, Debug, Serialize, Deserialize)]
168pub struct ClipboardContent {
169    pub payload: ClipboardPayload,
170    pub source_app: String,
171    pub captured_at: SystemTime,
172}
173
174impl ClipboardContent {
175    pub fn redacted_preview(&self) -> String {
176        match &self.payload {
177            ClipboardPayload::Text(text) if is_likely_sensitive_text(text) => {
178                format!("{} chars redacted", text.chars().count())
179            }
180            ClipboardPayload::Text(text) => text.chars().take(80).collect(),
181            ClipboardPayload::Image {
182                width,
183                height,
184                size_bytes,
185            } => format!("image {}x{} ({} bytes)", width, height, size_bytes),
186        }
187    }
188}
189
190#[derive(Clone, Debug, Serialize, Deserialize)]
191#[serde(tag = "kind", rename_all = "snake_case")]
192pub enum ClipboardPayload {
193    Text(String),
194    Image {
195        width: usize,
196        height: usize,
197        size_bytes: usize,
198    },
199}
200
201#[derive(Clone, Debug, Serialize, Deserialize)]
202#[serde(tag = "signal", rename_all = "snake_case")]
203pub enum StructuralSignal {
204    Clipboard(ClipboardSignal),
205    Selection(SelectionSignal),
206    Focus(FocusSignal),
207    Filesystem(SemanticSignal),
208}
209
210impl StructuralSignal {
211    pub fn signal_type(&self) -> Option<SignalType> {
212        match self {
213            StructuralSignal::Clipboard(_) => Some(SignalType::Clipboard),
214            StructuralSignal::Selection(_) => Some(SignalType::Selection),
215            StructuralSignal::Focus(_) => Some(SignalType::Focus),
216            StructuralSignal::Filesystem(_) => None,
217        }
218    }
219
220    pub fn source(&self) -> SignalSource {
221        match self {
222            StructuralSignal::Clipboard(_) => SignalSource::Clipboard,
223            StructuralSignal::Selection(_) => SignalSource::Selection,
224            StructuralSignal::Focus(_) => SignalSource::Focus,
225            StructuralSignal::Filesystem(_) => SignalSource::Filesystem,
226        }
227    }
228
229    pub fn matches(&self, signal_type: SignalType) -> bool {
230        self.signal_type() == Some(signal_type)
231    }
232}
233
234pub fn detect_content_type(content: &str) -> ContentType {
235    let trimmed = content.trim();
236    let lowercase = trimmed.to_ascii_lowercase();
237
238    if lowercase.starts_with("<!doctype html")
239        || lowercase.starts_with("<html")
240        || (lowercase.contains("<body") && lowercase.contains("</"))
241    {
242        return ContentType::Html;
243    }
244
245    let code_markers = [
246        "fn ",
247        "def ",
248        "class ",
249        "import ",
250        "from ",
251        "const ",
252        "let ",
253        "var ",
254        "function ",
255        "#include",
256        "SELECT ",
257        "{\n",
258    ];
259
260    if code_markers.iter().any(|marker| trimmed.contains(marker))
261        || (trimmed.lines().count() > 2
262            && trimmed.contains('{')
263            && trimmed.contains('}')
264            && trimmed.contains(';'))
265    {
266        return ContentType::Code;
267    }
268
269    if trimmed.is_empty() {
270        ContentType::Unknown
271    } else {
272        ContentType::Text
273    }
274}
275
276pub fn is_likely_sensitive_text(content: &str) -> bool {
277    let trimmed = content.trim();
278
279    if trimmed.is_empty() || trimmed.contains('\n') {
280        return false;
281    }
282
283    let jwt_like = trimmed.matches('.').count() == 2 && trimmed.len() > 20;
284    let token_prefix = ["sk-", "ghp_", "xoxb-", "AKIA", "-----BEGIN", "eyJ"];
285
286    // Require higher entropy to avoid false positives like "myFile123" or "config_v2"
287    // Real secrets/tokens have high entropy (5.0+ bits), normal text has ~4.0-4.5 bits
288    let looks_like_secret = trimmed.len() >= 16
289        && !trimmed.contains(' ')
290        && trimmed.chars().any(|c| c.is_ascii_alphabetic())
291        && trimmed.chars().any(|c| c.is_ascii_digit())
292        && shannon_entropy(trimmed) > 4.0;
293
294    jwt_like
295        || token_prefix
296            .iter()
297            .any(|prefix| trimmed.starts_with(prefix))
298        || looks_like_secret
299}
300
301fn shannon_entropy(input: &str) -> f64 {
302    if input.is_empty() {
303        return 0.0;
304    }
305
306    let mut frequency = [0u32; 256];
307    for byte in input.bytes() {
308        frequency[byte as usize] += 1;
309    }
310
311    let len = input.len() as f64;
312    frequency
313        .iter()
314        .filter(|&&count| count > 0)
315        .map(|&count| {
316            let p = count as f64 / len;
317            -p * p.log2()
318        })
319        .sum()
320}
321
322pub fn is_likely_command_text(content: &str) -> bool {
323    let trimmed = content.trim();
324
325    if trimmed.is_empty() || trimmed.contains('\n') {
326        return false;
327    }
328
329    let normalized = trimmed.strip_prefix("$ ").unwrap_or(trimmed);
330    let command_prefixes = [
331        "cargo ", "git ", "npm ", "pnpm ", "yarn ", "python ", "pip ", "uv ", "docker ",
332        "kubectl ", "ls", "cd ", "mkdir ", "rm ", "cp ", "mv ",
333    ];
334
335    command_prefixes
336        .iter()
337        .any(|prefix| normalized.starts_with(prefix))
338}
339
340#[cfg(test)]
341mod tests {
342    use super::*;
343
344    #[test]
345    fn detects_html() {
346        assert_eq!(
347            detect_content_type("<!DOCTYPE html><html></html>"),
348            ContentType::Html
349        );
350    }
351
352    #[test]
353    fn detects_code() {
354        assert_eq!(
355            detect_content_type("fn main() {\n println!(\"hi\");\n}"),
356            ContentType::Code
357        );
358    }
359
360    #[test]
361    fn detects_plain_text() {
362        assert_eq!(detect_content_type("hello world"), ContentType::Text);
363    }
364
365    #[test]
366    fn marks_sensitive_tokens() {
367        assert!(is_likely_sensitive_text("sk-live-1234567890abcdef"));
368    }
369
370    #[test]
371    fn rejects_false_positive_filenames() {
372        // These were triggering false positives before entropy check
373        assert!(!is_likely_sensitive_text("myFile123"));
374        assert!(!is_likely_sensitive_text("config_v2"));
375        assert!(!is_likely_sensitive_text("user2024"));
376        assert!(!is_likely_sensitive_text("version1.0"));
377        assert!(!is_likely_sensitive_text("data_backup_01"));
378    }
379
380    #[test]
381    fn detects_high_entropy_secrets() {
382        // Real secrets have high entropy
383        assert!(is_likely_sensitive_text(
384            "ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ123456"
385        ));
386        assert!(is_likely_sensitive_text(
387            "sk-proj-abcdefghijklmnop1234567890"
388        ));
389        // JWT-like tokens
390        assert!(is_likely_sensitive_text(
391            "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
392        ));
393    }
394
395    #[test]
396    fn shannon_entropy_values() {
397        // Low entropy - repeated characters
398        assert!(shannon_entropy("aaaaaaaaaa") < 1.0);
399        // Medium entropy - normal text
400        let medium = shannon_entropy("hello_world");
401        assert!(medium > 2.0 && medium < 4.0);
402        // High entropy - random-looking
403        let high = shannon_entropy("aB3xK9mP2qR7sT4uV8wY1zC5dE6fG0hI");
404        assert!(high > 4.5);
405    }
406
407    #[test]
408    fn marks_commands() {
409        assert!(is_likely_command_text("cargo test"));
410    }
411
412    #[test]
413    fn selection_signal_tracks_text_metadata() {
414        let signal = SelectionSignal::text("let value = 1;", "editor".to_string(), true);
415        assert_eq!(signal.content_type, ContentType::Code);
416        assert!(signal.is_editable);
417        assert_eq!(signal.size_bytes, 14);
418    }
419
420    #[test]
421    fn structural_signal_maps_to_source() {
422        assert_eq!(
423            StructuralSignal::Focus(FocusSignal::new(
424                "terminal".to_string(),
425                FocusTarget::Terminal,
426                true,
427            ))
428            .source(),
429            SignalSource::Focus
430        );
431    }
432}