Skip to main content

agentic_codebase/collective/
privacy.rs

1//! Privacy-preserving extraction.
2//!
3//! Determines which data items are safe to share with the collective
4//! intelligence network and which must remain private.
5
6use serde::{Deserialize, Serialize};
7
8/// Data that is safe to share with the collective.
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10pub enum Shareable {
11    /// Structural pattern signature (no actual code).
12    PatternSignature(String),
13    /// Aggregated complexity statistics.
14    ComplexityStats {
15        /// Language.
16        language: String,
17        /// Average complexity.
18        avg_complexity: u32,
19        /// Total functions analysed.
20        function_count: u32,
21    },
22    /// Anonymous mistake category counts.
23    MistakeCounts {
24        /// Category name.
25        category: String,
26        /// Number of occurrences.
27        count: u32,
28    },
29    /// Language usage distribution.
30    LanguageDistribution {
31        /// Language name.
32        language: String,
33        /// Percentage of codebase (0-100).
34        percentage: u32,
35    },
36}
37
38/// Data that must NOT be shared with the collective.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub enum NonShareable {
41    /// Actual source code.
42    SourceCode,
43    /// File paths (may reveal project structure).
44    FilePaths,
45    /// Symbol names (may reveal proprietary API names).
46    SymbolNames,
47    /// Commit messages (may contain sensitive info).
48    CommitMessages,
49    /// Author names or emails.
50    AuthorIdentity,
51    /// Content hashes (could be used to fingerprint code).
52    ContentHashes,
53    /// API keys, tokens, or credentials found in code.
54    Credentials,
55}
56
57/// Check whether a data item is safe to share.
58///
59/// Returns `true` if the item is shareable, `false` if it must stay private.
60///
61/// # Rules
62///
63/// Shareable data:
64/// - Structural pattern signatures (abstracted, no real code)
65/// - Aggregated statistics (complexity averages, counts)
66/// - Anonymous mistake category counts
67/// - Language usage distributions
68///
69/// Non-shareable data:
70/// - Source code
71/// - File paths
72/// - Symbol names
73/// - Commit messages
74/// - Author identities
75/// - Content hashes
76/// - Credentials
77pub fn is_shareable(item: &ShareableCheck) -> bool {
78    match item {
79        ShareableCheck::PatternSignature(_) => true,
80        ShareableCheck::AggregateStats { .. } => true,
81        ShareableCheck::MistakeCount { .. } => true,
82        ShareableCheck::LanguageDistribution { .. } => true,
83        ShareableCheck::SourceCode(_) => false,
84        ShareableCheck::FilePath(_) => false,
85        ShareableCheck::SymbolName(_) => false,
86        ShareableCheck::CommitMessage(_) => false,
87        ShareableCheck::AuthorIdentity(_) => false,
88        ShareableCheck::ContentHash(_) => false,
89        ShareableCheck::RawText(text) => !looks_like_credentials(text),
90    }
91}
92
93/// An item to check for shareability.
94#[derive(Debug, Clone)]
95pub enum ShareableCheck {
96    /// A structural pattern signature.
97    PatternSignature(String),
98    /// Aggregated statistics.
99    AggregateStats {
100        /// Statistic name.
101        name: String,
102        /// Statistic value.
103        value: f64,
104    },
105    /// A mistake count.
106    MistakeCount {
107        /// Category.
108        category: String,
109        /// Count.
110        count: u32,
111    },
112    /// Language distribution.
113    LanguageDistribution {
114        /// Language name.
115        language: String,
116        /// Percentage.
117        percentage: u32,
118    },
119    /// Raw source code.
120    SourceCode(String),
121    /// A file path.
122    FilePath(String),
123    /// A symbol name.
124    SymbolName(String),
125    /// A commit message.
126    CommitMessage(String),
127    /// An author identity.
128    AuthorIdentity(String),
129    /// A content hash.
130    ContentHash(String),
131    /// Generic text to check for credentials.
132    RawText(String),
133}
134
135/// Heuristic check for credential-like strings.
136fn looks_like_credentials(text: &str) -> bool {
137    let lower = text.to_lowercase();
138    let credential_indicators = [
139        "api_key",
140        "apikey",
141        "api-key",
142        "secret",
143        "password",
144        "passwd",
145        "token",
146        "bearer",
147        "authorization",
148        "aws_access_key",
149        "private_key",
150    ];
151    credential_indicators
152        .iter()
153        .any(|indicator| lower.contains(indicator))
154}
155
156/// Filter a list of items, keeping only those that are shareable.
157pub fn filter_shareable(items: &[ShareableCheck]) -> Vec<&ShareableCheck> {
158    items.iter().filter(|item| is_shareable(item)).collect()
159}