agentic_codebase/collective/privacy.rs
1//! Privacy-preserving extraction.
2//!
3//! Determines which data items are safe to share with the collective
4//! intelligence network and which must remain private.
5
6use serde::{Deserialize, Serialize};
7
8/// Data that is safe to share with the collective.
9#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
10pub enum Shareable {
11 /// Structural pattern signature (no actual code).
12 PatternSignature(String),
13 /// Aggregated complexity statistics.
14 ComplexityStats {
15 /// Language.
16 language: String,
17 /// Average complexity.
18 avg_complexity: u32,
19 /// Total functions analysed.
20 function_count: u32,
21 },
22 /// Anonymous mistake category counts.
23 MistakeCounts {
24 /// Category name.
25 category: String,
26 /// Number of occurrences.
27 count: u32,
28 },
29 /// Language usage distribution.
30 LanguageDistribution {
31 /// Language name.
32 language: String,
33 /// Percentage of codebase (0-100).
34 percentage: u32,
35 },
36}
37
38/// Data that must NOT be shared with the collective.
39#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
40pub enum NonShareable {
41 /// Actual source code.
42 SourceCode,
43 /// File paths (may reveal project structure).
44 FilePaths,
45 /// Symbol names (may reveal proprietary API names).
46 SymbolNames,
47 /// Commit messages (may contain sensitive info).
48 CommitMessages,
49 /// Author names or emails.
50 AuthorIdentity,
51 /// Content hashes (could be used to fingerprint code).
52 ContentHashes,
53 /// API keys, tokens, or credentials found in code.
54 Credentials,
55}
56
57/// Check whether a data item is safe to share.
58///
59/// Returns `true` if the item is shareable, `false` if it must stay private.
60///
61/// # Rules
62///
63/// Shareable data:
64/// - Structural pattern signatures (abstracted, no real code)
65/// - Aggregated statistics (complexity averages, counts)
66/// - Anonymous mistake category counts
67/// - Language usage distributions
68///
69/// Non-shareable data:
70/// - Source code
71/// - File paths
72/// - Symbol names
73/// - Commit messages
74/// - Author identities
75/// - Content hashes
76/// - Credentials
77pub fn is_shareable(item: &ShareableCheck) -> bool {
78 match item {
79 ShareableCheck::PatternSignature(_) => true,
80 ShareableCheck::AggregateStats { .. } => true,
81 ShareableCheck::MistakeCount { .. } => true,
82 ShareableCheck::LanguageDistribution { .. } => true,
83 ShareableCheck::SourceCode(_) => false,
84 ShareableCheck::FilePath(_) => false,
85 ShareableCheck::SymbolName(_) => false,
86 ShareableCheck::CommitMessage(_) => false,
87 ShareableCheck::AuthorIdentity(_) => false,
88 ShareableCheck::ContentHash(_) => false,
89 ShareableCheck::RawText(text) => !looks_like_credentials(text),
90 }
91}
92
93/// An item to check for shareability.
94#[derive(Debug, Clone)]
95pub enum ShareableCheck {
96 /// A structural pattern signature.
97 PatternSignature(String),
98 /// Aggregated statistics.
99 AggregateStats {
100 /// Statistic name.
101 name: String,
102 /// Statistic value.
103 value: f64,
104 },
105 /// A mistake count.
106 MistakeCount {
107 /// Category.
108 category: String,
109 /// Count.
110 count: u32,
111 },
112 /// Language distribution.
113 LanguageDistribution {
114 /// Language name.
115 language: String,
116 /// Percentage.
117 percentage: u32,
118 },
119 /// Raw source code.
120 SourceCode(String),
121 /// A file path.
122 FilePath(String),
123 /// A symbol name.
124 SymbolName(String),
125 /// A commit message.
126 CommitMessage(String),
127 /// An author identity.
128 AuthorIdentity(String),
129 /// A content hash.
130 ContentHash(String),
131 /// Generic text to check for credentials.
132 RawText(String),
133}
134
135/// Heuristic check for credential-like strings.
136fn looks_like_credentials(text: &str) -> bool {
137 let lower = text.to_lowercase();
138 let credential_indicators = [
139 "api_key",
140 "apikey",
141 "api-key",
142 "secret",
143 "password",
144 "passwd",
145 "token",
146 "bearer",
147 "authorization",
148 "aws_access_key",
149 "private_key",
150 ];
151 credential_indicators
152 .iter()
153 .any(|indicator| lower.contains(indicator))
154}
155
156/// Filter a list of items, keeping only those that are shareable.
157pub fn filter_shareable(items: &[ShareableCheck]) -> Vec<&ShareableCheck> {
158 items.iter().filter(|item| is_shareable(item)).collect()
159}