kaccy_ai/examples/
detection.rs1use crate::error::Result;
4
5pub struct PlagiarismDetectionExample;
10
11impl PlagiarismDetectionExample {
12 #[allow(dead_code)]
14 pub async fn run_basic_code_detection() -> Result<()> {
15 println!("=== Code Plagiarism Detection Example ===");
16 println!();
17
18 let _code1 = r"
20fn calculate_sum(numbers: &[i32]) -> i32 {
21 numbers.iter().sum()
22}
23";
24
25 let _code2 = r"
26fn sum_array(nums: &[i32]) -> i32 {
27 nums.iter().sum()
28}
29";
30
31 println!("Comparing code samples...");
32 println!("(Conceptual example - actual API calls omitted)");
33 println!();
34 println!("Results:");
35 println!("Similarity score: 85.00%");
36 println!("Is plagiarism: true");
37 println!("Confidence: 90%");
38 println!("Token similarity: 87.50%");
39
40 Ok(())
41 }
42
43 #[allow(dead_code)]
45 pub async fn run_text_detection() -> Result<()> {
46 println!("=== Text Plagiarism Detection Example ===");
47 println!();
48
49 let _text1 = "The blockchain is a distributed ledger that records transactions.";
50 let _text2 = "A blockchain represents a distributed ledger for recording transactions.";
51
52 println!("Comparing text samples...");
53 println!("(Conceptual example - actual API calls omitted)");
54 println!();
55 println!("Results:");
56 println!("Similarity score: 78.00%");
57 println!("Is plagiarism: true");
58 println!("Confidence: 85%");
59
60 Ok(())
61 }
62
63 #[allow(dead_code)]
65 pub async fn semantic_analysis_example(api_key: &str) -> Result<()> {
66 println!("=== Semantic Plagiarism Analysis (LLM-Powered) ===");
67 println!();
68
69 let _llm_client = crate::llm::LlmClientBuilder::new()
70 .openai_api_key(api_key)
71 .build()
72 .expect("Failed to build LLM client");
73
74 let _config = crate::plagiarism::PlagiarismConfig {
75 similarity_threshold: 0.7,
76 use_semantic_analysis: true,
77 ngram_size: 3,
78 min_token_overlap: 5,
79 };
80
81 let _text1 = "Machine learning models require large datasets for training.";
82 let _text2 = "To train ML models effectively, you need substantial amounts of data.";
83
84 println!("Running semantic analysis...");
85 println!("(Conceptual example - actual API calls omitted)");
86 println!();
87 println!("Results:");
88 println!(" Overall similarity: 72.00%");
89 println!(" Ngram similarity: 65.00%");
90 println!(" Semantic similarity (LLM): 82.00%");
91 println!(" Verdict: PLAGIARISM DETECTED");
92
93 Ok(())
94 }
95
96 #[allow(dead_code)]
98 pub async fn batch_detection_example() -> Result<()> {
99 println!("=== Batch Plagiarism Detection Example ===");
100 println!();
101
102 let _documents = [
103 "The quick brown fox jumps over the lazy dog.".to_string(),
104 "A fast brown fox leaps over a sleeping dog.".to_string(),
105 "Blockchain technology enables decentralized transactions.".to_string(),
106 "The rapid brown fox hops over the idle canine.".to_string(),
107 ];
108
109 println!("Analyzing 4 documents...");
110 println!("Note: Batch comparison requires pairwise comparison of all documents");
111 println!("For 4 documents, this would require 6 comparisons");
112
113 println!();
115 println!("Example similarity matrix (conceptual):");
116 println!(" Doc0 Doc1 Doc2 Doc3");
117 println!("Doc0 100.0 85.0 20.0 82.0");
118 println!("Doc1 85.0 100.0 15.0 88.0");
119 println!("Doc2 20.0 15.0 100.0 18.0");
120 println!("Doc3 82.0 88.0 18.0 100.0");
121 println!();
122 println!("Potential plagiarism clusters (>80% similar):");
123 println!(" * Cluster 1: Documents 0, 1, 3 (fox/canine theme)");
124 println!(" * Cluster 2: Document 2 (unrelated - blockchain)");
125
126 Ok(())
127 }
128
129 #[allow(dead_code)]
131 pub async fn use_cases_guide() -> Result<()> {
132 println!("=== Plagiarism Detection Use Cases ===");
133 println!();
134
135 println!("1. Fraud Detection");
136 println!(" - Detect users copying code/content from others");
137 println!(" - Identify reputation gaming through duplicate content");
138 println!(" - Example: User submits same code for multiple commitments");
139 println!();
140 println!("2. Content Verification");
141 println!(" - Verify commitment evidence is original");
142 println!(" - Check if GitHub commits are copied");
143 println!(" - Example: Detect forked repositories claimed as original work");
144 println!();
145 println!("3. Academic Integrity");
146 println!(" - Verify educational commitments are original");
147 println!(" - Detect code sharing between students");
148 println!(" - Example: Multiple users submitting similar solutions");
149 println!();
150 println!("4. Code Review");
151 println!(" - Find duplicate code blocks in codebase");
152 println!(" - Suggest refactoring opportunities");
153 println!(" - Example: Identify copy-pasted functions");
154 println!();
155 println!("Configuration tips:");
156 println!(" * Token similarity: Good for exact/near-exact copies (threshold: 0.7)");
157 println!(" * N-gram similarity: Detects paraphrasing (threshold: 0.6)");
158 println!(" * Semantic similarity: Finds conceptual copies (threshold: 0.75)");
159
160 Ok(())
161 }
162}
163
164pub struct ImageSimilarityExample;
169
170impl ImageSimilarityExample {
171 #[allow(dead_code)]
173 pub async fn run_basic_detection() -> Result<()> {
174 println!("=== Image Similarity Detection Example ===");
175 println!();
176
177 println!("Note: Using dHash algorithm for image hashing");
178 println!("Computing perceptual hash for images...");
179 println!(" Algorithm: dHash (difference hash)");
180 println!();
181
182 let hash1 = crate::image_similarity::PerceptualHash {
184 hash: 0x1234_5678_9ABC_DEF0,
185 algorithm: crate::image_similarity::HashAlgorithm::DHash,
186 };
187 let hash2 = crate::image_similarity::PerceptualHash {
188 hash: 0x1234_5678_9ABC_DEF1,
189 algorithm: crate::image_similarity::HashAlgorithm::DHash,
190 };
191
192 let hamming_distance = (hash1.hash ^ hash2.hash).count_ones();
194 let similarity_percent = (f64::from(64 - hamming_distance) / 64.0) * 100.0;
195
196 println!("Hash 1: {:016X}", hash1.hash);
197 println!("Hash 2: {:016X}", hash2.hash);
198 println!("Hamming distance: {hamming_distance}");
199 println!("Similarity score: {similarity_percent:.2}%");
200 println!("Is similar: {}", similarity_percent > 90.0);
201
202 Ok(())
203 }
204
205 #[allow(dead_code)]
207 pub async fn algorithm_comparison() -> Result<()> {
208 println!("=== Hash Algorithm Comparison ===");
209 println!();
210
211 println!("1. dHash (Difference Hash)");
212 println!(" - Speed: Very fast");
213 println!(" - Accuracy: Good");
214 println!(" - Best for: Real-time detection, large datasets");
215 println!(" - Resistant to: Scaling, slight cropping");
216 println!();
217 println!("2. aHash (Average Hash)");
218 println!(" - Speed: Fastest");
219 println!(" - Accuracy: Moderate");
220 println!(" - Best for: Quick filtering, high performance");
221 println!(" - Resistant to: Scaling, brightness changes");
222 println!();
223 println!("3. pHash (Perceptual Hash)");
224 println!(" - Speed: Slower");
225 println!(" - Accuracy: Best");
226 println!(" - Best for: High-quality detection, critical use cases");
227 println!(" - Resistant to: Rotation, compression, watermarks");
228 println!();
229 println!("Recommendation:");
230 println!(" * Use dHash for most cases (good balance)");
231 println!(" * Use pHash for fraud detection (highest accuracy)");
232 println!(" * Use aHash for preliminary filtering (fastest)");
233
234 Ok(())
235 }
236
237 #[allow(dead_code)]
239 pub async fn threshold_tuning_guide() -> Result<()> {
240 println!("=== Similarity Threshold Tuning Guide ===");
241 println!();
242
243 println!("Hamming distance thresholds:");
244 println!();
245 println!(" Distance 0-5: Nearly identical (99%+ similar)");
246 println!(" -> Same image, minor compression/resize");
247 println!();
248 println!(" Distance 6-10: Very similar (95-99% similar)");
249 println!(" -> Same image, different quality/format");
250 println!();
251 println!(" Distance 11-15: Similar (90-95% similar)");
252 println!(" -> Same subject, different angle/crop");
253 println!();
254 println!(" Distance 16-20: Somewhat similar (85-90% similar)");
255 println!(" -> Related content, different composition");
256 println!();
257 println!(" Distance 21+: Not similar (<85% similar)");
258 println!(" -> Different images");
259 println!();
260 println!("Recommended thresholds:");
261 println!(" * Exact duplicates: distance <= 5");
262 println!(" * Near duplicates: distance <= 10");
263 println!(" * Similar images: distance <= 15");
264 println!(" * Fraud detection: distance <= 8 (strict)");
265
266 Ok(())
267 }
268
269 #[allow(dead_code)]
271 pub async fn deduplication_example() -> Result<()> {
272 println!("=== Image Deduplication Database Example ===");
273 println!();
274
275 println!("Note: Image database example (conceptual)");
276
277 println!("Added 3 images to database");
278 println!();
279
280 println!("Finding duplicates for test image...");
281 println!("Found 2 similar images:");
282 println!(" - image1.jpg (hamming distance: 2)");
283 println!(" - image2.jpg (hamming distance: 3)");
284
285 Ok(())
286 }
287
288 #[allow(dead_code)]
290 pub async fn fraud_prevention_guide() -> Result<()> {
291 println!("=== Image Similarity for Fraud Prevention ===");
292 println!();
293
294 println!("Use Cases:");
295 println!();
296 println!("1. Screenshot Fraud Detection");
297 println!(" - Detect users submitting same screenshot multiple times");
298 println!(" - Identify edited/photoshopped evidence");
299 println!(" - Example: Modified transaction screenshots");
300 println!();
301 println!("2. Duplicate Evidence Prevention");
302 println!(" - Prevent reuse of evidence across commitments");
303 println!(" - Track all submitted images");
304 println!(" - Example: Same GitHub stats screenshot for different claims");
305 println!();
306 println!("3. Identity Verification");
307 println!(" - Detect duplicate profile pictures");
308 println!(" - Identify stock photo usage");
309 println!(" - Example: Multiple accounts with similar avatars");
310 println!();
311 println!("4. Content Originality");
312 println!(" - Verify image evidence is original");
313 println!(" - Detect images copied from web");
314 println!(" - Example: Reverse image search integration");
315 println!();
316 println!("Integration with kaccy-ai:");
317 println!(" let detector = ImageSimilarityDetector::new(HashAlgorithm::PHash);");
318 println!(" let fraud_detector = AiFraudDetector::new(llm_client);");
319 println!(" // Use both together for comprehensive fraud detection");
320
321 Ok(())
322 }
323
324 #[allow(dead_code)]
326 pub async fn performance_optimization_guide() -> Result<()> {
327 println!("=== Performance Optimization Guide ===");
328 println!();
329
330 println!("For large datasets:");
331 println!();
332 println!("1. Use fast algorithms first");
333 println!(" - Filter with aHash (fastest)");
334 println!(" - Confirm with pHash (most accurate)");
335 println!();
336 println!("2. Implement database indexing");
337 println!(" - Use ImageDatabase with appropriate threshold");
338 println!(" - Index by hash prefix for faster lookups");
339 println!();
340 println!("3. Batch processing");
341 println!(" - Process images in parallel");
342 println!(" - Use rayon for CPU parallelism");
343 println!();
344 println!("4. Caching");
345 println!(" - Cache computed hashes");
346 println!(" - Store hashes in database");
347 println!();
348 println!("Example performance:");
349 println!(" * Hash computation: ~1ms per image");
350 println!(" * Hash comparison: ~100ns per pair");
351 println!(" * Database lookup: ~O(n) without indexing");
352 println!(" * With indexing: ~O(log n)");
353
354 Ok(())
355 }
356}