1use std::collections::HashMap;
14
15use base64::Engine;
16use serde::{Deserialize, Serialize};
17
18use crate::blind;
19use crate::crypto;
20use crate::lsh::LshHasher;
21use crate::reranker::{self, Candidate, RankedResult};
22use crate::Result;
23
24pub const TRAPDOOR_BATCH_SIZE: usize = 5;
30
31pub const PAGE_SIZE: usize = 1000;
33
34const SEARCH_QUERY: &str = r#"
40 query SearchByBlindIndex($trapdoors: [String!]!, $owner: Bytes!, $first: Int!) {
41 blindIndexes(
42 where: { hash_in: $trapdoors, owner: $owner, fact_: { isActive: true } }
43 first: $first
44 orderBy: id
45 orderDirection: desc
46 ) {
47 id
48 fact {
49 id
50 encryptedBlob
51 encryptedEmbedding
52 decayScore
53 timestamp
54 createdAt
55 isActive
56 contentFp
57 }
58 }
59 }
60"#;
61
62const BROADENED_SEARCH_QUERY: &str = r#"
65 query BroadenedSearch($owner: Bytes!, $first: Int!) {
66 facts(
67 where: { owner: $owner, isActive: true }
68 first: $first
69 orderBy: timestamp
70 orderDirection: desc
71 ) {
72 id
73 encryptedBlob
74 encryptedEmbedding
75 decayScore
76 timestamp
77 createdAt
78 isActive
79 contentFp
80 }
81 }
82"#;
83
84const EXPORT_QUERY: &str = r#"
86 query ExportFacts($owner: Bytes!, $first: Int!, $skip: Int!) {
87 facts(
88 where: { owner: $owner, isActive: true }
89 first: $first
90 skip: $skip
91 orderBy: timestamp
92 orderDirection: desc
93 ) {
94 id
95 encryptedBlob
96 encryptedEmbedding
97 decayScore
98 timestamp
99 createdAt
100 isActive
101 }
102 }
103"#;
104
105const COUNT_QUERY: &str = r#"
107 query FactCount($owner: Bytes!) {
108 facts(where: { owner: $owner, isActive: true }, first: 1000) {
109 id
110 }
111 }
112"#;
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
120#[serde(rename_all = "camelCase")]
121pub struct SubgraphFact {
122 pub id: String,
123 pub encrypted_blob: String,
124 pub encrypted_embedding: Option<String>,
125 pub decay_score: Option<String>,
126 pub timestamp: Option<String>,
127 pub created_at: Option<String>,
128 pub is_active: Option<bool>,
129 pub content_fp: Option<String>,
130}
131
132#[derive(Deserialize)]
134#[serde(rename_all = "camelCase")]
135struct BlindIndexEntry {
136 #[allow(dead_code)]
137 id: String,
138 fact: Option<SubgraphFact>,
139}
140
141#[derive(Deserialize)]
142#[serde(rename_all = "camelCase")]
143struct SearchData {
144 blind_indexes: Option<Vec<BlindIndexEntry>>,
145}
146
147#[derive(Deserialize)]
149#[serde(rename_all = "camelCase")]
150struct ExportData {
151 facts: Option<Vec<SubgraphFact>>,
152}
153
154pub fn search_query() -> &'static str {
160 SEARCH_QUERY
161}
162
163pub fn broadened_search_query() -> &'static str {
165 BROADENED_SEARCH_QUERY
166}
167
168pub fn export_query() -> &'static str {
170 EXPORT_QUERY
171}
172
173pub fn count_query() -> &'static str {
175 COUNT_QUERY
176}
177
178pub fn generate_search_trapdoors(
191 query: &str,
192 query_embedding: &[f32],
193 lsh_hasher: &LshHasher,
194) -> Result<Vec<String>> {
195 let mut trapdoors = blind::generate_blind_indices(query);
197
198 let embedding_f64: Vec<f64> = query_embedding.iter().map(|&f| f as f64).collect();
200 let lsh_buckets = lsh_hasher.hash(&embedding_f64)?;
201 trapdoors.extend(lsh_buckets);
202
203 Ok(trapdoors)
204}
205
206pub fn parse_search_response(response_json: &str) -> Result<Vec<SubgraphFact>> {
215 let data: SearchData = serde_json::from_str(response_json)
216 .map_err(|e| crate::Error::Crypto(format!("failed to parse search response: {}", e)))?;
217
218 let mut seen: HashMap<String, ()> = HashMap::new();
219 let mut results = Vec::new();
220
221 if let Some(entries) = data.blind_indexes {
222 for entry in entries {
223 if let Some(fact) = entry.fact {
224 if fact.is_active != Some(false) && !seen.contains_key(&fact.id) {
225 seen.insert(fact.id.clone(), ());
226 results.push(fact);
227 }
228 }
229 }
230 }
231
232 Ok(results)
233}
234
235pub fn parse_broadened_response(response_json: &str) -> Result<Vec<SubgraphFact>> {
240 let data: ExportData = serde_json::from_str(response_json)
241 .map_err(|e| crate::Error::Crypto(format!("failed to parse broadened response: {}", e)))?;
242
243 Ok(data
244 .facts
245 .unwrap_or_default()
246 .into_iter()
247 .filter(|f| f.is_active != Some(false))
248 .collect())
249}
250
251pub fn hex_blob_to_base64(hex_blob: &str) -> Option<String> {
259 let hex_str = hex_blob.strip_prefix("0x").unwrap_or(hex_blob);
260 let bytes = hex::decode(hex_str).ok()?;
261 Some(base64::engine::general_purpose::STANDARD.encode(&bytes))
262}
263
264fn extract_text_from_blob(decrypted: &str) -> String {
273 if let Ok(envelope) = serde_json::from_str::<serde_json::Value>(decrypted) {
274 if let Some(text) = envelope.get("t").and_then(|v| v.as_str()) {
275 return text.to_string();
276 }
277 }
278 decrypted.to_string()
279}
280
281pub fn decrypt_and_rerank(
299 facts: &[SubgraphFact],
300 query: &str,
301 query_embedding: &[f32],
302 encryption_key_hex: &str,
303 top_k: usize,
304) -> Result<Vec<RankedResult>> {
305 if facts.is_empty() {
306 return Ok(Vec::new());
307 }
308
309 let key_bytes = hex::decode(encryption_key_hex)
311 .map_err(|e| crate::Error::Crypto(format!("invalid encryption key hex: {}", e)))?;
312 if key_bytes.len() != 32 {
313 return Err(crate::Error::Crypto(format!(
314 "encryption key must be 32 bytes, got {}",
315 key_bytes.len()
316 )));
317 }
318 let mut encryption_key = [0u8; 32];
319 encryption_key.copy_from_slice(&key_bytes);
320
321 let mut candidates = Vec::new();
323 for fact in facts {
324 let blob_b64 = match hex_blob_to_base64(&fact.encrypted_blob) {
326 Some(b) => b,
327 None => continue, };
329 let raw = match crypto::decrypt(&blob_b64, &encryption_key) {
330 Ok(t) => t,
331 Err(_) => continue, };
333 let text = extract_text_from_blob(&raw);
334
335 let emb = decrypt_embedding(fact.encrypted_embedding.as_deref(), &encryption_key);
337
338 candidates.push(Candidate {
339 id: fact.id.clone(),
340 text,
341 embedding: emb,
342 timestamp: fact.timestamp.clone().unwrap_or_default(),
343 source: None,
344 });
345 }
346
347 reranker::rerank(query, query_embedding, &candidates, top_k)
349}
350
351pub fn decrypt_and_rerank_with_key(
355 facts: &[SubgraphFact],
356 query: &str,
357 query_embedding: &[f32],
358 encryption_key: &[u8; 32],
359 top_k: usize,
360) -> Result<Vec<RankedResult>> {
361 if facts.is_empty() {
362 return Ok(Vec::new());
363 }
364
365 let mut candidates = Vec::new();
366 for fact in facts {
367 let blob_b64 = match hex_blob_to_base64(&fact.encrypted_blob) {
368 Some(b) => b,
369 None => continue,
370 };
371 let raw = match crypto::decrypt(&blob_b64, encryption_key) {
372 Ok(t) => t,
373 Err(_) => continue,
374 };
375 let text = extract_text_from_blob(&raw);
376
377 let emb = decrypt_embedding(fact.encrypted_embedding.as_deref(), encryption_key);
378
379 candidates.push(Candidate {
380 id: fact.id.clone(),
381 text,
382 embedding: emb,
383 timestamp: fact.timestamp.clone().unwrap_or_default(),
384 source: None,
385 });
386 }
387
388 reranker::rerank(query, query_embedding, &candidates, top_k)
389}
390
391fn decrypt_embedding(encrypted: Option<&str>, encryption_key: &[u8; 32]) -> Vec<f32> {
399 encrypted
400 .and_then(|e| crypto::decrypt(e, encryption_key).ok())
401 .and_then(|b64| base64::engine::general_purpose::STANDARD.decode(&b64).ok())
402 .map(|bytes| {
403 bytes
404 .chunks_exact(4)
405 .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]]))
406 .collect::<Vec<f32>>()
407 })
408 .unwrap_or_default()
409}
410
411#[cfg(test)]
416mod tests {
417 use super::*;
418
419 #[test]
420 fn test_hex_blob_to_base64_with_prefix() {
421 let hex_str = "0x48656c6c6f"; let b64 = hex_blob_to_base64(hex_str).unwrap();
423 let decoded = base64::engine::general_purpose::STANDARD
424 .decode(&b64)
425 .unwrap();
426 assert_eq!(decoded, b"Hello");
427 }
428
429 #[test]
430 fn test_hex_blob_to_base64_without_prefix() {
431 let hex_str = "48656c6c6f";
432 let b64 = hex_blob_to_base64(hex_str).unwrap();
433 let decoded = base64::engine::general_purpose::STANDARD
434 .decode(&b64)
435 .unwrap();
436 assert_eq!(decoded, b"Hello");
437 }
438
439 #[test]
440 fn test_hex_blob_to_base64_invalid() {
441 assert!(hex_blob_to_base64("0xZZZZ").is_none());
442 }
443
444 #[test]
445 fn test_generate_search_trapdoors() {
446 let keys = crate::crypto::derive_keys_from_mnemonic(
447 "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about",
448 )
449 .unwrap();
450 let lsh_seed = crate::crypto::derive_lsh_seed(
451 "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about",
452 &keys.salt,
453 )
454 .unwrap();
455 let lsh_hasher = LshHasher::new(&lsh_seed, 640).unwrap();
456
457 let embedding = vec![0.5f32; 640];
458 let trapdoors =
459 generate_search_trapdoors("dark mode preference", &embedding, &lsh_hasher).unwrap();
460
461 assert!(
463 trapdoors.len() > 20,
464 "Should have word + stem + LSH trapdoors, got {}",
465 trapdoors.len()
466 );
467
468 for t in &trapdoors {
470 assert!(
471 hex::decode(t).is_ok(),
472 "Trapdoor should be valid hex: {}",
473 t
474 );
475 }
476 }
477
478 #[test]
479 fn test_parse_search_response() {
480 let json = r#"{
481 "blindIndexes": [
482 {
483 "id": "idx1",
484 "fact": {
485 "id": "fact1",
486 "encryptedBlob": "0xdeadbeef",
487 "encryptedEmbedding": null,
488 "decayScore": "0.8",
489 "timestamp": "2026-01-01T00:00:00.000Z",
490 "isActive": true,
491 "contentFp": "abc123"
492 }
493 },
494 {
495 "id": "idx2",
496 "fact": {
497 "id": "fact1",
498 "encryptedBlob": "0xdeadbeef",
499 "encryptedEmbedding": null,
500 "decayScore": "0.8",
501 "timestamp": "2026-01-01T00:00:00.000Z",
502 "isActive": true,
503 "contentFp": "abc123"
504 }
505 },
506 {
507 "id": "idx3",
508 "fact": {
509 "id": "fact2",
510 "encryptedBlob": "0xcafebabe",
511 "encryptedEmbedding": null,
512 "decayScore": "0.5",
513 "timestamp": "2026-01-02T00:00:00.000Z",
514 "isActive": true,
515 "contentFp": "def456"
516 }
517 }
518 ]
519 }"#;
520
521 let facts = parse_search_response(json).unwrap();
522 assert_eq!(facts.len(), 2);
524 assert_eq!(facts[0].id, "fact1");
525 assert_eq!(facts[1].id, "fact2");
526 }
527
528 #[test]
529 fn test_parse_search_response_filters_inactive() {
530 let json = r#"{
531 "blindIndexes": [
532 {
533 "id": "idx1",
534 "fact": {
535 "id": "fact1",
536 "encryptedBlob": "0xdeadbeef",
537 "isActive": false,
538 "contentFp": null,
539 "decayScore": null,
540 "timestamp": null,
541 "encryptedEmbedding": null
542 }
543 },
544 {
545 "id": "idx2",
546 "fact": {
547 "id": "fact2",
548 "encryptedBlob": "0xcafebabe",
549 "isActive": true,
550 "contentFp": null,
551 "decayScore": null,
552 "timestamp": null,
553 "encryptedEmbedding": null
554 }
555 }
556 ]
557 }"#;
558
559 let facts = parse_search_response(json).unwrap();
560 assert_eq!(facts.len(), 1);
561 assert_eq!(facts[0].id, "fact2");
562 }
563
564 #[test]
565 fn test_parse_broadened_response() {
566 let json = r#"{
567 "facts": [
568 {
569 "id": "fact1",
570 "encryptedBlob": "0xdeadbeef",
571 "encryptedEmbedding": null,
572 "decayScore": "0.8",
573 "timestamp": "2026-01-01T00:00:00.000Z",
574 "isActive": true,
575 "contentFp": "abc123"
576 },
577 {
578 "id": "fact2",
579 "encryptedBlob": "0xcafebabe",
580 "encryptedEmbedding": null,
581 "decayScore": "0.5",
582 "timestamp": "2026-01-02T00:00:00.000Z",
583 "isActive": true,
584 "contentFp": "def456"
585 }
586 ]
587 }"#;
588
589 let facts = parse_broadened_response(json).unwrap();
590 assert_eq!(facts.len(), 2);
591 }
592
593 #[test]
594 fn test_parse_broadened_response_empty() {
595 let json = r#"{ "facts": null }"#;
596 let facts = parse_broadened_response(json).unwrap();
597 assert!(facts.is_empty());
598 }
599
600 #[test]
601 fn test_decrypt_and_rerank_empty() {
602 let results =
603 decrypt_and_rerank(&[], "query", &[0.5f32; 4], &hex::encode([0u8; 32]), 3).unwrap();
604 assert!(results.is_empty());
605 }
606
607 #[test]
608 fn test_decrypt_and_rerank_with_real_encryption() {
609 use crate::crypto;
610 use base64::Engine;
611
612 let keys = crypto::derive_keys_from_mnemonic(
613 "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about",
614 )
615 .unwrap();
616
617 let encryption_key_hex = hex::encode(keys.encryption_key);
618
619 let text1 = "User prefers dark mode in all applications";
621 let text2 = "The weather is sunny today";
622
623 let envelope1 = serde_json::json!({"t": text1, "a": "test", "s": "test"});
624 let enc1 = crypto::encrypt(&envelope1.to_string(), &keys.encryption_key).unwrap();
625 let enc1_bytes = base64::engine::general_purpose::STANDARD
626 .decode(&enc1)
627 .unwrap();
628 let enc1_hex = format!("0x{}", hex::encode(&enc1_bytes));
629
630 let envelope2 = serde_json::json!({"t": text2, "a": "test", "s": "test"});
631 let enc2 = crypto::encrypt(&envelope2.to_string(), &keys.encryption_key).unwrap();
632 let enc2_bytes = base64::engine::general_purpose::STANDARD
633 .decode(&enc2)
634 .unwrap();
635 let enc2_hex = format!("0x{}", hex::encode(&enc2_bytes));
636
637 let facts = vec![
638 SubgraphFact {
639 id: "fact1".to_string(),
640 encrypted_blob: enc1_hex,
641 encrypted_embedding: None,
642 decay_score: Some("0.8".to_string()),
643 timestamp: Some("2026-01-01T00:00:00.000Z".to_string()),
644 created_at: None,
645 is_active: Some(true),
646 content_fp: None,
647 },
648 SubgraphFact {
649 id: "fact2".to_string(),
650 encrypted_blob: enc2_hex,
651 encrypted_embedding: None,
652 decay_score: Some("0.5".to_string()),
653 timestamp: Some("2026-01-02T00:00:00.000Z".to_string()),
654 created_at: None,
655 is_active: Some(true),
656 content_fp: None,
657 },
658 ];
659
660 let query_embedding = vec![0.5f32; 4]; let results = decrypt_and_rerank(
662 &facts,
663 "dark mode",
664 &query_embedding,
665 &encryption_key_hex,
666 2,
667 )
668 .unwrap();
669
670 assert_eq!(results.len(), 2);
672
673 assert_eq!(results[0].text, text1);
675 assert!(results[0].score >= results[1].score);
676 }
677
678 #[test]
679 fn test_decrypt_and_rerank_skips_undecryptable() {
680 let encryption_key_hex = hex::encode([0u8; 32]);
681
682 let facts = vec![SubgraphFact {
683 id: "bad_fact".to_string(),
684 encrypted_blob: "0xdeadbeef".to_string(), encrypted_embedding: None,
686 decay_score: None,
687 timestamp: None,
688 created_at: None,
689 is_active: Some(true),
690 content_fp: None,
691 }];
692
693 let results =
694 decrypt_and_rerank(&facts, "query", &[0.5f32; 4], &encryption_key_hex, 3).unwrap();
695
696 assert!(results.is_empty());
698 }
699
700 #[test]
701 fn test_query_strings_not_empty() {
702 assert!(!search_query().is_empty());
703 assert!(search_query().contains("blindIndexes"));
704 assert!(!broadened_search_query().is_empty());
705 assert!(broadened_search_query().contains("facts"));
706 assert!(!export_query().is_empty());
707 assert!(export_query().contains("skip"));
708 assert!(!count_query().is_empty());
709 }
710
711 #[test]
712 fn test_decrypt_and_rerank_with_key() {
713 use crate::crypto;
714 use base64::Engine;
715
716 let keys = crypto::derive_keys_from_mnemonic(
717 "abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon abandon about",
718 )
719 .unwrap();
720
721 let text = "User prefers dark mode";
722 let envelope = serde_json::json!({"t": text, "a": "test", "s": "test"});
723 let enc = crypto::encrypt(&envelope.to_string(), &keys.encryption_key).unwrap();
724 let enc_bytes = base64::engine::general_purpose::STANDARD
725 .decode(&enc)
726 .unwrap();
727 let enc_hex = format!("0x{}", hex::encode(&enc_bytes));
728
729 let facts = vec![SubgraphFact {
730 id: "fact1".to_string(),
731 encrypted_blob: enc_hex,
732 encrypted_embedding: None,
733 decay_score: None,
734 timestamp: Some("2026-01-01T00:00:00.000Z".to_string()),
735 created_at: None,
736 is_active: Some(true),
737 content_fp: None,
738 }];
739
740 let results =
741 decrypt_and_rerank_with_key(&facts, "dark mode", &[0.5f32; 4], &keys.encryption_key, 1)
742 .unwrap();
743
744 assert_eq!(results.len(), 1);
745 assert_eq!(results[0].text, text);
746 }
747
748 #[test]
749 fn test_extract_text_from_blob() {
750 let json = r#"{"t":"hello world","a":"agent","s":"source"}"#;
752 assert_eq!(extract_text_from_blob(json), "hello world");
753
754 assert_eq!(extract_text_from_blob("raw text"), "raw text");
756
757 assert_eq!(extract_text_from_blob(r#"{"x":"y"}"#), r#"{"x":"y"}"#);
759 }
760}