embedding_demo/
embedding_demo.rs1use llm_brain::{ChunkingStrategy, LLMBrain, Result};
2use serde_json::json;
3
4#[tokio::main]
6async fn main() -> Result<()> {
7 println!("Initializing LLMBrain...");
9 let llm_brain = LLMBrain::launch().await?;
10
11 println!("\n--- Basic Embedding Demo ---");
13
14 let memories = vec![
16 (
17 "Cats are small mammals with fur, whiskers, and tails. They eat mice and birds, and are related to tigers and lions.",
18 json!({
19 "name": "Cat",
20 "type": "Animal",
21 "properties": {
22 "characteristics": ["fur", "whiskers", "tail"],
23 "diet": "carnivorous"
24 }
25 }),
26 ),
27 (
28 "Dogs are one of the earliest domesticated animals, loyal companions to humans, with various breeds and purposes.",
29 json!({
30 "name": "Dog",
31 "type": "Animal",
32 "properties": {
33 "characteristics": ["fur", "loyalty", "keen sense of smell"],
34 "uses": ["pet", "working dog", "guide dog"]
35 }
36 }),
37 ),
38 (
39 "Birds are warm-blooded vertebrates, covered with feathers, with forelimbs evolved into wings, and most can fly.",
40 json!({
41 "name": "Bird",
42 "type": "Animal",
43 "properties": {
44 "characteristics": ["feathers", "wings", "beak"],
45 "abilities": ["flying", "nest building", "singing"]
46 }
47 }),
48 ),
49 ];
50
51 for (content, metadata) in memories {
53 let memory_id = llm_brain.add_memory(content.to_owned(), metadata).await?;
54 println!("Added memory: ID={memory_id}");
55 }
56
57 tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
59
60 println!("\nQuerying memories about 'pets':");
62 let results = llm_brain.recall("pets and human relationships", 2).await?;
63 for (i, (memory, score)) in results.iter().enumerate() {
64 println!("\nResult {}: Similarity {:.4}", i + 1, score);
65 println!("Content: {}", memory.content);
66 println!(
67 "Metadata: {}",
68 serde_json::to_string_pretty(&memory.metadata)?
69 );
70 }
71
72 println!("\n--- Long Text Processing Demo ---");
74
75 let long_text = r#"
77 Artificial Intelligence (AI) is a branch of computer science aimed at creating systems capable of simulating human intelligence behaviors.
78 AI research includes multiple sub-fields such as machine learning, deep learning, natural language processing, computer vision, expert systems, etc.
79 Machine learning is one of the core technologies of AI, using statistical techniques to enable computer systems to "learn" (i.e., progressively improve performance) without explicit programming.
80 Deep learning is a subset of machine learning that uses multi-layered neural networks to process data. These networks extract features from data, with each layer building on the output of the previous one.
81 Large Language Models (LLMs) are a significant breakthrough in AI in recent years, generating human-like text, understanding context, and performing various language tasks by learning from vast amounts of text data.
82 Vector embeddings are techniques for representing text, images, or other data as points in a multi-dimensional vector space, playing an important role in information retrieval, recommendation systems, and semantic search.
83 "#;
84
85 println!("Long text content:\n{long_text}");
86
87 println!("\nUsing default strategy (no chunking):");
89 let embedding1 = llm_brain.process_long_text(long_text, None).await?;
90 println!("Generated embedding vector length: {}", embedding1.len());
91
92 println!("\nUsing chunk and average strategy:");
93 let chunk_strategy = ChunkingStrategy::ChunkAndAverage {
94 chunk_size: 100,
95 chunk_overlap: 20,
96 };
97 let embedding2 = llm_brain
98 .process_long_text(long_text, Some(chunk_strategy))
99 .await?;
100 println!("Generated embedding vector length: {}", embedding2.len());
101
102 let similarity = cosine_similarity(&embedding1, &embedding2);
104 println!("\nCosine similarity between embeddings from two strategies: {similarity:.4}");
105
106 println!("\n--- Adding Long Text Memory Demo ---");
108
109 let metadata = json!({
110 "name": "Artificial Intelligence Overview",
111 "type": "Knowledge",
112 "tags": ["AI", "Machine Learning", "Deep Learning"]
113 });
114
115 let memory_id = llm_brain.add_memory(long_text.to_owned(), metadata).await?;
116 println!("Added long text memory: ID={memory_id}");
117
118 tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
120
121 println!("\nQuerying memories about 'large language models':");
123 let results = llm_brain
124 .recall("large language models and vector embeddings", 1)
125 .await?;
126 for (i, (memory, score)) in results.iter().enumerate() {
127 println!("\nResult {}: Similarity {:.4}", i + 1, score);
128 println!("Content summary: {}", truncate_text(&memory.content, 100));
129 println!(
130 "Metadata: {}",
131 serde_json::to_string_pretty(&memory.metadata)?
132 );
133 }
134
135 println!("\nDemo completed.");
136 Ok(())
137}
138
139fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
141 if a.len() != b.len() || a.is_empty() {
142 return 0.0;
143 }
144
145 let mut dot_product = 0.0;
146 let mut norm_a = 0.0;
147 let mut norm_b = 0.0;
148
149 for i in 0..a.len() {
150 dot_product += a[i] * b[i];
151 norm_a += a[i] * a[i];
152 norm_b += b[i] * b[i];
153 }
154
155 if norm_a <= 0.0 || norm_b <= 0.0 {
156 return 0.0;
157 }
158
159 dot_product / (norm_a.sqrt() * norm_b.sqrt())
160}
161
162fn truncate_text(text: &str, max_length: usize) -> String {
164 if text.len() <= max_length {
165 return text.to_owned();
166 }
167
168 let truncated = &text[0..max_length];
169 format!("{truncated}...")
170}