embedding_demo/
embedding_demo.rs

1use llm_brain::{ChunkingStrategy, LLMBrain, Result};
2use serde_json::json;
3
4/// Embedding functionality demonstration
5#[tokio::main]
6async fn main() -> Result<()> {
7    // Initialize LLMBrain instance
8    println!("Initializing LLMBrain...");
9    let llm_brain = LLMBrain::launch().await?;
10
11    // Demo 1: Basic embedding generation and query
12    println!("\n--- Basic Embedding Demo ---");
13
14    // Add several memories
15    let memories = vec![
16        (
17            "Cats are small mammals with fur, whiskers, and tails. They eat mice and birds, and are related to tigers and lions.",
18            json!({
19                "name": "Cat",
20                "type": "Animal",
21                "properties": {
22                    "characteristics": ["fur", "whiskers", "tail"],
23                    "diet": "carnivorous"
24                }
25            }),
26        ),
27        (
28            "Dogs are one of the earliest domesticated animals, loyal companions to humans, with various breeds and purposes.",
29            json!({
30                "name": "Dog",
31                "type": "Animal",
32                "properties": {
33                    "characteristics": ["fur", "loyalty", "keen sense of smell"],
34                    "uses": ["pet", "working dog", "guide dog"]
35                }
36            }),
37        ),
38        (
39            "Birds are warm-blooded vertebrates, covered with feathers, with forelimbs evolved into wings, and most can fly.",
40            json!({
41                "name": "Bird",
42                "type": "Animal",
43                "properties": {
44                    "characteristics": ["feathers", "wings", "beak"],
45                    "abilities": ["flying", "nest building", "singing"]
46                }
47            }),
48        ),
49    ];
50
51    // Add memories to database
52    for (content, metadata) in memories {
53        let memory_id = llm_brain.add_memory(content.to_owned(), metadata).await?;
54        println!("Added memory: ID={memory_id}");
55    }
56
57    // Wait for embedding processing to complete
58    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
59
60    // Query related memories using semantic search
61    println!("\nQuerying memories about 'pets':");
62    let results = llm_brain.recall("pets and human relationships", 2).await?;
63    for (i, (memory, score)) in results.iter().enumerate() {
64        println!("\nResult {}: Similarity {:.4}", i + 1, score);
65        println!("Content: {}", memory.content);
66        println!(
67            "Metadata: {}",
68            serde_json::to_string_pretty(&memory.metadata)?
69        );
70    }
71
72    // Demo 2: Long text processing
73    println!("\n--- Long Text Processing Demo ---");
74
75    // Prepare a longer text
76    let long_text = r#"
77        Artificial Intelligence (AI) is a branch of computer science aimed at creating systems capable of simulating human intelligence behaviors.
78        AI research includes multiple sub-fields such as machine learning, deep learning, natural language processing, computer vision, expert systems, etc.
79        Machine learning is one of the core technologies of AI, using statistical techniques to enable computer systems to "learn" (i.e., progressively improve performance) without explicit programming.
80        Deep learning is a subset of machine learning that uses multi-layered neural networks to process data. These networks extract features from data, with each layer building on the output of the previous one.
81        Large Language Models (LLMs) are a significant breakthrough in AI in recent years, generating human-like text, understanding context, and performing various language tasks by learning from vast amounts of text data.
82        Vector embeddings are techniques for representing text, images, or other data as points in a multi-dimensional vector space, playing an important role in information retrieval, recommendation systems, and semantic search.
83    "#;
84
85    println!("Long text content:\n{long_text}");
86
87    // Process long text using different strategies
88    println!("\nUsing default strategy (no chunking):");
89    let embedding1 = llm_brain.process_long_text(long_text, None).await?;
90    println!("Generated embedding vector length: {}", embedding1.len());
91
92    println!("\nUsing chunk and average strategy:");
93    let chunk_strategy = ChunkingStrategy::ChunkAndAverage {
94        chunk_size: 100,
95        chunk_overlap: 20,
96    };
97    let embedding2 = llm_brain
98        .process_long_text(long_text, Some(chunk_strategy))
99        .await?;
100    println!("Generated embedding vector length: {}", embedding2.len());
101
102    // Calculate similarity between embeddings generated by different strategies
103    let similarity = cosine_similarity(&embedding1, &embedding2);
104    println!("\nCosine similarity between embeddings from two strategies: {similarity:.4}");
105
106    // Demo 3: Adding long text to memory
107    println!("\n--- Adding Long Text Memory Demo ---");
108
109    let metadata = json!({
110        "name": "Artificial Intelligence Overview",
111        "type": "Knowledge",
112        "tags": ["AI", "Machine Learning", "Deep Learning"]
113    });
114
115    let memory_id = llm_brain.add_memory(long_text.to_owned(), metadata).await?;
116    println!("Added long text memory: ID={memory_id}");
117
118    // Wait for embedding processing to complete
119    tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
120
121    // Query related content
122    println!("\nQuerying memories about 'large language models':");
123    let results = llm_brain
124        .recall("large language models and vector embeddings", 1)
125        .await?;
126    for (i, (memory, score)) in results.iter().enumerate() {
127        println!("\nResult {}: Similarity {:.4}", i + 1, score);
128        println!("Content summary: {}", truncate_text(&memory.content, 100));
129        println!(
130            "Metadata: {}",
131            serde_json::to_string_pretty(&memory.metadata)?
132        );
133    }
134
135    println!("\nDemo completed.");
136    Ok(())
137}
138
139/// Calculate cosine similarity between two vectors
140fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
141    if a.len() != b.len() || a.is_empty() {
142        return 0.0;
143    }
144
145    let mut dot_product = 0.0;
146    let mut norm_a = 0.0;
147    let mut norm_b = 0.0;
148
149    for i in 0..a.len() {
150        dot_product += a[i] * b[i];
151        norm_a += a[i] * a[i];
152        norm_b += b[i] * b[i];
153    }
154
155    if norm_a <= 0.0 || norm_b <= 0.0 {
156        return 0.0;
157    }
158
159    dot_product / (norm_a.sqrt() * norm_b.sqrt())
160}
161
162/// Truncate text to specified length, adding ellipsis
163fn truncate_text(text: &str, max_length: usize) -> String {
164    if text.len() <= max_length {
165        return text.to_owned();
166    }
167
168    let truncated = &text[0..max_length];
169    format!("{truncated}...")
170}