Skip to main content

cloakpipe_proxy/
tree_handlers.rs

1//! HTTP handlers for CloakTree — vectorless document retrieval.
2
3use crate::state::AppState;
4use axum::{
5    extract::{Path, State},
6    http::StatusCode,
7    response::IntoResponse,
8    Json,
9};
10use cloakpipe_tree::{
11    TreeIndexer, TreeSearcher,
12    storage::TreeStorage,
13};
14use serde::{Deserialize, Serialize};
15use serde_json::Value;
16use std::sync::Arc;
17
18// --- Request/Response types ---
19
20#[derive(Deserialize)]
21pub struct IndexTextRequest {
22    /// Document name (e.g., "contract.pdf")
23    pub name: String,
24    /// Raw text content of the document
25    pub text: String,
26}
27
28#[derive(Serialize)]
29pub struct IndexResponse {
30    pub id: String,
31    pub source: String,
32    pub description: Option<String>,
33    pub total_pages: usize,
34    pub node_count: usize,
35    pub max_depth: usize,
36    pub navigation: Vec<NavigationItem>,
37}
38
39#[derive(Serialize)]
40pub struct NavigationItem {
41    pub id: String,
42    pub title: String,
43    pub summary: Option<String>,
44    pub depth: usize,
45    pub pages: (usize, usize),
46    pub has_children: bool,
47}
48
49#[derive(Deserialize)]
50pub struct SearchRequest {
51    pub query: String,
52}
53
54#[derive(Serialize)]
55pub struct SearchResponse {
56    pub node_ids: Vec<String>,
57    pub reasoning: String,
58    pub confidence: Option<f64>,
59    pub extracted: Vec<ExtractedItem>,
60}
61
62#[derive(Serialize)]
63pub struct ExtractedItem {
64    pub node_id: String,
65    pub title: String,
66    pub text: String,
67    pub pages: (usize, usize),
68}
69
70#[derive(Deserialize)]
71pub struct QueryRequest {
72    /// Raw text content (for new documents) or tree ID (for existing)
73    pub text: Option<String>,
74    /// Document name
75    pub name: Option<String>,
76    /// Existing tree ID to search
77    pub tree_id: Option<String>,
78    /// The question to answer
79    pub query: String,
80}
81
82#[derive(Serialize)]
83pub struct QueryResponse {
84    pub answer: String,
85    pub sources: Vec<ExtractedItem>,
86    pub tree_id: String,
87    pub reasoning: String,
88}
89
90#[derive(Serialize)]
91pub struct TreeListItem {
92    pub id: String,
93    pub source: String,
94    pub description: Option<String>,
95    pub total_pages: usize,
96    pub node_count: usize,
97}
98
99// --- Handlers ---
100
101/// POST /tree/index — Build a tree index from text content.
102pub async fn tree_index_text(
103    State(state): State<Arc<AppState>>,
104    Json(req): Json<IndexTextRequest>,
105) -> Result<Json<IndexResponse>, (StatusCode, String)> {
106    let tree_config = state.config.tree.clone();
107    let indexer = TreeIndexer::new(
108        tree_config.clone(),
109        state.api_key.clone(),
110        state.config.proxy.upstream.clone(),
111    );
112
113    let tree = indexer
114        .build_index_from_text(&req.name, &req.text)
115        .await
116        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Indexing failed: {}", e)))?;
117
118    // Save to storage
119    let storage_path = &tree_config.storage_path;
120    TreeStorage::save(&tree, storage_path)
121        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Storage failed: {}", e)))?;
122
123    let nav = tree.navigation_map();
124    Ok(Json(IndexResponse {
125        id: tree.id.clone(),
126        source: tree.source.clone(),
127        description: tree.description.clone(),
128        total_pages: tree.total_pages,
129        node_count: tree.node_count(),
130        max_depth: tree.max_depth(),
131        navigation: nav.into_iter().map(|e| NavigationItem {
132            id: e.id,
133            title: e.title,
134            summary: e.summary,
135            depth: e.depth,
136            pages: e.pages,
137            has_children: e.has_children,
138        }).collect(),
139    }))
140}
141
142/// POST /tree/index/file — Build a tree index from a file path on disk.
143pub async fn tree_index_file(
144    State(state): State<Arc<AppState>>,
145    Json(body): Json<Value>,
146) -> Result<Json<IndexResponse>, (StatusCode, String)> {
147    let file_path = body["file_path"]
148        .as_str()
149        .ok_or((StatusCode::BAD_REQUEST, "file_path required".to_string()))?;
150
151    let tree_config = state.config.tree.clone();
152    let indexer = TreeIndexer::new(
153        tree_config.clone(),
154        state.api_key.clone(),
155        state.config.proxy.upstream.clone(),
156    );
157
158    let tree = indexer
159        .build_index(file_path)
160        .await
161        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Indexing failed: {}", e)))?;
162
163    TreeStorage::save(&tree, &tree_config.storage_path)
164        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Storage failed: {}", e)))?;
165
166    let nav = tree.navigation_map();
167    Ok(Json(IndexResponse {
168        id: tree.id.clone(),
169        source: tree.source.clone(),
170        description: tree.description.clone(),
171        total_pages: tree.total_pages,
172        node_count: tree.node_count(),
173        max_depth: tree.max_depth(),
174        navigation: nav.into_iter().map(|e| NavigationItem {
175            id: e.id,
176            title: e.title,
177            summary: e.summary,
178            depth: e.depth,
179            pages: e.pages,
180            has_children: e.has_children,
181        }).collect(),
182    }))
183}
184
185/// GET /tree/list — List all tree indices.
186pub async fn tree_list(
187    State(state): State<Arc<AppState>>,
188) -> Result<Json<Vec<TreeListItem>>, (StatusCode, String)> {
189    let storage_path = &state.config.tree.storage_path;
190
191    let trees_raw = TreeStorage::list(storage_path)
192        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("List failed: {}", e)))?;
193
194    let mut items = Vec::new();
195    for (id, _source) in trees_raw {
196        let path = format!("{}/{}.json", storage_path, id);
197        if let Ok(tree) = TreeStorage::load(&path) {
198            let node_count = tree.node_count();
199            items.push(TreeListItem {
200                id: tree.id,
201                source: tree.source,
202                description: tree.description,
203                total_pages: tree.total_pages,
204                node_count,
205            });
206        }
207    }
208
209    Ok(Json(items))
210}
211
212/// GET /tree/:id — Get tree details and navigation map.
213pub async fn tree_get(
214    State(state): State<Arc<AppState>>,
215    Path(tree_id): Path<String>,
216) -> Result<Json<IndexResponse>, (StatusCode, String)> {
217    let path = format!("{}/{}.json", state.config.tree.storage_path, tree_id);
218    let tree = TreeStorage::load(&path)
219        .map_err(|e| (StatusCode::NOT_FOUND, format!("Tree not found: {}", e)))?;
220
221    let nav = tree.navigation_map();
222    Ok(Json(IndexResponse {
223        id: tree.id.clone(),
224        source: tree.source.clone(),
225        description: tree.description.clone(),
226        total_pages: tree.total_pages,
227        node_count: tree.node_count(),
228        max_depth: tree.max_depth(),
229        navigation: nav.into_iter().map(|e| NavigationItem {
230            id: e.id,
231            title: e.title,
232            summary: e.summary,
233            depth: e.depth,
234            pages: e.pages,
235            has_children: e.has_children,
236        }).collect(),
237    }))
238}
239
240/// POST /tree/:id/search — Search a tree index.
241pub async fn tree_search(
242    State(state): State<Arc<AppState>>,
243    Path(tree_id): Path<String>,
244    Json(req): Json<SearchRequest>,
245) -> Result<Json<SearchResponse>, (StatusCode, String)> {
246    let storage_path = &state.config.tree.storage_path;
247    let tree_path = format!("{}/{}.json", storage_path, tree_id);
248
249    let tree = TreeStorage::load(&tree_path)
250        .map_err(|e| (StatusCode::NOT_FOUND, format!("Tree not found: {}", e)))?;
251
252    let searcher = TreeSearcher::new(
253        state.api_key.clone(),
254        state.config.proxy.upstream.clone(),
255        state.config.tree.search_model.clone(),
256    );
257
258    let result = searcher
259        .search(&tree, &req.query)
260        .await
261        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Search failed: {}", e)))?;
262
263    // Extract content from matched nodes
264    // Re-parse document for extraction (or load cached pages)
265    let extracted = result.node_ids.iter().filter_map(|id| {
266        tree.find_node(id).map(|node| ExtractedItem {
267            node_id: node.id.clone(),
268            title: node.title.clone(),
269            text: node.summary.as_ref().map(|s| s.text.clone()).unwrap_or_default(),
270            pages: node.pages,
271        })
272    }).collect();
273
274    Ok(Json(SearchResponse {
275        node_ids: result.node_ids,
276        reasoning: result.reasoning,
277        confidence: result.confidence,
278        extracted,
279    }))
280}
281
282/// POST /tree/query — Full RAG pipeline: index (if needed) + search + extract + answer.
283pub async fn tree_query(
284    State(state): State<Arc<AppState>>,
285    Json(req): Json<QueryRequest>,
286) -> Result<Json<QueryResponse>, (StatusCode, String)> {
287    let tree_config = state.config.tree.clone();
288    let storage_path = &tree_config.storage_path;
289
290    // Load or build tree
291    let tree = if let Some(tree_id) = &req.tree_id {
292        let path = format!("{}/{}.json", storage_path, tree_id);
293        TreeStorage::load(&path)
294            .map_err(|e| (StatusCode::NOT_FOUND, format!("Tree not found: {}", e)))?
295    } else if let Some(text) = &req.text {
296        let name = req.name.as_deref().unwrap_or("uploaded-document");
297        let indexer = TreeIndexer::new(
298            tree_config.clone(),
299            state.api_key.clone(),
300            state.config.proxy.upstream.clone(),
301        );
302        let tree = indexer
303            .build_index_from_text(name, text)
304            .await
305            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Indexing failed: {}", e)))?;
306        TreeStorage::save(&tree, storage_path)
307            .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Storage failed: {}", e)))?;
308        tree
309    } else {
310        return Err((StatusCode::BAD_REQUEST, "Either tree_id or text required".to_string()));
311    };
312
313    // Search
314    let searcher = TreeSearcher::new(
315        state.api_key.clone(),
316        state.config.proxy.upstream.clone(),
317        tree_config.search_model.clone(),
318    );
319
320    let search_result = searcher
321        .search(&tree, &req.query)
322        .await
323        .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Search failed: {}", e)))?;
324
325    // Build extracted items from node summaries/titles
326    let sources: Vec<ExtractedItem> = search_result.node_ids.iter().filter_map(|id| {
327        tree.find_node(id).map(|node| ExtractedItem {
328            node_id: node.id.clone(),
329            title: node.title.clone(),
330            text: node.summary.as_ref().map(|s| s.text.clone()).unwrap_or_default(),
331            pages: node.pages,
332        })
333    }).collect();
334
335    // Build context from extracted content
336    let context: String = sources.iter().enumerate().map(|(i, s)| {
337        format!("[Source {} | {} | Pages {}-{}]\n{}", i + 1, s.title, s.pages.0, s.pages.1, s.text)
338    }).collect::<Vec<_>>().join("\n\n");
339
340    // Generate answer
341    let answer_prompt = format!(
342        "Use the following context to answer the question. Cite source numbers when possible.\n\n\
343         ---\n{}\n---\n\nQuestion: {}",
344        context, req.query
345    );
346
347    let answer_body = serde_json::json!({
348        "model": tree_config.search_model,
349        "messages": [
350            {"role": "system", "content": "You answer questions based on provided document context. Be precise and cite sources."},
351            {"role": "user", "content": answer_prompt}
352        ],
353        "max_tokens": 2048,
354        "temperature": 0.3
355    });
356
357    let url = format!(
358        "{}/v1/chat/completions",
359        state.config.proxy.upstream.trim_end_matches('/')
360    );
361
362    let response = state
363        .http_client
364        .post(&url)
365        .header("Authorization", format!("Bearer {}", state.api_key))
366        .json(&answer_body)
367        .send()
368        .await
369        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("LLM request failed: {}", e)))?
370        .json::<Value>()
371        .await
372        .map_err(|e| (StatusCode::BAD_GATEWAY, format!("Invalid LLM response: {}", e)))?;
373
374    let answer = response["choices"][0]["message"]["content"]
375        .as_str()
376        .unwrap_or("Unable to generate answer")
377        .to_string();
378
379    Ok(Json(QueryResponse {
380        answer,
381        sources,
382        tree_id: tree.id,
383        reasoning: search_result.reasoning,
384    }))
385}
386
387/// DELETE /tree/:id — Delete a tree index.
388pub async fn tree_delete(
389    State(state): State<Arc<AppState>>,
390    Path(tree_id): Path<String>,
391) -> Result<impl IntoResponse, (StatusCode, String)> {
392    let path = format!("{}/{}.json", state.config.tree.storage_path, tree_id);
393    std::fs::remove_file(&path)
394        .map_err(|e| (StatusCode::NOT_FOUND, format!("Tree not found: {}", e)))?;
395
396    Ok(Json(serde_json::json!({"deleted": tree_id})))
397}