1use file_to_json::{Converter, FallbackStrategy, OpenRouterConfig};
4use serde_json::{json, Value};
5use std::io::Write;
6use std::path::PathBuf;
7use std::time::Duration;
8use tempfile::NamedTempFile;
9
10use crate::ingestion::config::AIProvider;
11use crate::ingestion::IngestionError;
12use crate::log_feature;
13use crate::logging::features::LogFeature;
14
15async fn convert_file_to_json_core(file_path: &PathBuf) -> Result<Value, IngestionError> {
17 log_feature!(
18 LogFeature::Ingestion,
19 info,
20 "Converting file to JSON: {:?}",
21 file_path
22 );
23
24 let ingestion_config = crate::ingestion::IngestionConfig::from_env()?;
26
27 if ingestion_config.provider != AIProvider::OpenRouter {
29 return Err(IngestionError::configuration_error(
30 "File conversion requires OpenRouter provider. Ollama is not supported for this feature."
31 ));
32 }
33
34 let file_to_json_config = OpenRouterConfig {
36 api_key: ingestion_config.openrouter.api_key.clone(),
37 model: ingestion_config.openrouter.model.clone(),
38 timeout: Duration::from_secs(ingestion_config.timeout_seconds),
39 fallback_strategy: FallbackStrategy::Chunked,
40 vision_model: Some(ingestion_config.openrouter.model.clone()),
41 max_image_bytes: 5 * 1024 * 1024, };
43
44 let file_path_str = file_path.to_string_lossy().to_string();
45
46 tokio::task::spawn_blocking(move || {
48 let converter = Converter::new(file_to_json_config)
49 .map_err(|_| IngestionError::FileConversionFailed)?;
50 converter.convert_path(&file_path_str)
51 .map_err(|e| {
52 log_feature!(
53 LogFeature::Ingestion,
54 error,
55 "Failed to convert file to JSON: {}",
56 e
57 );
58 IngestionError::FileConversionFailed
59 })
60 })
61 .await
62 .map_err(|e| {
63 log_feature!(
64 LogFeature::Ingestion,
65 error,
66 "Failed to spawn blocking task: {}",
67 e
68 );
69 IngestionError::FileConversionFailed
70 })?
71}
72
73pub async fn convert_file_to_json(file_path: &PathBuf) -> Result<Value, IngestionError> {
75 convert_file_to_json_core(file_path).await
76}
77
78pub async fn convert_file_to_json_http(
80 file_path: &PathBuf,
81) -> Result<Value, actix_web::HttpResponse> {
82 use actix_web::HttpResponse;
83
84 match convert_file_to_json_core(file_path).await {
85 Ok(value) => Ok(value),
86 Err(e) => {
87 log_feature!(LogFeature::Ingestion, error, "File conversion failed: {}", e);
88 Err(HttpResponse::InternalServerError().json(json!({
89 "success": false,
90 "error": format!("Failed to convert file to JSON: {}", e)
91 })))
92 }
93 }
94}
95
96pub fn flatten_root_layers(json: Value) -> Value {
103 if json.is_array() {
105 log_feature!(
106 LogFeature::Ingestion,
107 info,
108 "Flattening array elements with single-field wrappers"
109 );
110 return flatten_array_elements(json);
111 }
112
113 if let Value::Object(ref map) = json {
115 if map.len() == 1 {
117 let (key, value) = map.iter().next().unwrap();
118
119 if value.is_array() {
121 log_feature!(
122 LogFeature::Ingestion,
123 info,
124 "Flattening root->array pattern: removing '{}' wrapper",
125 key
126 );
127 return flatten_array_elements(value.clone());
128 }
129
130 if let Value::Object(ref inner_map) = value {
132 if inner_map.len() == 1 {
133 let (inner_key, inner_value) = inner_map.iter().next().unwrap();
134 if inner_value.is_array() {
135 log_feature!(
136 LogFeature::Ingestion,
137 info,
138 "Flattening root->root->array pattern: removing '{}'->'{}' wrappers",
139 key,
140 inner_key
141 );
142 return flatten_array_elements(inner_value.clone());
143 }
144 }
145 }
146 }
147 }
148
149 json
151}
152
153fn flatten_array_elements(value: Value) -> Value {
155 if let Value::Array(arr) = value {
156 let flattened_elements: Vec<Value> = arr
157 .into_iter()
158 .map(|element| {
159 if let Value::Object(ref map) = element {
161 if map.len() == 1 {
162 let (key, inner_value) = map.iter().next().unwrap();
163
164 if inner_value.is_object() {
167 log_feature!(
168 LogFeature::Ingestion,
169 debug,
170 "Flattening array element: removing '{}' wrapper from object",
171 key
172 );
173 return inner_value.clone();
174 }
175 }
176 }
177 element
178 })
179 .collect();
180
181 Value::Array(flattened_elements)
182 } else {
183 value
184 }
185}
186
187pub fn add_file_location(json: Value, file_path: &std::path::Path) -> Value {
189 match json {
190 Value::Object(mut map) => {
191 map.insert(
193 "file_location".to_string(),
194 Value::String(file_path.to_string_lossy().to_string()),
195 );
196 Value::Object(map)
197 }
198 Value::Array(arr) => {
199 let modified_array: Vec<Value> = arr
201 .into_iter()
202 .map(|mut item| {
203 if let Value::Object(ref mut obj) = item {
204 obj.insert(
205 "file_location".to_string(),
206 Value::String(file_path.to_string_lossy().to_string()),
207 );
208 }
209 item
210 })
211 .collect();
212 Value::Array(modified_array)
213 }
214 other => {
215 json!({
217 "file_location": file_path.to_string_lossy().to_string(),
218 "value": other
219 })
220 }
221 }
222}
223
224pub fn save_json_to_temp_file(json: &Value) -> std::io::Result<String> {
227 let temp_dir = std::env::temp_dir().join("folddb_debug");
229 std::fs::create_dir_all(&temp_dir)?;
230
231 let temp_file = NamedTempFile::new_in(&temp_dir)?;
233
234 let json_string = serde_json::to_string_pretty(json)?;
236
237 let mut file = temp_file.as_file();
239 file.write_all(json_string.as_bytes())?;
240 file.sync_all()?;
241
242 let (_file, path) = temp_file.keep()?;
244
245 Ok(path.to_string_lossy().to_string())
246}
247
248#[cfg(test)]
249mod tests {
250 use super::*;
251
252 #[test]
253 fn test_flatten_root_to_array() {
254 let input = json!({
255 "data": [
256 {"id": 1, "name": "Alice"},
257 {"id": 2, "name": "Bob"}
258 ]
259 });
260
261 let result = flatten_root_layers(input);
262
263 assert!(result.is_array());
264 let arr = result.as_array().unwrap();
265 assert_eq!(arr.len(), 2);
266 assert_eq!(arr[0]["id"], 1);
267 }
268
269 #[test]
270 fn test_flatten_root_root_to_array() {
271 let input = json!({
272 "response": {
273 "items": [
274 {"id": 1, "name": "Alice"},
275 {"id": 2, "name": "Bob"}
276 ]
277 }
278 });
279
280 let result = flatten_root_layers(input);
281
282 assert!(result.is_array());
283 let arr = result.as_array().unwrap();
284 assert_eq!(arr.len(), 2);
285 assert_eq!(arr[0]["name"], "Alice");
286 }
287
288 #[test]
289 fn test_no_flatten_multiple_fields() {
290 let input = json!({
291 "data": [{"id": 1}],
292 "metadata": {"count": 1}
293 });
294
295 let result = flatten_root_layers(input.clone());
296
297 assert_eq!(result, input);
299 }
300
301 #[test]
302 fn test_no_flatten_nested_object() {
303 let input = json!({
304 "user": {
305 "id": 1,
306 "name": "Alice"
307 }
308 });
309
310 let result = flatten_root_layers(input.clone());
311
312 assert_eq!(result, input);
314 }
315
316 #[test]
317 fn test_no_flatten_direct_array() {
318 let input = json!([
319 {"id": 1, "name": "Alice"},
320 {"id": 2, "name": "Bob"}
321 ]);
322
323 let result = flatten_root_layers(input.clone());
324
325 assert_eq!(result, input);
327 }
328
329 #[test]
330 fn test_no_flatten_deep_nesting() {
331 let input = json!({
332 "level1": {
333 "level2": {
334 "level3": [{"id": 1}]
335 }
336 }
337 });
338
339 let result = flatten_root_layers(input.clone());
340
341 assert_eq!(result, input);
343 }
344
345 #[test]
346 fn test_flatten_with_array_keeps_array_structure() {
347 let input = json!({
348 "data": [
349 {"id": 1, "name": "Alice"},
350 {"id": 2, "name": "Bob"}
351 ]
352 });
353
354 let result = flatten_root_layers(input);
355
356 assert!(result.is_array(), "Result should be an array");
358 assert!(!result.is_object(), "Result should not be wrapped in an object");
359
360 let arr = result.as_array().unwrap();
361 assert_eq!(arr.len(), 2);
362 }
363
364 #[test]
365 fn test_add_file_location_to_object() {
366 let input = json!({"id": 1, "name": "Alice"});
367 let path = PathBuf::from("/test/file.csv");
368
369 let result = add_file_location(input, &path);
370
371 assert!(result.is_object());
372 let obj = result.as_object().unwrap();
373 assert_eq!(obj["file_location"], "/test/file.csv");
374 assert_eq!(obj["id"], 1);
375 }
376
377 #[test]
378 fn test_add_file_location_to_array() {
379 let input = json!([
380 {"id": 1, "name": "Alice"},
381 {"id": 2, "name": "Bob"}
382 ]);
383 let path = PathBuf::from("/test/file.csv");
384
385 let result = add_file_location(input, &path);
386
387 assert!(result.is_array());
388 let arr = result.as_array().unwrap();
389 assert_eq!(arr.len(), 2);
390 assert_eq!(arr[0]["file_location"], "/test/file.csv");
391 assert_eq!(arr[1]["file_location"], "/test/file.csv");
392 }
393
394 #[test]
395 fn test_flatten_array_elements_with_single_field_wrappers() {
396 let input = json!({
397 "data": [
398 {"item": {"id": 1, "name": "Alice"}},
399 {"item": {"id": 2, "name": "Bob"}}
400 ]
401 });
402
403 let result = flatten_root_layers(input);
404
405 assert!(result.is_array());
406 let arr = result.as_array().unwrap();
407 assert_eq!(arr.len(), 2);
408
409 assert_eq!(arr[0]["id"], 1);
411 assert_eq!(arr[0]["name"], "Alice");
412 assert!(arr[0].get("item").is_none());
413
414 assert_eq!(arr[1]["id"], 2);
415 assert_eq!(arr[1]["name"], "Bob");
416 assert!(arr[1].get("item").is_none());
417 }
418
419 #[test]
420 fn test_flatten_array_elements_preserves_multi_field_objects() {
421 let input = json!({
422 "data": [
423 {
424 "id": 1,
425 "wrapper": {"name": "Alice"}
426 },
427 {
428 "id": 2,
429 "wrapper": {"name": "Bob"}
430 }
431 ]
432 });
433
434 let result = flatten_root_layers(input.clone());
435
436 assert!(result.is_array());
438 let arr = result.as_array().unwrap();
439 assert_eq!(arr.len(), 2);
440 assert_eq!(arr[0]["id"], 1);
441 assert!(arr[0].get("wrapper").is_some());
442 }
443
444 #[test]
445 fn test_flatten_array_elements_preserves_primitives() {
446 let input = json!({
447 "data": [
448 {"value": "Alice"},
449 {"value": 42},
450 {"value": true}
451 ]
452 });
453
454 let result = flatten_root_layers(input);
455
456 assert!(result.is_array());
457 let arr = result.as_array().unwrap();
458 assert_eq!(arr.len(), 3);
459
460 assert_eq!(arr[0]["value"], "Alice");
462 assert_eq!(arr[1]["value"], 42);
463 assert_eq!(arr[2]["value"], true);
464 }
465
466 #[test]
467 fn test_flatten_complex_nested_structure() {
468 let input = json!({
469 "response": {
470 "items": [
471 {"record": {"id": 1, "name": "Alice", "email": "alice@example.com"}},
472 {"record": {"id": 2, "name": "Bob", "email": "bob@example.com"}}
473 ]
474 }
475 });
476
477 let result = flatten_root_layers(input);
478
479 assert!(result.is_array());
480 let arr = result.as_array().unwrap();
481 assert_eq!(arr.len(), 2);
482
483 assert_eq!(arr[0]["id"], 1);
485 assert_eq!(arr[0]["name"], "Alice");
486 assert!(arr[0].get("record").is_none());
487
488 assert_eq!(arr[1]["id"], 2);
489 assert_eq!(arr[1]["name"], "Bob");
490 assert!(arr[1].get("record").is_none());
491 }
492
493 #[test]
494 fn test_flatten_direct_array_with_single_field_wrappers() {
495 let input = json!([
497 {"tweet": {"id": 1, "text": "Hello", "user": "alice"}},
498 {"tweet": {"id": 2, "text": "World", "user": "bob"}}
499 ]);
500
501 let result = flatten_root_layers(input);
502
503 assert!(result.is_array());
504 let arr = result.as_array().unwrap();
505 assert_eq!(arr.len(), 2);
506
507 assert_eq!(arr[0]["id"], 1);
509 assert_eq!(arr[0]["text"], "Hello");
510 assert_eq!(arr[0]["user"], "alice");
511 assert!(arr[0].get("tweet").is_none());
512
513 assert_eq!(arr[1]["id"], 2);
514 assert_eq!(arr[1]["text"], "World");
515 assert_eq!(arr[1]["user"], "bob");
516 assert!(arr[1].get("tweet").is_none());
517 }
518}
519