Skip to main content

sh_layer3/document_loaders/
json.rs

1//! # JSON Document Loader
2//!
3//! JSON 文件加载器。
4
5use crate::document_loaders::{DocumentLoader, LoadOptions};
6use crate::retriever_engine::Document;
7use crate::types::Layer3Result;
8use async_trait::async_trait;
9use std::path::PathBuf;
10
11/// JSON Loader 实现
12#[allow(dead_code)]
13pub struct JsonLoader {
14    #[allow(dead_code)]
15    options: LoadOptions,
16    /// JSON Pointer 或 jq 查询(可选)
17    query: Option<String>,
18}
19
20impl JsonLoader {
21    pub fn new() -> Self {
22        Self {
23            options: LoadOptions::default(),
24            query: None,
25        }
26    }
27
28    pub fn with_query(query: impl Into<String>) -> Self {
29        Self {
30            options: LoadOptions::default(),
31            query: Some(query.into()),
32        }
33    }
34}
35
36impl Default for JsonLoader {
37    fn default() -> Self {
38        Self::new()
39    }
40}
41
42#[async_trait]
43impl DocumentLoader for JsonLoader {
44    async fn load(&self, path: PathBuf) -> Layer3Result<Document> {
45        let content = tokio::fs::read_to_string(&path).await?;
46
47        // 验证 JSON 格式
48        let _: serde_json::Value = serde_json::from_str(&content)?;
49
50        Ok(Document::new(content).with_source(path.to_string_lossy().to_string()))
51    }
52
53    async fn load_and_split(&self, path: PathBuf) -> Layer3Result<Vec<Document>> {
54        let content = tokio::fs::read_to_string(&path).await?;
55        let json: serde_json::Value = serde_json::from_str(&content)?;
56
57        // 如果是数组,每个元素作为一个文档
58        if let serde_json::Value::Array(arr) = json {
59            return Ok(arr
60                .into_iter()
61                .enumerate()
62                .filter_map(|(i, v)| {
63                    if let Ok(s) = serde_json::to_string(&v) {
64                        Some(Document::new(s).with_source(format!(
65                            "{}[{}]",
66                            path.to_string_lossy(),
67                            i
68                        )))
69                    } else {
70                        None
71                    }
72                })
73                .collect());
74        }
75
76        // 否则作为单个文档
77        Ok(vec![self.load(path).await?])
78    }
79
80    fn supports(&self, path: &std::path::Path) -> bool {
81        path.extension()
82            .and_then(|e| e.to_str())
83            .map(|e| e == "json")
84            .unwrap_or(false)
85    }
86
87    fn extensions(&self) -> &[&str] {
88        &["json"]
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn test_json_loader_extensions() {
98        let loader = JsonLoader::new();
99        assert!(loader.extensions().contains(&"json"));
100    }
101}