learner/retriever/
json.rs1use serde_json::Value;
21
22use super::*;
23
24#[derive(Debug, Clone, Deserialize)]
42pub struct JsonConfig {
43 pub field_maps: HashMap<String, FieldMap>,
45}
46
47#[async_trait]
48impl ResponseProcessor for JsonConfig {
49 async fn process_response(&self, data: &[u8]) -> Result<Paper> {
72 let json: Value = serde_json::from_slice(data)
73 .map_err(|e| LearnerError::ApiError(format!("Failed to parse JSON: {}", e)))?;
74
75 trace!("Processing JSON response: {}", serde_json::to_string_pretty(&json).unwrap());
76
77 let title = self.extract_field(&json, "title")?;
78 let abstract_text = self.extract_field(&json, "abstract")?;
79 let publication_date =
80 chrono::DateTime::parse_from_rfc3339(&self.extract_field(&json, "publication_date")?)
81 .map(|dt| dt.with_timezone(&Utc))
82 .map_err(|e| LearnerError::ApiError(format!("Invalid date format: {}", e)))?;
83
84 let authors = if let Some(map) = self.field_maps.get("authors") {
85 self.extract_authors(&json, map)?
86 } else {
87 return Err(LearnerError::ApiError("Missing authors mapping".to_string()));
88 };
89
90 let pdf_url = self.field_maps.get("pdf_url").and_then(|map| {
91 self.get_by_path(&json, &map.path).map(|url| {
92 if let Some(transform) = &map.transform {
93 apply_transform(&url, transform).ok().unwrap_or_else(|| url.clone())
94 } else {
95 url.clone()
96 }
97 })
98 });
99
100 let doi = self
101 .field_maps
102 .get("doi")
103 .and_then(|map| self.get_by_path(&json, &map.path))
104 .map(String::from);
105
106 Ok(Paper {
107 title,
108 authors,
109 abstract_text,
110 publication_date,
111 source: String::new(),
112 source_identifier: String::new(),
113 pdf_url,
114 doi,
115 })
116 }
117}
118
119impl JsonConfig {
120 fn extract_field(&self, json: &Value, field: &str) -> Result<String> {
129 let map = self
130 .field_maps
131 .get(field)
132 .ok_or_else(|| LearnerError::ApiError(format!("Missing field mapping for {}", field)))?;
133
134 let value = self
135 .get_by_path(json, &map.path)
136 .ok_or_else(|| LearnerError::ApiError(format!("No content found for {}", field)))?;
137
138 if let Some(transform) = &map.transform {
139 apply_transform(&value, transform)
140 } else {
141 Ok(value)
142 }
143 }
144
145 fn get_by_path(&self, json: &Value, path: &str) -> Option<String> {
153 let mut current = json;
154
155 for part in path.split('/') {
156 current = if let Ok(index) = part.parse::<usize>() {
157 current.as_array()?.get(index)?
159 } else {
160 current.get(part)?
162 };
163 }
164
165 match current {
166 Value::String(s) => Some(s.clone()),
167 Value::Array(arr) if !arr.is_empty() => arr[0].as_str().map(String::from),
168 Value::Number(n) => Some(n.to_string()),
169 _ => current.as_str().map(String::from),
170 }
171 }
172
173 fn extract_authors(&self, json: &Value, map: &FieldMap) -> Result<Vec<Author>> {
183 let authors = if let Some(Value::Array(arr)) = get_path_value(json, &map.path) {
184 arr
185 .iter()
186 .filter_map(|author| {
187 let name = match (author.get("given"), author.get("family")) {
188 (Some(given), Some(family)) => {
189 format!("{} {}", given.as_str().unwrap_or(""), family.as_str().unwrap_or(""))
190 },
191 (Some(given), None) => given.as_str()?.to_string(),
192 (None, Some(family)) => family.as_str()?.to_string(),
193 (None, None) => return None,
194 };
195
196 let affiliation = author
197 .get("affiliation")
198 .and_then(|a| a.as_array())
199 .and_then(|arr| arr.first())
200 .and_then(|aff| aff.get("name"))
201 .and_then(|n| n.as_str())
202 .map(String::from);
203
204 Some(Author { name, affiliation, email: None })
205 })
206 .collect()
207 } else {
208 Vec::new()
209 };
210
211 if authors.is_empty() {
212 Err(LearnerError::ApiError("No authors found".to_string()))
213 } else {
214 Ok(authors)
215 }
216 }
217}
218
219fn get_path_value<'a>(json: &'a Value, path: &str) -> Option<&'a Value> {
224 let mut current = json;
225 for part in path.split('/') {
226 current = current.get(part)?;
227 }
228 Some(current)
229}