1use std::collections::{BTreeMap, HashSet};
7use std::env;
8use std::fs::File;
9use std::io::BufReader;
10use std::path::{Path, PathBuf};
11use std::time::Duration;
12
13use anyhow::{anyhow, bail, Context, Result};
14use clap::ValueEnum;
15use jsonpath_lib::select as jsonpath_select;
16use memvid_core::{DocMetadata, Memvid, MemvidError, PutOptions};
17use reqwest::blocking::Client;
18use reqwest::header::{HeaderMap, HeaderName, HeaderValue, ACCEPT, CONTENT_TYPE, USER_AGENT};
19use serde::Deserialize;
20use serde_json::{json, Value};
21use time::format_description::well_known::Rfc3339;
22use time::macros::format_description;
23use time::{OffsetDateTime, PrimitiveDateTime};
24
25use crate::config::CliConfig;
26use crate::utils::ensure_cli_mutation_allowed;
27
28#[derive(Debug)]
30pub(crate) struct ApiFetchCommand {
31 pub file: PathBuf,
32 pub config_path: PathBuf,
33 pub dry_run: bool,
34 pub mode_override: Option<ApiFetchMode>,
35 pub uri_override: Option<String>,
36 pub output_json: bool,
37 pub lock_timeout_ms: u64,
38 pub force_lock: bool,
39}
40
41#[derive(Clone, Copy, Debug, Deserialize, ValueEnum, PartialEq, Eq)]
42#[serde(rename_all = "lowercase")]
43pub enum ApiFetchMode {
44 Insert,
45 Upsert,
46}
47
48#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
49#[serde(rename_all = "lowercase")]
50enum HttpMethod {
51 #[serde(alias = "GET")]
52 Get,
53 #[serde(alias = "POST")]
54 Post,
55}
56
57#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
58#[serde(rename_all = "lowercase")]
59enum ResponseContentType {
60 #[serde(alias = "JSON")]
61 Json,
62 #[serde(alias = "RAW")]
63 Raw,
64}
65
66#[derive(Debug, Clone, Deserialize)]
67#[serde(untagged)]
68enum RequestBody {
69 Json(Value),
70 Text(String),
71}
72
73#[derive(Debug, Clone, Deserialize)]
74struct ExtractConfig {
75 pub items: String,
76 pub id: String,
77 #[serde(default)]
78 pub content: Option<String>,
79 #[serde(default)]
80 pub tags: Option<String>,
81 #[serde(default)]
82 pub metadata: Option<String>,
83 #[serde(default)]
84 pub timestamp: Option<String>,
85 #[serde(default)]
86 pub title: Option<String>,
87}
88
89#[derive(Debug, Deserialize)]
90struct ApiFetchConfig {
91 pub url: String,
92 #[serde(default)]
93 pub method: Option<HttpMethod>,
94 #[serde(default)]
95 pub body: Option<RequestBody>,
96 #[serde(default)]
97 pub headers: BTreeMap<String, String>,
98 pub extract: ExtractConfig,
99 #[serde(default)]
100 pub uri: Option<String>,
101 #[serde(default)]
102 pub mode: Option<ApiFetchMode>,
103 #[serde(default)]
104 pub tags: Option<Value>,
105 #[serde(default)]
106 pub metadata: Option<Value>,
107 #[serde(default)]
108 pub timestamp: Option<TimestampValue>,
109 #[serde(default)]
110 pub max_items: Option<usize>,
111 #[serde(default)]
112 pub content_type: Option<ResponseContentType>,
113 #[serde(default)]
114 pub title: Option<String>,
115}
116
117#[derive(Debug, Clone, Deserialize)]
118#[serde(untagged)]
119enum TimestampValue {
120 Number(i64),
121 String(String),
122}
123
124#[derive(Debug)]
125struct PreparedConfig {
126 url: String,
127 method: HttpMethod,
128 body: Option<RequestBody>,
129 headers: BTreeMap<String, String>,
130 extract: ExtractConfig,
131 base_uri: Option<String>,
132 mode: ApiFetchMode,
133 static_tags: Vec<TagDirective>,
134 static_doc_metadata: Option<DocMetadata>,
135 static_extra_metadata: BTreeMap<String, Value>,
136 static_timestamp: Option<i64>,
137 max_items: Option<usize>,
138 content_type: ResponseContentType,
139 default_title: Option<String>,
140}
141
142#[derive(Debug, Clone)]
143enum TagDirective {
144 Simple(String),
145 Pair { key: String, value: String },
146}
147
148#[derive(Debug)]
149struct FramePlan {
150 uri: String,
151 title: Option<String>,
152 payload: String,
153 tags: Vec<TagDirective>,
154 doc_metadata: Option<DocMetadata>,
155 extra_metadata: BTreeMap<String, Value>,
156 timestamp: Option<i64>,
157}
158
159#[derive(Debug, Clone, Copy, PartialEq, Eq)]
160enum FrameActionKind {
161 Insert,
162 Update,
163 Skip,
164}
165
166#[derive(Debug)]
167struct FrameResult {
168 uri: String,
169 action: FrameActionKind,
170 sequence: Option<u64>,
171 frame_id: Option<u64>,
172 reason: Option<String>,
173}
174
175pub(crate) fn run_api_fetch(_config: &CliConfig, command: ApiFetchCommand) -> Result<()> {
176 let config_data = load_config(&command.config_path)?;
177 let prepared = config_data.into_prepared(&command)?;
178
179 let client = crate::http::blocking_client(Duration::from_secs(60))
180 .context("failed to build HTTP client")?;
181
182 let headers = build_headers(&prepared.headers)?;
183 let response_body = execute_request(&client, &prepared, headers)?;
184 let root_value = parse_response(&response_body, prepared.content_type)?;
185
186 let mut warnings = Vec::new();
187 let mut plans = build_frame_plans(&prepared, &root_value)?;
188 if plans.is_empty() {
189 bail!("no items matched extract.items path");
190 }
191
192 let mut seen_uris = HashSet::new();
193 plans.retain(|plan| {
194 if !seen_uris.insert(plan.uri.clone()) {
195 warnings.push(format!("duplicate URI planned: {}", plan.uri));
196 false
197 } else {
198 true
199 }
200 });
201
202 let mut mem = Memvid::open(&command.file)?;
203 {
204 let settings = mem.lock_settings_mut();
205 settings.timeout_ms = command.lock_timeout_ms;
206 settings.force_stale = command.force_lock;
207 }
208 ensure_cli_mutation_allowed(&mem)?;
209
210 let mut results = Vec::new();
211 let mut inserted = 0usize;
212 let mut updated = 0usize;
213 let mut skipped = 0usize;
214
215 for plan in plans {
216 let existing = match mem.frame_by_uri(&plan.uri) {
217 Ok(frame) => Some(frame),
218 Err(MemvidError::FrameNotFoundByUri { .. }) => None,
219 Err(err) => return Err(err.into()),
220 };
221
222 match (existing, prepared.mode) {
223 (Some(frame), ApiFetchMode::Insert) => {
224 skipped += 1;
225 results.push(FrameResult {
226 uri: plan.uri,
227 action: FrameActionKind::Skip,
228 sequence: None,
229 frame_id: Some(frame.id),
230 reason: Some("frame already exists (mode=insert)".to_string()),
231 });
232 }
233 (Some(frame), ApiFetchMode::Upsert) => {
234 if command.dry_run {
235 updated += 1;
236 results.push(FrameResult {
237 uri: plan.uri,
238 action: FrameActionKind::Update,
239 sequence: None,
240 frame_id: Some(frame.id),
241 reason: None,
242 });
243 } else {
244 let seq = apply_update(&mut mem, frame.id, &plan)?;
245 results.push(FrameResult {
246 uri: plan.uri,
247 action: FrameActionKind::Update,
248 sequence: Some(seq),
249 frame_id: Some(frame.id),
250 reason: None,
251 });
252 updated += 1;
253 }
254 }
255 (None, _) => {
256 if command.dry_run {
257 inserted += 1;
258 results.push(FrameResult {
259 uri: plan.uri,
260 action: FrameActionKind::Insert,
261 sequence: None,
262 frame_id: None,
263 reason: None,
264 });
265 } else {
266 let seq = apply_insert(&mut mem, &plan)?;
267 results.push(FrameResult {
268 uri: plan.uri,
269 action: FrameActionKind::Insert,
270 sequence: Some(seq),
271 frame_id: None,
272 reason: None,
273 });
274 inserted += 1;
275 }
276 }
277 }
278 }
279
280 if !command.dry_run {
281 mem.commit()?;
282 }
283
284 if command.output_json {
285 print_json_summary(
286 &results,
287 inserted,
288 updated,
289 skipped,
290 command.dry_run,
291 &warnings,
292 )?;
293 } else {
294 print_human_summary(
295 &results,
296 inserted,
297 updated,
298 skipped,
299 command.dry_run,
300 &warnings,
301 );
302 }
303
304 Ok(())
305}
306
307fn load_config(path: &Path) -> Result<ApiFetchConfig> {
308 let file = File::open(path)
309 .with_context(|| format!("failed to open config file at {}", path.display()))?;
310 let reader = BufReader::new(file);
311 serde_json::from_reader(reader)
312 .with_context(|| format!("failed to parse config JSON at {}", path.display()))
313}
314
315impl ApiFetchConfig {
316 fn into_prepared(self, cmd: &ApiFetchCommand) -> Result<PreparedConfig> {
317 let mode = cmd
318 .mode_override
319 .or(self.mode)
320 .unwrap_or(ApiFetchMode::Insert);
321
322 let base_uri = cmd
323 .uri_override
324 .clone()
325 .or(self.uri)
326 .map(|value| value.trim_end_matches('/').to_string())
327 .filter(|value| !value.is_empty());
328
329 let static_tags = parse_tags_value(self.tags)?;
330 let (static_doc_metadata, static_extra_metadata) = split_metadata(self.metadata)?;
331 let static_timestamp = match self.timestamp {
332 Some(value) => Some(parse_timestamp_value(&value)?),
333 None => None,
334 };
335
336 let content_type = self.content_type.unwrap_or(ResponseContentType::Json);
337
338 Ok(PreparedConfig {
339 url: self.url,
340 method: self.method.unwrap_or(HttpMethod::Get),
341 body: self.body,
342 headers: self.headers,
343 extract: self.extract,
344 base_uri,
345 mode,
346 static_tags,
347 static_doc_metadata,
348 static_extra_metadata,
349 static_timestamp,
350 max_items: self.max_items,
351 content_type,
352 default_title: self.title,
353 })
354 }
355}
356
357fn parse_tags_value(value: Option<Value>) -> Result<Vec<TagDirective>> {
358 match value {
359 None | Some(Value::Null) => Ok(Vec::new()),
360 Some(Value::String(s)) => Ok(vec![TagDirective::Simple(s.trim().to_string())]),
361 Some(Value::Array(items)) => {
362 let mut tags = Vec::new();
363 for item in items {
364 match item {
365 Value::String(s) => tags.push(TagDirective::Simple(s.trim().to_string())),
366 Value::Object(map) => {
367 if let Some(Value::String(val)) = map.get("value") {
368 if let Some(Value::String(key)) = map.get("key") {
369 tags.push(TagDirective::Pair {
370 key: key.trim().to_string(),
371 value: val.trim().to_string(),
372 });
373 continue;
374 }
375 }
376 bail!(
377 "invalid tag entry in array; expected string or {{\"key\",\"value\"}}"
378 );
379 }
380 _ => bail!("tags array must contain strings or key/value objects"),
381 }
382 }
383 Ok(tags)
384 }
385 Some(Value::Object(object)) => {
386 let mut tags = Vec::new();
387 for (key, value) in object {
388 let key = key.trim().to_string();
389 let value_str = value_to_string(&value)?;
390 tags.push(TagDirective::Pair {
391 key,
392 value: value_str,
393 });
394 }
395 Ok(tags)
396 }
397 Some(_) => bail!("unsupported tags format; expected string, array, or object"),
398 }
399}
400
401fn split_metadata(value: Option<Value>) -> Result<(Option<DocMetadata>, BTreeMap<String, Value>)> {
402 match value {
403 None | Some(Value::Null) => Ok((None, BTreeMap::new())),
404 Some(Value::Object(map)) => {
405 let doc_attempt: Result<DocMetadata, _> =
406 serde_json::from_value(Value::Object(map.clone()));
407 if let Ok(meta) = doc_attempt {
408 Ok((Some(meta), BTreeMap::new()))
409 } else {
410 let mut extras = BTreeMap::new();
411 for (key, value) in map {
412 extras.insert(key, value);
413 }
414 Ok((None, extras))
415 }
416 }
417 Some(other) => {
418 let meta: DocMetadata = serde_json::from_value(other.clone())
419 .context("metadata must be an object compatible with DocMetadata")?;
420 Ok((Some(meta), BTreeMap::new()))
421 }
422 }
423}
424
425fn parse_timestamp_value(value: &TimestampValue) -> Result<i64> {
426 match value {
427 TimestampValue::Number(num) => Ok(*num),
428 TimestampValue::String(text) => parse_timestamp_str(text),
429 }
430}
431
432fn parse_timestamp_str(raw: &str) -> Result<i64> {
433 if let Ok(num) = raw.trim().parse::<i64>() {
434 return Ok(num);
435 }
436 if let Ok(dt) = OffsetDateTime::parse(raw.trim(), &Rfc3339) {
437 return Ok(dt.unix_timestamp());
438 }
439 if let Ok(parsed) = PrimitiveDateTime::parse(
440 raw.trim(),
441 &format_description!("[year]-[month]-[day] [hour]:[minute]:[second]"),
442 ) {
443 return Ok(parsed.assume_utc().unix_timestamp());
444 }
445 if let Ok(parsed) = PrimitiveDateTime::parse(
446 raw.trim(),
447 &format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]"),
448 ) {
449 return Ok(parsed.assume_utc().unix_timestamp());
450 }
451 bail!("unable to parse timestamp: {raw}")
452}
453
454fn build_headers(raw: &BTreeMap<String, String>) -> Result<HeaderMap> {
455 let mut headers = HeaderMap::new();
456 for (key, value) in raw {
457 let resolved = resolve_value(value)?;
458 let name = HeaderName::try_from(key.as_str())
459 .with_context(|| format!("invalid header name: {key}"))?;
460 let header_value = HeaderValue::from_str(&resolved)
461 .with_context(|| format!("invalid header value for {key}"))?;
462 headers.insert(name, header_value);
463 }
464 Ok(headers)
465}
466
467fn resolve_value(raw: &str) -> Result<String> {
468 if let Some(env_key) = raw.strip_prefix("env:") {
469 let key = env_key.trim();
470 let value = env::var(key).with_context(|| {
471 format!("environment variable {key} referenced in config is not set")
472 })?;
473 Ok(value)
474 } else {
475 Ok(raw.to_string())
476 }
477}
478
479fn execute_request(
480 client: &Client,
481 prepared: &PreparedConfig,
482 mut headers: HeaderMap,
483) -> Result<String> {
484 if !headers.contains_key(USER_AGENT) {
485 headers.insert(
486 USER_AGENT,
487 HeaderValue::from_static(concat!("memvid-cli/", env!("CARGO_PKG_VERSION"))),
488 );
489 }
490 if prepared.content_type == ResponseContentType::Json && !headers.contains_key(ACCEPT) {
491 headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
492 }
493 if matches!(prepared.method, HttpMethod::Post)
494 && prepared
495 .body
496 .as_ref()
497 .map(|body| matches!(body, RequestBody::Json(_)))
498 .unwrap_or(false)
499 && !headers.contains_key(CONTENT_TYPE)
500 {
501 headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
502 }
503
504 let mut request = match prepared.method {
505 HttpMethod::Get => client.get(&prepared.url),
506 HttpMethod::Post => client.post(&prepared.url),
507 };
508 if let Some(body) = &prepared.body {
509 match body {
510 RequestBody::Json(value) => {
511 request = request.json(value);
512 }
513 RequestBody::Text(text) => {
514 request = request.body(text.clone());
515 }
516 }
517 }
518 request = request.headers(headers);
519
520 let response = request
521 .send()
522 .with_context(|| format!("request to {} failed", prepared.url))?;
523 let response = response
524 .error_for_status()
525 .with_context(|| format!("remote endpoint {} returned an error status", prepared.url))?;
526 response.text().context("failed to read response body")
527}
528
529fn parse_response(body: &str, kind: ResponseContentType) -> Result<Value> {
530 match serde_json::from_str::<Value>(body) {
531 Ok(value) => Ok(value),
532 Err(err) => {
533 if matches!(kind, ResponseContentType::Raw) {
534 Ok(Value::String(body.to_string()))
535 } else {
536 Err(anyhow!("failed to decode response as JSON: {err}"))
537 }
538 }
539 }
540}
541
542fn build_frame_plans(prepared: &PreparedConfig, root: &Value) -> Result<Vec<FramePlan>> {
543 let mut matches = jsonpath_select(root, &prepared.extract.items)
544 .with_context(|| format!("jsonpath '{}' failed", prepared.extract.items))?;
545 if matches.is_empty() {
546 return Ok(Vec::new());
547 }
548 if let Some(limit) = prepared.max_items {
549 if matches.len() > limit {
550 matches.truncate(limit);
551 }
552 }
553
554 let mut plans = Vec::with_capacity(matches.len());
555 for item in matches {
556 let plan = build_frame_plan(prepared, item)?;
557 plans.push(plan);
558 }
559 Ok(plans)
560}
561
562fn build_frame_plan(prepared: &PreparedConfig, item: &Value) -> Result<FramePlan> {
563 let id_matches = jsonpath_select(item, &prepared.extract.id)
564 .with_context(|| format!("jsonpath '{}' failed", prepared.extract.id))?;
565 let id_value = id_matches.first().ok_or_else(|| {
566 anyhow!(
567 "extract.id path '{}' returned no results",
568 prepared.extract.id
569 )
570 })?;
571 let id = value_to_string(id_value)?.trim().to_string();
572 if id.is_empty() {
573 bail!("extracted id is empty after trimming");
574 }
575 let uri = build_uri(prepared.base_uri.as_deref(), &id);
576
577 let derived_title = if let Some(path) = &prepared.extract.title {
578 let title_matches =
579 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
580 title_matches
581 .first()
582 .map(|value| value_to_string(value))
583 .transpose()?
584 } else {
585 None
586 };
587
588 let title = derived_title.or_else(|| prepared.default_title.clone());
589
590 let payload = extract_payload(prepared, item)?;
591 let mut tags = prepared.static_tags.clone();
592 if let Some(path) = &prepared.extract.tags {
593 let tag_values =
594 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
595 for value in tag_values {
596 let tag = value_to_string(value)?;
597 if !tag.trim().is_empty() {
598 tags.push(TagDirective::Simple(tag.trim().to_string()));
599 }
600 }
601 }
602
603 let mut doc_metadata = prepared.static_doc_metadata.clone();
604 let mut extra_metadata = prepared.static_extra_metadata.clone();
605
606 if let Some(path) = &prepared.extract.metadata {
607 let meta_matches =
608 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
609 if let Some(value) = meta_matches.first() {
610 match value {
611 Value::Null => {}
612 Value::Object(map) => {
613 let mut handled = false;
614 if doc_metadata.is_none() {
615 if let Ok(parsed) =
616 serde_json::from_value::<DocMetadata>(Value::Object(map.clone()))
617 {
618 doc_metadata = Some(parsed);
619 handled = true;
620 }
621 }
622 if !handled {
623 for (key, value) in map.iter() {
624 extra_metadata.insert(key.clone(), (*value).clone());
625 }
626 }
627 }
628 other => {
629 extra_metadata.insert(path.clone(), (*other).clone());
630 }
631 }
632 }
633 }
634
635 if doc_metadata
636 .as_ref()
637 .and_then(|meta| meta.mime.as_ref())
638 .is_none()
639 {
640 let default_mime = match prepared.content_type {
641 ResponseContentType::Json => Some("application/json"),
642 ResponseContentType::Raw => Some("text/plain"),
643 };
644 if let Some(mime) = default_mime {
645 match doc_metadata.as_mut() {
646 Some(existing) => existing.mime = Some(mime.to_string()),
647 None => {
648 let mut meta = DocMetadata::default();
649 meta.mime = Some(mime.to_string());
650 doc_metadata = Some(meta);
651 }
652 }
653 }
654 }
655
656 let mut timestamp = prepared.static_timestamp;
657 if let Some(path) = &prepared.extract.timestamp {
658 let ts_matches =
659 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
660 if let Some(value) = ts_matches.first() {
661 timestamp = Some(parse_timestamp_from_value(value)?);
662 }
663 }
664
665 Ok(FramePlan {
666 uri,
667 title,
668 payload,
669 tags,
670 doc_metadata,
671 extra_metadata,
672 timestamp,
673 })
674}
675
676fn extract_payload(prepared: &PreparedConfig, item: &Value) -> Result<String> {
677 if let Some(path) = &prepared.extract.content {
678 let payload_matches =
679 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
680 let value = payload_matches
681 .first()
682 .ok_or_else(|| anyhow!("extract.content path '{}' returned no results", path))?;
683 match prepared.content_type {
684 ResponseContentType::Json => value_to_pretty_text(value),
685 ResponseContentType::Raw => value_to_plain_string(value),
686 }
687 } else {
688 match prepared.content_type {
689 ResponseContentType::Json => value_to_pretty_text(item),
690 ResponseContentType::Raw => value_to_plain_string(item),
691 }
692 }
693}
694
695fn value_to_plain_string(value: &Value) -> Result<String> {
696 match value {
697 Value::String(text) => Ok(text.clone()),
698 Value::Number(num) => Ok(num.to_string()),
699 Value::Bool(flag) => Ok(flag.to_string()),
700 other => bail!("expected textual content but found {other}"),
701 }
702}
703
704fn value_to_pretty_text(value: &Value) -> Result<String> {
705 match value {
706 Value::String(text) => Ok(text.clone()),
707 _ => serde_json::to_string_pretty(value).map_err(|err| anyhow!(err)),
708 }
709}
710
711fn value_to_string(value: &Value) -> Result<String> {
712 match value {
713 Value::String(text) => Ok(text.clone()),
714 Value::Number(num) => Ok(num.to_string()),
715 Value::Bool(flag) => Ok(flag.to_string()),
716 Value::Null => bail!("value is null"),
717 other => serde_json::to_string(other).map_err(|err| anyhow!(err)),
718 }
719}
720
721fn parse_timestamp_from_value(value: &Value) -> Result<i64> {
722 match value {
723 Value::Number(num) => num
724 .as_i64()
725 .ok_or_else(|| anyhow!("timestamp number must fit i64")),
726 Value::String(text) => parse_timestamp_str(text),
727 other => bail!("unsupported timestamp value: {other}"),
728 }
729}
730
731fn build_uri(base: Option<&str>, id: &str) -> String {
732 match base {
733 Some(prefix) => {
734 let prefix = prefix.trim_end_matches('/');
735 let suffix = id.trim_start_matches('/');
736 if prefix.is_empty() {
737 suffix.to_string()
738 } else if suffix.is_empty() {
739 prefix.to_string()
740 } else {
741 format!("{prefix}/{suffix}")
742 }
743 }
744 None => id.to_string(),
745 }
746}
747
748fn apply_insert(mem: &mut Memvid, plan: &FramePlan) -> Result<u64> {
749 let options = build_put_options(plan);
750 let payload = plan.payload.as_bytes();
751 mem.put_bytes_with_options(payload, options)
752 .context("failed to insert frame")
753}
754
755fn apply_update(mem: &mut Memvid, frame_id: u64, plan: &FramePlan) -> Result<u64> {
756 let options = build_put_options(plan);
757 let payload = plan.payload.as_bytes().to_vec();
758 mem.update_frame(frame_id, Some(payload), options, None)
759 .context("failed to update frame")
760}
761
762fn build_put_options(plan: &FramePlan) -> PutOptions {
763 let mut builder = PutOptions::builder()
764 .enable_embedding(false)
765 .auto_tag(false)
766 .extract_dates(false)
767 .uri(plan.uri.clone())
768 .search_text(plan.payload.clone());
769
770 if let Some(ts) = plan.timestamp {
771 builder = builder.timestamp(ts);
772 }
773 if let Some(title) = &plan.title {
774 if !title.trim().is_empty() {
775 builder = builder.title(title.clone());
776 }
777 }
778 if let Some(meta) = plan.doc_metadata.clone() {
779 builder = builder.metadata(meta);
780 }
781 for (key, value) in plan.extra_metadata.iter() {
782 builder = builder.metadata_entry(key.clone(), value.clone());
783 }
784 for tag in &plan.tags {
785 match tag {
786 TagDirective::Simple(tag) => {
787 if !tag.trim().is_empty() {
788 builder = builder.push_tag(tag.clone());
789 }
790 }
791 TagDirective::Pair { key, value } => {
792 builder = builder.tag(key.clone(), value.clone());
793 if !key.trim().is_empty() {
794 builder = builder.push_tag(key.clone());
795 }
796 if !value.trim().is_empty() && value != key {
797 builder = builder.push_tag(value.clone());
798 }
799 }
800 }
801 }
802
803 builder.build()
804}
805
806fn print_json_summary(
807 results: &[FrameResult],
808 inserted: usize,
809 updated: usize,
810 skipped: usize,
811 dry_run: bool,
812 warnings: &[String],
813) -> Result<()> {
814 let items: Vec<Value> = results
815 .iter()
816 .map(|res| {
817 json!({
818 "uri": res.uri,
819 "action": match res.action {
820 FrameActionKind::Insert => "insert",
821 FrameActionKind::Update => "update",
822 FrameActionKind::Skip => "skip",
823 },
824 "sequence": res.sequence,
825 "frame_id": res.frame_id,
826 "reason": res.reason,
827 })
828 })
829 .collect();
830 let summary = json!({
831 "dry_run": dry_run,
832 "counts": {
833 "total": results.len(),
834 "inserted": inserted,
835 "updated": updated,
836 "skipped": skipped,
837 },
838 "items": items,
839 "warnings": warnings,
840 });
841 println!("{}", serde_json::to_string_pretty(&summary)?);
842 Ok(())
843}
844
845fn print_human_summary(
846 results: &[FrameResult],
847 inserted: usize,
848 updated: usize,
849 skipped: usize,
850 dry_run: bool,
851 warnings: &[String],
852) {
853 for res in results {
854 match res.action {
855 FrameActionKind::Insert => {
856 if let Some(seq) = res.sequence {
857 println!("+ {} (seq {})", res.uri, seq);
858 } else {
859 println!("+ {} (planned)", res.uri);
860 }
861 }
862 FrameActionKind::Update => {
863 if let Some(seq) = res.sequence {
864 println!("~ {} (seq {})", res.uri, seq);
865 } else {
866 println!("~ {} (planned)", res.uri);
867 }
868 }
869 FrameActionKind::Skip => {
870 if let Some(reason) = &res.reason {
871 println!("- {} ({})", res.uri, reason);
872 } else {
873 println!("- {} (skipped)", res.uri);
874 }
875 }
876 }
877 }
878 println!(
879 "Summary: inserted {}, updated {}, skipped {}{}",
880 inserted,
881 updated,
882 skipped,
883 if dry_run { " (dry run)" } else { "" }
884 );
885 if !warnings.is_empty() {
886 println!("Warnings:");
887 for warning in warnings {
888 println!(" - {warning}");
889 }
890 }
891}
892
893#[cfg(test)]
894mod tests {
895 use super::*;
896 use serde_json::json;
897 use std::path::PathBuf;
898 use std::sync::{Mutex, OnceLock};
899
900 fn env_lock() -> std::sync::MutexGuard<'static, ()> {
901 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
902 LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
903 }
904
905 #[test]
906 fn resolve_value_reads_env() {
907 const VAR: &str = "API_FETCH_TEST_TOKEN";
908 let _guard = env_lock();
909 unsafe {
910 std::env::set_var(VAR, "secret");
911 }
912 let resolved = resolve_value(&format!("env:{VAR}")).expect("resolve env");
913 assert_eq!(resolved, "secret");
914 unsafe {
915 std::env::remove_var(VAR);
916 }
917 }
918
919 #[test]
920 fn prepared_config_applies_overrides() {
921 let raw = json!({
922 "url": "https://example.com/api",
923 "method": "get",
924 "headers": {},
925 "extract": {
926 "items": "$.items[*]",
927 "id": "$.id"
928 },
929 "mode": "insert",
930 "uri": "mem://base",
931 "tags": ["static"],
932 "metadata": {"mime": "text/plain"},
933 "timestamp": 42,
934 "content_type": "json"
935 });
936
937 let config: ApiFetchConfig = serde_json::from_value(raw).expect("config");
938 let command = ApiFetchCommand {
939 file: PathBuf::from("memory.mv2"),
940 config_path: PathBuf::from("cfg.json"),
941 dry_run: false,
942 mode_override: Some(ApiFetchMode::Upsert),
943 uri_override: Some("mem://override".to_string()),
944 output_json: false,
945 lock_timeout_ms: 5000,
946 force_lock: false,
947 };
948
949 let prepared = config.into_prepared(&command).expect("prepared");
950 assert_eq!(prepared.mode, ApiFetchMode::Upsert);
951 assert_eq!(prepared.base_uri.as_deref(), Some("mem://override"));
952 assert_eq!(prepared.static_tags.len(), 1);
953 assert!(prepared.static_doc_metadata.is_some());
954 assert_eq!(prepared.static_timestamp, Some(42));
955 assert_eq!(prepared.content_type, ResponseContentType::Json);
956 }
957
958 #[test]
959 fn build_frame_plan_extracts_payload_and_tags() {
960 let prepared = PreparedConfig {
961 url: "https://example.com".to_string(),
962 method: HttpMethod::Get,
963 body: None,
964 headers: BTreeMap::new(),
965 extract: ExtractConfig {
966 items: "$.items[*]".to_string(),
967 id: "$.id".to_string(),
968 content: Some("$.text".to_string()),
969 tags: Some("$.tags[*]".to_string()),
970 metadata: Some("$.meta".to_string()),
971 timestamp: Some("$.ts".to_string()),
972 title: Some("$.title".to_string()),
973 },
974 base_uri: Some("mem://base".to_string()),
975 mode: ApiFetchMode::Insert,
976 static_tags: vec![TagDirective::Simple("static".to_string())],
977 static_doc_metadata: None,
978 static_extra_metadata: BTreeMap::new(),
979 static_timestamp: Some(100),
980 max_items: None,
981 content_type: ResponseContentType::Raw,
982 default_title: Some("fallback".to_string()),
983 };
984
985 let root = json!({
986 "items": [
987 {
988 "id": "doc-1",
989 "text": "hello world",
990 "tags": ["dynamic"],
991 "meta": {"mime": "text/plain"},
992 "ts": 123,
993 "title": "Greeting"
994 }
995 ]
996 });
997
998 let plans = build_frame_plans(&prepared, &root).expect("plans");
999 assert_eq!(plans.len(), 1);
1000 let plan = &plans[0];
1001 assert_eq!(plan.uri, "mem://base/doc-1");
1002 assert_eq!(plan.payload, "hello world");
1003 assert_eq!(plan.title.as_deref(), Some("Greeting"));
1004 assert_eq!(plan.timestamp, Some(123));
1005 assert!(plan
1006 .tags
1007 .iter()
1008 .any(|tag| matches!(tag, TagDirective::Simple(value) if value == "dynamic")));
1009 assert!(plan
1010 .tags
1011 .iter()
1012 .any(|tag| matches!(tag, TagDirective::Simple(value) if value == "static")));
1013 let meta = plan.doc_metadata.as_ref().expect("doc metadata");
1014 assert_eq!(meta.mime.as_deref(), Some("text/plain"));
1015 }
1016
1017 #[test]
1018 fn build_frame_plan_defaults_json_mime() {
1019 let prepared = PreparedConfig {
1020 url: "https://example.com".to_string(),
1021 method: HttpMethod::Get,
1022 body: None,
1023 headers: BTreeMap::new(),
1024 extract: ExtractConfig {
1025 items: "$".to_string(),
1026 id: "$.id".to_string(),
1027 content: None,
1028 tags: None,
1029 metadata: None,
1030 timestamp: None,
1031 title: Some("$.title".to_string()),
1032 },
1033 base_uri: Some("memvid://json".to_string()),
1034 mode: ApiFetchMode::Insert,
1035 static_tags: Vec::new(),
1036 static_doc_metadata: None,
1037 static_extra_metadata: BTreeMap::new(),
1038 static_timestamp: None,
1039 max_items: None,
1040 content_type: ResponseContentType::Json,
1041 default_title: None,
1042 };
1043
1044 let root = json!({
1045 "id": 1,
1046 "title": "Example"
1047 });
1048
1049 let plans = build_frame_plans(&prepared, &root).expect("plans");
1050 assert_eq!(plans.len(), 1);
1051 let plan = &plans[0];
1052 let meta = plan.doc_metadata.as_ref().expect("doc metadata");
1053 assert_eq!(meta.mime.as_deref(), Some("application/json"));
1054 }
1055}