1use std::collections::{BTreeMap, HashSet};
7use std::env;
8use std::fs::File;
9use std::io::BufReader;
10use std::path::{Path, PathBuf};
11use std::time::Duration;
12
13use anyhow::{anyhow, bail, Context, Result};
14use clap::ValueEnum;
15use jsonpath_lib::select as jsonpath_select;
16use memvid_core::{DocMetadata, Memvid, MemvidError, PutOptions};
17use reqwest::blocking::Client;
18use reqwest::header::{HeaderMap, HeaderName, HeaderValue, ACCEPT, CONTENT_TYPE, USER_AGENT};
19use serde::Deserialize;
20use serde_json::{json, Value};
21use time::format_description::well_known::Rfc3339;
22use time::macros::format_description;
23use time::{OffsetDateTime, PrimitiveDateTime};
24
25use crate::config::CliConfig;
26use crate::utils::ensure_cli_mutation_allowed;
27
28#[derive(Debug)]
30pub(crate) struct ApiFetchCommand {
31 pub file: PathBuf,
32 pub config_path: PathBuf,
33 pub dry_run: bool,
34 pub mode_override: Option<ApiFetchMode>,
35 pub uri_override: Option<String>,
36 pub output_json: bool,
37 pub lock_timeout_ms: u64,
38 pub force_lock: bool,
39}
40
41#[derive(Clone, Copy, Debug, Deserialize, ValueEnum, PartialEq, Eq)]
42#[serde(rename_all = "lowercase")]
43pub enum ApiFetchMode {
44 Insert,
45 Upsert,
46}
47
48#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
49#[serde(rename_all = "lowercase")]
50enum HttpMethod {
51 #[serde(alias = "GET")]
52 Get,
53 #[serde(alias = "POST")]
54 Post,
55}
56
57#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
58#[serde(rename_all = "lowercase")]
59enum ResponseContentType {
60 #[serde(alias = "JSON")]
61 Json,
62 #[serde(alias = "RAW")]
63 Raw,
64}
65
66#[derive(Debug, Clone, Deserialize)]
67#[serde(untagged)]
68enum RequestBody {
69 Json(Value),
70 Text(String),
71}
72
73#[derive(Debug, Clone, Deserialize)]
74struct ExtractConfig {
75 pub items: String,
76 pub id: String,
77 #[serde(default)]
78 pub content: Option<String>,
79 #[serde(default)]
80 pub tags: Option<String>,
81 #[serde(default)]
82 pub metadata: Option<String>,
83 #[serde(default)]
84 pub timestamp: Option<String>,
85 #[serde(default)]
86 pub title: Option<String>,
87}
88
89#[derive(Debug, Deserialize)]
90struct ApiFetchConfig {
91 pub url: String,
92 #[serde(default)]
93 pub method: Option<HttpMethod>,
94 #[serde(default)]
95 pub body: Option<RequestBody>,
96 #[serde(default)]
97 pub headers: BTreeMap<String, String>,
98 pub extract: ExtractConfig,
99 #[serde(default)]
100 pub uri: Option<String>,
101 #[serde(default)]
102 pub mode: Option<ApiFetchMode>,
103 #[serde(default)]
104 pub tags: Option<Value>,
105 #[serde(default)]
106 pub metadata: Option<Value>,
107 #[serde(default)]
108 pub timestamp: Option<TimestampValue>,
109 #[serde(default)]
110 pub max_items: Option<usize>,
111 #[serde(default)]
112 pub content_type: Option<ResponseContentType>,
113 #[serde(default)]
114 pub title: Option<String>,
115}
116
117#[derive(Debug, Clone, Deserialize)]
118#[serde(untagged)]
119enum TimestampValue {
120 Number(i64),
121 String(String),
122}
123
124#[derive(Debug)]
125struct PreparedConfig {
126 url: String,
127 method: HttpMethod,
128 body: Option<RequestBody>,
129 headers: BTreeMap<String, String>,
130 extract: ExtractConfig,
131 base_uri: Option<String>,
132 mode: ApiFetchMode,
133 static_tags: Vec<TagDirective>,
134 static_doc_metadata: Option<DocMetadata>,
135 static_extra_metadata: BTreeMap<String, Value>,
136 static_timestamp: Option<i64>,
137 max_items: Option<usize>,
138 content_type: ResponseContentType,
139 default_title: Option<String>,
140}
141
142#[derive(Debug, Clone)]
143enum TagDirective {
144 Simple(String),
145 Pair { key: String, value: String },
146}
147
148#[derive(Debug)]
149struct FramePlan {
150 uri: String,
151 title: Option<String>,
152 payload: String,
153 tags: Vec<TagDirective>,
154 doc_metadata: Option<DocMetadata>,
155 extra_metadata: BTreeMap<String, Value>,
156 timestamp: Option<i64>,
157}
158
159#[derive(Debug, Clone, Copy, PartialEq, Eq)]
160enum FrameActionKind {
161 Insert,
162 Update,
163 Skip,
164}
165
166#[derive(Debug)]
167struct FrameResult {
168 uri: String,
169 action: FrameActionKind,
170 sequence: Option<u64>,
171 frame_id: Option<u64>,
172 reason: Option<String>,
173}
174
175pub(crate) fn run_api_fetch(_config: &CliConfig, command: ApiFetchCommand) -> Result<()> {
176 let config_data = load_config(&command.config_path)?;
177 let prepared = config_data.into_prepared(&command)?;
178
179 let client = Client::builder()
180 .timeout(Duration::from_secs(60))
181 .build()
182 .context("failed to build HTTP client")?;
183
184 let headers = build_headers(&prepared.headers)?;
185 let response_body = execute_request(&client, &prepared, headers)?;
186 let root_value = parse_response(&response_body, prepared.content_type)?;
187
188 let mut warnings = Vec::new();
189 let mut plans = build_frame_plans(&prepared, &root_value)?;
190 if plans.is_empty() {
191 bail!("no items matched extract.items path");
192 }
193
194 let mut seen_uris = HashSet::new();
195 plans.retain(|plan| {
196 if !seen_uris.insert(plan.uri.clone()) {
197 warnings.push(format!("duplicate URI planned: {}", plan.uri));
198 false
199 } else {
200 true
201 }
202 });
203
204 let mut mem = Memvid::open(&command.file)?;
205 {
206 let settings = mem.lock_settings_mut();
207 settings.timeout_ms = command.lock_timeout_ms;
208 settings.force_stale = command.force_lock;
209 }
210 ensure_cli_mutation_allowed(&mem)?;
211
212 let mut results = Vec::new();
213 let mut inserted = 0usize;
214 let mut updated = 0usize;
215 let mut skipped = 0usize;
216
217 for plan in plans {
218 let existing = match mem.frame_by_uri(&plan.uri) {
219 Ok(frame) => Some(frame),
220 Err(MemvidError::FrameNotFoundByUri { .. }) => None,
221 Err(err) => return Err(err.into()),
222 };
223
224 match (existing, prepared.mode) {
225 (Some(frame), ApiFetchMode::Insert) => {
226 skipped += 1;
227 results.push(FrameResult {
228 uri: plan.uri,
229 action: FrameActionKind::Skip,
230 sequence: None,
231 frame_id: Some(frame.id),
232 reason: Some("frame already exists (mode=insert)".to_string()),
233 });
234 }
235 (Some(frame), ApiFetchMode::Upsert) => {
236 if command.dry_run {
237 updated += 1;
238 results.push(FrameResult {
239 uri: plan.uri,
240 action: FrameActionKind::Update,
241 sequence: None,
242 frame_id: Some(frame.id),
243 reason: None,
244 });
245 } else {
246 let seq = apply_update(&mut mem, frame.id, &plan)?;
247 results.push(FrameResult {
248 uri: plan.uri,
249 action: FrameActionKind::Update,
250 sequence: Some(seq),
251 frame_id: Some(frame.id),
252 reason: None,
253 });
254 updated += 1;
255 }
256 }
257 (None, _) => {
258 if command.dry_run {
259 inserted += 1;
260 results.push(FrameResult {
261 uri: plan.uri,
262 action: FrameActionKind::Insert,
263 sequence: None,
264 frame_id: None,
265 reason: None,
266 });
267 } else {
268 let seq = apply_insert(&mut mem, &plan)?;
269 results.push(FrameResult {
270 uri: plan.uri,
271 action: FrameActionKind::Insert,
272 sequence: Some(seq),
273 frame_id: None,
274 reason: None,
275 });
276 inserted += 1;
277 }
278 }
279 }
280 }
281
282 if !command.dry_run {
283 mem.commit()?;
284 }
285
286 if command.output_json {
287 print_json_summary(
288 &results,
289 inserted,
290 updated,
291 skipped,
292 command.dry_run,
293 &warnings,
294 )?;
295 } else {
296 print_human_summary(
297 &results,
298 inserted,
299 updated,
300 skipped,
301 command.dry_run,
302 &warnings,
303 );
304 }
305
306 Ok(())
307}
308
309fn load_config(path: &Path) -> Result<ApiFetchConfig> {
310 let file = File::open(path)
311 .with_context(|| format!("failed to open config file at {}", path.display()))?;
312 let reader = BufReader::new(file);
313 serde_json::from_reader(reader)
314 .with_context(|| format!("failed to parse config JSON at {}", path.display()))
315}
316
317impl ApiFetchConfig {
318 fn into_prepared(self, cmd: &ApiFetchCommand) -> Result<PreparedConfig> {
319 let mode = cmd
320 .mode_override
321 .or(self.mode)
322 .unwrap_or(ApiFetchMode::Insert);
323
324 let base_uri = cmd
325 .uri_override
326 .clone()
327 .or(self.uri)
328 .map(|value| value.trim_end_matches('/').to_string())
329 .filter(|value| !value.is_empty());
330
331 let static_tags = parse_tags_value(self.tags)?;
332 let (static_doc_metadata, static_extra_metadata) = split_metadata(self.metadata)?;
333 let static_timestamp = match self.timestamp {
334 Some(value) => Some(parse_timestamp_value(&value)?),
335 None => None,
336 };
337
338 let content_type = self.content_type.unwrap_or(ResponseContentType::Json);
339
340 Ok(PreparedConfig {
341 url: self.url,
342 method: self.method.unwrap_or(HttpMethod::Get),
343 body: self.body,
344 headers: self.headers,
345 extract: self.extract,
346 base_uri,
347 mode,
348 static_tags,
349 static_doc_metadata,
350 static_extra_metadata,
351 static_timestamp,
352 max_items: self.max_items,
353 content_type,
354 default_title: self.title,
355 })
356 }
357}
358
359fn parse_tags_value(value: Option<Value>) -> Result<Vec<TagDirective>> {
360 match value {
361 None | Some(Value::Null) => Ok(Vec::new()),
362 Some(Value::String(s)) => Ok(vec![TagDirective::Simple(s.trim().to_string())]),
363 Some(Value::Array(items)) => {
364 let mut tags = Vec::new();
365 for item in items {
366 match item {
367 Value::String(s) => tags.push(TagDirective::Simple(s.trim().to_string())),
368 Value::Object(map) => {
369 if let Some(Value::String(val)) = map.get("value") {
370 if let Some(Value::String(key)) = map.get("key") {
371 tags.push(TagDirective::Pair {
372 key: key.trim().to_string(),
373 value: val.trim().to_string(),
374 });
375 continue;
376 }
377 }
378 bail!(
379 "invalid tag entry in array; expected string or {{\"key\",\"value\"}}"
380 );
381 }
382 _ => bail!("tags array must contain strings or key/value objects"),
383 }
384 }
385 Ok(tags)
386 }
387 Some(Value::Object(object)) => {
388 let mut tags = Vec::new();
389 for (key, value) in object {
390 let key = key.trim().to_string();
391 let value_str = value_to_string(&value)?;
392 tags.push(TagDirective::Pair {
393 key,
394 value: value_str,
395 });
396 }
397 Ok(tags)
398 }
399 Some(_) => bail!("unsupported tags format; expected string, array, or object"),
400 }
401}
402
403fn split_metadata(value: Option<Value>) -> Result<(Option<DocMetadata>, BTreeMap<String, Value>)> {
404 match value {
405 None | Some(Value::Null) => Ok((None, BTreeMap::new())),
406 Some(Value::Object(map)) => {
407 let doc_attempt: Result<DocMetadata, _> =
408 serde_json::from_value(Value::Object(map.clone()));
409 if let Ok(meta) = doc_attempt {
410 Ok((Some(meta), BTreeMap::new()))
411 } else {
412 let mut extras = BTreeMap::new();
413 for (key, value) in map {
414 extras.insert(key, value);
415 }
416 Ok((None, extras))
417 }
418 }
419 Some(other) => {
420 let meta: DocMetadata = serde_json::from_value(other.clone())
421 .context("metadata must be an object compatible with DocMetadata")?;
422 Ok((Some(meta), BTreeMap::new()))
423 }
424 }
425}
426
427fn parse_timestamp_value(value: &TimestampValue) -> Result<i64> {
428 match value {
429 TimestampValue::Number(num) => Ok(*num),
430 TimestampValue::String(text) => parse_timestamp_str(text),
431 }
432}
433
434fn parse_timestamp_str(raw: &str) -> Result<i64> {
435 if let Ok(num) = raw.trim().parse::<i64>() {
436 return Ok(num);
437 }
438 if let Ok(dt) = OffsetDateTime::parse(raw.trim(), &Rfc3339) {
439 return Ok(dt.unix_timestamp());
440 }
441 if let Ok(parsed) = PrimitiveDateTime::parse(
442 raw.trim(),
443 &format_description!("[year]-[month]-[day] [hour]:[minute]:[second]"),
444 ) {
445 return Ok(parsed.assume_utc().unix_timestamp());
446 }
447 if let Ok(parsed) = PrimitiveDateTime::parse(
448 raw.trim(),
449 &format_description!("[year]-[month]-[day]T[hour]:[minute]:[second]"),
450 ) {
451 return Ok(parsed.assume_utc().unix_timestamp());
452 }
453 bail!("unable to parse timestamp: {raw}")
454}
455
456fn build_headers(raw: &BTreeMap<String, String>) -> Result<HeaderMap> {
457 let mut headers = HeaderMap::new();
458 for (key, value) in raw {
459 let resolved = resolve_value(value)?;
460 let name = HeaderName::try_from(key.as_str())
461 .with_context(|| format!("invalid header name: {key}"))?;
462 let header_value = HeaderValue::from_str(&resolved)
463 .with_context(|| format!("invalid header value for {key}"))?;
464 headers.insert(name, header_value);
465 }
466 Ok(headers)
467}
468
469fn resolve_value(raw: &str) -> Result<String> {
470 if let Some(env_key) = raw.strip_prefix("env:") {
471 let key = env_key.trim();
472 let value = env::var(key).with_context(|| {
473 format!("environment variable {key} referenced in config is not set")
474 })?;
475 Ok(value)
476 } else {
477 Ok(raw.to_string())
478 }
479}
480
481fn execute_request(
482 client: &Client,
483 prepared: &PreparedConfig,
484 mut headers: HeaderMap,
485) -> Result<String> {
486 if !headers.contains_key(USER_AGENT) {
487 headers.insert(
488 USER_AGENT,
489 HeaderValue::from_static(concat!("memvid-cli/", env!("CARGO_PKG_VERSION"))),
490 );
491 }
492 if prepared.content_type == ResponseContentType::Json && !headers.contains_key(ACCEPT) {
493 headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
494 }
495 if matches!(prepared.method, HttpMethod::Post)
496 && prepared
497 .body
498 .as_ref()
499 .map(|body| matches!(body, RequestBody::Json(_)))
500 .unwrap_or(false)
501 && !headers.contains_key(CONTENT_TYPE)
502 {
503 headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
504 }
505
506 let mut request = match prepared.method {
507 HttpMethod::Get => client.get(&prepared.url),
508 HttpMethod::Post => client.post(&prepared.url),
509 };
510 if let Some(body) = &prepared.body {
511 match body {
512 RequestBody::Json(value) => {
513 request = request.json(value);
514 }
515 RequestBody::Text(text) => {
516 request = request.body(text.clone());
517 }
518 }
519 }
520 request = request.headers(headers);
521
522 let response = request
523 .send()
524 .with_context(|| format!("request to {} failed", prepared.url))?;
525 let response = response
526 .error_for_status()
527 .with_context(|| format!("remote endpoint {} returned an error status", prepared.url))?;
528 response.text().context("failed to read response body")
529}
530
531fn parse_response(body: &str, kind: ResponseContentType) -> Result<Value> {
532 match serde_json::from_str::<Value>(body) {
533 Ok(value) => Ok(value),
534 Err(err) => {
535 if matches!(kind, ResponseContentType::Raw) {
536 Ok(Value::String(body.to_string()))
537 } else {
538 Err(anyhow!("failed to decode response as JSON: {err}"))
539 }
540 }
541 }
542}
543
544fn build_frame_plans(prepared: &PreparedConfig, root: &Value) -> Result<Vec<FramePlan>> {
545 let mut matches = jsonpath_select(root, &prepared.extract.items)
546 .with_context(|| format!("jsonpath '{}' failed", prepared.extract.items))?;
547 if matches.is_empty() {
548 return Ok(Vec::new());
549 }
550 if let Some(limit) = prepared.max_items {
551 if matches.len() > limit {
552 matches.truncate(limit);
553 }
554 }
555
556 let mut plans = Vec::with_capacity(matches.len());
557 for item in matches {
558 let plan = build_frame_plan(prepared, item)?;
559 plans.push(plan);
560 }
561 Ok(plans)
562}
563
564fn build_frame_plan(prepared: &PreparedConfig, item: &Value) -> Result<FramePlan> {
565 let id_matches = jsonpath_select(item, &prepared.extract.id)
566 .with_context(|| format!("jsonpath '{}' failed", prepared.extract.id))?;
567 let id_value = id_matches.first().ok_or_else(|| {
568 anyhow!(
569 "extract.id path '{}' returned no results",
570 prepared.extract.id
571 )
572 })?;
573 let id = value_to_string(id_value)?.trim().to_string();
574 if id.is_empty() {
575 bail!("extracted id is empty after trimming");
576 }
577 let uri = build_uri(prepared.base_uri.as_deref(), &id);
578
579 let derived_title = if let Some(path) = &prepared.extract.title {
580 let title_matches =
581 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
582 title_matches
583 .first()
584 .map(|value| value_to_string(value))
585 .transpose()?
586 } else {
587 None
588 };
589
590 let title = derived_title.or_else(|| prepared.default_title.clone());
591
592 let payload = extract_payload(prepared, item)?;
593 let mut tags = prepared.static_tags.clone();
594 if let Some(path) = &prepared.extract.tags {
595 let tag_values =
596 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
597 for value in tag_values {
598 let tag = value_to_string(value)?;
599 if !tag.trim().is_empty() {
600 tags.push(TagDirective::Simple(tag.trim().to_string()));
601 }
602 }
603 }
604
605 let mut doc_metadata = prepared.static_doc_metadata.clone();
606 let mut extra_metadata = prepared.static_extra_metadata.clone();
607
608 if let Some(path) = &prepared.extract.metadata {
609 let meta_matches =
610 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
611 if let Some(value) = meta_matches.first() {
612 match value {
613 Value::Null => {}
614 Value::Object(map) => {
615 let mut handled = false;
616 if doc_metadata.is_none() {
617 if let Ok(parsed) =
618 serde_json::from_value::<DocMetadata>(Value::Object(map.clone()))
619 {
620 doc_metadata = Some(parsed);
621 handled = true;
622 }
623 }
624 if !handled {
625 for (key, value) in map.iter() {
626 extra_metadata.insert(key.clone(), (*value).clone());
627 }
628 }
629 }
630 other => {
631 extra_metadata.insert(path.clone(), (*other).clone());
632 }
633 }
634 }
635 }
636
637 if doc_metadata
638 .as_ref()
639 .and_then(|meta| meta.mime.as_ref())
640 .is_none()
641 {
642 let default_mime = match prepared.content_type {
643 ResponseContentType::Json => Some("application/json"),
644 ResponseContentType::Raw => Some("text/plain"),
645 };
646 if let Some(mime) = default_mime {
647 match doc_metadata.as_mut() {
648 Some(existing) => existing.mime = Some(mime.to_string()),
649 None => {
650 let mut meta = DocMetadata::default();
651 meta.mime = Some(mime.to_string());
652 doc_metadata = Some(meta);
653 }
654 }
655 }
656 }
657
658 let mut timestamp = prepared.static_timestamp;
659 if let Some(path) = &prepared.extract.timestamp {
660 let ts_matches =
661 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
662 if let Some(value) = ts_matches.first() {
663 timestamp = Some(parse_timestamp_from_value(value)?);
664 }
665 }
666
667 Ok(FramePlan {
668 uri,
669 title,
670 payload,
671 tags,
672 doc_metadata,
673 extra_metadata,
674 timestamp,
675 })
676}
677
678fn extract_payload(prepared: &PreparedConfig, item: &Value) -> Result<String> {
679 if let Some(path) = &prepared.extract.content {
680 let payload_matches =
681 jsonpath_select(item, path).with_context(|| format!("jsonpath '{}' failed", path))?;
682 let value = payload_matches
683 .first()
684 .ok_or_else(|| anyhow!("extract.content path '{}' returned no results", path))?;
685 match prepared.content_type {
686 ResponseContentType::Json => value_to_pretty_text(value),
687 ResponseContentType::Raw => value_to_plain_string(value),
688 }
689 } else {
690 match prepared.content_type {
691 ResponseContentType::Json => value_to_pretty_text(item),
692 ResponseContentType::Raw => value_to_plain_string(item),
693 }
694 }
695}
696
697fn value_to_plain_string(value: &Value) -> Result<String> {
698 match value {
699 Value::String(text) => Ok(text.clone()),
700 Value::Number(num) => Ok(num.to_string()),
701 Value::Bool(flag) => Ok(flag.to_string()),
702 other => bail!("expected textual content but found {other}"),
703 }
704}
705
706fn value_to_pretty_text(value: &Value) -> Result<String> {
707 match value {
708 Value::String(text) => Ok(text.clone()),
709 _ => serde_json::to_string_pretty(value).map_err(|err| anyhow!(err)),
710 }
711}
712
713fn value_to_string(value: &Value) -> Result<String> {
714 match value {
715 Value::String(text) => Ok(text.clone()),
716 Value::Number(num) => Ok(num.to_string()),
717 Value::Bool(flag) => Ok(flag.to_string()),
718 Value::Null => bail!("value is null"),
719 other => serde_json::to_string(other).map_err(|err| anyhow!(err)),
720 }
721}
722
723fn parse_timestamp_from_value(value: &Value) -> Result<i64> {
724 match value {
725 Value::Number(num) => num
726 .as_i64()
727 .ok_or_else(|| anyhow!("timestamp number must fit i64")),
728 Value::String(text) => parse_timestamp_str(text),
729 other => bail!("unsupported timestamp value: {other}"),
730 }
731}
732
733fn build_uri(base: Option<&str>, id: &str) -> String {
734 match base {
735 Some(prefix) => {
736 let prefix = prefix.trim_end_matches('/');
737 let suffix = id.trim_start_matches('/');
738 if prefix.is_empty() {
739 suffix.to_string()
740 } else if suffix.is_empty() {
741 prefix.to_string()
742 } else {
743 format!("{prefix}/{suffix}")
744 }
745 }
746 None => id.to_string(),
747 }
748}
749
750fn apply_insert(mem: &mut Memvid, plan: &FramePlan) -> Result<u64> {
751 let options = build_put_options(plan);
752 let payload = plan.payload.as_bytes();
753 mem.put_bytes_with_options(payload, options)
754 .context("failed to insert frame")
755}
756
757fn apply_update(mem: &mut Memvid, frame_id: u64, plan: &FramePlan) -> Result<u64> {
758 let options = build_put_options(plan);
759 let payload = plan.payload.as_bytes().to_vec();
760 mem.update_frame(frame_id, Some(payload), options, None)
761 .context("failed to update frame")
762}
763
764fn build_put_options(plan: &FramePlan) -> PutOptions {
765 let mut builder = PutOptions::builder()
766 .enable_embedding(false)
767 .auto_tag(false)
768 .extract_dates(false)
769 .uri(plan.uri.clone())
770 .search_text(plan.payload.clone());
771
772 if let Some(ts) = plan.timestamp {
773 builder = builder.timestamp(ts);
774 }
775 if let Some(title) = &plan.title {
776 if !title.trim().is_empty() {
777 builder = builder.title(title.clone());
778 }
779 }
780 if let Some(meta) = plan.doc_metadata.clone() {
781 builder = builder.metadata(meta);
782 }
783 for (key, value) in plan.extra_metadata.iter() {
784 builder = builder.metadata_entry(key.clone(), value.clone());
785 }
786 for tag in &plan.tags {
787 match tag {
788 TagDirective::Simple(tag) => {
789 if !tag.trim().is_empty() {
790 builder = builder.push_tag(tag.clone());
791 }
792 }
793 TagDirective::Pair { key, value } => {
794 builder = builder.tag(key.clone(), value.clone());
795 if !key.trim().is_empty() {
796 builder = builder.push_tag(key.clone());
797 }
798 if !value.trim().is_empty() && value != key {
799 builder = builder.push_tag(value.clone());
800 }
801 }
802 }
803 }
804
805 builder.build()
806}
807
808fn print_json_summary(
809 results: &[FrameResult],
810 inserted: usize,
811 updated: usize,
812 skipped: usize,
813 dry_run: bool,
814 warnings: &[String],
815) -> Result<()> {
816 let items: Vec<Value> = results
817 .iter()
818 .map(|res| {
819 json!({
820 "uri": res.uri,
821 "action": match res.action {
822 FrameActionKind::Insert => "insert",
823 FrameActionKind::Update => "update",
824 FrameActionKind::Skip => "skip",
825 },
826 "sequence": res.sequence,
827 "frame_id": res.frame_id,
828 "reason": res.reason,
829 })
830 })
831 .collect();
832 let summary = json!({
833 "dry_run": dry_run,
834 "counts": {
835 "total": results.len(),
836 "inserted": inserted,
837 "updated": updated,
838 "skipped": skipped,
839 },
840 "items": items,
841 "warnings": warnings,
842 });
843 println!("{}", serde_json::to_string_pretty(&summary)?);
844 Ok(())
845}
846
847fn print_human_summary(
848 results: &[FrameResult],
849 inserted: usize,
850 updated: usize,
851 skipped: usize,
852 dry_run: bool,
853 warnings: &[String],
854) {
855 for res in results {
856 match res.action {
857 FrameActionKind::Insert => {
858 if let Some(seq) = res.sequence {
859 println!("+ {} (seq {})", res.uri, seq);
860 } else {
861 println!("+ {} (planned)", res.uri);
862 }
863 }
864 FrameActionKind::Update => {
865 if let Some(seq) = res.sequence {
866 println!("~ {} (seq {})", res.uri, seq);
867 } else {
868 println!("~ {} (planned)", res.uri);
869 }
870 }
871 FrameActionKind::Skip => {
872 if let Some(reason) = &res.reason {
873 println!("- {} ({})", res.uri, reason);
874 } else {
875 println!("- {} (skipped)", res.uri);
876 }
877 }
878 }
879 }
880 println!(
881 "Summary: inserted {}, updated {}, skipped {}{}",
882 inserted,
883 updated,
884 skipped,
885 if dry_run { " (dry run)" } else { "" }
886 );
887 if !warnings.is_empty() {
888 println!("Warnings:");
889 for warning in warnings {
890 println!(" - {warning}");
891 }
892 }
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898 use serde_json::json;
899 use std::path::PathBuf;
900 use std::sync::{Mutex, OnceLock};
901
902 fn env_lock() -> std::sync::MutexGuard<'static, ()> {
903 static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
904 LOCK.get_or_init(|| Mutex::new(())).lock().unwrap()
905 }
906
907 #[test]
908 fn resolve_value_reads_env() {
909 const VAR: &str = "API_FETCH_TEST_TOKEN";
910 let _guard = env_lock();
911 unsafe {
912 std::env::set_var(VAR, "secret");
913 }
914 let resolved = resolve_value(&format!("env:{VAR}")).expect("resolve env");
915 assert_eq!(resolved, "secret");
916 unsafe {
917 std::env::remove_var(VAR);
918 }
919 }
920
921 #[test]
922 fn prepared_config_applies_overrides() {
923 let raw = json!({
924 "url": "https://example.com/api",
925 "method": "get",
926 "headers": {},
927 "extract": {
928 "items": "$.items[*]",
929 "id": "$.id"
930 },
931 "mode": "insert",
932 "uri": "mem://base",
933 "tags": ["static"],
934 "metadata": {"mime": "text/plain"},
935 "timestamp": 42,
936 "content_type": "json"
937 });
938
939 let config: ApiFetchConfig = serde_json::from_value(raw).expect("config");
940 let command = ApiFetchCommand {
941 file: PathBuf::from("memory.mv2"),
942 config_path: PathBuf::from("cfg.json"),
943 dry_run: false,
944 mode_override: Some(ApiFetchMode::Upsert),
945 uri_override: Some("mem://override".to_string()),
946 output_json: false,
947 lock_timeout_ms: 5000,
948 force_lock: false,
949 };
950
951 let prepared = config.into_prepared(&command).expect("prepared");
952 assert_eq!(prepared.mode, ApiFetchMode::Upsert);
953 assert_eq!(prepared.base_uri.as_deref(), Some("mem://override"));
954 assert_eq!(prepared.static_tags.len(), 1);
955 assert!(prepared.static_doc_metadata.is_some());
956 assert_eq!(prepared.static_timestamp, Some(42));
957 assert_eq!(prepared.content_type, ResponseContentType::Json);
958 }
959
960 #[test]
961 fn build_frame_plan_extracts_payload_and_tags() {
962 let prepared = PreparedConfig {
963 url: "https://example.com".to_string(),
964 method: HttpMethod::Get,
965 body: None,
966 headers: BTreeMap::new(),
967 extract: ExtractConfig {
968 items: "$.items[*]".to_string(),
969 id: "$.id".to_string(),
970 content: Some("$.text".to_string()),
971 tags: Some("$.tags[*]".to_string()),
972 metadata: Some("$.meta".to_string()),
973 timestamp: Some("$.ts".to_string()),
974 title: Some("$.title".to_string()),
975 },
976 base_uri: Some("mem://base".to_string()),
977 mode: ApiFetchMode::Insert,
978 static_tags: vec![TagDirective::Simple("static".to_string())],
979 static_doc_metadata: None,
980 static_extra_metadata: BTreeMap::new(),
981 static_timestamp: Some(100),
982 max_items: None,
983 content_type: ResponseContentType::Raw,
984 default_title: Some("fallback".to_string()),
985 };
986
987 let root = json!({
988 "items": [
989 {
990 "id": "doc-1",
991 "text": "hello world",
992 "tags": ["dynamic"],
993 "meta": {"mime": "text/plain"},
994 "ts": 123,
995 "title": "Greeting"
996 }
997 ]
998 });
999
1000 let plans = build_frame_plans(&prepared, &root).expect("plans");
1001 assert_eq!(plans.len(), 1);
1002 let plan = &plans[0];
1003 assert_eq!(plan.uri, "mem://base/doc-1");
1004 assert_eq!(plan.payload, "hello world");
1005 assert_eq!(plan.title.as_deref(), Some("Greeting"));
1006 assert_eq!(plan.timestamp, Some(123));
1007 assert!(plan
1008 .tags
1009 .iter()
1010 .any(|tag| matches!(tag, TagDirective::Simple(value) if value == "dynamic")));
1011 assert!(plan
1012 .tags
1013 .iter()
1014 .any(|tag| matches!(tag, TagDirective::Simple(value) if value == "static")));
1015 let meta = plan.doc_metadata.as_ref().expect("doc metadata");
1016 assert_eq!(meta.mime.as_deref(), Some("text/plain"));
1017 }
1018
1019 #[test]
1020 fn build_frame_plan_defaults_json_mime() {
1021 let prepared = PreparedConfig {
1022 url: "https://example.com".to_string(),
1023 method: HttpMethod::Get,
1024 body: None,
1025 headers: BTreeMap::new(),
1026 extract: ExtractConfig {
1027 items: "$".to_string(),
1028 id: "$.id".to_string(),
1029 content: None,
1030 tags: None,
1031 metadata: None,
1032 timestamp: None,
1033 title: Some("$.title".to_string()),
1034 },
1035 base_uri: Some("memvid://json".to_string()),
1036 mode: ApiFetchMode::Insert,
1037 static_tags: Vec::new(),
1038 static_doc_metadata: None,
1039 static_extra_metadata: BTreeMap::new(),
1040 static_timestamp: None,
1041 max_items: None,
1042 content_type: ResponseContentType::Json,
1043 default_title: None,
1044 };
1045
1046 let root = json!({
1047 "id": 1,
1048 "title": "Example"
1049 });
1050
1051 let plans = build_frame_plans(&prepared, &root).expect("plans");
1052 assert_eq!(plans.len(), 1);
1053 let plan = &plans[0];
1054 let meta = plan.doc_metadata.as_ref().expect("doc metadata");
1055 assert_eq!(meta.mime.as_deref(), Some("application/json"));
1056 }
1057}