1use std::{
4 env, fs,
5 io::{self, Read},
6 path::{Path, PathBuf},
7 process::Command as ProcessCommand,
8 thread,
9 time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16 bm25_score_chunks, build_embedding_provider_request, build_ocr_provider_request,
17 builtin_ocr_provider_specs, cosine_similarity, execute_embedding_provider_request,
18 is_zotron_evidence_artifact, machine_artifact_exists_for_item,
19 machine_artifact_exists_in_sidecar, machine_artifact_store_root,
20 ocr_provider_spec as raw_ocr_provider_spec, parse_embedding_provider_response,
21 parse_ocr_provider_response, read_machine_artifact_sidecar, rrf_merge,
22 write_machine_artifact_sidecar, ArtifactStorePlatform, EmbeddingChunkInput,
23 EmbeddingRequestInput, EmbeddingVector, MachineArtifactKind, OcrRequestInput,
24 ProviderCommandRunner, ProviderHttpInvocation, ProviderHttpTransport, StructureChunk,
25 DEFAULT_RPC_URL,
26};
27
28pub trait RpcCaller {
29 fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
30}
31
32#[derive(Debug, Clone, PartialEq, serde::Serialize)]
33pub struct CliOcrProviderSpec {
34 pub id: &'static str,
35 pub provider: &'static str,
36 pub request_style: &'static str,
37 pub auth: &'static str,
38 pub auth_header: &'static str,
39 pub supports_pdf_direct: bool,
40 pub key_field: &'static str,
41}
42
43#[derive(Debug, Clone, PartialEq, serde::Serialize)]
44pub struct CliEmbeddingProviderSpec {
45 pub id: &'static str,
46 pub provider: &'static str,
47 pub request_style: &'static str,
48 pub default_url: String,
49 pub default_model: &'static str,
50 pub auth: &'static str,
51 pub key_field: &'static str,
52}
53
54pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
55 builtin_ocr_provider_specs()
56 .into_iter()
57 .map(cli_ocr_provider_spec)
58 .collect()
59}
60
61pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
62 zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
63}
64
65pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
66 let spec = zotron_types::embedding_provider_spec(provider)?;
67 Ok(CliEmbeddingProviderSpec {
68 id: spec.id,
69 provider: spec.provider_key,
70 request_style: if spec.provider_key == "alibaba" {
71 "dashscope"
72 } else {
73 spec.request_style.as_str()
74 },
75 default_url: spec.default_url.unwrap_or("").to_string(),
76 default_model: spec.default_model,
77 auth: spec.auth,
78 key_field: spec.key_field,
79 })
80}
81
82pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
83 let typed = blocks
84 .iter()
85 .map(json_block_to_pdf_block)
86 .collect::<Result<Vec<_>, _>>()?;
87 let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
88 chunks
89 .into_iter()
90 .map(|chunk| chunk_to_cli_value(&chunk, &typed))
91 .collect()
92}
93
94fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
95 CliOcrProviderSpec {
96 id: spec.provider_key,
97 provider: spec.provider_key,
98 request_style: spec.request_style.as_str(),
99 auth: spec.auth,
100 auth_header: spec.auth_header,
101 supports_pdf_direct: spec.supports_pdf_direct,
102 key_field: spec.key_field,
103 }
104}
105
106fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
107 let block_key = value
108 .get("block_key")
109 .and_then(Value::as_str)
110 .ok_or_else(|| "block missing block_key".to_string())?
111 .to_string();
112 let item_key = value
113 .get("item_key")
114 .and_then(Value::as_str)
115 .ok_or_else(|| "block missing item_key".to_string())?
116 .to_string();
117 let attachment_key = value
118 .get("attachment_key")
119 .and_then(Value::as_str)
120 .ok_or_else(|| "block missing attachment_key".to_string())?
121 .to_string();
122 let page_idx = value
123 .get("page_idx")
124 .or_else(|| value.get("page"))
125 .and_then(Value::as_u64)
126 .unwrap_or(1);
127 let block_type = value
128 .get("type")
129 .or_else(|| value.get("block_type"))
130 .and_then(Value::as_str)
131 .unwrap_or("paragraph")
132 .to_string();
133 let section_path = value
134 .get("section_path")
135 .and_then(Value::as_array)
136 .map(|items| {
137 items
138 .iter()
139 .filter_map(Value::as_str)
140 .map(ToString::to_string)
141 .collect::<Vec<_>>()
142 })
143 .unwrap_or_default();
144 let text = value
145 .get("text")
146 .and_then(Value::as_str)
147 .unwrap_or("")
148 .to_string();
149 let bbox = value.get("bbox").and_then(value_bbox4);
150
151 Ok(zotron_types::PdfEvidenceBlock {
152 block_key,
153 item_key,
154 attachment_key,
155 page_idx,
156 block_type,
157 bbox,
158 section_path,
159 text,
160 })
161}
162
163fn chunk_to_cli_value(
164 chunk: &zotron_types::StructureChunk,
165 blocks: &[zotron_types::PdfEvidenceBlock],
166) -> Result<Value, String> {
167 let refs = chunk
168 .block_keys
169 .iter()
170 .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
171 .map(|block| {
172 serde_json::json!({
173 "block_key": block.block_key,
174 "page_idx": block.page_idx,
175 "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
176 if n.fract() == 0.0 {
177 Value::from(*n as i64)
178 } else {
179 Value::from(*n)
180 }
181 }).collect::<Vec<_>>()),
182 })
183 })
184 .collect::<Vec<_>>();
185 Ok(serde_json::json!({
186 "chunk_key": chunk.chunk_key,
187 "item_key": chunk.item_key,
188 "attachment_key": chunk.attachment_key,
189 "block_keys": chunk.block_keys,
190 "section_path": chunk.section_path,
191 "text": chunk.text,
192 "page_start": chunk.page_start,
193 "page_end": chunk.page_end,
194 "evidence_refs": refs,
195 }))
196}
197
198fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
199 let arr = value.as_array()?;
200 if arr.len() != 4 {
201 return None;
202 }
203 Some([
204 arr[0].as_f64()?,
205 arr[1].as_f64()?,
206 arr[2].as_f64()?,
207 arr[3].as_f64()?,
208 ])
209}
210
211impl RpcCaller for ZoteroRpc {
212 fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
213 self.call(method, params).map_err(|err| err.to_string())
214 }
215}
216
217#[derive(Debug, Parser)]
218#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
219struct Cli {
220 #[command(subcommand)]
221 command: Command,
222}
223
224#[derive(Debug, Subcommand)]
225enum OcrCommand {
226 Providers,
228 #[command(name = "run")]
230 Run {
231 #[arg(long)]
232 provider: String,
233 #[arg(long)]
235 input: Option<String>,
236 #[arg(long)]
238 file: Option<String>,
239 #[arg(long = "item-key")]
241 item_key: Option<String>,
242 #[arg(long = "attachment-key")]
244 attachment_key: Option<String>,
245 #[arg(long = "mime-type")]
247 mime_type: Option<String>,
248 #[arg(long)]
250 endpoint: Option<String>,
251 #[arg(long = "api-key-env")]
253 api_key_env: Option<String>,
254 },
255 Status {
257 #[arg(long)]
258 collection: String,
259 #[arg(long, default_value = DEFAULT_RPC_URL)]
260 url: String,
261 },
262 #[command(name = "process")]
264 Process {
265 #[arg(long, default_value = "mineru")]
266 provider: String,
267 #[arg(long)]
269 parent: String,
270 #[arg(long)]
272 attachment: Option<String>,
273 #[arg(long = "source-url")]
275 source_url: Option<String>,
276 #[arg(long = "result-dir")]
278 result_dir: Option<String>,
279 #[arg(long = "result-zip")]
281 result_zip: Option<String>,
282 #[arg(long = "provider-endpoint")]
284 provider_endpoint: Option<String>,
285 #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
287 api_key_env: String,
288 #[arg(long = "poll-interval-seconds", default_value_t = 5)]
289 poll_interval_seconds: u64,
290 #[arg(long = "timeout-seconds", default_value_t = 900)]
291 timeout_seconds: u64,
292 #[arg(long = "chunk-chars", default_value_t = 1200)]
293 chunk_chars: usize,
294 #[arg(long, default_value = DEFAULT_RPC_URL)]
295 url: String,
296 },
297}
298
299#[derive(Debug, Subcommand)]
300enum Command {
301 Ping {
303 #[arg(long, default_value = DEFAULT_RPC_URL)]
304 url: String,
305 },
306 Rpc {
308 method: String,
309 #[arg(default_value = "{}")]
310 params_json: String,
311 #[arg(long, default_value = DEFAULT_RPC_URL)]
312 url: String,
313 #[arg(long)]
314 paginate: bool,
315 #[arg(long, default_value_t = 100)]
316 page_size: usize,
317 },
318 Push {
320 json_file: String,
322 #[arg(long)]
324 pdf: Option<String>,
325 #[arg(long)]
327 collection: Option<String>,
328 #[arg(long = "on-duplicate", default_value = "skip")]
330 on_duplicate: String,
331 #[arg(long, default_value = DEFAULT_RPC_URL)]
332 url: String,
333 #[arg(long = "dry-run")]
335 dry_run: bool,
336 },
337 System {
339 #[command(subcommand)]
340 command: SystemCommand,
341 },
342 Search(SearchArgs),
344 Items {
346 #[command(subcommand)]
347 command: ItemsCommand,
348 },
349 Collections {
351 #[command(subcommand)]
352 command: CollectionsCommand,
353 },
354 Notes {
356 #[command(subcommand)]
357 command: NotesCommand,
358 },
359 Attachments {
361 #[command(subcommand)]
362 command: AttachmentsCommand,
363 },
364 Settings {
366 #[command(subcommand)]
367 command: SettingsCommand,
368 },
369 Tags {
371 #[command(subcommand)]
372 command: TagsCommand,
373 },
374 Export(ExportArgs),
376 Annotations {
378 #[command(subcommand)]
379 command: AnnotationsCommand,
380 },
381 Ocr {
383 #[command(subcommand)]
384 command: OcrCommand,
385 },
386 Rag {
388 #[command(subcommand)]
389 command: RagCommand,
390 },
391 #[command(name = "find-pdfs")]
393 FindPdfs {
394 #[arg(long)]
395 collection: String,
396 #[arg(long, default_value_t = 0)]
397 limit: usize,
398 #[arg(long, default_value = DEFAULT_RPC_URL)]
399 url: String,
400 },
401}
402
403struct RagSearchOptions {
404 query: String,
405 collection: Option<String>,
406 keys: Vec<String>,
407 zotero: bool,
408 top_spans_per_item: u64,
409 include_fulltext_spans: bool,
410 top_k: u64,
411 output: String,
412}
413
414#[derive(Debug, Subcommand)]
415enum RagCommand {
416 #[command(name = "providers")]
418 Providers,
419 #[command(name = "embed")]
421 Embed {
422 #[arg(long)]
423 provider: String,
424 #[arg(long)]
426 input: String,
427 #[arg(long)]
429 endpoint: Option<String>,
430 #[arg(long)]
432 model: Option<String>,
433 #[arg(long = "input-type")]
435 input_type: Option<String>,
436 #[arg(long = "api-key-env")]
438 api_key_env: Option<String>,
439 },
440 Status {
442 #[arg(long)]
443 collection: String,
444 #[arg(long, default_value = DEFAULT_RPC_URL)]
445 url: String,
446 },
447 #[command(name = "search")]
449 Search {
450 query: String,
451 #[arg(long)]
452 collection: Option<String>,
453 #[arg(long = "key", alias = "keys")]
455 keys: Vec<String>,
456 #[arg(long)]
457 zotero: bool,
458 #[arg(long = "top-spans-per-item", default_value_t = 3)]
459 top_spans_per_item: u64,
460 #[arg(long = "include-fulltext-spans")]
461 include_fulltext_spans: bool,
462 #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
463 top_k: u64,
464 #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
465 output: String,
466 #[arg(long, default_value = DEFAULT_RPC_URL)]
467 url: String,
468 },
469}
470
471#[derive(Debug, Subcommand)]
472enum SystemCommand {
473 Version {
475 #[arg(long, default_value = DEFAULT_RPC_URL)]
476 url: String,
477 },
478 Libraries {
480 #[arg(long, default_value = DEFAULT_RPC_URL)]
481 url: String,
482 },
483 #[command(name = "library-stats")]
485 LibraryStats {
486 #[arg(long)]
487 library: Option<i64>,
488 #[arg(long, default_value = DEFAULT_RPC_URL)]
489 url: String,
490 },
491 Schema {
493 #[arg(long = "type")]
494 item_type: Option<String>,
495 #[arg(long, default_value = DEFAULT_RPC_URL)]
496 url: String,
497 },
498 #[command(name = "current-collection")]
500 CurrentCollection {
501 #[arg(long, default_value = DEFAULT_RPC_URL)]
502 url: String,
503 },
504 #[command(name = "list-methods")]
506 ListMethods {
507 #[arg(long, default_value = DEFAULT_RPC_URL)]
508 url: String,
509 },
510 Describe {
512 method: Option<String>,
513 #[arg(long, default_value = DEFAULT_RPC_URL)]
514 url: String,
515 },
516}
517
518#[derive(Debug, clap::Args)]
519struct SearchArgs {
520 query: Option<String>,
522 #[arg(long)]
524 fulltext: bool,
525 #[arg(long)]
527 author: Option<String>,
528 #[arg(long)]
530 after: Option<String>,
531 #[arg(long)]
533 before: Option<String>,
534 #[arg(long)]
536 journal: Option<String>,
537 #[arg(long)]
539 tag: Option<String>,
540 #[arg(long)]
542 doi: Option<String>,
543 #[arg(long)]
545 isbn: Option<String>,
546 #[arg(long)]
548 issn: Option<String>,
549 #[arg(long)]
551 collection: Option<String>,
552 #[arg(long, default_value_t = 50)]
553 limit: u64,
554 #[arg(long, default_value_t = 0)]
555 offset: u64,
556 #[arg(long, default_value = DEFAULT_RPC_URL)]
557 url: String,
558 #[command(subcommand)]
559 management: Option<SearchManagementCommand>,
560}
561
562#[derive(Debug, Subcommand)]
563enum SearchManagementCommand {
564 #[command(name = "saved-searches")]
566 SavedSearches {
567 #[arg(long, default_value = DEFAULT_RPC_URL)]
568 url: String,
569 },
570 #[command(name = "create-saved")]
572 CreateSaved {
573 name: String,
574 #[arg(long = "condition", required = true)]
575 condition: Vec<String>,
576 #[arg(long)]
577 dry_run: bool,
578 #[arg(long, default_value = DEFAULT_RPC_URL)]
579 url: String,
580 },
581 #[command(name = "delete-saved")]
583 DeleteSaved {
584 search_key: String,
585 #[arg(long)]
586 dry_run: bool,
587 #[arg(long, default_value = DEFAULT_RPC_URL)]
588 url: String,
589 },
590}
591
592#[derive(Debug, Subcommand)]
593enum ItemsCommand {
594 Add {
596 #[arg(long)]
597 doi: Option<String>,
598 #[arg(long)]
599 isbn: Option<String>,
600 #[arg(long = "from-url")]
602 from_url: Option<String>,
603 #[arg(long)]
605 file: Option<String>,
606 #[arg(long)]
607 collection: Option<String>,
608 #[arg(long)]
609 dry_run: bool,
610 #[arg(long, default_value = DEFAULT_RPC_URL)]
611 url: String,
612 },
613 Create {
615 #[arg(long = "type")]
616 item_type: String,
617 #[arg(long = "field")]
618 fields: Vec<String>,
619 #[arg(long)]
620 dry_run: bool,
621 #[arg(long, default_value = DEFAULT_RPC_URL)]
622 url: String,
623 },
624 Update {
626 key: String,
627 #[arg(long = "field")]
628 fields: Vec<String>,
629 #[arg(long)]
630 dry_run: bool,
631 #[arg(long, default_value = DEFAULT_RPC_URL)]
632 url: String,
633 },
634 Delete {
636 key: String,
637 #[arg(long)]
638 dry_run: bool,
639 #[arg(long, default_value = DEFAULT_RPC_URL)]
640 url: String,
641 },
642 Trash {
644 items: Vec<String>,
645 #[arg(long)]
646 dry_run: bool,
647 #[arg(long, default_value = DEFAULT_RPC_URL)]
648 url: String,
649 },
650 Restore {
652 item: String,
653 #[arg(long)]
654 dry_run: bool,
655 #[arg(long, default_value = DEFAULT_RPC_URL)]
656 url: String,
657 },
658 #[command(name = "merge-duplicates")]
660 MergeDuplicates {
661 keys: Vec<String>,
662 #[arg(long)]
663 dry_run: bool,
664 #[arg(long, default_value = DEFAULT_RPC_URL)]
665 url: String,
666 },
667 #[command(name = "add-related")]
669 AddRelated {
670 key: String,
671 #[arg(long)]
672 target: String,
673 #[arg(long)]
674 dry_run: bool,
675 #[arg(long, default_value = DEFAULT_RPC_URL)]
676 url: String,
677 },
678 #[command(name = "remove-related")]
680 RemoveRelated {
681 key: String,
682 #[arg(long)]
683 target: String,
684 #[arg(long)]
685 dry_run: bool,
686 #[arg(long, default_value = DEFAULT_RPC_URL)]
687 url: String,
688 },
689 Get {
691 item: String,
692 #[arg(long, default_value = DEFAULT_RPC_URL)]
693 url: String,
694 },
695 List {
697 #[arg(long, default_value_t = 50)]
698 limit: u64,
699 #[arg(long, default_value_t = 0)]
700 offset: u64,
701 #[arg(long)]
702 sort: Option<String>,
703 #[arg(long, default_value = "asc")]
704 direction: String,
705 #[arg(long)]
707 trash: bool,
708 #[arg(long, default_value = DEFAULT_RPC_URL)]
709 url: String,
710 },
711 #[command(name = "find-duplicates")]
713 FindDuplicates {
714 #[arg(long, default_value = DEFAULT_RPC_URL)]
715 url: String,
716 },
717 Recent {
719 #[arg(long, default_value_t = 20)]
720 limit: u64,
721 #[arg(long, default_value_t = 0)]
722 offset: u64,
723 #[arg(long = "type", default_value = "added")]
724 recent_type: String,
725 #[arg(long, default_value = DEFAULT_RPC_URL)]
726 url: String,
727 },
728 Fulltext {
730 key: String,
731 #[arg(long, default_value = DEFAULT_RPC_URL)]
732 url: String,
733 },
734 Related {
736 key: String,
737 #[arg(long, default_value = DEFAULT_RPC_URL)]
738 url: String,
739 },
740 #[command(name = "citation-key")]
742 CitationKey {
743 key: String,
744 #[arg(long, default_value = DEFAULT_RPC_URL)]
745 url: String,
746 },
747}
748
749#[derive(Debug, Subcommand)]
750enum SettingsCommand {
751 Get {
753 key: String,
754 #[arg(long, default_value = DEFAULT_RPC_URL)]
755 url: String,
756 },
757 #[command(visible_alias = "get-all")]
759 List {
760 #[arg(long, default_value = DEFAULT_RPC_URL)]
761 url: String,
762 },
763 Set {
765 pairs: Vec<String>,
767 #[arg(long)]
769 file: Option<String>,
770 #[arg(long)]
771 dry_run: bool,
772 #[arg(long, default_value = DEFAULT_RPC_URL)]
773 url: String,
774 },
775}
776
777#[derive(Debug, Subcommand)]
778enum TagsCommand {
779 List {
781 #[arg(long, default_value_t = 200)]
782 limit: u64,
783 #[arg(long, default_value = DEFAULT_RPC_URL)]
784 url: String,
785 },
786 Rename {
788 old: String,
789 new: String,
790 #[arg(long)]
791 dry_run: bool,
792 #[arg(long, default_value = DEFAULT_RPC_URL)]
793 url: String,
794 },
795 Delete {
797 tag: String,
798 #[arg(long)]
799 dry_run: bool,
800 #[arg(long, default_value = DEFAULT_RPC_URL)]
801 url: String,
802 },
803 Add {
805 keys: Vec<String>,
806 #[arg(long = "tag", required = true)]
807 tags: Vec<String>,
808 #[arg(long)]
809 dry_run: bool,
810 #[arg(long, default_value = DEFAULT_RPC_URL)]
811 url: String,
812 },
813 Remove {
815 keys: Vec<String>,
816 #[arg(long = "tag", required = true)]
817 tags: Vec<String>,
818 #[arg(long)]
819 dry_run: bool,
820 #[arg(long, default_value = DEFAULT_RPC_URL)]
821 url: String,
822 },
823}
824
825#[derive(Debug, clap::Args)]
826struct ExportArgs {
827 keys: Vec<String>,
829 #[arg(long, default_value = "bibtex")]
831 format: String,
832 #[arg(long)]
834 collection: Option<String>,
835 #[arg(long, default_value = "http://www.zotero.org/styles/gb-t-7714-2015-numeric")]
837 style: String,
838 #[arg(long)]
840 html: bool,
841 #[arg(long, default_value = DEFAULT_RPC_URL)]
842 url: String,
843}
844
845#[derive(Debug, Subcommand)]
846enum AnnotationsCommand {
847 List {
849 #[arg(long)]
850 parent: String,
851 #[arg(long, default_value = DEFAULT_RPC_URL)]
852 url: String,
853 },
854 Create {
856 #[arg(long)]
857 parent: String,
858 #[arg(long = "type")]
859 annotation_type: String,
860 #[arg(long)]
862 position: Option<String>,
863 #[arg(long = "sort-index")]
865 sort_index: Option<String>,
866 #[arg(long)]
867 text: Option<String>,
868 #[arg(long)]
869 comment: Option<String>,
870 #[arg(long, default_value = "#ffd400")]
871 color: String,
872 #[arg(long)]
873 dry_run: bool,
874 #[arg(long, default_value = DEFAULT_RPC_URL)]
875 url: String,
876 },
877 Delete {
879 annotation_key: String,
880 #[arg(long)]
881 dry_run: bool,
882 #[arg(long, default_value = DEFAULT_RPC_URL)]
883 url: String,
884 },
885}
886
887#[derive(Debug, Subcommand)]
888enum AttachmentsCommand {
889 List {
891 #[arg(long)]
892 parent: String,
893 #[arg(long, default_value_t = 50)]
894 limit: u64,
895 #[arg(long, default_value_t = 0)]
896 offset: u64,
897 #[arg(long, default_value = DEFAULT_RPC_URL)]
898 url: String,
899 },
900 Get {
902 key: String,
903 #[arg(long, default_value = DEFAULT_RPC_URL)]
904 url: String,
905 },
906 Fulltext {
908 key: String,
909 #[arg(long, default_value = DEFAULT_RPC_URL)]
910 url: String,
911 },
912 Path {
914 key: String,
915 #[arg(long, default_value = DEFAULT_RPC_URL)]
916 url: String,
917 },
918 Add {
920 #[arg(long)]
921 parent: String,
922 #[arg(long)]
924 path: Option<String>,
925 #[arg(long = "from-url")]
927 from_url: Option<String>,
928 #[arg(long)]
929 title: Option<String>,
930 #[arg(long)]
931 dry_run: bool,
932 #[arg(long, default_value = DEFAULT_RPC_URL)]
933 url: String,
934 },
935 Delete {
937 key: String,
938 #[arg(long, default_value = DEFAULT_RPC_URL)]
939 url: String,
940 #[arg(long)]
941 dry_run: bool,
942 },
943 #[command(name = "find-pdf")]
945 FindPdf {
946 #[arg(long)]
947 parent: String,
948 #[arg(long, default_value = DEFAULT_RPC_URL)]
949 url: String,
950 },
951}
952
953#[derive(Debug, Subcommand)]
954enum NotesCommand {
955 List {
957 #[arg(long)]
958 parent: String,
959 #[arg(long, default_value_t = 50)]
960 limit: u64,
961 #[arg(long, default_value_t = 0)]
962 offset: u64,
963 #[arg(long, default_value = DEFAULT_RPC_URL)]
964 url: String,
965 },
966 Get {
968 note_key: String,
969 #[arg(long, default_value = DEFAULT_RPC_URL)]
970 url: String,
971 },
972 Create {
974 #[arg(long)]
975 parent: String,
976 #[arg(long)]
977 content: String,
978 #[arg(long = "tag")]
979 tags: Vec<String>,
980 #[arg(long)]
981 dry_run: bool,
982 #[arg(long, default_value = DEFAULT_RPC_URL)]
983 url: String,
984 },
985 Update {
987 note_key: String,
988 #[arg(long)]
989 content: String,
990 #[arg(long)]
991 dry_run: bool,
992 #[arg(long, default_value = DEFAULT_RPC_URL)]
993 url: String,
994 },
995 Delete {
997 note_key: String,
998 #[arg(long)]
999 dry_run: bool,
1000 #[arg(long, default_value = DEFAULT_RPC_URL)]
1001 url: String,
1002 },
1003 Search {
1005 query: String,
1006 #[arg(long, default_value_t = 50)]
1007 limit: u64,
1008 #[arg(long, default_value = DEFAULT_RPC_URL)]
1009 url: String,
1010 },
1011}
1012
1013#[derive(Debug, Subcommand)]
1014enum CollectionsCommand {
1015 List {
1017 #[arg(long, default_value = DEFAULT_RPC_URL)]
1018 url: String,
1019 },
1020 Tree {
1022 #[arg(long, default_value = DEFAULT_RPC_URL)]
1023 url: String,
1024 },
1025 Get {
1027 name_or_id: String,
1028 #[arg(long, default_value = DEFAULT_RPC_URL)]
1029 url: String,
1030 },
1031 #[command(name = "get-items", visible_alias = "items")]
1033 GetItems {
1034 name_or_id: String,
1035 #[arg(long)]
1036 limit: Option<u64>,
1037 #[arg(long, default_value_t = 0)]
1038 offset: u64,
1039 #[arg(long, default_value = DEFAULT_RPC_URL)]
1040 url: String,
1041 },
1042 Stats {
1044 name_or_id: String,
1045 #[arg(long, default_value = DEFAULT_RPC_URL)]
1046 url: String,
1047 },
1048 Rename {
1050 old_name: String,
1051 new_name: String,
1052 #[arg(long, default_value = DEFAULT_RPC_URL)]
1053 url: String,
1054 #[arg(long)]
1055 dry_run: bool,
1056 },
1057 Create {
1059 name: String,
1060 #[arg(long)]
1061 parent: Option<String>,
1062 #[arg(long, default_value = DEFAULT_RPC_URL)]
1063 url: String,
1064 #[arg(long)]
1065 dry_run: bool,
1066 },
1067 Delete {
1069 name_or_id: String,
1070 #[arg(long, default_value = DEFAULT_RPC_URL)]
1071 url: String,
1072 #[arg(long)]
1073 dry_run: bool,
1074 },
1075 #[command(name = "add-items")]
1077 AddItems {
1078 collection: String,
1079 item_keys: Vec<String>,
1080 #[arg(long, default_value = DEFAULT_RPC_URL)]
1081 url: String,
1082 #[arg(long)]
1083 dry_run: bool,
1084 },
1085 #[command(name = "remove-items")]
1087 RemoveItems {
1088 collection: String,
1089 item_keys: Vec<String>,
1090 #[arg(long, default_value = DEFAULT_RPC_URL)]
1091 url: String,
1092 #[arg(long)]
1093 dry_run: bool,
1094 },
1095}
1096
1097#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1098enum JsonStyle {
1099 PythonCompact,
1101 Pretty,
1103}
1104
1105enum ParseOutcome<T> {
1106 Command(T),
1107 Display(String),
1108}
1109
1110fn parse_cli<T>(
1111 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1112) -> Result<ParseOutcome<T>, String>
1113where
1114 T: Parser,
1115{
1116 match T::try_parse_from(args) {
1117 Ok(cli) => Ok(ParseOutcome::Command(cli)),
1118 Err(err)
1119 if matches!(
1120 err.kind(),
1121 ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1122 ) =>
1123 {
1124 Ok(ParseOutcome::Display(err.to_string()))
1125 }
1126 Err(err) => Err(err.to_string()),
1127 }
1128}
1129
1130pub fn format_error_json(message: &str) -> String {
1131 let message = message.trim_end();
1132 let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1133 serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1134}
1135
1136fn split_error_code(message: &str) -> Option<(&str, &str)> {
1137 let (code, rest) = message.split_once(':')?;
1138 if !code.is_empty()
1139 && code
1140 .chars()
1141 .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1142 {
1143 Some((code, rest.trim_start()))
1144 } else {
1145 None
1146 }
1147}
1148
1149pub fn run(
1150 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1151) -> Result<String, String> {
1152 let cli = match parse_cli::<Cli>(args)? {
1153 ParseOutcome::Command(cli) => cli,
1154 ParseOutcome::Display(output) => return Ok(output),
1155 };
1156 let url = command_url(&cli.command);
1157 let mut client = ZoteroRpc::new(url);
1158 run_command(cli.command, &mut client)
1159}
1160
1161pub fn run_with_client(
1162 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1163 client: &mut impl RpcCaller,
1164) -> Result<String, String> {
1165 let cli = match parse_cli::<Cli>(args)? {
1166 ParseOutcome::Command(cli) => cli,
1167 ParseOutcome::Display(output) => return Ok(output),
1168 };
1169 run_command(cli.command, client)
1170}
1171
1172fn rag_command_url(command: &RagCommand) -> String {
1173 match command {
1174 RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1175 RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1176 RagCommand::Status { url, .. } => url.clone(),
1177 RagCommand::Search { url, .. } => url.clone(),
1178 }
1179}
1180
1181fn command_url(command: &Command) -> String {
1182 match command {
1183 Command::Ping { url }
1184 | Command::Rpc { url, .. }
1185 | Command::Push { url, .. }
1186 | Command::FindPdfs { url, .. } => url.clone(),
1187 Command::Ocr { command } => match command {
1188 OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1189 OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1190 OcrCommand::Status { url, .. } => url.clone(),
1191 OcrCommand::Process { url, .. } => url.clone(),
1192 },
1193 Command::Rag { command } => rag_command_url(command),
1194 Command::System { command } => match command {
1195 SystemCommand::Version { url }
1196 | SystemCommand::Libraries { url }
1197 | SystemCommand::LibraryStats { url, .. }
1198 | SystemCommand::Schema { url, .. }
1199 | SystemCommand::CurrentCollection { url }
1200 | SystemCommand::ListMethods { url }
1201 | SystemCommand::Describe { url, .. } => url.clone(),
1202 },
1203 Command::Search(ref args) => match &args.management {
1204 Some(SearchManagementCommand::SavedSearches { url })
1205 | Some(SearchManagementCommand::CreateSaved { url, .. })
1206 | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1207 None => args.url.clone(),
1208 },
1209 Command::Items { command } => match command {
1210 ItemsCommand::Add { url, .. }
1211 | ItemsCommand::Create { url, .. }
1212 | ItemsCommand::Update { url, .. }
1213 | ItemsCommand::Delete { url, .. }
1214 | ItemsCommand::Trash { url, .. }
1215 | ItemsCommand::Restore { url, .. }
1216 | ItemsCommand::MergeDuplicates { url, .. }
1217 | ItemsCommand::AddRelated { url, .. }
1218 | ItemsCommand::RemoveRelated { url, .. }
1219 | ItemsCommand::Get { url, .. }
1220 | ItemsCommand::List { url, .. }
1221 | ItemsCommand::FindDuplicates { url }
1222 | ItemsCommand::Recent { url, .. }
1223 | ItemsCommand::Fulltext { url, .. }
1224 | ItemsCommand::Related { url, .. }
1225 | ItemsCommand::CitationKey { url, .. } => url.clone(),
1226 },
1227 Command::Collections { command } => match command {
1228 CollectionsCommand::List { url }
1229 | CollectionsCommand::Tree { url }
1230 | CollectionsCommand::Get { url, .. }
1231 | CollectionsCommand::GetItems { url, .. }
1232 | CollectionsCommand::Stats { url, .. }
1233 | CollectionsCommand::Rename { url, .. }
1234 | CollectionsCommand::Create { url, .. }
1235 | CollectionsCommand::Delete { url, .. }
1236 | CollectionsCommand::AddItems { url, .. }
1237 | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1238 },
1239 Command::Notes { command } => match command {
1240 NotesCommand::List { url, .. }
1241 | NotesCommand::Get { url, .. }
1242 | NotesCommand::Create { url, .. }
1243 | NotesCommand::Update { url, .. }
1244 | NotesCommand::Delete { url, .. }
1245 | NotesCommand::Search { url, .. } => url.clone(),
1246 },
1247 Command::Attachments { command } => match command {
1248 AttachmentsCommand::List { url, .. }
1249 | AttachmentsCommand::Get { url, .. }
1250 | AttachmentsCommand::Fulltext { url, .. }
1251 | AttachmentsCommand::Path { url, .. }
1252 | AttachmentsCommand::Add { url, .. }
1253 | AttachmentsCommand::Delete { url, .. }
1254 | AttachmentsCommand::FindPdf { url, .. } => url.clone(),
1255 },
1256 Command::Settings { command } => match command {
1257 SettingsCommand::Get { url, .. }
1258 | SettingsCommand::List { url }
1259 | SettingsCommand::Set { url, .. } => url.clone(),
1260 },
1261 Command::Tags { command } => match command {
1262 TagsCommand::List { url, .. }
1263 | TagsCommand::Rename { url, .. }
1264 | TagsCommand::Delete { url, .. }
1265 | TagsCommand::Add { url, .. }
1266 | TagsCommand::Remove { url, .. } => url.clone(),
1267 },
1268 Command::Export(ref args) => args.url.clone(),
1269 Command::Annotations { command } => match command {
1270 AnnotationsCommand::List { url, .. }
1271 | AnnotationsCommand::Create { url, .. }
1272 | AnnotationsCommand::Delete { url, .. } => url.clone(),
1273 },
1274 }
1275}
1276
1277fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1278 let value = match command {
1279 OcrCommand::Providers => serde_json::json!({
1280 "providers": ocr_provider_specs(),
1281 }),
1282 OcrCommand::Run {
1283 provider,
1284 input,
1285 file,
1286 item_key,
1287 attachment_key,
1288 mime_type,
1289 endpoint,
1290 api_key_env,
1291 } => run_ocr_run_command(OcrRunOptions {
1292 provider,
1293 input,
1294 file,
1295 item_key,
1296 attachment_key,
1297 mime_type,
1298 endpoint,
1299 api_key_env,
1300 })?,
1301 OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1302 OcrCommand::Process {
1303 provider,
1304 parent,
1305 attachment,
1306 source_url,
1307 result_dir,
1308 result_zip,
1309 provider_endpoint,
1310 api_key_env,
1311 poll_interval_seconds,
1312 timeout_seconds,
1313 chunk_chars,
1314 ..
1315 } => run_ocr_process_command(
1316 client,
1317 OcrProcessOptions {
1318 provider,
1319 parent,
1320 attachment,
1321 source_url,
1322 result_dir,
1323 result_zip,
1324 provider_endpoint,
1325 api_key_env,
1326 poll_interval_seconds,
1327 timeout_seconds,
1328 chunk_chars,
1329 },
1330 )?,
1331 };
1332 format_json(&value, JsonStyle::PythonCompact)
1333}
1334
1335struct OcrProcessOptions {
1336 provider: String,
1337 parent: String,
1338 attachment: Option<String>,
1339 source_url: Option<String>,
1340 result_dir: Option<String>,
1341 result_zip: Option<String>,
1342 provider_endpoint: Option<String>,
1343 api_key_env: String,
1344 poll_interval_seconds: u64,
1345 timeout_seconds: u64,
1346 chunk_chars: usize,
1347}
1348
1349struct OcrRunOptions {
1350 provider: String,
1351 input: Option<String>,
1352 file: Option<String>,
1353 item_key: Option<String>,
1354 attachment_key: Option<String>,
1355 mime_type: Option<String>,
1356 endpoint: Option<String>,
1357 api_key_env: Option<String>,
1358}
1359
1360fn run_ocr_run_command(options: OcrRunOptions) -> Result<Value, String> {
1361 let input: OcrRequestInput = match (options.input, options.file) {
1362 (Some(input), None) => read_json_input(&input)?,
1363 (None, Some(file)) => ocr_input_from_file(
1364 file,
1365 options.item_key,
1366 options.attachment_key,
1367 options.mime_type,
1368 )?,
1369 (Some(_), Some(_)) => {
1370 return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1371 }
1372 (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1373 };
1374 let request = build_ocr_provider_request(&options.provider, &input)?;
1375 let payload = if request.command.is_empty() {
1376 let method = request
1377 .method
1378 .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1379 let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1380 let mut transport =
1381 provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1382 transport.post_json(&ProviderHttpInvocation {
1383 provider: request.provider.to_string(),
1384 style: request.style.to_string(),
1385 method: method.to_string(),
1386 url: options
1387 .endpoint
1388 .or_else(|| request.url.map(ToString::to_string)),
1389 auth_header_name: request.auth_header.map(ToString::to_string),
1390 auth_header_value: None,
1391 body: request.body,
1392 })?
1393 } else {
1394 let mut command_runner = StdProviderCommandRunner;
1395 command_runner.run_json(&request.command)?
1396 };
1397 let blocks = match parse_ocr_provider_response(
1398 request.provider,
1399 &payload,
1400 &input.item_key,
1401 &input.attachment_key,
1402 ) {
1403 Ok(blocks) => blocks,
1404 Err(err) => {
1405 if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1406 return Ok(task);
1407 }
1408 return Err(err);
1409 }
1410 };
1411
1412 Ok(serde_json::json!({
1413 "provider": request.provider,
1414 "blocks": blocks,
1415 }))
1416}
1417
1418fn run_ocr_process_command(
1419 client: &mut impl RpcCaller,
1420 mut options: OcrProcessOptions,
1421) -> Result<Value, String> {
1422 let spec = raw_ocr_provider_spec(&options.provider)?;
1423
1424 let attachment = match options.attachment.take() {
1425 Some(key) => key,
1426 None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1427 };
1428 options.attachment = Some(attachment.clone());
1429
1430 let attachment_path = resolve_attachment_path(client, &attachment)?;
1431 let storage_dir = attachment_path
1432 .parent()
1433 .ok_or_else(|| {
1434 format!(
1435 "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1436 attachment_path.display()
1437 )
1438 })?
1439 .to_path_buf();
1440
1441 match spec.provider_key {
1442 "mineru" | "mineru-cli" => {
1443 if options.result_dir.is_some() && options.result_zip.is_some() {
1444 return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1445 }
1446 if options.source_url.is_some()
1447 && (options.result_dir.is_some() || options.result_zip.is_some())
1448 {
1449 return Err(
1450 "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1451 .to_string(),
1452 );
1453 }
1454 let file_name = attachment_path
1455 .file_name()
1456 .and_then(|name| name.to_str())
1457 .unwrap_or("document.pdf")
1458 .to_string();
1459 let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1460 let artifacts = persist_mineru_result_sidecars(
1461 &storage_dir, &options.parent, &attachment,
1462 &options.provider, &source, options.chunk_chars,
1463 )?;
1464 Ok(serde_json::json!({
1465 "provider": spec.provider_key,
1466 "status": "indexed",
1467 "item_key": options.parent,
1468 "attachment_key": attachment,
1469 "attachment_path": attachment_path,
1470 "storage_dir": storage_dir,
1471 "task_id": source.task_id,
1472 "state": source.state,
1473 "blocks": artifacts.block_count,
1474 "chunks": artifacts.chunk_count,
1475 "artifacts": artifacts.artifacts,
1476 }))
1477 }
1478 _ => {
1479 run_ocr_process_sync(
1480 client, &options, spec.provider_key,
1481 &attachment, &attachment_path, &storage_dir,
1482 )
1483 }
1484 }
1485}
1486
1487fn run_ocr_process_sync(
1488 client: &mut impl RpcCaller,
1489 options: &OcrProcessOptions,
1490 provider: &str,
1491 attachment_key: &str,
1492 attachment_path: &Path,
1493 storage_dir: &Path,
1494) -> Result<Value, String> {
1495 let api_url = if let Some(endpoint) = &options.provider_endpoint {
1496 endpoint.clone()
1497 } else {
1498 let settings = client.call("settings.getAll", None)?;
1499 settings.get("ocr.apiUrl")
1500 .and_then(Value::as_str)
1501 .unwrap_or("")
1502 .to_string()
1503 };
1504 if api_url.is_empty() {
1505 return Err(format!("MISSING_CONFIG: ocr.apiUrl not configured for provider {provider}"));
1506 }
1507
1508 let api_key = {
1509 let from_env = if !options.api_key_env.is_empty() {
1510 env::var(&options.api_key_env).ok().filter(|v| !v.is_empty())
1511 } else {
1512 None
1513 };
1514 from_env.unwrap_or_else(|| {
1515 client.call("settings.getRaw", Some(serde_json::json!({"key": "ocr.apiKey"})))
1516 .ok()
1517 .and_then(|raw| raw.get("ocr.apiKey").and_then(Value::as_str).map(String::from))
1518 .unwrap_or_default()
1519 })
1520 };
1521
1522 let pdf_bytes = fs::read(attachment_path)
1523 .map_err(|e| format!("READ_PDF_FAILED: {}: {e}", attachment_path.display()))?;
1524 let base64_pdf = format!("data:application/pdf;base64,{}", base64_encode(&pdf_bytes));
1525
1526 let model = {
1527 let settings = client.call("settings.getAll", None)?;
1528 settings.get("ocr.model")
1529 .and_then(Value::as_str)
1530 .unwrap_or("glm-ocr")
1531 .to_string()
1532 };
1533
1534 let body = serde_json::json!({
1535 "model": model,
1536 "file": base64_pdf,
1537 });
1538
1539 let mut req = ureq::post(&api_url).set("Content-Type", "application/json");
1540 if !api_key.is_empty() {
1541 req = req.set("Authorization", &format!("Bearer {api_key}"));
1542 }
1543 let response = req.send_json(&body)
1544 .map_err(|e| format!("OCR_REQUEST_FAILED: {provider}: {e}"))?;
1545 let payload: Value = response.into_json()
1546 .map_err(|e| format!("OCR_RESPONSE_PARSE_FAILED: {e}"))?;
1547
1548 let blocks = parse_ocr_provider_response(provider, &payload, &options.parent, attachment_key)?;
1549 let chunks = zotron_types::chunks_from_blocks(&blocks, options.chunk_chars);
1550
1551 let mut artifacts = Vec::new();
1552 artifacts.push(write_sidecar_json(
1553 storage_dir, &options.parent, attachment_key,
1554 MachineArtifactKind::OcrRaw, &payload,
1555 )?);
1556 artifacts.push(write_sidecar_jsonl(
1557 storage_dir, &options.parent, attachment_key,
1558 MachineArtifactKind::Blocks, &blocks,
1559 )?);
1560 artifacts.push(write_sidecar_jsonl(
1561 storage_dir, &options.parent, attachment_key,
1562 MachineArtifactKind::Chunks, &chunks,
1563 )?);
1564
1565 let embedding_count = embed_sidecar_chunks(client, storage_dir, &options.parent, attachment_key, &chunks);
1566
1567 Ok(serde_json::json!({
1568 "provider": provider,
1569 "status": "indexed",
1570 "item_key": options.parent,
1571 "attachment_key": attachment_key,
1572 "embeddings": embedding_count,
1573 "attachment_path": attachment_path,
1574 "storage_dir": storage_dir,
1575 "blocks": blocks.len(),
1576 "chunks": chunks.len(),
1577 "artifacts": artifacts,
1578 }))
1579}
1580
1581fn embed_sidecar_chunks(
1582 client: &mut impl RpcCaller,
1583 storage_dir: &Path,
1584 item_key: &str,
1585 attachment_key: &str,
1586 chunks: &[zotron_types::StructureChunk],
1587) -> usize {
1588 let Ok((provider, model, api_url, api_key)) = fetch_embedding_settings(client) else {
1589 return 0;
1590 };
1591 if provider.is_empty() || (api_key.is_empty() && provider != "ollama") {
1592 return 0;
1593 }
1594 let emb_chunks: Vec<EmbeddingChunkInput> = chunks
1595 .iter()
1596 .map(|c| EmbeddingChunkInput {
1597 chunk_key: c.chunk_key.clone(),
1598 text: c.text.clone(),
1599 })
1600 .collect();
1601 if emb_chunks.is_empty() {
1602 return 0;
1603 }
1604 let batch_size = 20;
1606 let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
1607 for batch in emb_chunks.chunks(batch_size) {
1608 let input = EmbeddingRequestInput {
1609 item_key: item_key.to_string(),
1610 chunks: batch.to_vec(),
1611 model: if model.is_empty() { None } else { Some(model.clone()) },
1612 url: if api_url.is_empty() { None } else { Some(api_url.clone()) },
1613 input_type: Some("document".to_string()),
1614 };
1615 let Ok(request) = build_embedding_provider_request(&provider, &input) else {
1616 break;
1617 };
1618 let Some(url) = request.url.as_deref() else { break };
1619 let mut http = ureq::post(url).set("Content-Type", "application/json");
1620 if let Some(auth) = request.auth_header {
1621 if !api_key.is_empty() {
1622 http = http.set(auth, &format!("Bearer {api_key}"));
1623 }
1624 }
1625 let Ok(resp) = http.send_json(&request.body) else { break };
1626 let Ok(payload): Result<Value, _> = resp.into_json() else { break };
1627 let Ok(vectors) = parse_embedding_provider_response(&provider, &payload, item_key, batch)
1628 else {
1629 break;
1630 };
1631 all_vectors.extend(vectors);
1632 }
1633 let count = all_vectors.len();
1634 if count > 0 {
1635 let filename = embedding_vector_filename(&provider, &model);
1636 let vectors_dir = storage_dir.join(".zotron").join("embeddings");
1637 let _ = fs::create_dir_all(&vectors_dir);
1638 let vectors_path = vectors_dir.join(&filename);
1639 let mut out = String::new();
1640 for v in &all_vectors {
1641 if let Ok(line) = serde_json::to_string(v) {
1642 out.push_str(&line);
1643 out.push('\n');
1644 }
1645 }
1646 let _ = fs::write(&vectors_path, &out);
1647 }
1648 count
1649}
1650
1651struct MineruResultSource {
1652 task_id: Option<String>,
1653 state: String,
1654 result_dir: PathBuf,
1655 raw_zip_bytes: Option<Vec<u8>>,
1656 task_status: Option<Value>,
1657 payload: Value,
1658 content_list_file: Option<PathBuf>,
1659 markdown: Option<String>,
1660}
1661
1662struct PersistedOcrArtifacts {
1663 block_count: usize,
1664 chunk_count: usize,
1665 artifacts: Vec<Value>,
1666}
1667
1668fn resolve_attachment_path(
1669 client: &mut impl RpcCaller,
1670 attachment_key: &str,
1671) -> Result<PathBuf, String> {
1672 let payload = client.call(
1673 "attachments.getPath",
1674 Some(serde_json::json!({"key": attachment_key})),
1675 )?;
1676 let raw_path = payload
1677 .get("path")
1678 .and_then(Value::as_str)
1679 .filter(|path| !path.trim().is_empty())
1680 .ok_or_else(|| {
1681 format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1682 })?;
1683 Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1684}
1685
1686fn resolve_first_pdf_attachment_key(
1688 client: &mut impl RpcCaller,
1689 parent_key: &str,
1690) -> Result<String, String> {
1691 let response = client.call(
1692 "attachments.list",
1693 Some(serde_json::json!({"parentKey": parent_key})),
1694 )?;
1695 let attachments = response
1697 .get("items")
1698 .and_then(Value::as_array)
1699 .or_else(|| response.as_array())
1700 .ok_or_else(|| {
1701 format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1702 })?;
1703 for attachment in attachments {
1704 if is_pdf_attachment(attachment) {
1705 if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1706 return Ok(key.to_string());
1707 }
1708 }
1709 }
1710 Err(format!(
1711 "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1712 ))
1713}
1714
1715fn load_mineru_result_source(
1716 options: &OcrProcessOptions,
1717 attachment_path: &Path,
1718 file_name: &str,
1719) -> Result<MineruResultSource, String> {
1720 if let Some(result_dir) = options.result_dir.as_deref() {
1721 return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1722 }
1723 if let Some(result_zip) = options.result_zip.as_deref() {
1724 let zip_path = PathBuf::from(result_zip);
1725 let zip_bytes = fs::read(&zip_path)
1726 .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1727 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1728 return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1729 }
1730
1731 let Some(source_url) = options
1732 .source_url
1733 .as_deref()
1734 .filter(|value| !value.trim().is_empty())
1735 else {
1736 return submit_mineru_local_file(options, attachment_path, file_name);
1737 };
1738 let input = OcrRequestInput {
1739 item_key: options.parent.clone(),
1740 attachment_key: options.attachment.clone().expect("attachment resolved"),
1741 file_name: file_name.to_string(),
1742 mime_type: "application/pdf".to_string(),
1743 content_base64: format!("url:{source_url}"),
1744 source_url: Some(source_url.to_string()),
1745 local_path: None,
1746 output_dir: None,
1747 };
1748 let task = submit_mineru_task(
1749 &options.provider,
1750 &input,
1751 options.provider_endpoint.clone(),
1752 &options.api_key_env,
1753 )?;
1754 let task_id = task
1755 .get("data")
1756 .and_then(|data| data.get("task_id"))
1757 .and_then(Value::as_str)
1758 .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1759 .to_string();
1760 let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1761 let status = poll_mineru_task(
1762 options.provider_endpoint.as_deref(),
1763 &task_id,
1764 &auth_header,
1765 options.poll_interval_seconds,
1766 options.timeout_seconds,
1767 )?;
1768 let zip_url = status
1769 .pointer("/data/full_zip_url")
1770 .or_else(|| status.pointer("/data/result/full_zip_url"))
1771 .and_then(Value::as_str)
1772 .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1773 let zip_bytes = download_bytes(zip_url)?;
1774 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1775 mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1776}
1777
1778fn submit_mineru_local_file(
1779 options: &OcrProcessOptions,
1780 attachment_path: &Path,
1781 file_name: &str,
1782) -> Result<MineruResultSource, String> {
1783 let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1784 let upload_request = create_mineru_file_upload(
1785 options.provider_endpoint.as_deref(),
1786 file_name,
1787 options.attachment.as_deref().expect("attachment resolved"),
1788 &auth_header,
1789 )?;
1790 let upload_url = upload_request
1791 .pointer("/data/file_urls/0")
1792 .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1793 .and_then(Value::as_str)
1794 .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1795 let batch_id = upload_request
1796 .pointer("/data/batch_id")
1797 .or_else(|| upload_request.pointer("/data/batchId"))
1798 .and_then(Value::as_str)
1799 .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1800 .to_string();
1801 let bytes = fs::read(attachment_path)
1802 .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1803 put_bytes(upload_url, &bytes)?;
1804 let status = poll_mineru_batch(
1805 options.provider_endpoint.as_deref(),
1806 &batch_id,
1807 &auth_header,
1808 options.poll_interval_seconds,
1809 options.timeout_seconds,
1810 )?;
1811 let zip_url = mineru_batch_zip_url(&status)
1812 .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1813 let zip_bytes = download_bytes(&zip_url)?;
1814 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1815 mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1816}
1817
1818fn create_mineru_file_upload(
1819 endpoint: Option<&str>,
1820 file_name: &str,
1821 data_id: &str,
1822 auth_header: &str,
1823) -> Result<Value, String> {
1824 let url = mineru_file_urls_url(endpoint);
1825 let body = serde_json::json!({
1826 "files": [{"name": file_name, "data_id": data_id}],
1827 "model_version": "vlm",
1828 "is_ocr": false,
1829 "enable_formula": true,
1830 "enable_table": true,
1831 "language": "ch",
1832 "page_ranges": "1-200",
1833 });
1834 ureq::post(&url)
1835 .set("Authorization", auth_header)
1836 .send_json(body)
1837 .map_err(|err| format!("POST {url} failed: {err}"))?
1838 .into_json::<Value>()
1839 .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1840}
1841
1842fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1843 ureq::put(url)
1844 .send_bytes(bytes)
1845 .map_err(|err| format!("PUT {url} failed: {err}"))?;
1846 Ok(())
1847}
1848
1849fn submit_mineru_task(
1850 provider: &str,
1851 input: &OcrRequestInput,
1852 endpoint: Option<String>,
1853 api_key_env: &str,
1854) -> Result<Value, String> {
1855 let request = build_ocr_provider_request(provider, input)?;
1856 let method = request
1857 .method
1858 .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1859 let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1860 transport.post_json(&ProviderHttpInvocation {
1861 provider: request.provider.to_string(),
1862 style: request.style.to_string(),
1863 method: method.to_string(),
1864 url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1865 auth_header_name: request.auth_header.map(ToString::to_string),
1866 auth_header_value: None,
1867 body: request.body,
1868 })
1869}
1870
1871fn poll_mineru_task(
1872 endpoint: Option<&str>,
1873 task_id: &str,
1874 auth_header: &str,
1875 poll_interval_seconds: u64,
1876 timeout_seconds: u64,
1877) -> Result<Value, String> {
1878 let url = mineru_task_status_url(endpoint, task_id);
1879 let started = Instant::now();
1880 loop {
1881 let status = get_json_with_auth(&url, auth_header)?;
1882 let state = status
1883 .pointer("/data/state")
1884 .or_else(|| status.pointer("/data/status"))
1885 .and_then(Value::as_str)
1886 .unwrap_or("unknown");
1887 match state {
1888 "done" | "finished" | "success" => return Ok(status),
1889 "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1890 _ => {
1891 if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1892 return Err(format!(
1893 "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1894 ));
1895 }
1896 thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1897 }
1898 }
1899 }
1900}
1901
1902fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1903 let base = endpoint
1904 .unwrap_or("https://mineru.net/api/v4/extract/task")
1905 .trim_end_matches('/');
1906 if base.ends_with("/extract/task") {
1907 format!("{base}/{task_id}")
1908 } else {
1909 format!("{base}/extract/task/{task_id}")
1910 }
1911}
1912
1913fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1914 let base = mineru_api_base(endpoint);
1915 format!("{base}/file-urls/batch")
1916}
1917
1918fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1919 let base = mineru_api_base(endpoint);
1920 format!("{base}/extract-results/batch/{batch_id}")
1921}
1922
1923fn mineru_api_base(endpoint: Option<&str>) -> String {
1924 let base = endpoint
1925 .unwrap_or("https://mineru.net/api/v4/extract/task")
1926 .trim_end_matches('/');
1927 if let Some(stripped) = base.strip_suffix("/extract/task") {
1928 return stripped.to_string();
1929 }
1930 if let Some(stripped) = base.strip_suffix("/extract") {
1931 return stripped.to_string();
1932 }
1933 base.to_string()
1934}
1935
1936fn poll_mineru_batch(
1937 endpoint: Option<&str>,
1938 batch_id: &str,
1939 auth_header: &str,
1940 poll_interval_seconds: u64,
1941 timeout_seconds: u64,
1942) -> Result<Value, String> {
1943 let url = mineru_batch_status_url(endpoint, batch_id);
1944 let started = Instant::now();
1945 loop {
1946 let status = get_json_with_auth(&url, auth_header)?;
1947 let state = mineru_batch_state(&status).unwrap_or("unknown");
1948 match state {
1949 "done" | "finished" | "success" => return Ok(status),
1950 "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1951 _ => {
1952 if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1953 return Err(format!(
1954 "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1955 ));
1956 }
1957 thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1958 }
1959 }
1960 }
1961}
1962
1963fn mineru_batch_state(status: &Value) -> Option<&str> {
1964 status
1965 .pointer("/data/extract_result/0/state")
1966 .or_else(|| status.pointer("/data/extractResult/0/state"))
1967 .or_else(|| status.pointer("/data/state"))
1968 .and_then(Value::as_str)
1969}
1970
1971fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1972 status
1973 .pointer("/data/extract_result/0/full_zip_url")
1974 .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
1975 .or_else(|| status.pointer("/data/full_zip_url"))
1976 .and_then(Value::as_str)
1977 .map(ToString::to_string)
1978}
1979
1980fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
1981 let token = env::var(api_key_env)
1982 .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
1983 let token = token.trim();
1984 if token.is_empty() {
1985 return Err(format!(
1986 "provider credential env var {api_key_env} is empty"
1987 ));
1988 }
1989 Ok(match auth_scheme {
1990 "bearer" if token.starts_with("Bearer ") => token.to_string(),
1991 "bearer" => format!("Bearer {token}"),
1992 "token" if token.starts_with("token ") => token.to_string(),
1993 "token" => format!("token {token}"),
1994 _ => token.to_string(),
1995 })
1996}
1997
1998fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
1999 ureq::get(url)
2000 .set("Authorization", auth_header)
2001 .call()
2002 .map_err(|err| format!("GET {url} failed: {err}"))?
2003 .into_json::<Value>()
2004 .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
2005}
2006
2007fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
2008 let response = ureq::get(url)
2009 .call()
2010 .map_err(|err| format!("download {url} failed: {err}"))?;
2011 let mut bytes = Vec::new();
2012 response
2013 .into_reader()
2014 .read_to_end(&mut bytes)
2015 .map_err(|err| format!("read download {url}: {err}"))?;
2016 Ok(bytes)
2017}
2018
2019fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
2020 let dir = unique_temp_path(prefix);
2021 fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
2022 let zip_path = dir.with_extension("zip");
2023 fs::write(&zip_path, zip_bytes)
2024 .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
2025 let output = ProcessCommand::new("unzip")
2026 .arg("-q")
2027 .arg("-o")
2028 .arg(&zip_path)
2029 .arg("-d")
2030 .arg(&dir)
2031 .output()
2032 .map_err(|err| format!("run unzip: {err}"))?;
2033 if !output.status.success() {
2034 return Err(format!(
2035 "unzip {} failed: {}",
2036 zip_path.display(),
2037 String::from_utf8_lossy(&output.stderr).trim()
2038 ));
2039 }
2040 Ok(dir)
2041}
2042
2043fn unique_temp_path(prefix: &str) -> PathBuf {
2044 let nanos = SystemTime::now()
2045 .duration_since(UNIX_EPOCH)
2046 .map(|duration| duration.as_nanos())
2047 .unwrap_or(0);
2048 env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
2049}
2050
2051fn mineru_result_source_from_dir(
2052 result_dir: PathBuf,
2053 raw_zip_bytes: Option<Vec<u8>>,
2054 task_status: Option<Value>,
2055 task_id: Option<String>,
2056) -> Result<MineruResultSource, String> {
2057 let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
2058 let markdown = find_first_file_by_name(&result_dir, "full.md")
2059 .map(|path| {
2060 fs::read_to_string(&path)
2061 .map_err(|err| format!("read native markdown {}: {err}", path.display()))
2062 })
2063 .transpose()?;
2064 Ok(MineruResultSource {
2065 task_id,
2066 state: "done".to_string(),
2067 result_dir,
2068 raw_zip_bytes,
2069 task_status,
2070 payload,
2071 content_list_file,
2072 markdown,
2073 })
2074}
2075
2076fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
2077 let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
2078 if let Some(path) = v2 {
2079 let value = read_json_file(&path)?;
2080 return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
2081 }
2082 let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
2083 if let Some(path) = content_list {
2084 let value = read_json_file(&path)?;
2085 return Ok((serde_json::json!({"content_list": value}), Some(path)));
2086 }
2087 let layout = find_first_file_by_name(result_dir, "layout.json");
2088 if let Some(path) = layout {
2089 return Ok((read_json_file(&path)?, Some(path)));
2090 }
2091 let markdown = find_first_file_by_name(result_dir, "full.md");
2092 if let Some(path) = markdown {
2093 let text = fs::read_to_string(&path)
2094 .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
2095 return Ok((serde_json::json!({"result": text}), Some(path)));
2096 }
2097 Err(format!(
2098 "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
2099 result_dir.display()
2100 ))
2101}
2102
2103fn read_json_file(path: &Path) -> Result<Value, String> {
2104 let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
2105 serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
2106}
2107
2108fn persist_mineru_result_sidecars(
2109 storage_dir: &Path,
2110 item_key: &str,
2111 attachment_key: &str,
2112 provider: &str,
2113 source: &MineruResultSource,
2114 chunk_chars: usize,
2115) -> Result<PersistedOcrArtifacts, String> {
2116 let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
2117 let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
2118 let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
2119 let raw_bundle = serde_json::json!({
2120 "provider": provider,
2121 "item_key": item_key,
2122 "attachment_key": attachment_key,
2123 "task_id": source.task_id,
2124 "state": source.state,
2125 "task_status": source.task_status,
2126 "content_list_file": source.content_list_file,
2127 "payload": source.payload,
2128 });
2129
2130 let mut artifacts = Vec::new();
2131 artifacts.push(write_sidecar_json(
2132 storage_dir,
2133 item_key,
2134 attachment_key,
2135 MachineArtifactKind::OcrRaw,
2136 &raw_bundle,
2137 )?);
2138 artifacts.push(write_sidecar_jsonl(
2139 storage_dir,
2140 item_key,
2141 attachment_key,
2142 MachineArtifactKind::Blocks,
2143 &blocks,
2144 )?);
2145 artifacts.push(write_sidecar_jsonl(
2146 storage_dir,
2147 item_key,
2148 attachment_key,
2149 MachineArtifactKind::Chunks,
2150 &chunks,
2151 )?);
2152 if let Some(markdown) = source.markdown.as_deref() {
2153 artifacts.push(write_sidecar_bytes(
2154 storage_dir,
2155 item_key,
2156 attachment_key,
2157 MachineArtifactKind::OcrNativeMarkdown,
2158 markdown.as_bytes(),
2159 )?);
2160 }
2161 artifacts.push(write_sidecar_json(
2162 storage_dir,
2163 item_key,
2164 attachment_key,
2165 MachineArtifactKind::OcrNativeAssets,
2166 &assets,
2167 )?);
2168 if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2169 artifacts.push(write_extra_sidecar_bytes(
2170 storage_dir,
2171 ".zotron/ocr/latest.raw.zip",
2172 bytes,
2173 )?);
2174 }
2175
2176 Ok(PersistedOcrArtifacts {
2177 block_count: blocks.len(),
2178 chunk_count: chunks.len(),
2179 artifacts,
2180 })
2181}
2182
2183fn write_sidecar_json(
2184 storage_dir: &Path,
2185 item_key: &str,
2186 attachment_key: &str,
2187 kind: MachineArtifactKind,
2188 value: &Value,
2189) -> Result<Value, String> {
2190 let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2191 write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2192}
2193
2194fn write_sidecar_jsonl<T: serde::Serialize>(
2195 storage_dir: &Path,
2196 item_key: &str,
2197 attachment_key: &str,
2198 kind: MachineArtifactKind,
2199 values: &[T],
2200) -> Result<Value, String> {
2201 let mut out = String::new();
2202 for value in values {
2203 out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2204 out.push('\n');
2205 }
2206 write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2207}
2208
2209fn write_sidecar_bytes(
2210 storage_dir: &Path,
2211 item_key: &str,
2212 attachment_key: &str,
2213 kind: MachineArtifactKind,
2214 bytes: &[u8],
2215) -> Result<Value, String> {
2216 let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2217 .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2218 Ok(serde_json::json!({
2219 "kind": kind,
2220 "relative_path": record.relative_path,
2221 "absolute_path": record.absolute_path,
2222 }))
2223}
2224
2225fn write_extra_sidecar_bytes(
2226 storage_dir: &Path,
2227 relative_path: &str,
2228 bytes: &[u8],
2229) -> Result<Value, String> {
2230 let absolute_path = storage_dir.join(relative_path);
2231 if let Some(parent) = absolute_path.parent() {
2232 fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2233 }
2234 fs::write(&absolute_path, bytes)
2235 .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2236 Ok(serde_json::json!({
2237 "kind": "ocr_raw_zip",
2238 "relative_path": relative_path,
2239 "absolute_path": absolute_path,
2240 }))
2241}
2242
2243fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2244 let mut images = Vec::new();
2245 for file in collect_files(result_dir)? {
2246 if !is_image_file(&file) {
2247 continue;
2248 }
2249 let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2250 let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2251 if let Some(parent) = destination.parent() {
2252 fs::create_dir_all(parent)
2253 .map_err(|err| format!("create {}: {err}", parent.display()))?;
2254 }
2255 fs::copy(&file, &destination).map_err(|err| {
2256 format!(
2257 "copy MinerU asset {} to {}: {err}",
2258 file.display(),
2259 destination.display()
2260 )
2261 })?;
2262 images.push(serde_json::json!({
2263 "source_relative": relative,
2264 "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2265 "absolute_path": destination,
2266 }));
2267 }
2268 Ok(serde_json::json!({
2269 "provider": "mineru",
2270 "images": images,
2271 }))
2272}
2273
2274fn is_image_file(path: &Path) -> bool {
2275 matches!(
2276 path.extension()
2277 .and_then(|ext| ext.to_str())
2278 .unwrap_or_default()
2279 .to_ascii_lowercase()
2280 .as_str(),
2281 "png" | "jpg" | "jpeg" | "webp" | "gif"
2282 )
2283}
2284
2285fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2286 collect_files(root).ok()?.into_iter().find(|path| {
2287 path.file_name()
2288 .and_then(|name| name.to_str())
2289 .is_some_and(|name| name.ends_with(suffix))
2290 })
2291}
2292
2293fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2294 collect_files(root).ok()?.into_iter().find(|path| {
2295 path.file_name()
2296 .and_then(|file_name| file_name.to_str())
2297 .is_some_and(|file_name| file_name == name)
2298 })
2299}
2300
2301fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2302 let mut files = Vec::new();
2303 collect_files_into(root, &mut files)?;
2304 files.sort();
2305 Ok(files)
2306}
2307
2308fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2309 for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2310 let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2311 let path = entry.path();
2312 let file_type = entry
2313 .file_type()
2314 .map_err(|err| format!("stat {}: {err}", path.display()))?;
2315 if file_type.is_dir() {
2316 collect_files_into(&path, files)?;
2317 } else if file_type.is_file() {
2318 files.push(path);
2319 }
2320 }
2321 Ok(())
2322}
2323
2324fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2325 let data = payload.get("data")?;
2326 let task_id = data.get("task_id").and_then(Value::as_str)?;
2327 Some(serde_json::json!({
2328 "provider": provider,
2329 "status": "submitted",
2330 "task_id": task_id,
2331 "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2332 "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2333 "raw": payload,
2334 }))
2335}
2336
2337fn ocr_input_from_file(
2338 file: String,
2339 item_key: Option<String>,
2340 attachment_key: Option<String>,
2341 mime_type: Option<String>,
2342) -> Result<OcrRequestInput, String> {
2343 let item_key = item_key
2344 .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2345 let attachment_key = attachment_key.ok_or_else(|| {
2346 "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2347 })?;
2348 let path = PathBuf::from(&file);
2349 let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2350 let file_name = path
2351 .file_name()
2352 .and_then(|name| name.to_str())
2353 .unwrap_or("document.pdf")
2354 .to_string();
2355 let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2356 Ok(OcrRequestInput {
2357 item_key,
2358 attachment_key,
2359 file_name,
2360 mime_type,
2361 content_base64: base64_encode(&bytes),
2362 source_url: None,
2363 local_path: Some(file),
2364 output_dir: None,
2365 })
2366}
2367
2368fn guess_mime_type(path: &Path) -> &'static str {
2369 match path
2370 .extension()
2371 .and_then(|ext| ext.to_str())
2372 .unwrap_or_default()
2373 .to_ascii_lowercase()
2374 .as_str()
2375 {
2376 "png" => "image/png",
2377 "jpg" | "jpeg" => "image/jpeg",
2378 "webp" => "image/webp",
2379 _ => "application/pdf",
2380 }
2381}
2382
2383fn base64_encode(bytes: &[u8]) -> String {
2384 const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2385 let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2386 for chunk in bytes.chunks(3) {
2387 let b0 = chunk[0];
2388 let b1 = *chunk.get(1).unwrap_or(&0);
2389 let b2 = *chunk.get(2).unwrap_or(&0);
2390 out.push(TABLE[(b0 >> 2) as usize] as char);
2391 out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2392 if chunk.len() > 1 {
2393 out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2394 } else {
2395 out.push('=');
2396 }
2397 if chunk.len() > 2 {
2398 out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2399 } else {
2400 out.push('=');
2401 }
2402 }
2403 out
2404}
2405
2406fn run_ocr_status_command(
2407 client: &mut impl RpcCaller,
2408 collection: String,
2409) -> Result<Value, String> {
2410 let collection_key = find_collection_in_tree(client, &collection)?
2411 .and_then(|node| node.get("key").cloned())
2412 .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2413 let raw = paginate_rpc(
2414 client,
2415 "collections.getItems",
2416 serde_json::json!({"key": collection_key}),
2417 500,
2418 )?;
2419 let items = raw
2420 .get("items")
2421 .and_then(Value::as_array)
2422 .or_else(|| raw.as_array())
2423 .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2424 .clone();
2425
2426 let mut has_ocr = 0usize;
2427 for item in &items {
2428 let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2429 if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2430 has_ocr += 1;
2431 }
2432 }
2433
2434 Ok(serde_json::json!({
2435 "collection": collection,
2436 "total": items.len(),
2437 "has_ocr": has_ocr,
2438 "missing_ocr": items.len() - has_ocr,
2439 }))
2440}
2441
2442fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2443 if let Some(item_key) = item_key.as_str() {
2444 if machine_artifact_exists_for_item(
2447 machine_artifact_store_root(),
2448 item_key,
2449 MachineArtifactKind::Chunks,
2450 ) {
2451 return Ok(true);
2452 }
2453 }
2454
2455 let attachments = client.call(
2456 "attachments.list",
2457 Some(serde_json::json!({"parentKey": item_key.clone()})),
2458 )?;
2459 Ok(attachments.as_array().is_some_and(|attachments| {
2460 attachments.iter().any(|attachment| {
2461 let has_sidecar_chunks = attachment
2462 .get("path")
2463 .and_then(Value::as_str)
2464 .map(local_path_from_zotero_path)
2465 .as_deref()
2466 .map(Path::new)
2467 .and_then(Path::parent)
2468 .is_some_and(|dir| {
2469 machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2470 });
2471 if has_sidecar_chunks {
2472 return true;
2473 }
2474
2475 attachment
2477 .get("title")
2478 .and_then(Value::as_str)
2479 .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2480 })
2481 }))
2482}
2483
2484fn local_path_from_zotero_path(path: &str) -> String {
2485 if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2486 return ProcessCommand::new("wslpath")
2487 .arg("-u")
2488 .arg(path)
2489 .output()
2490 .ok()
2491 .filter(|output| output.status.success())
2492 .and_then(|output| String::from_utf8(output.stdout).ok())
2493 .map(|converted| converted.trim().to_string())
2494 .filter(|converted| !converted.is_empty())
2495 .unwrap_or_else(|| path.to_string());
2496 }
2497 path.to_string()
2498}
2499
2500fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2501 let notes = client.call(
2502 "notes.list",
2503 Some(serde_json::json!({"parentKey": item_key.clone()})),
2504 )?;
2505 Ok(notes.as_array().is_some_and(|notes| {
2506 notes.iter().any(|note| {
2507 note.get("tags")
2508 .and_then(Value::as_array)
2509 .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2510 })
2511 }))
2512}
2513
2514fn tag_is_ocr(tag: &Value) -> bool {
2515 tag.as_str() == Some("ocr")
2516 || tag
2517 .get("tag")
2518 .and_then(Value::as_str)
2519 .is_some_and(|tag| tag == "ocr")
2520}
2521
2522fn find_collection_in_tree(
2523 client: &mut impl RpcCaller,
2524 collection: &str,
2525) -> Result<Option<Value>, String> {
2526 let tree = client.call("collections.tree", None)?;
2527 let nodes = tree
2528 .as_array()
2529 .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2530 Ok(search_collection_tree(nodes, collection).cloned())
2531}
2532
2533fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2534 for node in nodes {
2535 if node.get("key").and_then(Value::as_str) == Some(collection)
2536 || node.get("name").and_then(Value::as_str) == Some(collection)
2537 {
2538 return Some(node);
2539 }
2540 if let Some(children) = node.get("children").and_then(Value::as_array) {
2541 if let Some(found) = search_collection_tree(children, collection) {
2542 return Some(found);
2543 }
2544 }
2545 }
2546 None
2547}
2548
2549fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2550 if let Command::Export(args) = command {
2551 return run_export(args, client);
2552 }
2553
2554 let (value, style) = match command {
2555 Command::Ping { .. } => (
2556 call_json(client, "system.ping", None)?,
2557 JsonStyle::PythonCompact,
2558 ),
2559 Command::Rpc {
2560 method,
2561 params_json,
2562 paginate,
2563 page_size,
2564 ..
2565 } => {
2566 let params = serde_json::from_str::<Value>(¶ms_json)
2567 .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2568 if !params.is_object() {
2569 return Err("INVALID_JSON: params must be a JSON object".to_string());
2570 }
2571 if paginate {
2572 (
2573 paginate_rpc(client, &method, params, page_size)?,
2574 JsonStyle::Pretty,
2575 )
2576 } else {
2577 (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2578 }
2579 }
2580 Command::Push {
2581 json_file,
2582 pdf,
2583 collection,
2584 on_duplicate,
2585 dry_run,
2586 ..
2587 } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2588 Command::System { command } => run_system_command(command, client)?,
2589 Command::Search(args) => {
2590 if let Some(mgmt) = args.management {
2591 run_search_management_command(mgmt, client)?
2592 } else {
2593 run_search(args, client)?
2594 }
2595 }
2596 Command::Items { command } => run_items_command(command, client)?,
2597 Command::Collections { command } => run_collections_command(command, client)?,
2598 Command::Notes { command } => run_notes_command(command, client)?,
2599 Command::Attachments { command } => run_attachments_command(command, client)?,
2600 Command::Settings { command } => run_settings_command(command, client)?,
2601 Command::Tags { command } => run_tags_command(command, client)?,
2602 Command::Annotations { command } => run_annotations_command(command, client)?,
2603 Command::Ocr { command } => {
2604 return run_ocr_command(command, client);
2605 }
2606 Command::Rag { command } => {
2607 return run_rag_command(command, client);
2608 }
2609 Command::Export(_) => unreachable!("export commands return raw output above"),
2610 Command::FindPdfs {
2611 collection, limit, ..
2612 } => run_find_pdfs_command(client, collection, limit)?,
2613 };
2614
2615 format_json(&value, style)
2616}
2617
2618fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2619 match command {
2620 RagCommand::Providers => format_json(
2621 &serde_json::json!({
2622 "providers": [
2623 embedding_provider_spec("volcengine")?,
2624 embedding_provider_spec("alibaba")?,
2625 embedding_provider_spec("custom")?,
2626 ],
2627 }),
2628 JsonStyle::Pretty,
2629 ),
2630 RagCommand::Embed {
2631 provider,
2632 input,
2633 endpoint,
2634 model,
2635 input_type,
2636 api_key_env,
2637 } => {
2638 let value = run_embedding_provider_json_command(
2639 provider,
2640 input,
2641 endpoint,
2642 model,
2643 input_type,
2644 api_key_env,
2645 )?;
2646 format_json(&value, JsonStyle::PythonCompact)
2647 }
2648 RagCommand::Status { collection, .. } => {
2649 let value = rag_status_value(client, &collection)?;
2650 format_json(&value, JsonStyle::PythonCompact)
2651 }
2652 RagCommand::Search {
2653 query,
2654 collection,
2655 keys,
2656 zotero,
2657 top_spans_per_item,
2658 include_fulltext_spans,
2659 top_k,
2660 output,
2661 ..
2662 } => run_rag_search_command(
2663 client,
2664 RagSearchOptions {
2665 query,
2666 collection,
2667 keys,
2668 zotero,
2669 top_spans_per_item,
2670 include_fulltext_spans,
2671 top_k,
2672 output,
2673 },
2674 ),
2675 }
2676}
2677
2678fn run_embedding_provider_json_command(
2679 provider: String,
2680 input: String,
2681 endpoint: Option<String>,
2682 model: Option<String>,
2683 input_type: Option<String>,
2684 api_key_env: Option<String>,
2685) -> Result<Value, String> {
2686 let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2687 if endpoint.is_some() {
2688 input.url = endpoint;
2689 }
2690 if model.is_some() {
2691 input.model = model;
2692 }
2693 if input_type.is_some() {
2694 input.input_type = input_type;
2695 }
2696 let mut transport = provider_http_transport(api_key_env.as_deref())?;
2697 let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2698
2699 Ok(serde_json::json!({
2700 "provider": provider,
2701 "vectors": vectors,
2702 }))
2703}
2704
2705fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2706 provider_http_transport_with_auth(api_key_env, "bearer")
2707}
2708
2709fn provider_http_transport_with_auth(
2710 api_key_env: Option<&str>,
2711 auth_scheme: &str,
2712) -> Result<UreqProviderHttpTransport, String> {
2713 let Some(env_name) = api_key_env else {
2714 return Ok(UreqProviderHttpTransport::new());
2715 };
2716 let token = env::var(env_name)
2717 .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2718 if token.trim().is_empty() {
2719 return Err(format!("provider credential env var {env_name} is empty"));
2720 }
2721 let token = token.trim();
2722 match auth_scheme {
2723 "token" if token.starts_with("token ") => {
2724 Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2725 }
2726 "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2727 "token {token}"
2728 ))),
2729 "bearer" if token.starts_with("Bearer ") => {
2730 Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2731 }
2732 "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2733 "none" => Ok(UreqProviderHttpTransport::new()),
2734 other => Err(format!("unsupported provider auth scheme {other}")),
2735 }
2736}
2737
2738fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2739 let payload = if path == "-" {
2740 let mut input = String::new();
2741 io::stdin()
2742 .read_to_string(&mut input)
2743 .map_err(|err| format!("read stdin: {err}"))?;
2744 input
2745 } else {
2746 fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2747 };
2748 serde_json::from_str::<T>(&payload)
2749 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2750}
2751
2752fn fetch_embedding_settings(
2753 client: &mut impl RpcCaller,
2754) -> Result<(String, String, String, String), String> {
2755 let settings = client.call("settings.getAll", None)?;
2756 let provider = settings
2757 .get("embedding.provider")
2758 .and_then(Value::as_str)
2759 .unwrap_or("ollama")
2760 .to_string();
2761 let model = settings
2762 .get("embedding.model")
2763 .and_then(Value::as_str)
2764 .unwrap_or("")
2765 .to_string();
2766 let api_url = settings
2767 .get("embedding.apiUrl")
2768 .and_then(Value::as_str)
2769 .unwrap_or("")
2770 .to_string();
2771 let raw = client.call("settings.getRaw", Some(serde_json::json!({"key": "embedding.apiKey"})))?;
2772 let api_key = raw
2773 .get("embedding.apiKey")
2774 .and_then(Value::as_str)
2775 .unwrap_or("")
2776 .to_string();
2777 Ok((provider, model, api_url, api_key))
2778}
2779
2780fn fetch_retrieval_mode(client: &mut impl RpcCaller) -> String {
2781 client
2782 .call(
2783 "settings.get",
2784 Some(serde_json::json!({"key": "rag.retrievalMode"})),
2785 )
2786 .ok()
2787 .and_then(|v| {
2788 v.get("rag.retrievalMode")
2789 .and_then(Value::as_str)
2790 .map(String::from)
2791 })
2792 .unwrap_or_else(|| "hybrid".to_string())
2793}
2794
2795fn resolve_sidecar_paths(
2796 client: &mut impl RpcCaller,
2797 collection: Option<&str>,
2798 keys: &[String],
2799) -> Result<Vec<(String, String, PathBuf)>, String> {
2800 let items = if !keys.is_empty() {
2801 let mut items = Vec::new();
2802 for key in keys {
2803 let item = client.call("items.get", Some(serde_json::json!({"key": key})))?;
2804 items.push(item);
2805 }
2806 items
2807 } else if let Some(col) = collection {
2808 let col_key = resolve_collection(client, col)?;
2809 let response = client.call(
2810 "collections.getItems",
2811 Some(serde_json::json!({"key": col_key})),
2812 )?;
2813 collection_items(&response)
2814 } else {
2815 return Err("INVALID_ARGS: --collection or --key required".into());
2816 };
2817
2818 let mut results = Vec::new();
2819 for item in &items {
2820 let item_key = item.get("key").and_then(Value::as_str).unwrap_or_default();
2821 let attachments = client.call(
2822 "attachments.list",
2823 Some(serde_json::json!({"parentKey": item_key})),
2824 )?;
2825 let att_list = attachments
2826 .get("items")
2827 .and_then(Value::as_array)
2828 .or_else(|| attachments.as_array())
2829 .cloned()
2830 .unwrap_or_default();
2831 for att in &att_list {
2832 let content_type = att
2833 .get("contentType")
2834 .and_then(Value::as_str)
2835 .unwrap_or("");
2836 if content_type != "application/pdf" {
2837 continue;
2838 }
2839 let att_key = att.get("key").and_then(Value::as_str).unwrap_or_default();
2840 let path = att.get("path").and_then(Value::as_str).unwrap_or_default();
2841 if path.is_empty() {
2842 continue;
2843 }
2844 let local_path = local_path_from_zotero_path(path);
2845 let pdf_path = PathBuf::from(&local_path);
2846 if let Some(parent) = pdf_path.parent() {
2847 let sidecar_root = parent.join(".zotron");
2848 if sidecar_root.exists() {
2849 results.push((item_key.to_string(), att_key.to_string(), sidecar_root));
2850 }
2851 }
2852 }
2853 }
2854 Ok(results)
2855}
2856
2857fn load_sidecar_chunks(sidecar_root: &Path) -> Vec<StructureChunk> {
2858 let chunks_path = sidecar_root.join("chunks").join("chunks.v1.jsonl");
2859 let Ok(content) = fs::read_to_string(&chunks_path) else {
2860 return Vec::new();
2861 };
2862 content
2863 .lines()
2864 .filter(|line| !line.trim().is_empty())
2865 .filter_map(|line| serde_json::from_str::<StructureChunk>(line).ok())
2866 .collect()
2867}
2868
2869fn embedding_vector_filename(provider: &str, model: &str) -> String {
2870 let p = provider.trim().to_lowercase().replace('/', "-");
2871 let m = model.trim().to_lowercase().replace('/', "-");
2872 if p.is_empty() && m.is_empty() {
2873 return "vectors.jsonl".to_string();
2874 }
2875 format!("{p}--{m}.jsonl")
2876}
2877
2878fn load_sidecar_vectors(sidecar_root: &Path, provider: &str, model: &str) -> Vec<EmbeddingVector> {
2879 let embeddings_dir = sidecar_root.join("embeddings");
2880 let target = embedding_vector_filename(provider, model);
2881 let target_path = embeddings_dir.join(&target);
2882 if let Ok(content) = fs::read_to_string(&target_path) {
2883 let vecs: Vec<EmbeddingVector> = content
2884 .lines()
2885 .filter(|line| !line.trim().is_empty())
2886 .filter_map(|line| serde_json::from_str(line).ok())
2887 .collect();
2888 if !vecs.is_empty() {
2889 return vecs;
2890 }
2891 }
2892 for legacy in &["vectors.v1.jsonl", "vectors.jsonl"] {
2894 let path = embeddings_dir.join(legacy);
2895 if let Ok(content) = fs::read_to_string(&path) {
2896 let vecs: Vec<EmbeddingVector> = content
2897 .lines()
2898 .filter(|line| !line.trim().is_empty())
2899 .filter_map(|line| serde_json::from_str::<EmbeddingVector>(line).ok())
2900 .filter(|v| v.source_provider == provider || provider.is_empty())
2901 .collect();
2902 if !vecs.is_empty() {
2903 return vecs;
2904 }
2905 }
2906 }
2907 Vec::new()
2908}
2909
2910fn embed_query_text(
2911 query: &str,
2912 provider: &str,
2913 model: &str,
2914 api_url: &str,
2915 api_key: &str,
2916) -> Result<Vec<f64>, String> {
2917 let input = EmbeddingRequestInput {
2918 item_key: "query".to_string(),
2919 chunks: vec![EmbeddingChunkInput {
2920 chunk_key: "q0".to_string(),
2921 text: query.to_string(),
2922 }],
2923 model: if model.is_empty() {
2924 None
2925 } else {
2926 Some(model.to_string())
2927 },
2928 url: if api_url.is_empty() {
2929 None
2930 } else {
2931 Some(api_url.to_string())
2932 },
2933 input_type: Some("query".to_string()),
2934 };
2935 let request = build_embedding_provider_request(provider, &input)?;
2936 let url = request
2937 .url
2938 .as_deref()
2939 .ok_or("no embedding URL configured")?;
2940 let mut http = ureq::post(url).set("Content-Type", "application/json");
2941 if let Some(auth) = request.auth_header {
2942 if !api_key.is_empty() {
2943 http = http.set(auth, &format!("Bearer {api_key}"));
2944 }
2945 }
2946 let resp = http
2947 .send_json(&request.body)
2948 .map_err(|e| format!("embedding request failed: {e}"))?;
2949 let payload: Value = resp
2950 .into_json()
2951 .map_err(|e| format!("embedding response parse: {e}"))?;
2952 let vectors =
2953 parse_embedding_provider_response(provider, &payload, "query", &input.chunks)?;
2954 vectors
2955 .into_iter()
2956 .next()
2957 .map(|v| v.vector)
2958 .ok_or_else(|| "no embedding vector returned".to_string())
2959}
2960
2961fn run_rag_search_xpi_fallback(
2962 client: &mut impl RpcCaller,
2963 options: &RagSearchOptions,
2964) -> Result<String, String> {
2965 let mut params = serde_json::json!({
2966 "query": options.query,
2967 "limit": options.top_k,
2968 "top_spans_per_item": options.top_spans_per_item,
2969 "include_fulltext_spans": options.include_fulltext_spans,
2970 });
2971 if let Some(map) = params.as_object_mut() {
2972 if let Some(col) = &options.collection {
2973 map.insert("collection".into(), Value::String(col.clone()));
2974 }
2975 if !options.keys.is_empty() {
2976 map.insert(
2977 "keys".into(),
2978 Value::Array(options.keys.iter().map(|k| Value::String(k.clone())).collect()),
2979 );
2980 }
2981 }
2982 let payload = client.call("rag.searchHits", Some(params))?;
2983 let hits = payload
2984 .get("hits")
2985 .and_then(Value::as_array)
2986 .cloned()
2987 .unwrap_or_default();
2988 if options.output == "jsonl" {
2989 let mut out = String::new();
2990 for hit in &hits {
2991 out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
2992 out.push('\n');
2993 }
2994 Ok(out)
2995 } else {
2996 let total = hits.len() as u64;
2997 format_json(
2998 &normalize_list_envelope(
2999 serde_json::json!({"items": hits, "total": total}),
3000 "items",
3001 Some(options.top_k),
3002 0,
3003 ),
3004 JsonStyle::Pretty,
3005 )
3006 }
3007}
3008
3009fn run_rag_search_command(
3010 client: &mut impl RpcCaller,
3011 options: RagSearchOptions,
3012) -> Result<String, String> {
3013 if options.zotero {
3015 if options.collection.is_none() && options.keys.is_empty() {
3016 return Err(
3017 "INVALID_ARGS: --collection or --key is required".to_string(),
3018 );
3019 }
3020 return run_rag_search_xpi_fallback(client, &options);
3021 }
3022
3023 if options.collection.is_none() && options.keys.is_empty() {
3025 return Err("INVALID_ARGS: --collection or --key required".to_string());
3026 }
3027
3028 let sidecars = resolve_sidecar_paths(
3030 client,
3031 options.collection.as_deref(),
3032 &options.keys,
3033 );
3034
3035 let sidecars = match sidecars {
3037 Ok(ref s) if !s.is_empty() => s,
3038 _ => return run_rag_search_xpi_fallback(client, &options),
3039 };
3040
3041 let (emb_provider, emb_model, emb_url, emb_key) = fetch_embedding_settings(client)?;
3043 let mut all_chunks: Vec<StructureChunk> = Vec::new();
3044 let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
3045 for (_item_key, _att_key, sidecar_root) in sidecars {
3046 all_chunks.extend(load_sidecar_chunks(sidecar_root));
3047 all_vectors.extend(load_sidecar_vectors(sidecar_root, &emb_provider, &emb_model));
3048 }
3049
3050 if all_chunks.is_empty() {
3051 return run_rag_search_xpi_fallback(client, &options);
3052 }
3053
3054 let mode = fetch_retrieval_mode(client);
3056
3057 let bm25_ranked = if mode != "dense" {
3059 bm25_score_chunks(&all_chunks, &options.query, 1.2, 0.75)
3060 } else {
3061 Vec::new()
3062 };
3063
3064 let dense_ranked = if mode != "lexical" && !all_vectors.is_empty() {
3066 match embed_query_text(&options.query, &emb_provider, &emb_model, &emb_url, &emb_key) {
3067 Ok(query_vec) => {
3068 let vec_map: std::collections::HashMap<&str, &[f64]> = all_vectors
3069 .iter()
3070 .map(|v| (v.chunk_key.as_str(), v.vector.as_slice()))
3071 .collect();
3072 let mut scores: Vec<(usize, f64)> = all_chunks
3073 .iter()
3074 .enumerate()
3075 .filter_map(|(i, chunk)| {
3076 vec_map.get(chunk.chunk_key.as_str()).map(|stored| {
3077 (i, cosine_similarity(&query_vec, stored))
3078 })
3079 })
3080 .filter(|(_, s)| *s > 0.0)
3081 .collect();
3082 scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3083 scores
3084 }
3085 Err(_) => Vec::new(),
3086 }
3087 } else {
3088 Vec::new()
3089 };
3090
3091 let limit = options.top_k as usize;
3093 let ranked = if !bm25_ranked.is_empty() && !dense_ranked.is_empty() {
3094 rrf_merge(&bm25_ranked, &dense_ranked, 60.0, limit)
3095 } else if !bm25_ranked.is_empty() {
3096 bm25_ranked.into_iter().take(limit).collect()
3097 } else {
3098 dense_ranked.into_iter().take(limit).collect()
3099 };
3100
3101 let mut per_item_count: std::collections::HashMap<&str, u64> =
3103 std::collections::HashMap::new();
3104 let mut selected: Vec<(usize, f64)> = Vec::new();
3105 for (idx, score) in &ranked {
3106 let item_key = all_chunks[*idx].item_key.as_str();
3107 let count = per_item_count.entry(item_key).or_insert(0);
3108 if *count < options.top_spans_per_item {
3109 *count += 1;
3110 selected.push((*idx, *score));
3111 }
3112 }
3113
3114 let mut meta_cache: std::collections::HashMap<String, Value> =
3116 std::collections::HashMap::new();
3117 let mut hits: Vec<Value> = Vec::new();
3118 for (idx, score) in &selected {
3119 let chunk = &all_chunks[*idx];
3120 let meta = if let Some(cached) = meta_cache.get(&chunk.item_key) {
3121 cached.clone()
3122 } else {
3123 let fetched = client
3124 .call(
3125 "items.get",
3126 Some(serde_json::json!({"key": chunk.item_key})),
3127 )
3128 .unwrap_or(Value::Null);
3129 meta_cache.insert(chunk.item_key.clone(), fetched.clone());
3130 fetched
3131 };
3132 let title = meta
3133 .get("title")
3134 .and_then(Value::as_str)
3135 .unwrap_or("")
3136 .to_string();
3137 let authors = meta
3138 .get("creators")
3139 .and_then(Value::as_array)
3140 .map(|creators| {
3141 creators
3142 .iter()
3143 .filter_map(|c| {
3144 let last = c.get("lastName").and_then(Value::as_str).unwrap_or("");
3145 let first = c.get("firstName").and_then(Value::as_str).unwrap_or("");
3146 if last.is_empty() && first.is_empty() {
3147 None
3148 } else {
3149 Some(format!("{last}{first}"))
3150 }
3151 })
3152 .collect::<Vec<_>>()
3153 .join(", ")
3154 })
3155 .unwrap_or_default();
3156 let year = meta.get("date").and_then(Value::as_str).unwrap_or("");
3157 let mut hit = serde_json::json!({
3158 "item_key": chunk.item_key,
3159 "chunk_key": chunk.chunk_key,
3160 "title": title,
3161 "authors": authors,
3162 "year": year,
3163 "text": chunk.text,
3164 "page_range": chunk.page_range,
3165 "section_path": chunk.section_path,
3166 "score": score,
3167 });
3168 if options.include_fulltext_spans {
3169 hit.as_object_mut().unwrap().insert(
3170 "attachment_key".to_string(),
3171 Value::String(chunk.attachment_key.clone()),
3172 );
3173 }
3174 hits.push(hit);
3175 }
3176
3177 if options.output == "jsonl" {
3179 let mut out = String::new();
3180 for hit in &hits {
3181 out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3182 out.push('\n');
3183 }
3184 Ok(out)
3185 } else {
3186 let total = hits.len() as u64;
3187 format_json(
3188 &normalize_list_envelope(
3189 serde_json::json!({"items": hits, "total": total}),
3190 "items",
3191 Some(options.top_k),
3192 0,
3193 ),
3194 JsonStyle::Pretty,
3195 )
3196 }
3197}
3198
3199fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
3200 let raw_store_path = rag_store_path(collection);
3201 if raw_store_path.exists() {
3202 return rag_status_from_store(collection, &raw_store_path);
3203 }
3204
3205 let mut store_candidates = Vec::new();
3206 let collection_match = find_collection_in_tree(client, collection)?;
3207 if let Some(collection_node) = collection_match.as_ref() {
3208 if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
3209 store_candidates.push(rag_store_path(name));
3210 }
3211 if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
3212 store_candidates.push(rag_store_path(key));
3213 }
3214 }
3215 for store_path in unique_paths(store_candidates) {
3216 if store_path.exists() {
3217 return rag_status_from_store(collection, &store_path);
3218 }
3219 }
3220
3221 rag_status_from_zotero_sidecars(client, collection, collection_match)
3222}
3223
3224fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
3225 let mut unique = Vec::new();
3226 for path in paths {
3227 if !unique.iter().any(|seen| seen == &path) {
3228 unique.push(path);
3229 }
3230 }
3231 unique
3232}
3233
3234fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
3235 let raw = fs::read_to_string(store_path)
3236 .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
3237 let store: Value = serde_json::from_str(&raw)
3238 .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
3239 let chunks = store
3240 .get("chunks")
3241 .and_then(Value::as_array)
3242 .cloned()
3243 .unwrap_or_default();
3244 let mut item_keys = Vec::<Value>::new();
3245 for chunk in &chunks {
3246 let Some(item_key) = chunk.get("item_key") else {
3247 continue;
3248 };
3249 if !item_keys.iter().any(|seen| seen == item_key) {
3250 item_keys.push(item_key.clone());
3251 }
3252 }
3253 Ok(serde_json::json!({
3254 "status": "indexed",
3255 "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
3256 "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
3257 "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
3258 "total_chunks": chunks.len(),
3259 "total_items": item_keys.len(),
3260 "store_path": store_path.to_string_lossy(),
3261 }))
3262}
3263
3264fn rag_status_from_zotero_sidecars(
3265 client: &mut impl RpcCaller,
3266 collection: &str,
3267 collection_match: Option<Value>,
3268) -> Result<Value, String> {
3269 let collection_key = collection_match
3270 .as_ref()
3271 .and_then(|node| node.get("key").cloned())
3272 .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
3273 let raw = paginate_rpc(
3274 client,
3275 "collections.getItems",
3276 serde_json::json!({"key": collection_key}),
3277 500,
3278 )?;
3279 let items = raw
3280 .get("items")
3281 .and_then(Value::as_array)
3282 .or_else(|| raw.as_array())
3283 .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
3284 .clone();
3285
3286 let mut indexed_items = 0usize;
3287 let mut total_chunks = 0usize;
3288 for item in &items {
3289 let item_key = item.get("key").cloned().unwrap_or(Value::Null);
3290 let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
3291 if chunk_count > 0 {
3292 indexed_items += 1;
3293 total_chunks += chunk_count;
3294 }
3295 }
3296
3297 if indexed_items == 0 {
3298 return Ok(serde_json::json!({
3299 "status": "not indexed",
3300 "collection": collection,
3301 "total_items": items.len(),
3302 "indexed_items": 0,
3303 }));
3304 }
3305
3306 Ok(serde_json::json!({
3307 "status": "indexed",
3308 "collection": collection,
3309 "total_chunks": total_chunks,
3310 "total_items": indexed_items,
3311 "collection_items": items.len(),
3312 "source": "zotero-sidecar",
3313 }))
3314}
3315
3316fn sidecar_chunk_count_for_item(
3317 client: &mut impl RpcCaller,
3318 item_key: &Value,
3319) -> Result<usize, String> {
3320 let attachments = client.call(
3321 "attachments.list",
3322 Some(serde_json::json!({"parentKey": item_key.clone()})),
3323 )?;
3324 let Some(attachments) = attachments.as_array() else {
3325 return Ok(0);
3326 };
3327
3328 let mut count = 0usize;
3329 for attachment in attachments {
3330 let Some(path) = attachment.get("path").and_then(Value::as_str) else {
3331 continue;
3332 };
3333 let local = local_path_from_zotero_path(path);
3334 let Some(dir) = Path::new(&local).parent() else {
3335 continue;
3336 };
3337 let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
3338 continue;
3339 };
3340 let text = String::from_utf8_lossy(&bytes);
3341 count += text.lines().filter(|line| !line.trim().is_empty()).count();
3342 }
3343 Ok(count)
3344}
3345
3346fn rag_store_path(collection: &str) -> PathBuf {
3347 rag_store_root().join(format!("{collection}.json"))
3348}
3349
3350fn rag_store_root() -> PathBuf {
3351 let xdg_data_home = env::var_os("XDG_DATA_HOME")
3352 .filter(|path| !path.is_empty())
3353 .map(PathBuf::from);
3354 let appdata = env::var_os("APPDATA")
3355 .filter(|path| !path.is_empty())
3356 .map(PathBuf::from);
3357 let userprofile = env::var_os("USERPROFILE")
3358 .filter(|path| !path.is_empty())
3359 .map(PathBuf::from);
3360 let home = env::var_os("HOME")
3361 .filter(|path| !path.is_empty())
3362 .map(PathBuf::from);
3363
3364 rag_store_root_for_platform(
3365 ArtifactStorePlatform::current(),
3366 xdg_data_home.as_deref(),
3367 appdata.as_deref(),
3368 userprofile.as_deref(),
3369 home.as_deref(),
3370 )
3371}
3372
3373fn rag_store_root_for_platform(
3374 platform: ArtifactStorePlatform,
3375 xdg_data_home: Option<&Path>,
3376 appdata: Option<&Path>,
3377 userprofile: Option<&Path>,
3378 home: Option<&Path>,
3379) -> PathBuf {
3380 match platform {
3381 ArtifactStorePlatform::Windows => {
3382 if let Some(path) = appdata {
3383 return path.join("Zotron").join("rag");
3384 }
3385 if let Some(path) = userprofile {
3386 return path
3387 .join("AppData")
3388 .join("Roaming")
3389 .join("Zotron")
3390 .join("rag");
3391 }
3392 if let Some(path) = home {
3393 return path
3394 .join("AppData")
3395 .join("Roaming")
3396 .join("Zotron")
3397 .join("rag");
3398 }
3399 PathBuf::from(".zotron").join("rag")
3400 }
3401 ArtifactStorePlatform::Macos => {
3402 if let Some(path) = home {
3403 return path
3404 .join("Library")
3405 .join("Application Support")
3406 .join("Zotron")
3407 .join("rag");
3408 }
3409 if let Some(path) = xdg_data_home {
3410 return path.join("zotron").join("rag");
3411 }
3412 PathBuf::from(".zotron").join("rag")
3413 }
3414 ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
3415 .map(|path| path.join("zotron").join("rag"))
3416 .or_else(|| {
3417 home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
3418 })
3419 .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
3420 }
3421}
3422
3423fn run_push_command(
3424 json_file: String,
3425 pdf: Option<String>,
3426 collection: Option<String>,
3427 on_duplicate: String,
3428 dry_run: bool,
3429 client: &mut impl RpcCaller,
3430) -> Result<String, String> {
3431 if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
3432 return Err(format!(
3433 "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
3434 ));
3435 }
3436
3437 let payload = if json_file == "-" {
3438 let mut input = String::new();
3439 io::stdin()
3440 .read_to_string(&mut input)
3441 .map_err(|err| format!("read stdin: {err}"))?;
3442 input
3443 } else {
3444 fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
3445 };
3446 let item_json = serde_json::from_str::<Value>(&payload)
3447 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
3448
3449 if dry_run {
3450 let collection_key = collection
3451 .as_deref()
3452 .map(|name| resolve_collection(client, name))
3453 .transpose()?;
3454 return format_json(
3455 &serde_json::json!({
3456 "ok": true,
3457 "dryRun": true,
3458 "wouldPush": {
3459 "title": item_json.get("title").cloned().unwrap_or(Value::Null),
3460 "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
3461 "collectionKey": collection_key,
3462 "pdfPath": pdf,
3463 "onDuplicate": on_duplicate,
3464 }
3465 }),
3466 JsonStyle::PythonCompact,
3467 );
3468 }
3469
3470 let result = push_item(
3471 client,
3472 &item_json,
3473 pdf.as_deref(),
3474 collection.as_deref(),
3475 &on_duplicate,
3476 )?;
3477 format_json(&result, JsonStyle::PythonCompact)
3478}
3479
3480fn push_item(
3481 client: &mut impl RpcCaller,
3482 item_json: &Value,
3483 pdf_path: Option<&str>,
3484 collection: Option<&str>,
3485 on_duplicate: &str,
3486) -> Result<Value, String> {
3487 let pdf_size = if let Some(path) = pdf_path {
3488 validate_pdf_magic(path)?
3489 } else {
3490 0
3491 };
3492
3493 let collection_key = match collection {
3494 Some(name) => resolve_collection(client, name)?,
3495 None => resolve_current_collection(client)?,
3496 };
3497
3498 let dup_id = find_duplicate(client, item_json)?;
3499 if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
3500 if !is_library_root(&collection_key) {
3501 client.call(
3502 "collections.addItems",
3503 Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
3504 )?;
3505 }
3506 let mut pdf_attached = false;
3507 if let Some(path) = pdf_path {
3508 if !item_has_pdf_attachment(client, dup_id)? {
3509 attach_pdf(client, dup_id, path)?;
3510 pdf_attached = true;
3511 }
3512 }
3513 return Ok(push_result(
3514 "skipped_duplicate",
3515 Some(dup_id.to_string()),
3516 pdf_attached,
3517 if pdf_attached { pdf_size } else { 0 },
3518 Value::Null,
3519 ));
3520 }
3521
3522 let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
3523 let (item_key, status) =
3524 if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
3525 let mut params = serde_json::Map::new();
3526 params.insert("key".to_string(), Value::String(dup_id.to_string()));
3527 params.insert(
3528 "fields".to_string(),
3529 xpi_payload
3530 .get("fields")
3531 .cloned()
3532 .unwrap_or_else(|| serde_json::json!({})),
3533 );
3534 if let Some(creators) = xpi_payload.get("creators") {
3535 params.insert("creators".to_string(), creators.clone());
3536 }
3537 if let Some(tags) = xpi_payload.get("tags") {
3538 params.insert("tags".to_string(), tags.clone());
3539 }
3540 client.call("items.update", Some(Value::Object(params)))?;
3541 (dup_id.to_string(), "updated")
3542 } else {
3543 let created = client.call("items.create", Some(xpi_payload))?;
3544 let key = created
3545 .get("key")
3546 .and_then(Value::as_str)
3547 .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
3548 (key.to_string(), "created")
3549 };
3550
3551 let mut pdf_attached = false;
3552 if let Some(path) = pdf_path {
3553 if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
3554 attach_pdf(client, &item_key, path)?;
3555 pdf_attached = true;
3556 }
3557 }
3558
3559 if status == "updated" && !is_library_root(&collection_key) {
3560 client.call(
3561 "collections.addItems",
3562 Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3563 )?;
3564 }
3565
3566 Ok(push_result(
3567 status,
3568 Some(item_key),
3569 pdf_attached,
3570 if pdf_attached { pdf_size } else { 0 },
3571 Value::Null,
3572 ))
3573}
3574
3575fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3576 let bytes = fs::read(path)
3577 .map_err(|_| format!("INVALID_PDF: {path} does not start with %PDF- magic bytes"))?;
3578 if !bytes.starts_with(b"%PDF-") {
3579 return Err(format!(
3580 "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3581 ));
3582 }
3583 Ok(bytes.len() as u64)
3584}
3585
3586fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3587 let selected = client.call("system.currentCollection", None)?;
3588 Ok(selected
3589 .get("key")
3590 .cloned()
3591 .unwrap_or_else(|| Value::Number(0.into())))
3592}
3593
3594fn find_duplicate(
3595 client: &mut impl RpcCaller,
3596 item_json: &Value,
3597) -> Result<Option<String>, String> {
3598 if let Some(doi) = item_json
3599 .get("DOI")
3600 .and_then(Value::as_str)
3601 .filter(|doi| !doi.is_empty())
3602 {
3603 let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3604 if let Some(key) = first_hit_key(&hits) {
3605 return Ok(Some(key));
3606 }
3607 }
3608
3609 if let Some(title) = item_json
3610 .get("title")
3611 .and_then(Value::as_str)
3612 .filter(|title| title.len() >= 10)
3613 {
3614 let hits = client.call(
3615 "search.quick",
3616 Some(serde_json::json!({"query": title, "limit": 20})),
3617 )?;
3618 if let Some(items) = response_items(&hits) {
3619 for item in items {
3620 if item.get("title").and_then(Value::as_str) == Some(title) {
3621 if let Some(key) = item.get("key").and_then(Value::as_str) {
3622 return Ok(Some(key.to_string()));
3623 }
3624 }
3625 }
3626 }
3627 }
3628
3629 Ok(None)
3630}
3631
3632fn first_hit_key(response: &Value) -> Option<String> {
3633 response_items(response)?
3634 .first()?
3635 .get("key")?
3636 .as_str()
3637 .map(ToString::to_string)
3638}
3639
3640fn response_items(response: &Value) -> Option<&Vec<Value>> {
3641 response
3642 .get("items")
3643 .and_then(Value::as_array)
3644 .or_else(|| response.as_array())
3645}
3646
3647fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3648 const NON_FIELD_KEYS: &[&str] = &[
3649 "itemType",
3650 "creators",
3651 "tags",
3652 "collections",
3653 "attachments",
3654 "relations",
3655 "notes",
3656 "id",
3657 "key",
3658 "version",
3659 ];
3660
3661 let mut fields = serde_json::Map::new();
3662 if let Some(item) = item_json.as_object() {
3663 for (key, value) in item {
3664 if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3665 fields.insert(key.clone(), value.clone());
3666 }
3667 }
3668 }
3669
3670 let mut payload = serde_json::Map::new();
3671 payload.insert(
3672 "itemType".to_string(),
3673 item_json
3674 .get("itemType")
3675 .cloned()
3676 .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3677 );
3678 payload.insert("fields".to_string(), Value::Object(fields));
3679
3680 if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3681 if !creators.is_empty() {
3682 payload.insert(
3683 "creators".to_string(),
3684 Value::Array(
3685 creators
3686 .iter()
3687 .map(|creator| {
3688 serde_json::json!({
3689 "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3690 "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3691 "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3692 })
3693 })
3694 .collect(),
3695 ),
3696 );
3697 }
3698 }
3699
3700 if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3701 if !tags.is_empty() {
3702 payload.insert(
3703 "tags".to_string(),
3704 Value::Array(
3705 tags.iter()
3706 .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3707 .collect(),
3708 ),
3709 );
3710 }
3711 }
3712
3713 if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3714 payload.insert(
3715 "collections".to_string(),
3716 Value::Array(vec![collection_key.clone()]),
3717 );
3718 }
3719
3720 Value::Object(payload)
3721}
3722
3723fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3724 let attachments = client.call(
3725 "attachments.list",
3726 Some(serde_json::json!({"parentKey": item_key})),
3727 )?;
3728 Ok(has_pdf_attachment(&attachments))
3729}
3730
3731fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3732 client.call(
3733 "attachments.add",
3734 Some(serde_json::json!({
3735 "parentKey": item_key,
3736 "path": zotero_path(path),
3737 "title": "Full Text PDF",
3738 })),
3739 )?;
3740 Ok(())
3741}
3742
3743fn zotero_path(path: &str) -> String {
3744 let path = Path::new(path)
3745 .canonicalize()
3746 .unwrap_or_else(|_| Path::new(path).to_path_buf())
3747 .to_string_lossy()
3748 .into_owned();
3749 if is_wsl() {
3750 return ProcessCommand::new("wslpath")
3751 .arg("-w")
3752 .arg(&path)
3753 .output()
3754 .ok()
3755 .filter(|output| output.status.success())
3756 .and_then(|output| String::from_utf8(output.stdout).ok())
3757 .map(|converted| converted.trim().to_string())
3758 .filter(|converted| !converted.is_empty())
3759 .unwrap_or(path);
3760 }
3761 path
3762}
3763
3764fn is_wsl() -> bool {
3765 if env::var_os("WSL_DISTRO_NAME").is_some() {
3766 return true;
3767 }
3768 fs::read_to_string("/proc/sys/kernel/osrelease")
3769 .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3770 .unwrap_or(false)
3771}
3772
3773fn is_library_root(value: &Value) -> bool {
3774 value.as_i64() == Some(0) || value.as_u64() == Some(0)
3775}
3776
3777fn push_result(
3778 status: &str,
3779 zotero_item_key: Option<String>,
3780 pdf_attached: bool,
3781 pdf_size_bytes: u64,
3782 error: Value,
3783) -> Value {
3784 serde_json::json!({
3785 "status": status,
3786 "zotero_item_key": zotero_item_key,
3787 "pdf_attached": pdf_attached,
3788 "pdf_size_bytes": pdf_size_bytes,
3789 "error": error,
3790 })
3791}
3792
3793fn run_search(
3794 args: SearchArgs,
3795 client: &mut impl RpcCaller,
3796) -> Result<(Value, JsonStyle), String> {
3797 let SearchArgs {
3798 query, fulltext, author, after, before, journal, tag,
3799 doi, isbn, issn, collection, limit, offset, ..
3800 } = args;
3801
3802 let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3803 if has_identifier {
3804 let mut params = serde_json::Map::new();
3805 if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3806 if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3807 if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3808 let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3809 return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3810 }
3811
3812 if fulltext {
3813 let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3814 let mut params = serde_json::json!({"query": query, "limit": limit});
3815 if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3816 map.insert("collection".into(), resolve_collection(client, &col)?);
3817 }
3818 let value = client.call("search.fulltext", Some(params))?;
3819 return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3820 }
3821
3822 let has_filters = author.is_some() || after.is_some() || before.is_some()
3823 || journal.is_some() || tag.is_some();
3824 if has_filters {
3825 let mut conditions: Vec<Value> = Vec::new();
3826 if let Some(query) = &query {
3827 conditions.push(serde_json::json!({
3828 "field": "quicksearch-titleCreatorYear",
3829 "operator": "contains",
3830 "value": query,
3831 }));
3832 }
3833 if let Some(author) = author {
3834 conditions.push(serde_json::json!({
3835 "field": "creator", "operator": "contains", "value": author,
3836 }));
3837 }
3838 if let Some(after) = after {
3839 conditions.push(serde_json::json!({
3840 "field": "date", "operator": "isAfter", "value": after,
3841 }));
3842 }
3843 if let Some(before) = before {
3844 conditions.push(serde_json::json!({
3845 "field": "date", "operator": "isBefore", "value": before,
3846 }));
3847 }
3848 if let Some(journal) = journal {
3849 conditions.push(serde_json::json!({
3850 "field": "publicationTitle", "operator": "contains", "value": journal,
3851 }));
3852 }
3853 if let Some(tag) = tag {
3854 conditions.push(serde_json::json!({
3855 "field": "tag", "operator": "is", "value": tag,
3856 }));
3857 }
3858 let value = client.call(
3859 "search.advanced",
3860 Some(serde_json::json!({
3861 "conditions": conditions,
3862 "operator": "and",
3863 "limit": limit,
3864 "offset": offset,
3865 })),
3866 )?;
3867 return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3868 }
3869
3870 let query = query.ok_or(
3871 "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3872 )?;
3873 let value = if let Some(col) = collection {
3874 let key = resolve_collection(client, &col)?;
3875 let response = client.call(
3876 "collections.getItems",
3877 Some(serde_json::json!({"key": key})),
3878 )?;
3879 collection_quick_search_response(&response, &query, limit)
3880 } else {
3881 filter_search_artifacts(client.call(
3882 "search.quick",
3883 Some(serde_json::json!({"query": query, "limit": limit})),
3884 )?)
3885 };
3886 Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3887}
3888
3889fn run_search_management_command(
3890 command: SearchManagementCommand,
3891 client: &mut impl RpcCaller,
3892) -> Result<(Value, JsonStyle), String> {
3893 match command {
3894 SearchManagementCommand::SavedSearches { .. } => Ok((
3895 normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3896 JsonStyle::Pretty,
3897 )),
3898 SearchManagementCommand::CreateSaved {
3899 name, condition, dry_run, ..
3900 } => {
3901 let conditions = condition
3902 .iter()
3903 .map(|raw| parse_search_condition(raw))
3904 .collect::<Result<Vec<_>, _>>()?;
3905 let params = serde_json::json!({"name": name, "conditions": conditions});
3906 if dry_run {
3907 Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3908 } else {
3909 Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3910 }
3911 }
3912 SearchManagementCommand::DeleteSaved {
3913 search_key, dry_run, ..
3914 } => {
3915 let params = serde_json::json!({"key": search_key});
3916 if dry_run {
3917 Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3918 } else {
3919 Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3920 }
3921 }
3922 }
3923}
3924
3925fn filter_search_artifacts(mut value: Value) -> Value {
3926 let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3927 return value;
3928 };
3929 items.retain(|item| match item.get("title").and_then(Value::as_str) {
3930 Some(title) => !is_zotron_evidence_artifact(title),
3931 None => true,
3932 });
3933 let total_items = items.len() as u64;
3934 if let Some(total) = value.get_mut("total") {
3935 *total = Value::from(total_items);
3936 }
3937 value
3938}
3939
3940fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3941 let mut matched = collection_items(response)
3942 .into_iter()
3943 .filter(|item| !item_is_evidence_artifact(item))
3944 .filter(|item| quick_item_matches(item, query))
3945 .collect::<Vec<_>>();
3946 let total = matched.len() as u64;
3947 let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3948 if matched.len() > limit {
3949 matched.truncate(limit);
3950 }
3951 serde_json::json!({"items": matched, "total": total})
3952}
3953
3954fn item_is_evidence_artifact(item: &Value) -> bool {
3955 item.get("title")
3956 .and_then(Value::as_str)
3957 .is_some_and(is_zotron_evidence_artifact)
3958}
3959
3960fn quick_item_matches(item: &Value, query: &str) -> bool {
3961 let terms = query
3962 .split_whitespace()
3963 .map(|term| term.to_lowercase())
3964 .filter(|term| !term.is_empty())
3965 .collect::<Vec<_>>();
3966 if terms.is_empty() {
3967 return true;
3968 }
3969 let mut haystack = String::new();
3970 append_search_text(item, &mut haystack);
3971 let haystack = haystack.to_lowercase();
3972 terms.iter().all(|term| haystack.contains(term))
3973}
3974
3975fn append_search_text(value: &Value, out: &mut String) {
3976 match value {
3977 Value::String(text) => {
3978 out.push(' ');
3979 out.push_str(text);
3980 }
3981 Value::Number(number) => {
3982 out.push(' ');
3983 out.push_str(&number.to_string());
3984 }
3985 Value::Bool(value) => {
3986 out.push(' ');
3987 out.push_str(if *value { "true" } else { "false" });
3988 }
3989 Value::Array(items) => {
3990 for item in items {
3991 append_search_text(item, out);
3992 }
3993 }
3994 Value::Object(map) => {
3995 for item in map.values() {
3996 append_search_text(item, out);
3997 }
3998 }
3999 Value::Null => {}
4000 }
4001}
4002
4003fn parse_search_condition(raw: &str) -> Result<Value, String> {
4004 let mut parts = raw.split_whitespace();
4005 let field = parts.next();
4006 let operator = parts.next();
4007 let value = parts.collect::<Vec<_>>().join(" ");
4008 match (field, operator, value.is_empty()) {
4009 (Some(field), Some(operator), false) => Ok(serde_json::json!({
4010 "field": field,
4011 "operator": operator,
4012 "value": value,
4013 })),
4014 _ => Err(format!(
4015 "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
4016 )),
4017 }
4018}
4019
4020fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
4021 if let Value::Array(arr) = value {
4022 let total = arr.len() as u64;
4023 let mut obj = serde_json::Map::new();
4024 obj.insert(list_key.to_string(), Value::Array(arr));
4025 obj.insert("total".to_string(), Value::from(total));
4026 if let Some(limit) = limit {
4027 obj.insert("limit".to_string(), Value::from(limit));
4028 }
4029 obj.insert("offset".to_string(), Value::from(offset));
4030 obj.insert("hasMore".to_string(), Value::Bool(false));
4031 return Value::Object(obj);
4032 }
4033
4034 let mut obj = match value {
4035 Value::Object(obj) if obj.contains_key(list_key) => obj,
4036 other => return other,
4037 };
4038
4039 let items_len = obj
4040 .get(list_key)
4041 .and_then(Value::as_array)
4042 .map_or(0, |a| a.len()) as u64;
4043 let total = obj
4044 .get("total")
4045 .and_then(Value::as_u64)
4046 .unwrap_or(items_len);
4047
4048 obj.insert("total".to_string(), Value::from(total));
4049 if let Some(limit) = limit {
4050 obj.insert("limit".to_string(), Value::from(limit));
4051 }
4052 obj.insert("offset".to_string(), Value::from(offset));
4053 obj.insert(
4054 "hasMore".to_string(),
4055 Value::Bool(offset + items_len < total),
4056 );
4057
4058 Value::Object(obj)
4059}
4060
4061const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
4062const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
4063
4064fn paginate_rpc(
4065 client: &mut impl RpcCaller,
4066 method: &str,
4067 params: Value,
4068 page_size: usize,
4069) -> Result<Value, String> {
4070 let base = params
4071 .as_object()
4072 .ok_or_else(|| "params must be a JSON object".to_string())?;
4073 let mut out = Vec::new();
4074 let mut prev_page: Option<Vec<Value>> = None;
4075 let mut offset = 0usize;
4076
4077 loop {
4078 let mut page_params = base.clone();
4079 page_params.insert("offset".to_string(), Value::Number(offset.into()));
4080 page_params.insert("limit".to_string(), Value::Number(page_size.into()));
4081 let response = client.call(method, Some(Value::Object(page_params)))?;
4082
4083 let page = match extract_page(&response) {
4084 Some(page) => page,
4085 None if out.is_empty() => return Ok(response),
4086 None if response.is_object() => {
4087 return Err(format!(
4088 "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
4089 out.len()
4090 ));
4091 }
4092 None => {
4093 return Err(format!(
4094 "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
4095 out.len()
4096 ));
4097 }
4098 };
4099
4100 if prev_page.as_ref() == Some(&page) {
4101 return Err(format!(
4102 "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
4103 out.len()
4104 ));
4105 }
4106
4107 let page_len = page.len();
4108 out.extend(page.clone());
4109 if page_len < page_size {
4110 return Ok(Value::Array(out));
4111 }
4112 if out.len() >= RPC_PAGINATION_SAFETY_CAP {
4113 out.truncate(RPC_PAGINATION_SAFETY_CAP);
4114 return Ok(Value::Array(out));
4115 }
4116 prev_page = Some(page);
4117 offset += page_size;
4118 }
4119}
4120
4121fn extract_page(response: &Value) -> Option<Vec<Value>> {
4122 if let Some(page) = response.as_array() {
4123 return Some(page.clone());
4124 }
4125 let object = response.as_object()?;
4126 for key in RPC_PAGE_LIST_KEYS {
4127 if let Some(page) = object.get(key).and_then(Value::as_array) {
4128 return Some(page.clone());
4129 }
4130 }
4131 None
4132}
4133
4134fn run_find_pdfs_command(
4135 client: &mut impl RpcCaller,
4136 collection: String,
4137 limit: usize,
4138) -> Result<(Value, JsonStyle), String> {
4139 let collection_key = resolve_collection(client, &collection)?;
4140 let response = client.call(
4141 "collections.getItems",
4142 Some(serde_json::json!({"key": collection_key})),
4143 )?;
4144 let items = collection_items(&response);
4145
4146 let mut missing = Vec::new();
4147 for item in &items {
4148 let Some(item_key) = item.get("key").and_then(Value::as_str) else {
4149 continue;
4150 };
4151 let attachments = client.call(
4152 "attachments.list",
4153 Some(serde_json::json!({"parentKey": item_key})),
4154 )?;
4155 if !has_pdf_attachment(&attachments) {
4156 missing.push(item.clone());
4157 }
4158 if limit > 0 && missing.len() >= limit {
4159 break;
4160 }
4161 }
4162
4163 let mut results = Vec::new();
4164 for item in &missing {
4165 let item_key = item
4166 .get("key")
4167 .and_then(Value::as_str)
4168 .ok_or_else(|| "missing item lacks key".to_string())?;
4169 let response = client.call(
4170 "attachments.findPDF",
4171 Some(serde_json::json!({"parentKey": item_key})),
4172 )?;
4173 let attachment = response.get("attachment").filter(|value| !value.is_null());
4174 results.push(serde_json::json!({
4175 "item_key": item_key,
4176 "title": item.get("title").cloned().unwrap_or(Value::Null),
4177 "found": attachment.is_some(),
4178 "attachment_key": attachment
4179 .and_then(|attachment| attachment.get("key"))
4180 .cloned()
4181 .unwrap_or(Value::Null),
4182 }));
4183 }
4184
4185 Ok((
4186 serde_json::json!({
4187 "scanned": items.len(),
4188 "attempted": missing.len(),
4189 "results": results,
4190 }),
4191 JsonStyle::Pretty,
4192 ))
4193}
4194
4195fn collection_items(response: &Value) -> Vec<Value> {
4196 if let Some(items) = response.get("items").and_then(Value::as_array) {
4197 return items.clone();
4198 }
4199 response.as_array().cloned().unwrap_or_default()
4200}
4201
4202fn has_pdf_attachment(attachments: &Value) -> bool {
4203 attachments
4204 .as_array()
4205 .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
4206}
4207
4208fn is_pdf_attachment(attachment: &Value) -> bool {
4209 let content_type = attachment
4210 .get("contentType")
4211 .and_then(Value::as_str)
4212 .unwrap_or_default()
4213 .to_lowercase();
4214 let path = attachment
4215 .get("path")
4216 .and_then(Value::as_str)
4217 .unwrap_or_default()
4218 .to_lowercase();
4219 matches!(
4220 content_type.as_str(),
4221 "application/pdf" | "application/x-pdf"
4222 ) || path.ends_with(".pdf")
4223}
4224
4225fn call_json(
4226 client: &mut impl RpcCaller,
4227 method: &str,
4228 params: Option<Value>,
4229) -> Result<Value, String> {
4230 client.call(method, params)
4231}
4232
4233fn run_system_command(
4234 command: SystemCommand,
4235 client: &mut impl RpcCaller,
4236) -> Result<(Value, JsonStyle), String> {
4237 let value = match command {
4238 SystemCommand::Version { .. } => client.call("system.version", None)?,
4239 SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
4240 SystemCommand::LibraryStats { library, .. } => {
4241 let params = library.map(|id| serde_json::json!({"id": id}));
4242 client.call("system.libraryStats", params)?
4243 }
4244 SystemCommand::Schema { item_type, .. } => {
4245 if let Some(item_type) = item_type {
4246 let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
4247 let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
4248 let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
4249 .iter()
4250 .filter_map(|f| f.get("field").cloned())
4251 .collect();
4252 let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
4253 .iter()
4254 .filter_map(|c| c.get("creatorType").cloned())
4255 .collect();
4256 serde_json::json!({
4257 "itemType": item_type,
4258 "fields": field_names,
4259 "creatorTypes": creator_names,
4260 })
4261 } else {
4262 let types = client.call("system.itemTypes", None)?;
4263 let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
4264 .iter()
4265 .filter_map(|t| t.get("itemType").cloned())
4266 .collect();
4267 Value::Array(type_names)
4268 }
4269 }
4270 SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
4271 SystemCommand::ListMethods { .. } => client.call("system.listMethods", None)?,
4272 SystemCommand::Describe { method, .. } => {
4273 let params = method.map(|method| serde_json::json!({"method": method}));
4274 client.call("system.describe", params)?
4275 }
4276 };
4277 Ok((value, JsonStyle::Pretty))
4278}
4279
4280fn run_items_command(
4281 command: ItemsCommand,
4282 client: &mut impl RpcCaller,
4283) -> Result<(Value, JsonStyle), String> {
4284 let (value, style) = match command {
4285 ItemsCommand::Add {
4286 doi,
4287 isbn,
4288 from_url,
4289 file,
4290 collection,
4291 dry_run,
4292 ..
4293 } => {
4294 if let Some(doi) = doi {
4295 run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
4296 } else if let Some(isbn) = isbn {
4297 run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
4298 } else if let Some(from_url) = from_url {
4299 run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
4300 } else if let Some(file) = file {
4301 let mut params = serde_json::json!({"path": zotero_path(&file)});
4302 maybe_insert_collection(client, &mut params, collection)?;
4303 run_mutation_command(client, "items.addFromFile", params, dry_run)?
4304 } else {
4305 return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, or --file".into());
4306 }
4307 }
4308 ItemsCommand::Create {
4309 item_type,
4310 fields,
4311 dry_run,
4312 ..
4313 } => {
4314 let parsed_fields = parse_field_options(&fields)?;
4315 let mut params = serde_json::json!({"itemType": item_type});
4316 if !parsed_fields.is_empty() {
4317 if let Some(map) = params.as_object_mut() {
4318 map.insert("fields".to_string(), Value::Object(parsed_fields));
4319 }
4320 }
4321 run_mutation_command(client, "items.create", params, dry_run)?
4322 }
4323 ItemsCommand::Update {
4324 key,
4325 fields,
4326 dry_run,
4327 ..
4328 } => {
4329 let parsed_fields = parse_field_options(&fields)?;
4330 let mut params = serde_json::json!({"key": key});
4331 if !parsed_fields.is_empty() {
4332 if let Some(map) = params.as_object_mut() {
4333 map.insert("fields".to_string(), Value::Object(parsed_fields));
4334 }
4335 }
4336 run_mutation_command(client, "items.update", params, dry_run)?
4337 }
4338 ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
4339 client,
4340 "items.delete",
4341 serde_json::json!({"key": key}),
4342 dry_run,
4343 )?,
4344 ItemsCommand::Trash {
4345 items, dry_run, ..
4346 } => {
4347 if items.len() == 1 {
4348 run_mutation_command(
4349 client,
4350 "items.trash",
4351 serde_json::json!({"key": items[0]}),
4352 dry_run,
4353 )?
4354 } else {
4355 run_mutation_command(
4356 client,
4357 "items.batchTrash",
4358 serde_json::json!({"keys": items}),
4359 dry_run,
4360 )?
4361 }
4362 }
4363 ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
4364 client,
4365 "items.restore",
4366 serde_json::json!({"key": item}),
4367 dry_run,
4368 )?,
4369 ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
4370 if keys.len() < 2 {
4371 return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
4372 }
4373 run_mutation_command(
4374 client,
4375 "items.mergeDuplicates",
4376 serde_json::json!({"keys": keys}),
4377 dry_run,
4378 )?
4379 }
4380 ItemsCommand::AddRelated {
4381 key,
4382 target,
4383 dry_run,
4384 ..
4385 } => run_mutation_command(
4386 client,
4387 "items.addRelated",
4388 serde_json::json!({"key": key, "targetKey": target}),
4389 dry_run,
4390 )?,
4391 ItemsCommand::RemoveRelated {
4392 key,
4393 target,
4394 dry_run,
4395 ..
4396 } => run_mutation_command(
4397 client,
4398 "items.removeRelated",
4399 serde_json::json!({"key": key, "targetKey": target}),
4400 dry_run,
4401 )?,
4402 ItemsCommand::Get { item, .. } => (
4403 client.call("items.get", Some(serde_json::json!({"key": item})))?,
4404 JsonStyle::Pretty,
4405 ),
4406 ItemsCommand::List {
4407 limit,
4408 offset,
4409 sort,
4410 direction,
4411 trash,
4412 ..
4413 } => {
4414 if trash {
4415 let value = client.call(
4416 "items.getTrash",
4417 Some(serde_json::json!({"limit": limit, "offset": offset})),
4418 )?;
4419 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4420 } else {
4421 let mut params = serde_json::json!({
4422 "limit": limit,
4423 "offset": offset,
4424 "direction": direction,
4425 });
4426 if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
4427 map.insert("sort".to_string(), Value::String(sort));
4428 }
4429 let value = client.call("items.list", Some(params))?;
4430 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4431 }
4432 }
4433 ItemsCommand::FindDuplicates { .. } => (
4434 client.call("items.findDuplicates", None)?,
4435 JsonStyle::Pretty,
4436 ),
4437 ItemsCommand::Recent {
4438 limit,
4439 offset,
4440 recent_type,
4441 ..
4442 } => {
4443 if recent_type != "added" && recent_type != "modified" {
4444 return Err(format!(
4445 "--type must be added or modified, got {recent_type:?}"
4446 ));
4447 }
4448 let value = client.call(
4449 "items.getRecent",
4450 Some(
4451 serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
4452 ),
4453 )?;
4454 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4455 }
4456 ItemsCommand::Fulltext { key, .. } => (
4457 client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
4458 JsonStyle::Pretty,
4459 ),
4460 ItemsCommand::Related { key, .. } => (
4461 normalize_list_envelope(
4462 client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
4463 "items",
4464 None,
4465 0,
4466 ),
4467 JsonStyle::Pretty,
4468 ),
4469 ItemsCommand::CitationKey { key, .. } => (
4470 client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
4471 JsonStyle::Pretty,
4472 ),
4473 };
4474 Ok((value, style))
4475}
4476
4477fn run_add_identifier_command(
4478 client: &mut impl RpcCaller,
4479 method: &str,
4480 param_name: &str,
4481 param_value: String,
4482 collection: Option<String>,
4483 dry_run: bool,
4484) -> Result<(Value, JsonStyle), String> {
4485 let mut params = Value::Object(serde_json::Map::from_iter([(
4486 param_name.to_string(),
4487 Value::String(param_value),
4488 )]));
4489 maybe_insert_collection(client, &mut params, collection)?;
4490 run_mutation_command(client, method, params, dry_run)
4491}
4492
4493fn run_mutation_command(
4494 client: &mut impl RpcCaller,
4495 method: &str,
4496 params: Value,
4497 dry_run: bool,
4498) -> Result<(Value, JsonStyle), String> {
4499 let value = if dry_run {
4500 serde_json::json!({
4501 "ok": true,
4502 "dryRun": true,
4503 "wouldCall": method,
4504 "wouldCallParams": params,
4505 })
4506 } else {
4507 client.call(method, Some(params))?
4508 };
4509 Ok((value, JsonStyle::PythonCompact))
4510}
4511
4512fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
4513 let mut parsed = serde_json::Map::new();
4514 for field in fields {
4515 let (key, value) = field
4516 .split_once('=')
4517 .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
4518 parsed.insert(key.to_string(), Value::String(value.to_string()));
4519 }
4520 Ok(parsed)
4521}
4522
4523fn maybe_insert_collection(
4524 client: &mut impl RpcCaller,
4525 params: &mut Value,
4526 collection: Option<String>,
4527) -> Result<(), String> {
4528 let Some(collection) = collection else {
4529 return Ok(());
4530 };
4531 let collection = resolve_collection(client, &collection)?;
4532 let include = match &collection {
4533 Value::Null => false,
4534 Value::Number(number) => number.as_i64() != Some(0),
4535 _ => true,
4536 };
4537 if include {
4538 params
4539 .as_object_mut()
4540 .expect("mutation params are always objects")
4541 .insert("collection".to_string(), collection);
4542 }
4543 Ok(())
4544}
4545
4546fn run_settings_command(
4547 command: SettingsCommand,
4548 client: &mut impl RpcCaller,
4549) -> Result<(Value, JsonStyle), String> {
4550 let (value, style) = match command {
4551 SettingsCommand::Get { key, .. } => (
4552 client.call("settings.get", Some(serde_json::json!({"key": key})))?,
4553 JsonStyle::Pretty,
4554 ),
4555 SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
4556 SettingsCommand::Set {
4557 pairs,
4558 file,
4559 dry_run,
4560 ..
4561 } => {
4562 if let Some(file) = file {
4563 let raw = fs::read_to_string(&file)
4565 .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4566 let settings: Value = serde_json::from_str(&raw)
4567 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4568 if dry_run {
4569 (
4570 dry_run_value("settings.setAll", settings),
4571 JsonStyle::PythonCompact,
4572 )
4573 } else {
4574 (
4575 client.call("settings.setAll", Some(settings))?,
4576 JsonStyle::PythonCompact,
4577 )
4578 }
4579 } else if pairs.len() == 2 {
4580 let key = &pairs[0];
4582 let value = &pairs[1];
4583 let parsed_value = serde_json::from_str::<Value>(value)
4584 .unwrap_or(Value::String(value.clone()));
4585 let params = serde_json::json!({"key": key, "value": parsed_value});
4586 if dry_run {
4587 (
4588 dry_run_value("settings.set", params),
4589 JsonStyle::PythonCompact,
4590 )
4591 } else {
4592 (
4593 client.call("settings.set", Some(params))?,
4594 JsonStyle::PythonCompact,
4595 )
4596 }
4597 } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4598 let mut map = serde_json::Map::new();
4600 for chunk in pairs.chunks(2) {
4601 let parsed = serde_json::from_str::<Value>(&chunk[1])
4602 .unwrap_or(Value::String(chunk[1].clone()));
4603 map.insert(chunk[0].clone(), parsed);
4604 }
4605 let settings = Value::Object(map);
4606 if dry_run {
4607 (
4608 dry_run_value("settings.setAll", settings),
4609 JsonStyle::PythonCompact,
4610 )
4611 } else {
4612 (
4613 client.call("settings.setAll", Some(settings))?,
4614 JsonStyle::PythonCompact,
4615 )
4616 }
4617 } else {
4618 return Err(
4619 "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4620 );
4621 }
4622 }
4623 };
4624 Ok((value, style))
4625}
4626
4627fn run_tags_command(
4628 command: TagsCommand,
4629 client: &mut impl RpcCaller,
4630) -> Result<(Value, JsonStyle), String> {
4631 let (value, style) = match command {
4632 TagsCommand::List { limit, .. } => {
4633 let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4634 (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4635 }
4636 TagsCommand::Rename {
4637 old, new, dry_run, ..
4638 } => run_tag_mutation(
4639 client,
4640 "tags.rename",
4641 serde_json::json!({"oldName": old, "newName": new}),
4642 dry_run,
4643 )?,
4644 TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4645 client,
4646 "tags.delete",
4647 serde_json::json!({"tag": tag}),
4648 dry_run,
4649 )?,
4650 TagsCommand::Add {
4651 keys, tags, dry_run, ..
4652 } => {
4653 if keys.len() == 1 {
4654 run_tag_mutation(
4655 client,
4656 "tags.add",
4657 serde_json::json!({"key": keys[0], "tags": tags}),
4658 dry_run,
4659 )?
4660 } else {
4661 run_tag_mutation(
4662 client,
4663 "tags.batchUpdate",
4664 serde_json::json!({"keys": keys, "add": tags}),
4665 dry_run,
4666 )?
4667 }
4668 }
4669 TagsCommand::Remove {
4670 keys, tags, dry_run, ..
4671 } => {
4672 if keys.len() == 1 {
4673 run_tag_mutation(
4674 client,
4675 "tags.remove",
4676 serde_json::json!({"key": keys[0], "tags": tags}),
4677 dry_run,
4678 )?
4679 } else {
4680 run_tag_mutation(
4681 client,
4682 "tags.batchUpdate",
4683 serde_json::json!({"keys": keys, "remove": tags}),
4684 dry_run,
4685 )?
4686 }
4687 }
4688 };
4689 Ok((value, style))
4690}
4691
4692fn run_tag_mutation(
4693 client: &mut impl RpcCaller,
4694 method: &str,
4695 params: Value,
4696 dry_run: bool,
4697) -> Result<(Value, JsonStyle), String> {
4698 if dry_run {
4699 Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4700 } else {
4701 Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4702 }
4703}
4704
4705fn dry_run_value(method: &str, params: Value) -> Value {
4706 serde_json::json!({
4707 "ok": true,
4708 "dryRun": true,
4709 "wouldCall": method,
4710 "wouldCallParams": params,
4711 })
4712}
4713
4714fn run_annotations_command(
4715 command: AnnotationsCommand,
4716 client: &mut impl RpcCaller,
4717) -> Result<(Value, JsonStyle), String> {
4718 let (value, style) = match command {
4719 AnnotationsCommand::List { parent, .. } => {
4720 let value = client.call(
4721 "annotations.list",
4722 Some(serde_json::json!({"parentKey": parent})),
4723 )?;
4724 (normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty)
4725 }
4726 AnnotationsCommand::Create {
4727 parent,
4728 annotation_type,
4729 position,
4730 sort_index,
4731 text,
4732 comment,
4733 color,
4734 dry_run,
4735 ..
4736 } => {
4737 if !matches!(
4738 annotation_type.as_str(),
4739 "highlight" | "note" | "underline" | "image" | "ink"
4740 ) {
4741 return Err(format!(
4742 "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4743 ));
4744 }
4745 let position = position
4746 .ok_or_else(|| "INVALID_ARGS: --position JSON is required".to_string())
4747 .and_then(|raw| {
4748 serde_json::from_str::<Value>(&raw)
4749 .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4750 })?;
4751 validate_annotation_position(annotation_type.as_str(), &position)?;
4752 let mut params = serde_json::Map::new();
4753 params.insert("parentKey".to_string(), Value::String(parent));
4754 params.insert("type".to_string(), Value::String(annotation_type));
4755 params.insert("color".to_string(), Value::String(color));
4756 params.insert("position".to_string(), position);
4757 if let Some(sort_index) = sort_index {
4758 params.insert(
4759 "sortIndex".to_string(),
4760 parse_annotation_sort_index(sort_index)?,
4761 );
4762 }
4763 if let Some(text) = text {
4764 params.insert("text".to_string(), Value::String(text));
4765 }
4766 if let Some(comment) = comment {
4767 params.insert("comment".to_string(), Value::String(comment));
4768 }
4769 run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4770 }
4771 AnnotationsCommand::Delete {
4772 annotation_key,
4773 dry_run,
4774 ..
4775 } => run_mutating_command(
4776 client,
4777 "annotations.delete",
4778 serde_json::json!({"key": annotation_key}),
4779 dry_run,
4780 )?,
4781 };
4782 Ok((value, style))
4783}
4784
4785fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4786 position
4787 .get("pageIndex")
4788 .and_then(Value::as_i64)
4789 .filter(|value| *value >= 0)
4790 .ok_or_else(|| {
4791 "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4792 })?;
4793
4794 if annotation_type == "ink" {
4795 let has_paths = position
4796 .get("paths")
4797 .and_then(Value::as_array)
4798 .is_some_and(|paths| !paths.is_empty());
4799 if !has_paths {
4800 return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4801 }
4802 return Ok(());
4803 }
4804
4805 let valid_rects = position
4806 .get("rects")
4807 .and_then(Value::as_array)
4808 .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4809 if !valid_rects {
4810 return Err(
4811 "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4812 );
4813 }
4814 Ok(())
4815}
4816
4817fn is_annotation_rect(value: &Value) -> bool {
4818 value.as_array().is_some_and(|coords| {
4819 coords.len() == 4
4820 && coords
4821 .iter()
4822 .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4823 })
4824}
4825
4826fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4827 let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4828 let valid = match &parsed {
4829 Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4830 Value::String(value) => {
4831 is_zotero_pdf_sort_index(value.trim())
4832 || (!value.trim().is_empty()
4833 && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4834 }
4835 _ => false,
4836 };
4837 if valid {
4838 Ok(parsed)
4839 } else {
4840 Err(format!(
4841 "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4842 ))
4843 }
4844}
4845
4846fn is_zotero_pdf_sort_index(value: &str) -> bool {
4847 let mut parts = value.split('|');
4848 matches!(
4849 (parts.next(), parts.next(), parts.next(), parts.next()),
4850 (Some(page), Some(offset), Some(y), None)
4851 if page.len() == 5
4852 && offset.len() == 6
4853 && y.len() == 5
4854 && page.chars().all(|ch| ch.is_ascii_digit())
4855 && offset.chars().all(|ch| ch.is_ascii_digit())
4856 && y.chars().all(|ch| ch.is_ascii_digit())
4857 )
4858}
4859
4860fn run_attachments_command(
4861 command: AttachmentsCommand,
4862 client: &mut impl RpcCaller,
4863) -> Result<(Value, JsonStyle), String> {
4864 let value = match command {
4865 AttachmentsCommand::List {
4866 parent,
4867 limit,
4868 offset,
4869 ..
4870 } => normalize_list_envelope(
4871 client.call(
4872 "attachments.list",
4873 Some(serde_json::json!({"parentKey": parent})),
4874 )?,
4875 "items",
4876 Some(limit),
4877 offset,
4878 ),
4879 AttachmentsCommand::Get { key, .. } => {
4880 client.call("attachments.get", Some(serde_json::json!({"key": key})))?
4881 }
4882 AttachmentsCommand::Fulltext { key, .. } => client.call(
4883 "attachments.getFulltext",
4884 Some(serde_json::json!({"key": key})),
4885 )?,
4886 AttachmentsCommand::Path { key, .. } => localize_attachment_path_response(
4887 client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4888 ),
4889 AttachmentsCommand::Add {
4890 parent,
4891 path,
4892 from_url,
4893 title,
4894 dry_run,
4895 ..
4896 } => {
4897 match (path, from_url) {
4898 (Some(p), None) => {
4899 let mut params = serde_json::json!({"parentKey": parent, "path": zotero_path(&p)});
4900 insert_optional_string(&mut params, "title", title);
4901 if dry_run {
4902 return Ok((
4903 dry_run_value("attachments.add", params),
4904 JsonStyle::PythonCompact,
4905 ));
4906 }
4907 return Ok((
4908 client.call("attachments.add", Some(params))?,
4909 JsonStyle::PythonCompact,
4910 ));
4911 }
4912 (None, Some(u)) => {
4913 let mut params = serde_json::json!({"parentKey": parent, "url": u});
4914 insert_optional_string(&mut params, "title", title);
4915 if dry_run {
4916 return Ok((
4917 dry_run_value("attachments.addByURL", params),
4918 JsonStyle::PythonCompact,
4919 ));
4920 }
4921 return Ok((
4922 client.call("attachments.addByURL", Some(params))?,
4923 JsonStyle::PythonCompact,
4924 ));
4925 }
4926 (Some(_), Some(_)) => {
4927 return Err("INVALID_ARGS: --path and --from-url are mutually exclusive".to_string());
4928 }
4929 (None, None) => {
4930 return Err("INVALID_ARGS: either --path or --from-url is required".to_string());
4931 }
4932 }
4933 }
4934 AttachmentsCommand::Delete { key, dry_run, .. } => {
4935 let params = serde_json::json!({"key": key});
4936 if dry_run {
4937 return Ok((
4938 dry_run_value("attachments.delete", params),
4939 JsonStyle::PythonCompact,
4940 ));
4941 }
4942 return Ok((
4943 client.call("attachments.delete", Some(params))?,
4944 JsonStyle::PythonCompact,
4945 ));
4946 }
4947 AttachmentsCommand::FindPdf { parent, .. } => client.call(
4948 "attachments.findPDF",
4949 Some(serde_json::json!({"parentKey": parent})),
4950 )?,
4951 };
4952 Ok((value, JsonStyle::Pretty))
4953}
4954
4955fn localize_attachment_path_response(mut value: Value) -> Value {
4956 if let Some(path) = value.get("path").and_then(Value::as_str) {
4957 let local = local_path_from_zotero_path(path);
4958 if let Some(map) = value.as_object_mut() {
4959 map.insert("path".to_string(), Value::String(local));
4960 }
4961 }
4962 value
4963}
4964
4965fn run_notes_command(
4966 command: NotesCommand,
4967 client: &mut impl RpcCaller,
4968) -> Result<(Value, JsonStyle), String> {
4969 let (value, style) = match command {
4970 NotesCommand::List {
4971 parent,
4972 limit,
4973 offset,
4974 ..
4975 } => {
4976 let value = client.call(
4977 "notes.list",
4978 Some(serde_json::json!({"parentKey": parent})),
4979 )?;
4980 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4981 }
4982 NotesCommand::Get { note_key, .. } => {
4983 let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
4984 (value, JsonStyle::Pretty)
4985 }
4986 NotesCommand::Create {
4987 parent,
4988 content,
4989 tags,
4990 dry_run,
4991 ..
4992 } => {
4993 let mut params = serde_json::Map::new();
4994 params.insert("parentKey".to_string(), Value::String(parent));
4995 params.insert("content".to_string(), Value::String(content));
4996 if !tags.is_empty() {
4997 params.insert(
4998 "tags".to_string(),
4999 Value::Array(tags.into_iter().map(Value::String).collect()),
5000 );
5001 }
5002 run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
5003 }
5004 NotesCommand::Update {
5005 note_key,
5006 content,
5007 dry_run,
5008 ..
5009 } => run_mutating_command(
5010 client,
5011 "notes.update",
5012 serde_json::json!({"key": note_key, "content": content}),
5013 dry_run,
5014 )?,
5015 NotesCommand::Delete {
5016 note_key, dry_run, ..
5017 } => {
5018 run_mutating_command(
5020 client,
5021 "items.delete",
5022 serde_json::json!({"key": note_key}),
5023 dry_run,
5024 )?
5025 }
5026 NotesCommand::Search { query, limit, .. } => {
5027 let value = client.call(
5028 "notes.search",
5029 Some(serde_json::json!({"query": query, "limit": limit})),
5030 )?;
5031 (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
5032 }
5033 };
5034 Ok((value, style))
5035}
5036
5037fn run_mutating_command(
5038 client: &mut impl RpcCaller,
5039 method: &str,
5040 params: Value,
5041 dry_run: bool,
5042) -> Result<(Value, JsonStyle), String> {
5043 if dry_run {
5044 Ok((
5045 serde_json::json!({
5046 "ok": true,
5047 "dryRun": true,
5048 "wouldCall": method,
5049 "wouldCallParams": params,
5050 }),
5051 JsonStyle::PythonCompact,
5052 ))
5053 } else {
5054 client
5055 .call(method, Some(params))
5056 .map(|value| (value, JsonStyle::PythonCompact))
5057 }
5058}
5059
5060fn run_collections_command(
5061 command: CollectionsCommand,
5062 client: &mut impl RpcCaller,
5063) -> Result<(Value, JsonStyle), String> {
5064 let value = match command {
5065 CollectionsCommand::List { .. } => normalize_list_envelope(
5066 client.call("collections.list", None)?,
5067 "items",
5068 None,
5069 0,
5070 ),
5071 CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
5072 CollectionsCommand::Get { name_or_id, .. } => {
5073 let key = resolve_collection(client, &name_or_id)?;
5074 client.call("collections.get", Some(serde_json::json!({"key": key})))?
5075 }
5076 CollectionsCommand::GetItems {
5077 name_or_id,
5078 limit,
5079 offset,
5080 ..
5081 } => {
5082 let key = resolve_collection(client, &name_or_id)?;
5083 let mut params = serde_json::json!({"key": key});
5084 if let Some(map) = params.as_object_mut() {
5085 if let Some(limit) = limit {
5086 map.insert("limit".to_string(), Value::Number(limit.into()));
5087 }
5088 if offset > 0 {
5089 map.insert("offset".to_string(), Value::Number(offset.into()));
5090 }
5091 }
5092 normalize_list_envelope(
5093 client.call("collections.getItems", Some(params))?,
5094 "items",
5095 limit,
5096 offset,
5097 )
5098 }
5099 CollectionsCommand::Stats { name_or_id, .. } => {
5100 let key = resolve_collection(client, &name_or_id)?;
5101 client.call("collections.stats", Some(serde_json::json!({"key": key})))?
5102 }
5103 CollectionsCommand::Rename {
5104 old_name,
5105 new_name,
5106 dry_run,
5107 ..
5108 } => {
5109 let key = resolve_mutable_collection(client, &old_name, "rename")?;
5110 let params = serde_json::json!({"key": key, "name": new_name});
5111 if dry_run {
5112 return Ok((
5113 dry_run_value("collections.rename", params),
5114 JsonStyle::PythonCompact,
5115 ));
5116 }
5117 return Ok((
5118 client.call("collections.rename", Some(params))?,
5119 JsonStyle::PythonCompact,
5120 ));
5121 }
5122 CollectionsCommand::Create {
5123 name,
5124 parent,
5125 dry_run,
5126 ..
5127 } => {
5128 let mut params = serde_json::json!({"name": name});
5129 if let Some(parent) = parent {
5130 let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
5131 if let Some(map) = params.as_object_mut() {
5132 map.insert("parentKey".to_string(), parent_key);
5133 }
5134 }
5135 if dry_run {
5136 return Ok((
5137 dry_run_value("collections.create", params),
5138 JsonStyle::PythonCompact,
5139 ));
5140 }
5141 return Ok((
5142 client.call("collections.create", Some(params))?,
5143 JsonStyle::PythonCompact,
5144 ));
5145 }
5146 CollectionsCommand::Delete {
5147 name_or_id,
5148 dry_run,
5149 ..
5150 } => {
5151 let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
5152 let params = serde_json::json!({"key": key});
5153 if dry_run {
5154 return Ok((
5155 dry_run_value("collections.delete", params),
5156 JsonStyle::PythonCompact,
5157 ));
5158 }
5159 return Ok((
5160 client.call("collections.delete", Some(params))?,
5161 JsonStyle::PythonCompact,
5162 ));
5163 }
5164 CollectionsCommand::AddItems {
5165 collection,
5166 item_keys,
5167 dry_run,
5168 ..
5169 } => {
5170 let key = resolve_mutable_collection(client, &collection, "add to")?;
5171 let params = serde_json::json!({"key": key, "keys": item_keys});
5172 if dry_run {
5173 return Ok((
5174 dry_run_value("collections.addItems", params),
5175 JsonStyle::PythonCompact,
5176 ));
5177 }
5178 return Ok((
5179 client.call("collections.addItems", Some(params))?,
5180 JsonStyle::PythonCompact,
5181 ));
5182 }
5183 CollectionsCommand::RemoveItems {
5184 collection,
5185 item_keys,
5186 dry_run,
5187 ..
5188 } => {
5189 let key = resolve_mutable_collection(client, &collection, "operate on")?;
5190 let params = serde_json::json!({"key": key, "keys": item_keys});
5191 if dry_run {
5192 return Ok((
5193 dry_run_value("collections.removeItems", params),
5194 JsonStyle::PythonCompact,
5195 ));
5196 }
5197 return Ok((
5198 client.call("collections.removeItems", Some(params))?,
5199 JsonStyle::PythonCompact,
5200 ));
5201 }
5202 };
5203 Ok((value, JsonStyle::Pretty))
5204}
5205
5206fn resolve_export_keys(
5207 client: &mut impl RpcCaller,
5208 mut keys: Vec<String>,
5209 collection: Option<String>,
5210) -> Result<Vec<String>, String> {
5211 if let Some(name) = collection {
5212 let col_key = resolve_collection(client, &name)?;
5213 let response = client.call(
5214 "collections.getItems",
5215 Some(serde_json::json!({"key": col_key})),
5216 )?;
5217 let items = collection_items(&response);
5218 for item in items {
5219 if let Some(key) = item.get("key").and_then(Value::as_str) {
5220 if !keys.contains(&key.to_string()) {
5221 keys.push(key.to_string());
5222 }
5223 }
5224 }
5225 }
5226 if keys.is_empty() {
5227 return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
5228 }
5229 Ok(keys)
5230}
5231
5232fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
5233 let keys = resolve_export_keys(client, args.keys, args.collection)?;
5234 match args.format.as_str() {
5235 "bibtex" => run_export_content_command(client, "export.bibtex", keys),
5236 "ris" => run_export_content_command(client, "export.ris", keys),
5237 "csl-json" => {
5238 let response =
5239 client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
5240 if let Some(content) = response.get("content") {
5241 format_json(content, JsonStyle::Pretty)
5242 } else {
5243 format_json(&response, JsonStyle::PythonCompact)
5244 }
5245 }
5246 "bibliography" => {
5247 let response = client.call(
5248 "export.bibliography",
5249 Some(serde_json::json!({"keys": keys, "style": args.style})),
5250 )?;
5251 if let Some(object) = response.as_object() {
5252 let field = if args.html { "html" } else { "text" };
5253 if object.contains_key("html") || object.contains_key("text") {
5254 return raw_value_output(
5255 object.get(field).unwrap_or(&Value::String(String::new())),
5256 );
5257 }
5258 }
5259 format_json(&response, JsonStyle::PythonCompact)
5260 }
5261 other => Err(format!(
5262 "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
5263 )),
5264 }
5265}
5266
5267fn run_export_content_command(
5268 client: &mut impl RpcCaller,
5269 method: &str,
5270 keys: Vec<String>,
5271) -> Result<String, String> {
5272 let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
5273 if let Some(content) = response.get("content") {
5274 raw_value_output(content)
5275 } else {
5276 format_json(&response, JsonStyle::PythonCompact)
5277 }
5278}
5279
5280fn raw_value_output(value: &Value) -> Result<String, String> {
5281 let mut out = match value {
5282 Value::Null => String::new(),
5283 Value::String(content) => content.clone(),
5284 other => to_python_repr(other),
5285 };
5286 out.push('\n');
5287 Ok(out)
5288}
5289
5290fn to_python_repr(value: &Value) -> String {
5291 match value {
5292 Value::Null => "None".to_string(),
5293 Value::Bool(value) => {
5294 if *value {
5295 "True".to_string()
5296 } else {
5297 "False".to_string()
5298 }
5299 }
5300 Value::Number(value) => value.to_string(),
5301 Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
5302 Value::Array(values) => {
5303 let inner = values
5304 .iter()
5305 .map(to_python_repr)
5306 .collect::<Vec<_>>()
5307 .join(", ");
5308 format!("[{inner}]")
5309 }
5310 Value::Object(entries) => {
5311 let inner = entries
5312 .iter()
5313 .map(|(key, value)| {
5314 format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
5315 })
5316 .collect::<Vec<_>>()
5317 .join(", ");
5318 format!("{{{inner}}}")
5319 }
5320 }
5321}
5322
5323fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
5324 let trimmed = name_or_id.trim();
5325 if let Ok(id) = trimmed.parse::<i64>() {
5326 return Ok(Value::Number(id.into()));
5327 }
5328
5329 let collections = client.call("collections.list", None)?;
5330 let items = collections
5331 .get("items")
5332 .and_then(Value::as_array)
5333 .or_else(|| collections.as_array())
5334 .ok_or_else(|| "collections.list returned non-array result".to_string())?;
5335
5336 if let Some(collection) = items
5337 .iter()
5338 .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
5339 {
5340 return collection_key(collection);
5341 }
5342
5343 let exact = items
5344 .iter()
5345 .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
5346 .collect::<Vec<_>>();
5347 if exact.len() == 1 {
5348 return collection_key(exact[0]);
5349 }
5350
5351 let needle = normalize_collection_name(trimmed);
5352 let fuzzy = items
5353 .iter()
5354 .filter(|collection| {
5355 collection
5356 .get("name")
5357 .and_then(Value::as_str)
5358 .map(normalize_collection_name)
5359 .is_some_and(|name| name.contains(&needle))
5360 })
5361 .collect::<Vec<_>>();
5362
5363 match fuzzy.len() {
5364 1 => collection_key(fuzzy[0]),
5365 0 => Err(format!(
5366 "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
5367 )),
5368 _ => Err(format!(
5369 "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
5370 )),
5371 }
5372}
5373
5374fn collection_key(collection: &Value) -> Result<Value, String> {
5375 collection
5376 .get("key")
5377 .cloned()
5378 .ok_or_else(|| "collection result is missing key".to_string())
5379}
5380
5381fn resolve_mutable_collection(
5382 client: &mut impl RpcCaller,
5383 name_or_id: &str,
5384 operation: &str,
5385) -> Result<Value, String> {
5386 let key = resolve_collection(client, name_or_id)?;
5387 if key.as_i64() == Some(0) {
5388 return Err(format!(
5389 "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
5390 ));
5391 }
5392 Ok(key)
5393}
5394
5395fn insert_optional_string(value: &mut Value, key: &str, maybe: Option<String>) {
5396 if let (Some(map), Some(content)) = (value.as_object_mut(), maybe) {
5397 map.insert(key.to_string(), Value::String(content));
5398 }
5399}
5400
5401fn normalize_collection_name(name: &str) -> String {
5402 name.split_whitespace()
5403 .collect::<Vec<_>>()
5404 .join(" ")
5405 .to_lowercase()
5406}
5407
5408fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
5409 let mut out = match style {
5410 JsonStyle::PythonCompact => to_python_compact_json(value),
5411 JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
5412 };
5413 out.push('\n');
5414 Ok(out)
5415}
5416
5417fn to_python_compact_json(value: &Value) -> String {
5418 match value {
5419 Value::Null => "null".to_string(),
5420 Value::Bool(value) => value.to_string(),
5421 Value::Number(value) => value.to_string(),
5422 Value::String(value) => {
5423 serde_json::to_string(value).expect("string serialization cannot fail")
5424 }
5425 Value::Array(values) => {
5426 let inner = values
5427 .iter()
5428 .map(to_python_compact_json)
5429 .collect::<Vec<_>>()
5430 .join(", ");
5431 format!("[{inner}]")
5432 }
5433 Value::Object(entries) => {
5434 let inner = entries
5435 .iter()
5436 .map(|(key, value)| {
5437 let key = serde_json::to_string(key).expect("string serialization cannot fail");
5438 format!("{key}: {}", to_python_compact_json(value))
5439 })
5440 .collect::<Vec<_>>()
5441 .join(", ");
5442 format!("{{{inner}}}")
5443 }
5444 }
5445}