1use std::{
4 env, fs,
5 io::{self, Read},
6 path::{Path, PathBuf},
7 process::Command as ProcessCommand,
8 thread,
9 time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16 build_ocr_provider_request, builtin_ocr_provider_specs, execute_embedding_provider_request,
17 is_zotron_evidence_artifact, machine_artifact_exists_for_item,
18 machine_artifact_exists_in_sidecar, machine_artifact_store_root,
19 ocr_provider_spec as raw_ocr_provider_spec, parse_ocr_provider_response,
20 read_machine_artifact_sidecar, write_machine_artifact_sidecar, ArtifactStorePlatform,
21 EmbeddingRequestInput, MachineArtifactKind, OcrRequestInput, ProviderCommandRunner,
22 ProviderHttpInvocation, ProviderHttpTransport, DEFAULT_RPC_URL,
23};
24
25pub trait RpcCaller {
26 fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
27}
28
29#[derive(Debug, Clone, PartialEq, serde::Serialize)]
30pub struct CliOcrProviderSpec {
31 pub id: &'static str,
32 pub provider: &'static str,
33 pub request_style: &'static str,
34 pub auth: &'static str,
35 pub auth_header: &'static str,
36 pub supports_pdf_direct: bool,
37 pub key_field: &'static str,
38}
39
40#[derive(Debug, Clone, PartialEq, serde::Serialize)]
41pub struct CliEmbeddingProviderSpec {
42 pub id: &'static str,
43 pub provider: &'static str,
44 pub request_style: &'static str,
45 pub default_url: String,
46 pub default_model: &'static str,
47 pub auth: &'static str,
48 pub key_field: &'static str,
49}
50
51pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
52 builtin_ocr_provider_specs()
53 .into_iter()
54 .map(cli_ocr_provider_spec)
55 .collect()
56}
57
58pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
59 zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
60}
61
62pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
63 let spec = zotron_types::embedding_provider_spec(provider)?;
64 Ok(CliEmbeddingProviderSpec {
65 id: spec.id,
66 provider: spec.provider_key,
67 request_style: if spec.provider_key == "alibaba" {
68 "dashscope"
69 } else {
70 spec.request_style.as_str()
71 },
72 default_url: spec.default_url.unwrap_or("").to_string(),
73 default_model: spec.default_model,
74 auth: spec.auth,
75 key_field: spec.key_field,
76 })
77}
78
79pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
80 let typed = blocks
81 .iter()
82 .map(json_block_to_pdf_block)
83 .collect::<Result<Vec<_>, _>>()?;
84 let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
85 chunks
86 .into_iter()
87 .map(|chunk| chunk_to_cli_value(&chunk, &typed))
88 .collect()
89}
90
91fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
92 CliOcrProviderSpec {
93 id: spec.provider_key,
94 provider: spec.provider_key,
95 request_style: spec.request_style.as_str(),
96 auth: spec.auth,
97 auth_header: spec.auth_header,
98 supports_pdf_direct: spec.supports_pdf_direct,
99 key_field: spec.key_field,
100 }
101}
102
103fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
104 let block_key = value
105 .get("block_key")
106 .and_then(Value::as_str)
107 .ok_or_else(|| "block missing block_key".to_string())?
108 .to_string();
109 let item_key = value
110 .get("item_key")
111 .and_then(Value::as_str)
112 .ok_or_else(|| "block missing item_key".to_string())?
113 .to_string();
114 let attachment_key = value
115 .get("attachment_key")
116 .and_then(Value::as_str)
117 .ok_or_else(|| "block missing attachment_key".to_string())?
118 .to_string();
119 let page_idx = value
120 .get("page_idx")
121 .or_else(|| value.get("page"))
122 .and_then(Value::as_u64)
123 .unwrap_or(1);
124 let block_type = value
125 .get("type")
126 .or_else(|| value.get("block_type"))
127 .and_then(Value::as_str)
128 .unwrap_or("paragraph")
129 .to_string();
130 let section_path = value
131 .get("section_path")
132 .and_then(Value::as_array)
133 .map(|items| {
134 items
135 .iter()
136 .filter_map(Value::as_str)
137 .map(ToString::to_string)
138 .collect::<Vec<_>>()
139 })
140 .unwrap_or_default();
141 let text = value
142 .get("text")
143 .and_then(Value::as_str)
144 .unwrap_or("")
145 .to_string();
146 let bbox = value.get("bbox").and_then(value_bbox4);
147
148 Ok(zotron_types::PdfEvidenceBlock {
149 block_key,
150 item_key,
151 attachment_key,
152 page_idx,
153 block_type,
154 bbox,
155 section_path,
156 text,
157 })
158}
159
160fn chunk_to_cli_value(
161 chunk: &zotron_types::StructureChunk,
162 blocks: &[zotron_types::PdfEvidenceBlock],
163) -> Result<Value, String> {
164 let refs = chunk
165 .block_keys
166 .iter()
167 .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
168 .map(|block| {
169 serde_json::json!({
170 "block_key": block.block_key,
171 "page_idx": block.page_idx,
172 "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
173 if n.fract() == 0.0 {
174 Value::from(*n as i64)
175 } else {
176 Value::from(*n)
177 }
178 }).collect::<Vec<_>>()),
179 })
180 })
181 .collect::<Vec<_>>();
182 Ok(serde_json::json!({
183 "chunk_key": chunk.chunk_key,
184 "item_key": chunk.item_key,
185 "attachment_key": chunk.attachment_key,
186 "block_keys": chunk.block_keys,
187 "section_path": chunk.section_path,
188 "text": chunk.text,
189 "page_start": chunk.page_start,
190 "page_end": chunk.page_end,
191 "evidence_refs": refs,
192 }))
193}
194
195fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
196 let arr = value.as_array()?;
197 if arr.len() != 4 {
198 return None;
199 }
200 Some([
201 arr[0].as_f64()?,
202 arr[1].as_f64()?,
203 arr[2].as_f64()?,
204 arr[3].as_f64()?,
205 ])
206}
207
208impl RpcCaller for ZoteroRpc {
209 fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
210 self.call(method, params).map_err(|err| err.to_string())
211 }
212}
213
214#[derive(Debug, Parser)]
215#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
216struct Cli {
217 #[command(subcommand)]
218 command: Command,
219}
220
221#[derive(Debug, Subcommand)]
222enum OcrCommand {
223 Providers,
225 #[command(name = "run")]
227 Run {
228 #[arg(long)]
229 provider: String,
230 #[arg(long)]
232 input: Option<String>,
233 #[arg(long)]
235 file: Option<String>,
236 #[arg(long = "item-key")]
238 item_key: Option<String>,
239 #[arg(long = "attachment-key")]
241 attachment_key: Option<String>,
242 #[arg(long = "mime-type")]
244 mime_type: Option<String>,
245 #[arg(long)]
247 endpoint: Option<String>,
248 #[arg(long = "api-key-env")]
250 api_key_env: Option<String>,
251 },
252 Status {
254 #[arg(long)]
255 collection: String,
256 #[arg(long, default_value = DEFAULT_RPC_URL)]
257 url: String,
258 },
259 #[command(name = "process")]
261 Process {
262 #[arg(long, default_value = "mineru")]
263 provider: String,
264 #[arg(long)]
266 parent: String,
267 #[arg(long)]
269 attachment: Option<String>,
270 #[arg(long = "source-url")]
272 source_url: Option<String>,
273 #[arg(long = "result-dir")]
275 result_dir: Option<String>,
276 #[arg(long = "result-zip")]
278 result_zip: Option<String>,
279 #[arg(long = "provider-endpoint")]
281 provider_endpoint: Option<String>,
282 #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
284 api_key_env: String,
285 #[arg(long = "poll-interval-seconds", default_value_t = 5)]
286 poll_interval_seconds: u64,
287 #[arg(long = "timeout-seconds", default_value_t = 900)]
288 timeout_seconds: u64,
289 #[arg(long = "chunk-chars", default_value_t = 1200)]
290 chunk_chars: usize,
291 #[arg(long, default_value = DEFAULT_RPC_URL)]
292 url: String,
293 },
294}
295
296#[derive(Debug, Subcommand)]
297enum Command {
298 Ping {
300 #[arg(long, default_value = DEFAULT_RPC_URL)]
301 url: String,
302 },
303 Rpc {
305 method: String,
306 #[arg(default_value = "{}")]
307 params_json: String,
308 #[arg(long, default_value = DEFAULT_RPC_URL)]
309 url: String,
310 #[arg(long)]
311 paginate: bool,
312 #[arg(long, default_value_t = 100)]
313 page_size: usize,
314 },
315 Push {
317 json_file: String,
319 #[arg(long)]
321 pdf: Option<String>,
322 #[arg(long)]
324 collection: Option<String>,
325 #[arg(long = "on-duplicate", default_value = "skip")]
327 on_duplicate: String,
328 #[arg(long, default_value = DEFAULT_RPC_URL)]
329 url: String,
330 #[arg(long = "dry-run")]
332 dry_run: bool,
333 },
334 System {
336 #[command(subcommand)]
337 command: SystemCommand,
338 },
339 Search(SearchArgs),
341 Items {
343 #[command(subcommand)]
344 command: ItemsCommand,
345 },
346 Collections {
348 #[command(subcommand)]
349 command: CollectionsCommand,
350 },
351 Notes {
353 #[command(subcommand)]
354 command: NotesCommand,
355 },
356 Attachments {
358 #[command(subcommand)]
359 command: AttachmentsCommand,
360 },
361 Settings {
363 #[command(subcommand)]
364 command: SettingsCommand,
365 },
366 Tags {
368 #[command(subcommand)]
369 command: TagsCommand,
370 },
371 Export(ExportArgs),
373 Annotations {
375 #[command(subcommand)]
376 command: AnnotationsCommand,
377 },
378 Ocr {
380 #[command(subcommand)]
381 command: OcrCommand,
382 },
383 Rag {
385 #[command(subcommand)]
386 command: RagCommand,
387 },
388 #[command(name = "find-pdfs")]
390 FindPdfs {
391 #[arg(long)]
392 collection: String,
393 #[arg(long, default_value_t = 0)]
394 limit: usize,
395 #[arg(long, default_value = DEFAULT_RPC_URL)]
396 url: String,
397 },
398}
399
400struct RagHitsOptions {
401 query: String,
402 collection: Option<String>,
403 keys: Vec<String>,
404 zotero: bool,
405 top_spans_per_item: u64,
406 include_fulltext_spans: bool,
407 top_k: u64,
408 output: String,
409}
410
411#[derive(Debug, Subcommand)]
412enum RagCommand {
413 #[command(name = "providers")]
415 Providers,
416 #[command(name = "embed")]
418 Embed {
419 #[arg(long)]
420 provider: String,
421 #[arg(long)]
423 input: String,
424 #[arg(long)]
426 endpoint: Option<String>,
427 #[arg(long)]
429 model: Option<String>,
430 #[arg(long = "input-type")]
432 input_type: Option<String>,
433 #[arg(long = "api-key-env")]
435 api_key_env: Option<String>,
436 },
437 Status {
439 #[arg(long)]
440 collection: String,
441 #[arg(long, default_value = DEFAULT_RPC_URL)]
442 url: String,
443 },
444 #[command(name = "search")]
446 Search {
447 query: String,
448 #[arg(long)]
449 collection: Option<String>,
450 #[arg(long = "key", alias = "keys")]
452 keys: Vec<String>,
453 #[arg(long)]
454 zotero: bool,
455 #[arg(long = "top-spans-per-item", default_value_t = 3)]
456 top_spans_per_item: u64,
457 #[arg(long = "include-fulltext-spans")]
458 include_fulltext_spans: bool,
459 #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
460 top_k: u64,
461 #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
462 output: String,
463 #[arg(long, default_value = DEFAULT_RPC_URL)]
464 url: String,
465 },
466}
467
468#[derive(Debug, Subcommand)]
469enum SystemCommand {
470 Version {
472 #[arg(long, default_value = DEFAULT_RPC_URL)]
473 url: String,
474 },
475 Libraries {
477 #[arg(long, default_value = DEFAULT_RPC_URL)]
478 url: String,
479 },
480 #[command(name = "library-stats")]
482 LibraryStats {
483 #[arg(long)]
484 library: Option<i64>,
485 #[arg(long, default_value = DEFAULT_RPC_URL)]
486 url: String,
487 },
488 Schema {
490 #[arg(long = "type")]
491 item_type: Option<String>,
492 #[arg(long, default_value = DEFAULT_RPC_URL)]
493 url: String,
494 },
495 #[command(name = "current-collection")]
497 CurrentCollection {
498 #[arg(long, default_value = DEFAULT_RPC_URL)]
499 url: String,
500 },
501 #[command(name = "list-methods")]
503 ListMethods {
504 #[arg(long, default_value = DEFAULT_RPC_URL)]
505 url: String,
506 },
507 Describe {
509 method: Option<String>,
510 #[arg(long, default_value = DEFAULT_RPC_URL)]
511 url: String,
512 },
513}
514
515#[derive(Debug, clap::Args)]
516struct SearchArgs {
517 query: Option<String>,
519 #[arg(long)]
521 fulltext: bool,
522 #[arg(long)]
524 author: Option<String>,
525 #[arg(long)]
527 after: Option<String>,
528 #[arg(long)]
530 before: Option<String>,
531 #[arg(long)]
533 journal: Option<String>,
534 #[arg(long)]
536 tag: Option<String>,
537 #[arg(long)]
539 doi: Option<String>,
540 #[arg(long)]
542 isbn: Option<String>,
543 #[arg(long)]
545 issn: Option<String>,
546 #[arg(long)]
548 collection: Option<String>,
549 #[arg(long, default_value_t = 50)]
550 limit: u64,
551 #[arg(long, default_value_t = 0)]
552 offset: u64,
553 #[arg(long, default_value = DEFAULT_RPC_URL)]
554 url: String,
555 #[command(subcommand)]
556 management: Option<SearchManagementCommand>,
557}
558
559#[derive(Debug, Subcommand)]
560enum SearchManagementCommand {
561 #[command(name = "saved-searches")]
563 SavedSearches {
564 #[arg(long, default_value = DEFAULT_RPC_URL)]
565 url: String,
566 },
567 #[command(name = "create-saved")]
569 CreateSaved {
570 name: String,
571 #[arg(long = "condition", required = true)]
572 condition: Vec<String>,
573 #[arg(long)]
574 dry_run: bool,
575 #[arg(long, default_value = DEFAULT_RPC_URL)]
576 url: String,
577 },
578 #[command(name = "delete-saved")]
580 DeleteSaved {
581 search_key: String,
582 #[arg(long)]
583 dry_run: bool,
584 #[arg(long, default_value = DEFAULT_RPC_URL)]
585 url: String,
586 },
587}
588
589#[derive(Debug, Subcommand)]
590enum ItemsCommand {
591 Add {
593 #[arg(long)]
594 doi: Option<String>,
595 #[arg(long)]
596 isbn: Option<String>,
597 #[arg(long = "from-url")]
599 from_url: Option<String>,
600 #[arg(long)]
602 file: Option<String>,
603 #[arg(long)]
604 collection: Option<String>,
605 #[arg(long)]
606 dry_run: bool,
607 #[arg(long, default_value = DEFAULT_RPC_URL)]
608 url: String,
609 },
610 Create {
612 #[arg(long = "type")]
613 item_type: String,
614 #[arg(long = "field")]
615 fields: Vec<String>,
616 #[arg(long)]
617 dry_run: bool,
618 #[arg(long, default_value = DEFAULT_RPC_URL)]
619 url: String,
620 },
621 Update {
623 key: String,
624 #[arg(long = "field")]
625 fields: Vec<String>,
626 #[arg(long)]
627 dry_run: bool,
628 #[arg(long, default_value = DEFAULT_RPC_URL)]
629 url: String,
630 },
631 Delete {
633 key: String,
634 #[arg(long)]
635 dry_run: bool,
636 #[arg(long, default_value = DEFAULT_RPC_URL)]
637 url: String,
638 },
639 Trash {
641 items: Vec<String>,
642 #[arg(long)]
643 dry_run: bool,
644 #[arg(long, default_value = DEFAULT_RPC_URL)]
645 url: String,
646 },
647 Restore {
649 item: String,
650 #[arg(long)]
651 dry_run: bool,
652 #[arg(long, default_value = DEFAULT_RPC_URL)]
653 url: String,
654 },
655 #[command(name = "merge-duplicates")]
657 MergeDuplicates {
658 keys: Vec<String>,
659 #[arg(long)]
660 dry_run: bool,
661 #[arg(long, default_value = DEFAULT_RPC_URL)]
662 url: String,
663 },
664 #[command(name = "add-related")]
666 AddRelated {
667 key: String,
668 #[arg(long)]
669 target: String,
670 #[arg(long)]
671 dry_run: bool,
672 #[arg(long, default_value = DEFAULT_RPC_URL)]
673 url: String,
674 },
675 #[command(name = "remove-related")]
677 RemoveRelated {
678 key: String,
679 #[arg(long)]
680 target: String,
681 #[arg(long)]
682 dry_run: bool,
683 #[arg(long, default_value = DEFAULT_RPC_URL)]
684 url: String,
685 },
686 Get {
688 item: String,
689 #[arg(long, default_value = DEFAULT_RPC_URL)]
690 url: String,
691 },
692 List {
694 #[arg(long, default_value_t = 50)]
695 limit: u64,
696 #[arg(long, default_value_t = 0)]
697 offset: u64,
698 #[arg(long)]
699 sort: Option<String>,
700 #[arg(long, default_value = "asc")]
701 direction: String,
702 #[arg(long)]
704 trash: bool,
705 #[arg(long, default_value = DEFAULT_RPC_URL)]
706 url: String,
707 },
708 #[command(name = "find-duplicates")]
710 FindDuplicates {
711 #[arg(long, default_value = DEFAULT_RPC_URL)]
712 url: String,
713 },
714 Recent {
716 #[arg(long, default_value_t = 20)]
717 limit: u64,
718 #[arg(long, default_value_t = 0)]
719 offset: u64,
720 #[arg(long = "type", default_value = "added")]
721 recent_type: String,
722 #[arg(long, default_value = DEFAULT_RPC_URL)]
723 url: String,
724 },
725 Fulltext {
727 key: String,
728 #[arg(long, default_value = DEFAULT_RPC_URL)]
729 url: String,
730 },
731 Related {
733 key: String,
734 #[arg(long, default_value = DEFAULT_RPC_URL)]
735 url: String,
736 },
737 #[command(name = "citation-key")]
739 CitationKey {
740 key: String,
741 #[arg(long, default_value = DEFAULT_RPC_URL)]
742 url: String,
743 },
744}
745
746#[derive(Debug, Subcommand)]
747enum SettingsCommand {
748 Get {
750 key: String,
751 #[arg(long, default_value = DEFAULT_RPC_URL)]
752 url: String,
753 },
754 #[command(visible_alias = "get-all")]
756 List {
757 #[arg(long, default_value = DEFAULT_RPC_URL)]
758 url: String,
759 },
760 Set {
762 pairs: Vec<String>,
764 #[arg(long)]
766 file: Option<String>,
767 #[arg(long)]
768 dry_run: bool,
769 #[arg(long, default_value = DEFAULT_RPC_URL)]
770 url: String,
771 },
772}
773
774#[derive(Debug, Subcommand)]
775enum TagsCommand {
776 List {
778 #[arg(long, default_value_t = 200)]
779 limit: u64,
780 #[arg(long, default_value = DEFAULT_RPC_URL)]
781 url: String,
782 },
783 Rename {
785 old: String,
786 new: String,
787 #[arg(long)]
788 dry_run: bool,
789 #[arg(long, default_value = DEFAULT_RPC_URL)]
790 url: String,
791 },
792 Delete {
794 tag: String,
795 #[arg(long)]
796 dry_run: bool,
797 #[arg(long, default_value = DEFAULT_RPC_URL)]
798 url: String,
799 },
800 Add {
802 keys: Vec<String>,
803 #[arg(long = "tag", required = true)]
804 tags: Vec<String>,
805 #[arg(long)]
806 dry_run: bool,
807 #[arg(long, default_value = DEFAULT_RPC_URL)]
808 url: String,
809 },
810 Remove {
812 keys: Vec<String>,
813 #[arg(long = "tag", required = true)]
814 tags: Vec<String>,
815 #[arg(long)]
816 dry_run: bool,
817 #[arg(long, default_value = DEFAULT_RPC_URL)]
818 url: String,
819 },
820}
821
822#[derive(Debug, clap::Args)]
823struct ExportArgs {
824 keys: Vec<String>,
826 #[arg(long, default_value = "bibtex")]
828 format: String,
829 #[arg(long)]
831 collection: Option<String>,
832 #[arg(long, default_value = "http://www.zotero.org/styles/gb-t-7714-2015-numeric")]
834 style: String,
835 #[arg(long)]
837 html: bool,
838 #[arg(long, default_value = DEFAULT_RPC_URL)]
839 url: String,
840}
841
842#[derive(Debug, Subcommand)]
843enum AnnotationsCommand {
844 List {
846 #[arg(long)]
847 parent: String,
848 #[arg(long, default_value = DEFAULT_RPC_URL)]
849 url: String,
850 },
851 Create {
853 #[arg(long)]
854 parent: String,
855 #[arg(long = "type")]
856 annotation_type: String,
857 #[arg(long)]
859 position: Option<String>,
860 #[arg(long = "sort-index")]
862 sort_index: Option<String>,
863 #[arg(long)]
864 text: Option<String>,
865 #[arg(long)]
866 comment: Option<String>,
867 #[arg(long, default_value = "#ffd400")]
868 color: String,
869 #[arg(long)]
870 dry_run: bool,
871 #[arg(long, default_value = DEFAULT_RPC_URL)]
872 url: String,
873 },
874 Delete {
876 annotation_key: String,
877 #[arg(long)]
878 dry_run: bool,
879 #[arg(long, default_value = DEFAULT_RPC_URL)]
880 url: String,
881 },
882}
883
884#[derive(Debug, Subcommand)]
885enum AttachmentsCommand {
886 List {
888 #[arg(long)]
889 parent: String,
890 #[arg(long, default_value_t = 50)]
891 limit: u64,
892 #[arg(long, default_value_t = 0)]
893 offset: u64,
894 #[arg(long, default_value = DEFAULT_RPC_URL)]
895 url: String,
896 },
897 Get {
899 key: String,
900 #[arg(long, default_value = DEFAULT_RPC_URL)]
901 url: String,
902 },
903 Fulltext {
905 key: String,
906 #[arg(long, default_value = DEFAULT_RPC_URL)]
907 url: String,
908 },
909 Path {
911 key: String,
912 #[arg(long, default_value = DEFAULT_RPC_URL)]
913 url: String,
914 },
915 Add {
917 #[arg(long)]
918 parent: String,
919 #[arg(long)]
921 path: Option<String>,
922 #[arg(long = "from-url")]
924 from_url: Option<String>,
925 #[arg(long)]
926 title: Option<String>,
927 #[arg(long)]
928 dry_run: bool,
929 #[arg(long, default_value = DEFAULT_RPC_URL)]
930 url: String,
931 },
932 Delete {
934 key: String,
935 #[arg(long, default_value = DEFAULT_RPC_URL)]
936 url: String,
937 #[arg(long)]
938 dry_run: bool,
939 },
940 #[command(name = "find-pdf")]
942 FindPdf {
943 #[arg(long)]
944 parent: String,
945 #[arg(long, default_value = DEFAULT_RPC_URL)]
946 url: String,
947 },
948}
949
950#[derive(Debug, Subcommand)]
951enum NotesCommand {
952 List {
954 #[arg(long)]
955 parent: String,
956 #[arg(long, default_value_t = 50)]
957 limit: u64,
958 #[arg(long, default_value_t = 0)]
959 offset: u64,
960 #[arg(long, default_value = DEFAULT_RPC_URL)]
961 url: String,
962 },
963 Get {
965 note_key: String,
966 #[arg(long, default_value = DEFAULT_RPC_URL)]
967 url: String,
968 },
969 Create {
971 #[arg(long)]
972 parent: String,
973 #[arg(long)]
974 content: String,
975 #[arg(long = "tag")]
976 tags: Vec<String>,
977 #[arg(long)]
978 dry_run: bool,
979 #[arg(long, default_value = DEFAULT_RPC_URL)]
980 url: String,
981 },
982 Update {
984 note_key: String,
985 #[arg(long)]
986 content: String,
987 #[arg(long)]
988 dry_run: bool,
989 #[arg(long, default_value = DEFAULT_RPC_URL)]
990 url: String,
991 },
992 Delete {
994 note_key: String,
995 #[arg(long)]
996 dry_run: bool,
997 #[arg(long, default_value = DEFAULT_RPC_URL)]
998 url: String,
999 },
1000 Search {
1002 query: String,
1003 #[arg(long, default_value_t = 50)]
1004 limit: u64,
1005 #[arg(long, default_value = DEFAULT_RPC_URL)]
1006 url: String,
1007 },
1008}
1009
1010#[derive(Debug, Subcommand)]
1011enum CollectionsCommand {
1012 List {
1014 #[arg(long, default_value = DEFAULT_RPC_URL)]
1015 url: String,
1016 },
1017 Tree {
1019 #[arg(long, default_value = DEFAULT_RPC_URL)]
1020 url: String,
1021 },
1022 Get {
1024 name_or_id: String,
1025 #[arg(long, default_value = DEFAULT_RPC_URL)]
1026 url: String,
1027 },
1028 #[command(name = "get-items", visible_alias = "items")]
1030 GetItems {
1031 name_or_id: String,
1032 #[arg(long)]
1033 limit: Option<u64>,
1034 #[arg(long, default_value_t = 0)]
1035 offset: u64,
1036 #[arg(long, default_value = DEFAULT_RPC_URL)]
1037 url: String,
1038 },
1039 Stats {
1041 name_or_id: String,
1042 #[arg(long, default_value = DEFAULT_RPC_URL)]
1043 url: String,
1044 },
1045 Rename {
1047 old_name: String,
1048 new_name: String,
1049 #[arg(long, default_value = DEFAULT_RPC_URL)]
1050 url: String,
1051 #[arg(long)]
1052 dry_run: bool,
1053 },
1054 Create {
1056 name: String,
1057 #[arg(long)]
1058 parent: Option<String>,
1059 #[arg(long, default_value = DEFAULT_RPC_URL)]
1060 url: String,
1061 #[arg(long)]
1062 dry_run: bool,
1063 },
1064 Delete {
1066 name_or_id: String,
1067 #[arg(long, default_value = DEFAULT_RPC_URL)]
1068 url: String,
1069 #[arg(long)]
1070 dry_run: bool,
1071 },
1072 #[command(name = "add-items")]
1074 AddItems {
1075 collection: String,
1076 item_keys: Vec<String>,
1077 #[arg(long, default_value = DEFAULT_RPC_URL)]
1078 url: String,
1079 #[arg(long)]
1080 dry_run: bool,
1081 },
1082 #[command(name = "remove-items")]
1084 RemoveItems {
1085 collection: String,
1086 item_keys: Vec<String>,
1087 #[arg(long, default_value = DEFAULT_RPC_URL)]
1088 url: String,
1089 #[arg(long)]
1090 dry_run: bool,
1091 },
1092}
1093
1094#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1095enum JsonStyle {
1096 PythonCompact,
1098 Pretty,
1100}
1101
1102enum ParseOutcome<T> {
1103 Command(T),
1104 Display(String),
1105}
1106
1107fn parse_cli<T>(
1108 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1109) -> Result<ParseOutcome<T>, String>
1110where
1111 T: Parser,
1112{
1113 match T::try_parse_from(args) {
1114 Ok(cli) => Ok(ParseOutcome::Command(cli)),
1115 Err(err)
1116 if matches!(
1117 err.kind(),
1118 ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1119 ) =>
1120 {
1121 Ok(ParseOutcome::Display(err.to_string()))
1122 }
1123 Err(err) => Err(err.to_string()),
1124 }
1125}
1126
1127pub fn format_error_json(message: &str) -> String {
1128 let message = message.trim_end();
1129 let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1130 serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1131}
1132
1133fn split_error_code(message: &str) -> Option<(&str, &str)> {
1134 let (code, rest) = message.split_once(':')?;
1135 if !code.is_empty()
1136 && code
1137 .chars()
1138 .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1139 {
1140 Some((code, rest.trim_start()))
1141 } else {
1142 None
1143 }
1144}
1145
1146pub fn run(
1147 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1148) -> Result<String, String> {
1149 let cli = match parse_cli::<Cli>(args)? {
1150 ParseOutcome::Command(cli) => cli,
1151 ParseOutcome::Display(output) => return Ok(output),
1152 };
1153 let url = command_url(&cli.command);
1154 let mut client = ZoteroRpc::new(url);
1155 run_command(cli.command, &mut client)
1156}
1157
1158pub fn run_with_client(
1159 args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1160 client: &mut impl RpcCaller,
1161) -> Result<String, String> {
1162 let cli = match parse_cli::<Cli>(args)? {
1163 ParseOutcome::Command(cli) => cli,
1164 ParseOutcome::Display(output) => return Ok(output),
1165 };
1166 run_command(cli.command, client)
1167}
1168
1169fn rag_command_url(command: &RagCommand) -> String {
1170 match command {
1171 RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1172 RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1173 RagCommand::Status { url, .. } => url.clone(),
1174 RagCommand::Search { url, .. } => url.clone(),
1175 }
1176}
1177
1178fn command_url(command: &Command) -> String {
1179 match command {
1180 Command::Ping { url }
1181 | Command::Rpc { url, .. }
1182 | Command::Push { url, .. }
1183 | Command::FindPdfs { url, .. } => url.clone(),
1184 Command::Ocr { command } => match command {
1185 OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1186 OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1187 OcrCommand::Status { url, .. } => url.clone(),
1188 OcrCommand::Process { url, .. } => url.clone(),
1189 },
1190 Command::Rag { command } => rag_command_url(command),
1191 Command::System { command } => match command {
1192 SystemCommand::Version { url }
1193 | SystemCommand::Libraries { url }
1194 | SystemCommand::LibraryStats { url, .. }
1195 | SystemCommand::Schema { url, .. }
1196 | SystemCommand::CurrentCollection { url }
1197 | SystemCommand::ListMethods { url }
1198 | SystemCommand::Describe { url, .. } => url.clone(),
1199 },
1200 Command::Search(ref args) => match &args.management {
1201 Some(SearchManagementCommand::SavedSearches { url })
1202 | Some(SearchManagementCommand::CreateSaved { url, .. })
1203 | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1204 None => args.url.clone(),
1205 },
1206 Command::Items { command } => match command {
1207 ItemsCommand::Add { url, .. }
1208 | ItemsCommand::Create { url, .. }
1209 | ItemsCommand::Update { url, .. }
1210 | ItemsCommand::Delete { url, .. }
1211 | ItemsCommand::Trash { url, .. }
1212 | ItemsCommand::Restore { url, .. }
1213 | ItemsCommand::MergeDuplicates { url, .. }
1214 | ItemsCommand::AddRelated { url, .. }
1215 | ItemsCommand::RemoveRelated { url, .. }
1216 | ItemsCommand::Get { url, .. }
1217 | ItemsCommand::List { url, .. }
1218 | ItemsCommand::FindDuplicates { url }
1219 | ItemsCommand::Recent { url, .. }
1220 | ItemsCommand::Fulltext { url, .. }
1221 | ItemsCommand::Related { url, .. }
1222 | ItemsCommand::CitationKey { url, .. } => url.clone(),
1223 },
1224 Command::Collections { command } => match command {
1225 CollectionsCommand::List { url }
1226 | CollectionsCommand::Tree { url }
1227 | CollectionsCommand::Get { url, .. }
1228 | CollectionsCommand::GetItems { url, .. }
1229 | CollectionsCommand::Stats { url, .. }
1230 | CollectionsCommand::Rename { url, .. }
1231 | CollectionsCommand::Create { url, .. }
1232 | CollectionsCommand::Delete { url, .. }
1233 | CollectionsCommand::AddItems { url, .. }
1234 | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1235 },
1236 Command::Notes { command } => match command {
1237 NotesCommand::List { url, .. }
1238 | NotesCommand::Get { url, .. }
1239 | NotesCommand::Create { url, .. }
1240 | NotesCommand::Update { url, .. }
1241 | NotesCommand::Delete { url, .. }
1242 | NotesCommand::Search { url, .. } => url.clone(),
1243 },
1244 Command::Attachments { command } => match command {
1245 AttachmentsCommand::List { url, .. }
1246 | AttachmentsCommand::Get { url, .. }
1247 | AttachmentsCommand::Fulltext { url, .. }
1248 | AttachmentsCommand::Path { url, .. }
1249 | AttachmentsCommand::Add { url, .. }
1250 | AttachmentsCommand::Delete { url, .. }
1251 | AttachmentsCommand::FindPdf { url, .. } => url.clone(),
1252 },
1253 Command::Settings { command } => match command {
1254 SettingsCommand::Get { url, .. }
1255 | SettingsCommand::List { url }
1256 | SettingsCommand::Set { url, .. } => url.clone(),
1257 },
1258 Command::Tags { command } => match command {
1259 TagsCommand::List { url, .. }
1260 | TagsCommand::Rename { url, .. }
1261 | TagsCommand::Delete { url, .. }
1262 | TagsCommand::Add { url, .. }
1263 | TagsCommand::Remove { url, .. } => url.clone(),
1264 },
1265 Command::Export(ref args) => args.url.clone(),
1266 Command::Annotations { command } => match command {
1267 AnnotationsCommand::List { url, .. }
1268 | AnnotationsCommand::Create { url, .. }
1269 | AnnotationsCommand::Delete { url, .. } => url.clone(),
1270 },
1271 }
1272}
1273
1274fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1275 let value = match command {
1276 OcrCommand::Providers => serde_json::json!({
1277 "providers": ocr_provider_specs(),
1278 }),
1279 OcrCommand::Run {
1280 provider,
1281 input,
1282 file,
1283 item_key,
1284 attachment_key,
1285 mime_type,
1286 endpoint,
1287 api_key_env,
1288 } => run_ocr_provider_json_command(OcrProviderJsonOptions {
1289 provider,
1290 input,
1291 file,
1292 item_key,
1293 attachment_key,
1294 mime_type,
1295 endpoint,
1296 api_key_env,
1297 })?,
1298 OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1299 OcrCommand::Process {
1300 provider,
1301 parent,
1302 attachment,
1303 source_url,
1304 result_dir,
1305 result_zip,
1306 provider_endpoint,
1307 api_key_env,
1308 poll_interval_seconds,
1309 timeout_seconds,
1310 chunk_chars,
1311 ..
1312 } => run_ocr_parse_pdf_command(
1313 client,
1314 OcrParsePdfOptions {
1315 provider,
1316 parent,
1317 attachment,
1318 source_url,
1319 result_dir,
1320 result_zip,
1321 provider_endpoint,
1322 api_key_env,
1323 poll_interval_seconds,
1324 timeout_seconds,
1325 chunk_chars,
1326 },
1327 )?,
1328 };
1329 format_json(&value, JsonStyle::PythonCompact)
1330}
1331
1332struct OcrParsePdfOptions {
1333 provider: String,
1334 parent: String,
1335 attachment: Option<String>,
1336 source_url: Option<String>,
1337 result_dir: Option<String>,
1338 result_zip: Option<String>,
1339 provider_endpoint: Option<String>,
1340 api_key_env: String,
1341 poll_interval_seconds: u64,
1342 timeout_seconds: u64,
1343 chunk_chars: usize,
1344}
1345
1346struct OcrProviderJsonOptions {
1347 provider: String,
1348 input: Option<String>,
1349 file: Option<String>,
1350 item_key: Option<String>,
1351 attachment_key: Option<String>,
1352 mime_type: Option<String>,
1353 endpoint: Option<String>,
1354 api_key_env: Option<String>,
1355}
1356
1357fn run_ocr_provider_json_command(options: OcrProviderJsonOptions) -> Result<Value, String> {
1358 let input: OcrRequestInput = match (options.input, options.file) {
1359 (Some(input), None) => read_json_input(&input)?,
1360 (None, Some(file)) => ocr_input_from_file(
1361 file,
1362 options.item_key,
1363 options.attachment_key,
1364 options.mime_type,
1365 )?,
1366 (Some(_), Some(_)) => {
1367 return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1368 }
1369 (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1370 };
1371 let request = build_ocr_provider_request(&options.provider, &input)?;
1372 let payload = if request.command.is_empty() {
1373 let method = request
1374 .method
1375 .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1376 let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1377 let mut transport =
1378 provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1379 transport.post_json(&ProviderHttpInvocation {
1380 provider: request.provider.to_string(),
1381 style: request.style.to_string(),
1382 method: method.to_string(),
1383 url: options
1384 .endpoint
1385 .or_else(|| request.url.map(ToString::to_string)),
1386 auth_header_name: request.auth_header.map(ToString::to_string),
1387 auth_header_value: None,
1388 body: request.body,
1389 })?
1390 } else {
1391 let mut command_runner = StdProviderCommandRunner;
1392 command_runner.run_json(&request.command)?
1393 };
1394 let blocks = match parse_ocr_provider_response(
1395 request.provider,
1396 &payload,
1397 &input.item_key,
1398 &input.attachment_key,
1399 ) {
1400 Ok(blocks) => blocks,
1401 Err(err) => {
1402 if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1403 return Ok(task);
1404 }
1405 return Err(err);
1406 }
1407 };
1408
1409 Ok(serde_json::json!({
1410 "provider": request.provider,
1411 "blocks": blocks,
1412 }))
1413}
1414
1415fn run_ocr_parse_pdf_command(
1416 client: &mut impl RpcCaller,
1417 mut options: OcrParsePdfOptions,
1418) -> Result<Value, String> {
1419 let spec = raw_ocr_provider_spec(&options.provider)?;
1420 if spec.provider_key != "mineru" {
1421 return Err(
1422 "INVALID_ARGS: ocr parse-pdf currently supports only --provider mineru".to_string(),
1423 );
1424 }
1425 if options.result_dir.is_some() && options.result_zip.is_some() {
1426 return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1427 }
1428 if options.source_url.is_some()
1429 && (options.result_dir.is_some() || options.result_zip.is_some())
1430 {
1431 return Err(
1432 "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1433 .to_string(),
1434 );
1435 }
1436
1437 let attachment = match options.attachment.take() {
1438 Some(key) => key,
1439 None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1440 };
1441 options.attachment = Some(attachment.clone());
1442
1443 let attachment_path = resolve_attachment_path(client, &attachment)?;
1444 let storage_dir = attachment_path
1445 .parent()
1446 .ok_or_else(|| {
1447 format!(
1448 "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1449 attachment_path.display()
1450 )
1451 })?
1452 .to_path_buf();
1453 let file_name = attachment_path
1454 .file_name()
1455 .and_then(|name| name.to_str())
1456 .unwrap_or("document.pdf")
1457 .to_string();
1458
1459 let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1460 let artifacts = persist_mineru_result_sidecars(
1461 &storage_dir,
1462 &options.parent,
1463 &attachment,
1464 &options.provider,
1465 &source,
1466 options.chunk_chars,
1467 )?;
1468
1469 Ok(serde_json::json!({
1470 "provider": "mineru",
1471 "status": "indexed",
1472 "item_key": options.parent,
1473 "attachment_key": attachment,
1474 "attachment_path": attachment_path,
1475 "storage_dir": storage_dir,
1476 "task_id": source.task_id,
1477 "state": source.state,
1478 "blocks": artifacts.block_count,
1479 "chunks": artifacts.chunk_count,
1480 "artifacts": artifacts.artifacts,
1481 }))
1482}
1483
1484struct MineruResultSource {
1485 task_id: Option<String>,
1486 state: String,
1487 result_dir: PathBuf,
1488 raw_zip_bytes: Option<Vec<u8>>,
1489 task_status: Option<Value>,
1490 payload: Value,
1491 content_list_file: Option<PathBuf>,
1492 markdown: Option<String>,
1493}
1494
1495struct PersistedOcrArtifacts {
1496 block_count: usize,
1497 chunk_count: usize,
1498 artifacts: Vec<Value>,
1499}
1500
1501fn resolve_attachment_path(
1502 client: &mut impl RpcCaller,
1503 attachment_key: &str,
1504) -> Result<PathBuf, String> {
1505 let payload = client.call(
1506 "attachments.getPath",
1507 Some(serde_json::json!({"key": attachment_key})),
1508 )?;
1509 let raw_path = payload
1510 .get("path")
1511 .and_then(Value::as_str)
1512 .filter(|path| !path.trim().is_empty())
1513 .ok_or_else(|| {
1514 format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1515 })?;
1516 Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1517}
1518
1519fn resolve_first_pdf_attachment_key(
1521 client: &mut impl RpcCaller,
1522 parent_key: &str,
1523) -> Result<String, String> {
1524 let response = client.call(
1525 "attachments.list",
1526 Some(serde_json::json!({"parentKey": parent_key})),
1527 )?;
1528 let attachments = response
1530 .get("items")
1531 .and_then(Value::as_array)
1532 .or_else(|| response.as_array())
1533 .ok_or_else(|| {
1534 format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1535 })?;
1536 for attachment in attachments {
1537 if is_pdf_attachment(attachment) {
1538 if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1539 return Ok(key.to_string());
1540 }
1541 }
1542 }
1543 Err(format!(
1544 "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1545 ))
1546}
1547
1548fn load_mineru_result_source(
1549 options: &OcrParsePdfOptions,
1550 attachment_path: &Path,
1551 file_name: &str,
1552) -> Result<MineruResultSource, String> {
1553 if let Some(result_dir) = options.result_dir.as_deref() {
1554 return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1555 }
1556 if let Some(result_zip) = options.result_zip.as_deref() {
1557 let zip_path = PathBuf::from(result_zip);
1558 let zip_bytes = fs::read(&zip_path)
1559 .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1560 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1561 return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1562 }
1563
1564 let Some(source_url) = options
1565 .source_url
1566 .as_deref()
1567 .filter(|value| !value.trim().is_empty())
1568 else {
1569 return submit_mineru_local_file(options, attachment_path, file_name);
1570 };
1571 let input = OcrRequestInput {
1572 item_key: options.parent.clone(),
1573 attachment_key: options.attachment.clone().expect("attachment resolved"),
1574 file_name: file_name.to_string(),
1575 mime_type: "application/pdf".to_string(),
1576 content_base64: format!("url:{source_url}"),
1577 source_url: Some(source_url.to_string()),
1578 local_path: None,
1579 output_dir: None,
1580 };
1581 let task = submit_mineru_task(
1582 &options.provider,
1583 &input,
1584 options.provider_endpoint.clone(),
1585 &options.api_key_env,
1586 )?;
1587 let task_id = task
1588 .get("data")
1589 .and_then(|data| data.get("task_id"))
1590 .and_then(Value::as_str)
1591 .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1592 .to_string();
1593 let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1594 let status = poll_mineru_task(
1595 options.provider_endpoint.as_deref(),
1596 &task_id,
1597 &auth_header,
1598 options.poll_interval_seconds,
1599 options.timeout_seconds,
1600 )?;
1601 let zip_url = status
1602 .pointer("/data/full_zip_url")
1603 .or_else(|| status.pointer("/data/result/full_zip_url"))
1604 .and_then(Value::as_str)
1605 .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1606 let zip_bytes = download_bytes(zip_url)?;
1607 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1608 mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1609}
1610
1611fn submit_mineru_local_file(
1612 options: &OcrParsePdfOptions,
1613 attachment_path: &Path,
1614 file_name: &str,
1615) -> Result<MineruResultSource, String> {
1616 let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1617 let upload_request = create_mineru_file_upload(
1618 options.provider_endpoint.as_deref(),
1619 file_name,
1620 options.attachment.as_deref().expect("attachment resolved"),
1621 &auth_header,
1622 )?;
1623 let upload_url = upload_request
1624 .pointer("/data/file_urls/0")
1625 .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1626 .and_then(Value::as_str)
1627 .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1628 let batch_id = upload_request
1629 .pointer("/data/batch_id")
1630 .or_else(|| upload_request.pointer("/data/batchId"))
1631 .and_then(Value::as_str)
1632 .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1633 .to_string();
1634 let bytes = fs::read(attachment_path)
1635 .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1636 put_bytes(upload_url, &bytes)?;
1637 let status = poll_mineru_batch(
1638 options.provider_endpoint.as_deref(),
1639 &batch_id,
1640 &auth_header,
1641 options.poll_interval_seconds,
1642 options.timeout_seconds,
1643 )?;
1644 let zip_url = mineru_batch_zip_url(&status)
1645 .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1646 let zip_bytes = download_bytes(&zip_url)?;
1647 let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1648 mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1649}
1650
1651fn create_mineru_file_upload(
1652 endpoint: Option<&str>,
1653 file_name: &str,
1654 data_id: &str,
1655 auth_header: &str,
1656) -> Result<Value, String> {
1657 let url = mineru_file_urls_url(endpoint);
1658 let body = serde_json::json!({
1659 "files": [{"name": file_name, "data_id": data_id}],
1660 "model_version": "vlm",
1661 "is_ocr": false,
1662 "enable_formula": true,
1663 "enable_table": true,
1664 "language": "ch",
1665 "page_ranges": "1-200",
1666 });
1667 ureq::post(&url)
1668 .set("Authorization", auth_header)
1669 .send_json(body)
1670 .map_err(|err| format!("POST {url} failed: {err}"))?
1671 .into_json::<Value>()
1672 .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1673}
1674
1675fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1676 ureq::put(url)
1677 .send_bytes(bytes)
1678 .map_err(|err| format!("PUT {url} failed: {err}"))?;
1679 Ok(())
1680}
1681
1682fn submit_mineru_task(
1683 provider: &str,
1684 input: &OcrRequestInput,
1685 endpoint: Option<String>,
1686 api_key_env: &str,
1687) -> Result<Value, String> {
1688 let request = build_ocr_provider_request(provider, input)?;
1689 let method = request
1690 .method
1691 .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1692 let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1693 transport.post_json(&ProviderHttpInvocation {
1694 provider: request.provider.to_string(),
1695 style: request.style.to_string(),
1696 method: method.to_string(),
1697 url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1698 auth_header_name: request.auth_header.map(ToString::to_string),
1699 auth_header_value: None,
1700 body: request.body,
1701 })
1702}
1703
1704fn poll_mineru_task(
1705 endpoint: Option<&str>,
1706 task_id: &str,
1707 auth_header: &str,
1708 poll_interval_seconds: u64,
1709 timeout_seconds: u64,
1710) -> Result<Value, String> {
1711 let url = mineru_task_status_url(endpoint, task_id);
1712 let started = Instant::now();
1713 loop {
1714 let status = get_json_with_auth(&url, auth_header)?;
1715 let state = status
1716 .pointer("/data/state")
1717 .or_else(|| status.pointer("/data/status"))
1718 .and_then(Value::as_str)
1719 .unwrap_or("unknown");
1720 match state {
1721 "done" | "finished" | "success" => return Ok(status),
1722 "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1723 _ => {
1724 if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1725 return Err(format!(
1726 "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1727 ));
1728 }
1729 thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1730 }
1731 }
1732 }
1733}
1734
1735fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1736 let base = endpoint
1737 .unwrap_or("https://mineru.net/api/v4/extract/task")
1738 .trim_end_matches('/');
1739 if base.ends_with("/extract/task") {
1740 format!("{base}/{task_id}")
1741 } else {
1742 format!("{base}/extract/task/{task_id}")
1743 }
1744}
1745
1746fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1747 let base = mineru_api_base(endpoint);
1748 format!("{base}/file-urls/batch")
1749}
1750
1751fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1752 let base = mineru_api_base(endpoint);
1753 format!("{base}/extract-results/batch/{batch_id}")
1754}
1755
1756fn mineru_api_base(endpoint: Option<&str>) -> String {
1757 let base = endpoint
1758 .unwrap_or("https://mineru.net/api/v4/extract/task")
1759 .trim_end_matches('/');
1760 if let Some(stripped) = base.strip_suffix("/extract/task") {
1761 return stripped.to_string();
1762 }
1763 if let Some(stripped) = base.strip_suffix("/extract") {
1764 return stripped.to_string();
1765 }
1766 base.to_string()
1767}
1768
1769fn poll_mineru_batch(
1770 endpoint: Option<&str>,
1771 batch_id: &str,
1772 auth_header: &str,
1773 poll_interval_seconds: u64,
1774 timeout_seconds: u64,
1775) -> Result<Value, String> {
1776 let url = mineru_batch_status_url(endpoint, batch_id);
1777 let started = Instant::now();
1778 loop {
1779 let status = get_json_with_auth(&url, auth_header)?;
1780 let state = mineru_batch_state(&status).unwrap_or("unknown");
1781 match state {
1782 "done" | "finished" | "success" => return Ok(status),
1783 "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1784 _ => {
1785 if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1786 return Err(format!(
1787 "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1788 ));
1789 }
1790 thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1791 }
1792 }
1793 }
1794}
1795
1796fn mineru_batch_state(status: &Value) -> Option<&str> {
1797 status
1798 .pointer("/data/extract_result/0/state")
1799 .or_else(|| status.pointer("/data/extractResult/0/state"))
1800 .or_else(|| status.pointer("/data/state"))
1801 .and_then(Value::as_str)
1802}
1803
1804fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1805 status
1806 .pointer("/data/extract_result/0/full_zip_url")
1807 .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
1808 .or_else(|| status.pointer("/data/full_zip_url"))
1809 .and_then(Value::as_str)
1810 .map(ToString::to_string)
1811}
1812
1813fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
1814 let token = env::var(api_key_env)
1815 .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
1816 let token = token.trim();
1817 if token.is_empty() {
1818 return Err(format!(
1819 "provider credential env var {api_key_env} is empty"
1820 ));
1821 }
1822 Ok(match auth_scheme {
1823 "bearer" if token.starts_with("Bearer ") => token.to_string(),
1824 "bearer" => format!("Bearer {token}"),
1825 "token" if token.starts_with("token ") => token.to_string(),
1826 "token" => format!("token {token}"),
1827 _ => token.to_string(),
1828 })
1829}
1830
1831fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
1832 ureq::get(url)
1833 .set("Authorization", auth_header)
1834 .call()
1835 .map_err(|err| format!("GET {url} failed: {err}"))?
1836 .into_json::<Value>()
1837 .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
1838}
1839
1840fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
1841 let response = ureq::get(url)
1842 .call()
1843 .map_err(|err| format!("download {url} failed: {err}"))?;
1844 let mut bytes = Vec::new();
1845 response
1846 .into_reader()
1847 .read_to_end(&mut bytes)
1848 .map_err(|err| format!("read download {url}: {err}"))?;
1849 Ok(bytes)
1850}
1851
1852fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
1853 let dir = unique_temp_path(prefix);
1854 fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
1855 let zip_path = dir.with_extension("zip");
1856 fs::write(&zip_path, zip_bytes)
1857 .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
1858 let output = ProcessCommand::new("unzip")
1859 .arg("-q")
1860 .arg("-o")
1861 .arg(&zip_path)
1862 .arg("-d")
1863 .arg(&dir)
1864 .output()
1865 .map_err(|err| format!("run unzip: {err}"))?;
1866 if !output.status.success() {
1867 return Err(format!(
1868 "unzip {} failed: {}",
1869 zip_path.display(),
1870 String::from_utf8_lossy(&output.stderr).trim()
1871 ));
1872 }
1873 Ok(dir)
1874}
1875
1876fn unique_temp_path(prefix: &str) -> PathBuf {
1877 let nanos = SystemTime::now()
1878 .duration_since(UNIX_EPOCH)
1879 .map(|duration| duration.as_nanos())
1880 .unwrap_or(0);
1881 env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1882}
1883
1884fn mineru_result_source_from_dir(
1885 result_dir: PathBuf,
1886 raw_zip_bytes: Option<Vec<u8>>,
1887 task_status: Option<Value>,
1888 task_id: Option<String>,
1889) -> Result<MineruResultSource, String> {
1890 let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
1891 let markdown = find_first_file_by_name(&result_dir, "full.md")
1892 .map(|path| {
1893 fs::read_to_string(&path)
1894 .map_err(|err| format!("read native markdown {}: {err}", path.display()))
1895 })
1896 .transpose()?;
1897 Ok(MineruResultSource {
1898 task_id,
1899 state: "done".to_string(),
1900 result_dir,
1901 raw_zip_bytes,
1902 task_status,
1903 payload,
1904 content_list_file,
1905 markdown,
1906 })
1907}
1908
1909fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
1910 let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
1911 if let Some(path) = v2 {
1912 let value = read_json_file(&path)?;
1913 return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
1914 }
1915 let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
1916 if let Some(path) = content_list {
1917 let value = read_json_file(&path)?;
1918 return Ok((serde_json::json!({"content_list": value}), Some(path)));
1919 }
1920 let layout = find_first_file_by_name(result_dir, "layout.json");
1921 if let Some(path) = layout {
1922 return Ok((read_json_file(&path)?, Some(path)));
1923 }
1924 let markdown = find_first_file_by_name(result_dir, "full.md");
1925 if let Some(path) = markdown {
1926 let text = fs::read_to_string(&path)
1927 .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
1928 return Ok((serde_json::json!({"result": text}), Some(path)));
1929 }
1930 Err(format!(
1931 "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
1932 result_dir.display()
1933 ))
1934}
1935
1936fn read_json_file(path: &Path) -> Result<Value, String> {
1937 let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
1938 serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
1939}
1940
1941fn persist_mineru_result_sidecars(
1942 storage_dir: &Path,
1943 item_key: &str,
1944 attachment_key: &str,
1945 provider: &str,
1946 source: &MineruResultSource,
1947 chunk_chars: usize,
1948) -> Result<PersistedOcrArtifacts, String> {
1949 let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
1950 let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
1951 let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
1952 let raw_bundle = serde_json::json!({
1953 "provider": provider,
1954 "item_key": item_key,
1955 "attachment_key": attachment_key,
1956 "task_id": source.task_id,
1957 "state": source.state,
1958 "task_status": source.task_status,
1959 "content_list_file": source.content_list_file,
1960 "payload": source.payload,
1961 });
1962
1963 let mut artifacts = Vec::new();
1964 artifacts.push(write_sidecar_json(
1965 storage_dir,
1966 item_key,
1967 attachment_key,
1968 MachineArtifactKind::OcrRaw,
1969 &raw_bundle,
1970 )?);
1971 artifacts.push(write_sidecar_jsonl(
1972 storage_dir,
1973 item_key,
1974 attachment_key,
1975 MachineArtifactKind::Blocks,
1976 &blocks,
1977 )?);
1978 artifacts.push(write_sidecar_jsonl(
1979 storage_dir,
1980 item_key,
1981 attachment_key,
1982 MachineArtifactKind::Chunks,
1983 &chunks,
1984 )?);
1985 if let Some(markdown) = source.markdown.as_deref() {
1986 artifacts.push(write_sidecar_bytes(
1987 storage_dir,
1988 item_key,
1989 attachment_key,
1990 MachineArtifactKind::OcrNativeMarkdown,
1991 markdown.as_bytes(),
1992 )?);
1993 }
1994 artifacts.push(write_sidecar_json(
1995 storage_dir,
1996 item_key,
1997 attachment_key,
1998 MachineArtifactKind::OcrNativeAssets,
1999 &assets,
2000 )?);
2001 if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2002 artifacts.push(write_extra_sidecar_bytes(
2003 storage_dir,
2004 ".zotron/ocr/latest.raw.zip",
2005 bytes,
2006 )?);
2007 }
2008
2009 Ok(PersistedOcrArtifacts {
2010 block_count: blocks.len(),
2011 chunk_count: chunks.len(),
2012 artifacts,
2013 })
2014}
2015
2016fn write_sidecar_json(
2017 storage_dir: &Path,
2018 item_key: &str,
2019 attachment_key: &str,
2020 kind: MachineArtifactKind,
2021 value: &Value,
2022) -> Result<Value, String> {
2023 let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2024 write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2025}
2026
2027fn write_sidecar_jsonl<T: serde::Serialize>(
2028 storage_dir: &Path,
2029 item_key: &str,
2030 attachment_key: &str,
2031 kind: MachineArtifactKind,
2032 values: &[T],
2033) -> Result<Value, String> {
2034 let mut out = String::new();
2035 for value in values {
2036 out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2037 out.push('\n');
2038 }
2039 write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2040}
2041
2042fn write_sidecar_bytes(
2043 storage_dir: &Path,
2044 item_key: &str,
2045 attachment_key: &str,
2046 kind: MachineArtifactKind,
2047 bytes: &[u8],
2048) -> Result<Value, String> {
2049 let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2050 .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2051 Ok(serde_json::json!({
2052 "kind": kind,
2053 "relative_path": record.relative_path,
2054 "absolute_path": record.absolute_path,
2055 }))
2056}
2057
2058fn write_extra_sidecar_bytes(
2059 storage_dir: &Path,
2060 relative_path: &str,
2061 bytes: &[u8],
2062) -> Result<Value, String> {
2063 let absolute_path = storage_dir.join(relative_path);
2064 if let Some(parent) = absolute_path.parent() {
2065 fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2066 }
2067 fs::write(&absolute_path, bytes)
2068 .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2069 Ok(serde_json::json!({
2070 "kind": "ocr_raw_zip",
2071 "relative_path": relative_path,
2072 "absolute_path": absolute_path,
2073 }))
2074}
2075
2076fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2077 let mut images = Vec::new();
2078 for file in collect_files(result_dir)? {
2079 if !is_image_file(&file) {
2080 continue;
2081 }
2082 let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2083 let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2084 if let Some(parent) = destination.parent() {
2085 fs::create_dir_all(parent)
2086 .map_err(|err| format!("create {}: {err}", parent.display()))?;
2087 }
2088 fs::copy(&file, &destination).map_err(|err| {
2089 format!(
2090 "copy MinerU asset {} to {}: {err}",
2091 file.display(),
2092 destination.display()
2093 )
2094 })?;
2095 images.push(serde_json::json!({
2096 "source_relative": relative,
2097 "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2098 "absolute_path": destination,
2099 }));
2100 }
2101 Ok(serde_json::json!({
2102 "provider": "mineru",
2103 "images": images,
2104 }))
2105}
2106
2107fn is_image_file(path: &Path) -> bool {
2108 matches!(
2109 path.extension()
2110 .and_then(|ext| ext.to_str())
2111 .unwrap_or_default()
2112 .to_ascii_lowercase()
2113 .as_str(),
2114 "png" | "jpg" | "jpeg" | "webp" | "gif"
2115 )
2116}
2117
2118fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2119 collect_files(root).ok()?.into_iter().find(|path| {
2120 path.file_name()
2121 .and_then(|name| name.to_str())
2122 .is_some_and(|name| name.ends_with(suffix))
2123 })
2124}
2125
2126fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2127 collect_files(root).ok()?.into_iter().find(|path| {
2128 path.file_name()
2129 .and_then(|file_name| file_name.to_str())
2130 .is_some_and(|file_name| file_name == name)
2131 })
2132}
2133
2134fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2135 let mut files = Vec::new();
2136 collect_files_into(root, &mut files)?;
2137 files.sort();
2138 Ok(files)
2139}
2140
2141fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2142 for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2143 let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2144 let path = entry.path();
2145 let file_type = entry
2146 .file_type()
2147 .map_err(|err| format!("stat {}: {err}", path.display()))?;
2148 if file_type.is_dir() {
2149 collect_files_into(&path, files)?;
2150 } else if file_type.is_file() {
2151 files.push(path);
2152 }
2153 }
2154 Ok(())
2155}
2156
2157fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2158 let data = payload.get("data")?;
2159 let task_id = data.get("task_id").and_then(Value::as_str)?;
2160 Some(serde_json::json!({
2161 "provider": provider,
2162 "status": "submitted",
2163 "task_id": task_id,
2164 "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2165 "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2166 "raw": payload,
2167 }))
2168}
2169
2170fn ocr_input_from_file(
2171 file: String,
2172 item_key: Option<String>,
2173 attachment_key: Option<String>,
2174 mime_type: Option<String>,
2175) -> Result<OcrRequestInput, String> {
2176 let item_key = item_key
2177 .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2178 let attachment_key = attachment_key.ok_or_else(|| {
2179 "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2180 })?;
2181 let path = PathBuf::from(&file);
2182 let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2183 let file_name = path
2184 .file_name()
2185 .and_then(|name| name.to_str())
2186 .unwrap_or("document.pdf")
2187 .to_string();
2188 let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2189 Ok(OcrRequestInput {
2190 item_key,
2191 attachment_key,
2192 file_name,
2193 mime_type,
2194 content_base64: base64_encode(&bytes),
2195 source_url: None,
2196 local_path: Some(file),
2197 output_dir: None,
2198 })
2199}
2200
2201fn guess_mime_type(path: &Path) -> &'static str {
2202 match path
2203 .extension()
2204 .and_then(|ext| ext.to_str())
2205 .unwrap_or_default()
2206 .to_ascii_lowercase()
2207 .as_str()
2208 {
2209 "png" => "image/png",
2210 "jpg" | "jpeg" => "image/jpeg",
2211 "webp" => "image/webp",
2212 _ => "application/pdf",
2213 }
2214}
2215
2216fn base64_encode(bytes: &[u8]) -> String {
2217 const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2218 let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2219 for chunk in bytes.chunks(3) {
2220 let b0 = chunk[0];
2221 let b1 = *chunk.get(1).unwrap_or(&0);
2222 let b2 = *chunk.get(2).unwrap_or(&0);
2223 out.push(TABLE[(b0 >> 2) as usize] as char);
2224 out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2225 if chunk.len() > 1 {
2226 out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2227 } else {
2228 out.push('=');
2229 }
2230 if chunk.len() > 2 {
2231 out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2232 } else {
2233 out.push('=');
2234 }
2235 }
2236 out
2237}
2238
2239fn run_ocr_status_command(
2240 client: &mut impl RpcCaller,
2241 collection: String,
2242) -> Result<Value, String> {
2243 let collection_key = find_collection_in_tree(client, &collection)?
2244 .and_then(|node| node.get("key").cloned())
2245 .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2246 let raw = paginate_rpc(
2247 client,
2248 "collections.getItems",
2249 serde_json::json!({"key": collection_key}),
2250 500,
2251 )?;
2252 let items = raw
2253 .get("items")
2254 .and_then(Value::as_array)
2255 .or_else(|| raw.as_array())
2256 .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2257 .clone();
2258
2259 let mut has_ocr = 0usize;
2260 for item in &items {
2261 let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2262 if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2263 has_ocr += 1;
2264 }
2265 }
2266
2267 Ok(serde_json::json!({
2268 "collection": collection,
2269 "total": items.len(),
2270 "has_ocr": has_ocr,
2271 "missing_ocr": items.len() - has_ocr,
2272 }))
2273}
2274
2275fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2276 if let Some(item_key) = item_key.as_str() {
2277 if machine_artifact_exists_for_item(
2280 machine_artifact_store_root(),
2281 item_key,
2282 MachineArtifactKind::Chunks,
2283 ) {
2284 return Ok(true);
2285 }
2286 }
2287
2288 let attachments = client.call(
2289 "attachments.list",
2290 Some(serde_json::json!({"parentKey": item_key.clone()})),
2291 )?;
2292 Ok(attachments.as_array().is_some_and(|attachments| {
2293 attachments.iter().any(|attachment| {
2294 let has_sidecar_chunks = attachment
2295 .get("path")
2296 .and_then(Value::as_str)
2297 .map(local_path_from_zotero_path)
2298 .as_deref()
2299 .map(Path::new)
2300 .and_then(Path::parent)
2301 .is_some_and(|dir| {
2302 machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2303 });
2304 if has_sidecar_chunks {
2305 return true;
2306 }
2307
2308 attachment
2310 .get("title")
2311 .and_then(Value::as_str)
2312 .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2313 })
2314 }))
2315}
2316
2317fn local_path_from_zotero_path(path: &str) -> String {
2318 if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2319 return ProcessCommand::new("wslpath")
2320 .arg("-u")
2321 .arg(path)
2322 .output()
2323 .ok()
2324 .filter(|output| output.status.success())
2325 .and_then(|output| String::from_utf8(output.stdout).ok())
2326 .map(|converted| converted.trim().to_string())
2327 .filter(|converted| !converted.is_empty())
2328 .unwrap_or_else(|| path.to_string());
2329 }
2330 path.to_string()
2331}
2332
2333fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2334 let notes = client.call(
2335 "notes.list",
2336 Some(serde_json::json!({"parentKey": item_key.clone()})),
2337 )?;
2338 Ok(notes.as_array().is_some_and(|notes| {
2339 notes.iter().any(|note| {
2340 note.get("tags")
2341 .and_then(Value::as_array)
2342 .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2343 })
2344 }))
2345}
2346
2347fn tag_is_ocr(tag: &Value) -> bool {
2348 tag.as_str() == Some("ocr")
2349 || tag
2350 .get("tag")
2351 .and_then(Value::as_str)
2352 .is_some_and(|tag| tag == "ocr")
2353}
2354
2355fn find_collection_in_tree(
2356 client: &mut impl RpcCaller,
2357 collection: &str,
2358) -> Result<Option<Value>, String> {
2359 let tree = client.call("collections.tree", None)?;
2360 let nodes = tree
2361 .as_array()
2362 .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2363 Ok(search_collection_tree(nodes, collection).cloned())
2364}
2365
2366fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2367 for node in nodes {
2368 if node.get("key").and_then(Value::as_str) == Some(collection)
2369 || node.get("name").and_then(Value::as_str) == Some(collection)
2370 {
2371 return Some(node);
2372 }
2373 if let Some(children) = node.get("children").and_then(Value::as_array) {
2374 if let Some(found) = search_collection_tree(children, collection) {
2375 return Some(found);
2376 }
2377 }
2378 }
2379 None
2380}
2381
2382fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2383 if let Command::Export(args) = command {
2384 return run_export(args, client);
2385 }
2386
2387 let (value, style) = match command {
2388 Command::Ping { .. } => (
2389 call_json(client, "system.ping", None)?,
2390 JsonStyle::PythonCompact,
2391 ),
2392 Command::Rpc {
2393 method,
2394 params_json,
2395 paginate,
2396 page_size,
2397 ..
2398 } => {
2399 let params = serde_json::from_str::<Value>(¶ms_json)
2400 .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2401 if !params.is_object() {
2402 return Err("INVALID_JSON: params must be a JSON object".to_string());
2403 }
2404 if paginate {
2405 (
2406 paginate_rpc(client, &method, params, page_size)?,
2407 JsonStyle::Pretty,
2408 )
2409 } else {
2410 (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2411 }
2412 }
2413 Command::Push {
2414 json_file,
2415 pdf,
2416 collection,
2417 on_duplicate,
2418 dry_run,
2419 ..
2420 } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2421 Command::System { command } => run_system_command(command, client)?,
2422 Command::Search(args) => {
2423 if let Some(mgmt) = args.management {
2424 run_search_management_command(mgmt, client)?
2425 } else {
2426 run_search(args, client)?
2427 }
2428 }
2429 Command::Items { command } => run_items_command(command, client)?,
2430 Command::Collections { command } => run_collections_command(command, client)?,
2431 Command::Notes { command } => run_notes_command(command, client)?,
2432 Command::Attachments { command } => run_attachments_command(command, client)?,
2433 Command::Settings { command } => run_settings_command(command, client)?,
2434 Command::Tags { command } => run_tags_command(command, client)?,
2435 Command::Annotations { command } => run_annotations_command(command, client)?,
2436 Command::Ocr { command } => {
2437 return run_ocr_command(command, client);
2438 }
2439 Command::Rag { command } => {
2440 return run_rag_command(command, client);
2441 }
2442 Command::Export(_) => unreachable!("export commands return raw output above"),
2443 Command::FindPdfs {
2444 collection, limit, ..
2445 } => run_find_pdfs_command(client, collection, limit)?,
2446 };
2447
2448 format_json(&value, style)
2449}
2450
2451fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2452 match command {
2453 RagCommand::Providers => format_json(
2454 &serde_json::json!({
2455 "providers": [
2456 embedding_provider_spec("volcengine")?,
2457 embedding_provider_spec("alibaba")?,
2458 embedding_provider_spec("custom")?,
2459 ],
2460 }),
2461 JsonStyle::Pretty,
2462 ),
2463 RagCommand::Embed {
2464 provider,
2465 input,
2466 endpoint,
2467 model,
2468 input_type,
2469 api_key_env,
2470 } => {
2471 let value = run_embedding_provider_json_command(
2472 provider,
2473 input,
2474 endpoint,
2475 model,
2476 input_type,
2477 api_key_env,
2478 )?;
2479 format_json(&value, JsonStyle::PythonCompact)
2480 }
2481 RagCommand::Status { collection, .. } => {
2482 let value = rag_status_value(client, &collection)?;
2483 format_json(&value, JsonStyle::PythonCompact)
2484 }
2485 RagCommand::Search {
2486 query,
2487 collection,
2488 keys,
2489 zotero,
2490 top_spans_per_item,
2491 include_fulltext_spans,
2492 top_k,
2493 output,
2494 ..
2495 } => run_rag_hits_command(
2496 client,
2497 RagHitsOptions {
2498 query,
2499 collection,
2500 keys,
2501 zotero,
2502 top_spans_per_item,
2503 include_fulltext_spans,
2504 top_k,
2505 output,
2506 },
2507 ),
2508 }
2509}
2510
2511fn run_embedding_provider_json_command(
2512 provider: String,
2513 input: String,
2514 endpoint: Option<String>,
2515 model: Option<String>,
2516 input_type: Option<String>,
2517 api_key_env: Option<String>,
2518) -> Result<Value, String> {
2519 let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2520 if endpoint.is_some() {
2521 input.url = endpoint;
2522 }
2523 if model.is_some() {
2524 input.model = model;
2525 }
2526 if input_type.is_some() {
2527 input.input_type = input_type;
2528 }
2529 let mut transport = provider_http_transport(api_key_env.as_deref())?;
2530 let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2531
2532 Ok(serde_json::json!({
2533 "provider": provider,
2534 "vectors": vectors,
2535 }))
2536}
2537
2538fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2539 provider_http_transport_with_auth(api_key_env, "bearer")
2540}
2541
2542fn provider_http_transport_with_auth(
2543 api_key_env: Option<&str>,
2544 auth_scheme: &str,
2545) -> Result<UreqProviderHttpTransport, String> {
2546 let Some(env_name) = api_key_env else {
2547 return Ok(UreqProviderHttpTransport::new());
2548 };
2549 let token = env::var(env_name)
2550 .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2551 if token.trim().is_empty() {
2552 return Err(format!("provider credential env var {env_name} is empty"));
2553 }
2554 let token = token.trim();
2555 match auth_scheme {
2556 "token" if token.starts_with("token ") => {
2557 Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2558 }
2559 "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2560 "token {token}"
2561 ))),
2562 "bearer" if token.starts_with("Bearer ") => {
2563 Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2564 }
2565 "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2566 "none" => Ok(UreqProviderHttpTransport::new()),
2567 other => Err(format!("unsupported provider auth scheme {other}")),
2568 }
2569}
2570
2571fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2572 let payload = if path == "-" {
2573 let mut input = String::new();
2574 io::stdin()
2575 .read_to_string(&mut input)
2576 .map_err(|err| format!("read stdin: {err}"))?;
2577 input
2578 } else {
2579 fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2580 };
2581 serde_json::from_str::<T>(&payload)
2582 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2583}
2584
2585fn run_rag_hits_command(
2586 client: &mut impl RpcCaller,
2587 options: RagHitsOptions,
2588) -> Result<String, String> {
2589 if !options.zotero {
2590 return Err(
2591 "zotron rag hits currently supports only the fixture-covered --zotero backend in Rust"
2592 .to_string(),
2593 );
2594 }
2595 if options.collection.is_none() && options.keys.is_empty() {
2596 return Err(
2597 "INVALID_ARGS: --collection or --key is required when --zotero is used".to_string(),
2598 );
2599 }
2600 let mut params = serde_json::json!({
2601 "query": options.query,
2602 "limit": options.top_k,
2603 "top_spans_per_item": options.top_spans_per_item,
2604 "include_fulltext_spans": options.include_fulltext_spans,
2605 });
2606 if let Some(map) = params.as_object_mut() {
2607 if let Some(collection) = options.collection {
2608 map.insert("collection".to_string(), Value::String(collection));
2609 }
2610 if !options.keys.is_empty() {
2611 map.insert(
2612 "keys".to_string(),
2613 Value::Array(options.keys.into_iter().map(Value::String).collect()),
2614 );
2615 }
2616 }
2617 let payload = client.call("rag.searchHits", Some(params))?;
2618 let hits = payload
2619 .get("hits")
2620 .and_then(Value::as_array)
2621 .cloned()
2622 .unwrap_or_default();
2623 if options.output == "jsonl" {
2624 let mut out = String::new();
2625 for hit in &hits {
2626 out.push_str(&serde_json::to_string(hit).map_err(|err| err.to_string())?);
2627 out.push('\n');
2628 }
2629 Ok(out)
2630 } else {
2631 let total = hits.len() as u64;
2632 let value = normalize_list_envelope(
2633 serde_json::json!({"items": hits, "total": total}),
2634 "items",
2635 Some(options.top_k),
2636 0,
2637 );
2638 format_json(&value, JsonStyle::Pretty)
2639 }
2640}
2641
2642fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
2643 let raw_store_path = rag_store_path(collection);
2644 if raw_store_path.exists() {
2645 return rag_status_from_store(collection, &raw_store_path);
2646 }
2647
2648 let mut store_candidates = Vec::new();
2649 let collection_match = find_collection_in_tree(client, collection)?;
2650 if let Some(collection_node) = collection_match.as_ref() {
2651 if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
2652 store_candidates.push(rag_store_path(name));
2653 }
2654 if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
2655 store_candidates.push(rag_store_path(key));
2656 }
2657 }
2658 for store_path in unique_paths(store_candidates) {
2659 if store_path.exists() {
2660 return rag_status_from_store(collection, &store_path);
2661 }
2662 }
2663
2664 rag_status_from_zotero_sidecars(client, collection, collection_match)
2665}
2666
2667fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
2668 let mut unique = Vec::new();
2669 for path in paths {
2670 if !unique.iter().any(|seen| seen == &path) {
2671 unique.push(path);
2672 }
2673 }
2674 unique
2675}
2676
2677fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
2678 let raw = fs::read_to_string(store_path)
2679 .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
2680 let store: Value = serde_json::from_str(&raw)
2681 .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
2682 let chunks = store
2683 .get("chunks")
2684 .and_then(Value::as_array)
2685 .cloned()
2686 .unwrap_or_default();
2687 let mut item_keys = Vec::<Value>::new();
2688 for chunk in &chunks {
2689 let Some(item_key) = chunk.get("item_key") else {
2690 continue;
2691 };
2692 if !item_keys.iter().any(|seen| seen == item_key) {
2693 item_keys.push(item_key.clone());
2694 }
2695 }
2696 Ok(serde_json::json!({
2697 "status": "indexed",
2698 "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
2699 "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
2700 "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
2701 "total_chunks": chunks.len(),
2702 "total_items": item_keys.len(),
2703 "store_path": store_path.to_string_lossy(),
2704 }))
2705}
2706
2707fn rag_status_from_zotero_sidecars(
2708 client: &mut impl RpcCaller,
2709 collection: &str,
2710 collection_match: Option<Value>,
2711) -> Result<Value, String> {
2712 let collection_key = collection_match
2713 .as_ref()
2714 .and_then(|node| node.get("key").cloned())
2715 .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2716 let raw = paginate_rpc(
2717 client,
2718 "collections.getItems",
2719 serde_json::json!({"key": collection_key}),
2720 500,
2721 )?;
2722 let items = raw
2723 .get("items")
2724 .and_then(Value::as_array)
2725 .or_else(|| raw.as_array())
2726 .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2727 .clone();
2728
2729 let mut indexed_items = 0usize;
2730 let mut total_chunks = 0usize;
2731 for item in &items {
2732 let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2733 let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
2734 if chunk_count > 0 {
2735 indexed_items += 1;
2736 total_chunks += chunk_count;
2737 }
2738 }
2739
2740 if indexed_items == 0 {
2741 return Ok(serde_json::json!({
2742 "status": "not indexed",
2743 "collection": collection,
2744 "total_items": items.len(),
2745 "indexed_items": 0,
2746 }));
2747 }
2748
2749 Ok(serde_json::json!({
2750 "status": "indexed",
2751 "collection": collection,
2752 "total_chunks": total_chunks,
2753 "total_items": indexed_items,
2754 "collection_items": items.len(),
2755 "source": "zotero-sidecar",
2756 }))
2757}
2758
2759fn sidecar_chunk_count_for_item(
2760 client: &mut impl RpcCaller,
2761 item_key: &Value,
2762) -> Result<usize, String> {
2763 let attachments = client.call(
2764 "attachments.list",
2765 Some(serde_json::json!({"parentKey": item_key.clone()})),
2766 )?;
2767 let Some(attachments) = attachments.as_array() else {
2768 return Ok(0);
2769 };
2770
2771 let mut count = 0usize;
2772 for attachment in attachments {
2773 let Some(path) = attachment.get("path").and_then(Value::as_str) else {
2774 continue;
2775 };
2776 let local = local_path_from_zotero_path(path);
2777 let Some(dir) = Path::new(&local).parent() else {
2778 continue;
2779 };
2780 let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
2781 continue;
2782 };
2783 let text = String::from_utf8_lossy(&bytes);
2784 count += text.lines().filter(|line| !line.trim().is_empty()).count();
2785 }
2786 Ok(count)
2787}
2788
2789fn rag_store_path(collection: &str) -> PathBuf {
2790 rag_store_root().join(format!("{collection}.json"))
2791}
2792
2793fn rag_store_root() -> PathBuf {
2794 let xdg_data_home = env::var_os("XDG_DATA_HOME")
2795 .filter(|path| !path.is_empty())
2796 .map(PathBuf::from);
2797 let appdata = env::var_os("APPDATA")
2798 .filter(|path| !path.is_empty())
2799 .map(PathBuf::from);
2800 let userprofile = env::var_os("USERPROFILE")
2801 .filter(|path| !path.is_empty())
2802 .map(PathBuf::from);
2803 let home = env::var_os("HOME")
2804 .filter(|path| !path.is_empty())
2805 .map(PathBuf::from);
2806
2807 rag_store_root_for_platform(
2808 ArtifactStorePlatform::current(),
2809 xdg_data_home.as_deref(),
2810 appdata.as_deref(),
2811 userprofile.as_deref(),
2812 home.as_deref(),
2813 )
2814}
2815
2816fn rag_store_root_for_platform(
2817 platform: ArtifactStorePlatform,
2818 xdg_data_home: Option<&Path>,
2819 appdata: Option<&Path>,
2820 userprofile: Option<&Path>,
2821 home: Option<&Path>,
2822) -> PathBuf {
2823 match platform {
2824 ArtifactStorePlatform::Windows => {
2825 if let Some(path) = appdata {
2826 return path.join("Zotron").join("rag");
2827 }
2828 if let Some(path) = userprofile {
2829 return path
2830 .join("AppData")
2831 .join("Roaming")
2832 .join("Zotron")
2833 .join("rag");
2834 }
2835 if let Some(path) = home {
2836 return path
2837 .join("AppData")
2838 .join("Roaming")
2839 .join("Zotron")
2840 .join("rag");
2841 }
2842 PathBuf::from(".zotron").join("rag")
2843 }
2844 ArtifactStorePlatform::Macos => {
2845 if let Some(path) = home {
2846 return path
2847 .join("Library")
2848 .join("Application Support")
2849 .join("Zotron")
2850 .join("rag");
2851 }
2852 if let Some(path) = xdg_data_home {
2853 return path.join("zotron").join("rag");
2854 }
2855 PathBuf::from(".zotron").join("rag")
2856 }
2857 ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
2858 .map(|path| path.join("zotron").join("rag"))
2859 .or_else(|| {
2860 home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
2861 })
2862 .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
2863 }
2864}
2865
2866fn run_push_command(
2867 json_file: String,
2868 pdf: Option<String>,
2869 collection: Option<String>,
2870 on_duplicate: String,
2871 dry_run: bool,
2872 client: &mut impl RpcCaller,
2873) -> Result<String, String> {
2874 if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
2875 return Err(format!(
2876 "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
2877 ));
2878 }
2879
2880 let payload = if json_file == "-" {
2881 let mut input = String::new();
2882 io::stdin()
2883 .read_to_string(&mut input)
2884 .map_err(|err| format!("read stdin: {err}"))?;
2885 input
2886 } else {
2887 fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
2888 };
2889 let item_json = serde_json::from_str::<Value>(&payload)
2890 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
2891
2892 if dry_run {
2893 let collection_key = collection
2894 .as_deref()
2895 .map(|name| resolve_collection(client, name))
2896 .transpose()?;
2897 return format_json(
2898 &serde_json::json!({
2899 "ok": true,
2900 "dryRun": true,
2901 "wouldPush": {
2902 "title": item_json.get("title").cloned().unwrap_or(Value::Null),
2903 "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
2904 "collectionKey": collection_key,
2905 "pdfPath": pdf,
2906 "onDuplicate": on_duplicate,
2907 }
2908 }),
2909 JsonStyle::PythonCompact,
2910 );
2911 }
2912
2913 let result = push_item(
2914 client,
2915 &item_json,
2916 pdf.as_deref(),
2917 collection.as_deref(),
2918 &on_duplicate,
2919 )?;
2920 format_json(&result, JsonStyle::PythonCompact)
2921}
2922
2923fn push_item(
2924 client: &mut impl RpcCaller,
2925 item_json: &Value,
2926 pdf_path: Option<&str>,
2927 collection: Option<&str>,
2928 on_duplicate: &str,
2929) -> Result<Value, String> {
2930 let pdf_size = if let Some(path) = pdf_path {
2931 validate_pdf_magic(path)?
2932 } else {
2933 0
2934 };
2935
2936 let collection_key = match collection {
2937 Some(name) => resolve_collection(client, name)?,
2938 None => resolve_current_collection(client)?,
2939 };
2940
2941 let dup_id = find_duplicate(client, item_json)?;
2942 if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
2943 if !is_library_root(&collection_key) {
2944 client.call(
2945 "collections.addItems",
2946 Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
2947 )?;
2948 }
2949 let mut pdf_attached = false;
2950 if let Some(path) = pdf_path {
2951 if !item_has_pdf_attachment(client, dup_id)? {
2952 attach_pdf(client, dup_id, path)?;
2953 pdf_attached = true;
2954 }
2955 }
2956 return Ok(push_result(
2957 "skipped_duplicate",
2958 Some(dup_id.to_string()),
2959 pdf_attached,
2960 if pdf_attached { pdf_size } else { 0 },
2961 Value::Null,
2962 ));
2963 }
2964
2965 let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
2966 let (item_key, status) =
2967 if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
2968 let mut params = serde_json::Map::new();
2969 params.insert("key".to_string(), Value::String(dup_id.to_string()));
2970 params.insert(
2971 "fields".to_string(),
2972 xpi_payload
2973 .get("fields")
2974 .cloned()
2975 .unwrap_or_else(|| serde_json::json!({})),
2976 );
2977 if let Some(creators) = xpi_payload.get("creators") {
2978 params.insert("creators".to_string(), creators.clone());
2979 }
2980 if let Some(tags) = xpi_payload.get("tags") {
2981 params.insert("tags".to_string(), tags.clone());
2982 }
2983 client.call("items.update", Some(Value::Object(params)))?;
2984 (dup_id.to_string(), "updated")
2985 } else {
2986 let created = client.call("items.create", Some(xpi_payload))?;
2987 let key = created
2988 .get("key")
2989 .and_then(Value::as_str)
2990 .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
2991 (key.to_string(), "created")
2992 };
2993
2994 let mut pdf_attached = false;
2995 if let Some(path) = pdf_path {
2996 if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
2997 attach_pdf(client, &item_key, path)?;
2998 pdf_attached = true;
2999 }
3000 }
3001
3002 if status == "updated" && !is_library_root(&collection_key) {
3003 client.call(
3004 "collections.addItems",
3005 Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3006 )?;
3007 }
3008
3009 Ok(push_result(
3010 status,
3011 Some(item_key),
3012 pdf_attached,
3013 if pdf_attached { pdf_size } else { 0 },
3014 Value::Null,
3015 ))
3016}
3017
3018fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3019 let bytes = fs::read(path)
3020 .map_err(|_| format!("INVALID_PDF: {path} does not start with %PDF- magic bytes"))?;
3021 if !bytes.starts_with(b"%PDF-") {
3022 return Err(format!(
3023 "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3024 ));
3025 }
3026 Ok(bytes.len() as u64)
3027}
3028
3029fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3030 let selected = client.call("system.currentCollection", None)?;
3031 Ok(selected
3032 .get("key")
3033 .cloned()
3034 .unwrap_or_else(|| Value::Number(0.into())))
3035}
3036
3037fn find_duplicate(
3038 client: &mut impl RpcCaller,
3039 item_json: &Value,
3040) -> Result<Option<String>, String> {
3041 if let Some(doi) = item_json
3042 .get("DOI")
3043 .and_then(Value::as_str)
3044 .filter(|doi| !doi.is_empty())
3045 {
3046 let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3047 if let Some(key) = first_hit_key(&hits) {
3048 return Ok(Some(key));
3049 }
3050 }
3051
3052 if let Some(title) = item_json
3053 .get("title")
3054 .and_then(Value::as_str)
3055 .filter(|title| title.len() >= 10)
3056 {
3057 let hits = client.call(
3058 "search.quick",
3059 Some(serde_json::json!({"query": title, "limit": 20})),
3060 )?;
3061 if let Some(items) = response_items(&hits) {
3062 for item in items {
3063 if item.get("title").and_then(Value::as_str) == Some(title) {
3064 if let Some(key) = item.get("key").and_then(Value::as_str) {
3065 return Ok(Some(key.to_string()));
3066 }
3067 }
3068 }
3069 }
3070 }
3071
3072 Ok(None)
3073}
3074
3075fn first_hit_key(response: &Value) -> Option<String> {
3076 response_items(response)?
3077 .first()?
3078 .get("key")?
3079 .as_str()
3080 .map(ToString::to_string)
3081}
3082
3083fn response_items(response: &Value) -> Option<&Vec<Value>> {
3084 response
3085 .get("items")
3086 .and_then(Value::as_array)
3087 .or_else(|| response.as_array())
3088}
3089
3090fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3091 const NON_FIELD_KEYS: &[&str] = &[
3092 "itemType",
3093 "creators",
3094 "tags",
3095 "collections",
3096 "attachments",
3097 "relations",
3098 "notes",
3099 "id",
3100 "key",
3101 "version",
3102 ];
3103
3104 let mut fields = serde_json::Map::new();
3105 if let Some(item) = item_json.as_object() {
3106 for (key, value) in item {
3107 if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3108 fields.insert(key.clone(), value.clone());
3109 }
3110 }
3111 }
3112
3113 let mut payload = serde_json::Map::new();
3114 payload.insert(
3115 "itemType".to_string(),
3116 item_json
3117 .get("itemType")
3118 .cloned()
3119 .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3120 );
3121 payload.insert("fields".to_string(), Value::Object(fields));
3122
3123 if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3124 if !creators.is_empty() {
3125 payload.insert(
3126 "creators".to_string(),
3127 Value::Array(
3128 creators
3129 .iter()
3130 .map(|creator| {
3131 serde_json::json!({
3132 "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3133 "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3134 "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3135 })
3136 })
3137 .collect(),
3138 ),
3139 );
3140 }
3141 }
3142
3143 if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3144 if !tags.is_empty() {
3145 payload.insert(
3146 "tags".to_string(),
3147 Value::Array(
3148 tags.iter()
3149 .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3150 .collect(),
3151 ),
3152 );
3153 }
3154 }
3155
3156 if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3157 payload.insert(
3158 "collections".to_string(),
3159 Value::Array(vec![collection_key.clone()]),
3160 );
3161 }
3162
3163 Value::Object(payload)
3164}
3165
3166fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3167 let attachments = client.call(
3168 "attachments.list",
3169 Some(serde_json::json!({"parentKey": item_key})),
3170 )?;
3171 Ok(has_pdf_attachment(&attachments))
3172}
3173
3174fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3175 client.call(
3176 "attachments.add",
3177 Some(serde_json::json!({
3178 "parentKey": item_key,
3179 "path": zotero_path(path),
3180 "title": "Full Text PDF",
3181 })),
3182 )?;
3183 Ok(())
3184}
3185
3186fn zotero_path(path: &str) -> String {
3187 let path = Path::new(path)
3188 .canonicalize()
3189 .unwrap_or_else(|_| Path::new(path).to_path_buf())
3190 .to_string_lossy()
3191 .into_owned();
3192 if is_wsl() {
3193 return ProcessCommand::new("wslpath")
3194 .arg("-w")
3195 .arg(&path)
3196 .output()
3197 .ok()
3198 .filter(|output| output.status.success())
3199 .and_then(|output| String::from_utf8(output.stdout).ok())
3200 .map(|converted| converted.trim().to_string())
3201 .filter(|converted| !converted.is_empty())
3202 .unwrap_or(path);
3203 }
3204 path
3205}
3206
3207fn is_wsl() -> bool {
3208 if env::var_os("WSL_DISTRO_NAME").is_some() {
3209 return true;
3210 }
3211 fs::read_to_string("/proc/sys/kernel/osrelease")
3212 .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3213 .unwrap_or(false)
3214}
3215
3216fn is_library_root(value: &Value) -> bool {
3217 value.as_i64() == Some(0) || value.as_u64() == Some(0)
3218}
3219
3220fn push_result(
3221 status: &str,
3222 zotero_item_key: Option<String>,
3223 pdf_attached: bool,
3224 pdf_size_bytes: u64,
3225 error: Value,
3226) -> Value {
3227 serde_json::json!({
3228 "status": status,
3229 "zotero_item_key": zotero_item_key,
3230 "pdf_attached": pdf_attached,
3231 "pdf_size_bytes": pdf_size_bytes,
3232 "error": error,
3233 })
3234}
3235
3236fn run_search(
3237 args: SearchArgs,
3238 client: &mut impl RpcCaller,
3239) -> Result<(Value, JsonStyle), String> {
3240 let SearchArgs {
3241 query, fulltext, author, after, before, journal, tag,
3242 doi, isbn, issn, collection, limit, offset, ..
3243 } = args;
3244
3245 let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3246 if has_identifier {
3247 let mut params = serde_json::Map::new();
3248 if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3249 if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3250 if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3251 let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3252 return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3253 }
3254
3255 if fulltext {
3256 let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3257 let mut params = serde_json::json!({"query": query, "limit": limit});
3258 if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3259 map.insert("collection".into(), resolve_collection(client, &col)?);
3260 }
3261 let value = client.call("search.fulltext", Some(params))?;
3262 return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3263 }
3264
3265 let has_filters = author.is_some() || after.is_some() || before.is_some()
3266 || journal.is_some() || tag.is_some();
3267 if has_filters {
3268 let mut conditions: Vec<Value> = Vec::new();
3269 if let Some(query) = &query {
3270 conditions.push(serde_json::json!({
3271 "field": "quicksearch-titleCreatorYear",
3272 "operator": "contains",
3273 "value": query,
3274 }));
3275 }
3276 if let Some(author) = author {
3277 conditions.push(serde_json::json!({
3278 "field": "creator", "operator": "contains", "value": author,
3279 }));
3280 }
3281 if let Some(after) = after {
3282 conditions.push(serde_json::json!({
3283 "field": "date", "operator": "isAfter", "value": after,
3284 }));
3285 }
3286 if let Some(before) = before {
3287 conditions.push(serde_json::json!({
3288 "field": "date", "operator": "isBefore", "value": before,
3289 }));
3290 }
3291 if let Some(journal) = journal {
3292 conditions.push(serde_json::json!({
3293 "field": "publicationTitle", "operator": "contains", "value": journal,
3294 }));
3295 }
3296 if let Some(tag) = tag {
3297 conditions.push(serde_json::json!({
3298 "field": "tag", "operator": "is", "value": tag,
3299 }));
3300 }
3301 let value = client.call(
3302 "search.advanced",
3303 Some(serde_json::json!({
3304 "conditions": conditions,
3305 "operator": "and",
3306 "limit": limit,
3307 "offset": offset,
3308 })),
3309 )?;
3310 return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3311 }
3312
3313 let query = query.ok_or(
3314 "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3315 )?;
3316 let value = if let Some(col) = collection {
3317 let key = resolve_collection(client, &col)?;
3318 let response = client.call(
3319 "collections.getItems",
3320 Some(serde_json::json!({"key": key})),
3321 )?;
3322 collection_quick_search_response(&response, &query, limit)
3323 } else {
3324 filter_search_artifacts(client.call(
3325 "search.quick",
3326 Some(serde_json::json!({"query": query, "limit": limit})),
3327 )?)
3328 };
3329 Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3330}
3331
3332fn run_search_management_command(
3333 command: SearchManagementCommand,
3334 client: &mut impl RpcCaller,
3335) -> Result<(Value, JsonStyle), String> {
3336 match command {
3337 SearchManagementCommand::SavedSearches { .. } => Ok((
3338 normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3339 JsonStyle::Pretty,
3340 )),
3341 SearchManagementCommand::CreateSaved {
3342 name, condition, dry_run, ..
3343 } => {
3344 let conditions = condition
3345 .iter()
3346 .map(|raw| parse_search_condition(raw))
3347 .collect::<Result<Vec<_>, _>>()?;
3348 let params = serde_json::json!({"name": name, "conditions": conditions});
3349 if dry_run {
3350 Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3351 } else {
3352 Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3353 }
3354 }
3355 SearchManagementCommand::DeleteSaved {
3356 search_key, dry_run, ..
3357 } => {
3358 let params = serde_json::json!({"key": search_key});
3359 if dry_run {
3360 Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3361 } else {
3362 Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3363 }
3364 }
3365 }
3366}
3367
3368fn filter_search_artifacts(mut value: Value) -> Value {
3369 let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3370 return value;
3371 };
3372 items.retain(|item| match item.get("title").and_then(Value::as_str) {
3373 Some(title) => !is_zotron_evidence_artifact(title),
3374 None => true,
3375 });
3376 let total_items = items.len() as u64;
3377 if let Some(total) = value.get_mut("total") {
3378 *total = Value::from(total_items);
3379 }
3380 value
3381}
3382
3383fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3384 let mut matched = collection_items(response)
3385 .into_iter()
3386 .filter(|item| !item_is_evidence_artifact(item))
3387 .filter(|item| quick_item_matches(item, query))
3388 .collect::<Vec<_>>();
3389 let total = matched.len() as u64;
3390 let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3391 if matched.len() > limit {
3392 matched.truncate(limit);
3393 }
3394 serde_json::json!({"items": matched, "total": total})
3395}
3396
3397fn item_is_evidence_artifact(item: &Value) -> bool {
3398 item.get("title")
3399 .and_then(Value::as_str)
3400 .is_some_and(is_zotron_evidence_artifact)
3401}
3402
3403fn quick_item_matches(item: &Value, query: &str) -> bool {
3404 let terms = query
3405 .split_whitespace()
3406 .map(|term| term.to_lowercase())
3407 .filter(|term| !term.is_empty())
3408 .collect::<Vec<_>>();
3409 if terms.is_empty() {
3410 return true;
3411 }
3412 let mut haystack = String::new();
3413 append_search_text(item, &mut haystack);
3414 let haystack = haystack.to_lowercase();
3415 terms.iter().all(|term| haystack.contains(term))
3416}
3417
3418fn append_search_text(value: &Value, out: &mut String) {
3419 match value {
3420 Value::String(text) => {
3421 out.push(' ');
3422 out.push_str(text);
3423 }
3424 Value::Number(number) => {
3425 out.push(' ');
3426 out.push_str(&number.to_string());
3427 }
3428 Value::Bool(value) => {
3429 out.push(' ');
3430 out.push_str(if *value { "true" } else { "false" });
3431 }
3432 Value::Array(items) => {
3433 for item in items {
3434 append_search_text(item, out);
3435 }
3436 }
3437 Value::Object(map) => {
3438 for item in map.values() {
3439 append_search_text(item, out);
3440 }
3441 }
3442 Value::Null => {}
3443 }
3444}
3445
3446fn parse_search_condition(raw: &str) -> Result<Value, String> {
3447 let mut parts = raw.split_whitespace();
3448 let field = parts.next();
3449 let operator = parts.next();
3450 let value = parts.collect::<Vec<_>>().join(" ");
3451 match (field, operator, value.is_empty()) {
3452 (Some(field), Some(operator), false) => Ok(serde_json::json!({
3453 "field": field,
3454 "operator": operator,
3455 "value": value,
3456 })),
3457 _ => Err(format!(
3458 "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
3459 )),
3460 }
3461}
3462
3463fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
3464 if let Value::Array(arr) = value {
3465 let total = arr.len() as u64;
3466 let mut obj = serde_json::Map::new();
3467 obj.insert(list_key.to_string(), Value::Array(arr));
3468 obj.insert("total".to_string(), Value::from(total));
3469 if let Some(limit) = limit {
3470 obj.insert("limit".to_string(), Value::from(limit));
3471 }
3472 obj.insert("offset".to_string(), Value::from(offset));
3473 obj.insert("hasMore".to_string(), Value::Bool(false));
3474 return Value::Object(obj);
3475 }
3476
3477 let mut obj = match value {
3478 Value::Object(obj) if obj.contains_key(list_key) => obj,
3479 other => return other,
3480 };
3481
3482 let items_len = obj
3483 .get(list_key)
3484 .and_then(Value::as_array)
3485 .map_or(0, |a| a.len()) as u64;
3486 let total = obj
3487 .get("total")
3488 .and_then(Value::as_u64)
3489 .unwrap_or(items_len);
3490
3491 obj.insert("total".to_string(), Value::from(total));
3492 if let Some(limit) = limit {
3493 obj.insert("limit".to_string(), Value::from(limit));
3494 }
3495 obj.insert("offset".to_string(), Value::from(offset));
3496 obj.insert(
3497 "hasMore".to_string(),
3498 Value::Bool(offset + items_len < total),
3499 );
3500
3501 Value::Object(obj)
3502}
3503
3504const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
3505const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
3506
3507fn paginate_rpc(
3508 client: &mut impl RpcCaller,
3509 method: &str,
3510 params: Value,
3511 page_size: usize,
3512) -> Result<Value, String> {
3513 let base = params
3514 .as_object()
3515 .ok_or_else(|| "params must be a JSON object".to_string())?;
3516 let mut out = Vec::new();
3517 let mut prev_page: Option<Vec<Value>> = None;
3518 let mut offset = 0usize;
3519
3520 loop {
3521 let mut page_params = base.clone();
3522 page_params.insert("offset".to_string(), Value::Number(offset.into()));
3523 page_params.insert("limit".to_string(), Value::Number(page_size.into()));
3524 let response = client.call(method, Some(Value::Object(page_params)))?;
3525
3526 let page = match extract_page(&response) {
3527 Some(page) => page,
3528 None if out.is_empty() => return Ok(response),
3529 None if response.is_object() => {
3530 return Err(format!(
3531 "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
3532 out.len()
3533 ));
3534 }
3535 None => {
3536 return Err(format!(
3537 "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
3538 out.len()
3539 ));
3540 }
3541 };
3542
3543 if prev_page.as_ref() == Some(&page) {
3544 return Err(format!(
3545 "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
3546 out.len()
3547 ));
3548 }
3549
3550 let page_len = page.len();
3551 out.extend(page.clone());
3552 if page_len < page_size {
3553 return Ok(Value::Array(out));
3554 }
3555 if out.len() >= RPC_PAGINATION_SAFETY_CAP {
3556 out.truncate(RPC_PAGINATION_SAFETY_CAP);
3557 return Ok(Value::Array(out));
3558 }
3559 prev_page = Some(page);
3560 offset += page_size;
3561 }
3562}
3563
3564fn extract_page(response: &Value) -> Option<Vec<Value>> {
3565 if let Some(page) = response.as_array() {
3566 return Some(page.clone());
3567 }
3568 let object = response.as_object()?;
3569 for key in RPC_PAGE_LIST_KEYS {
3570 if let Some(page) = object.get(key).and_then(Value::as_array) {
3571 return Some(page.clone());
3572 }
3573 }
3574 None
3575}
3576
3577fn run_find_pdfs_command(
3578 client: &mut impl RpcCaller,
3579 collection: String,
3580 limit: usize,
3581) -> Result<(Value, JsonStyle), String> {
3582 let collection_key = resolve_collection(client, &collection)?;
3583 let response = client.call(
3584 "collections.getItems",
3585 Some(serde_json::json!({"key": collection_key})),
3586 )?;
3587 let items = collection_items(&response);
3588
3589 let mut missing = Vec::new();
3590 for item in &items {
3591 let Some(item_key) = item.get("key").and_then(Value::as_str) else {
3592 continue;
3593 };
3594 let attachments = client.call(
3595 "attachments.list",
3596 Some(serde_json::json!({"parentKey": item_key})),
3597 )?;
3598 if !has_pdf_attachment(&attachments) {
3599 missing.push(item.clone());
3600 }
3601 if limit > 0 && missing.len() >= limit {
3602 break;
3603 }
3604 }
3605
3606 let mut results = Vec::new();
3607 for item in &missing {
3608 let item_key = item
3609 .get("key")
3610 .and_then(Value::as_str)
3611 .ok_or_else(|| "missing item lacks key".to_string())?;
3612 let response = client.call(
3613 "attachments.findPDF",
3614 Some(serde_json::json!({"parentKey": item_key})),
3615 )?;
3616 let attachment = response.get("attachment").filter(|value| !value.is_null());
3617 results.push(serde_json::json!({
3618 "item_key": item_key,
3619 "title": item.get("title").cloned().unwrap_or(Value::Null),
3620 "found": attachment.is_some(),
3621 "attachment_key": attachment
3622 .and_then(|attachment| attachment.get("key"))
3623 .cloned()
3624 .unwrap_or(Value::Null),
3625 }));
3626 }
3627
3628 Ok((
3629 serde_json::json!({
3630 "scanned": items.len(),
3631 "attempted": missing.len(),
3632 "results": results,
3633 }),
3634 JsonStyle::Pretty,
3635 ))
3636}
3637
3638fn collection_items(response: &Value) -> Vec<Value> {
3639 if let Some(items) = response.get("items").and_then(Value::as_array) {
3640 return items.clone();
3641 }
3642 response.as_array().cloned().unwrap_or_default()
3643}
3644
3645fn has_pdf_attachment(attachments: &Value) -> bool {
3646 attachments
3647 .as_array()
3648 .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
3649}
3650
3651fn is_pdf_attachment(attachment: &Value) -> bool {
3652 let content_type = attachment
3653 .get("contentType")
3654 .and_then(Value::as_str)
3655 .unwrap_or_default()
3656 .to_lowercase();
3657 let path = attachment
3658 .get("path")
3659 .and_then(Value::as_str)
3660 .unwrap_or_default()
3661 .to_lowercase();
3662 matches!(
3663 content_type.as_str(),
3664 "application/pdf" | "application/x-pdf"
3665 ) || path.ends_with(".pdf")
3666}
3667
3668fn call_json(
3669 client: &mut impl RpcCaller,
3670 method: &str,
3671 params: Option<Value>,
3672) -> Result<Value, String> {
3673 client.call(method, params)
3674}
3675
3676fn run_system_command(
3677 command: SystemCommand,
3678 client: &mut impl RpcCaller,
3679) -> Result<(Value, JsonStyle), String> {
3680 let value = match command {
3681 SystemCommand::Version { .. } => client.call("system.version", None)?,
3682 SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
3683 SystemCommand::LibraryStats { library, .. } => {
3684 let params = library.map(|id| serde_json::json!({"id": id}));
3685 client.call("system.libraryStats", params)?
3686 }
3687 SystemCommand::Schema { item_type, .. } => {
3688 if let Some(item_type) = item_type {
3689 let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
3690 let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
3691 let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
3692 .iter()
3693 .filter_map(|f| f.get("field").cloned())
3694 .collect();
3695 let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
3696 .iter()
3697 .filter_map(|c| c.get("creatorType").cloned())
3698 .collect();
3699 serde_json::json!({
3700 "itemType": item_type,
3701 "fields": field_names,
3702 "creatorTypes": creator_names,
3703 })
3704 } else {
3705 let types = client.call("system.itemTypes", None)?;
3706 let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
3707 .iter()
3708 .filter_map(|t| t.get("itemType").cloned())
3709 .collect();
3710 Value::Array(type_names)
3711 }
3712 }
3713 SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
3714 SystemCommand::ListMethods { .. } => client.call("system.listMethods", None)?,
3715 SystemCommand::Describe { method, .. } => {
3716 let params = method.map(|method| serde_json::json!({"method": method}));
3717 client.call("system.describe", params)?
3718 }
3719 };
3720 Ok((value, JsonStyle::Pretty))
3721}
3722
3723fn run_items_command(
3724 command: ItemsCommand,
3725 client: &mut impl RpcCaller,
3726) -> Result<(Value, JsonStyle), String> {
3727 let (value, style) = match command {
3728 ItemsCommand::Add {
3729 doi,
3730 isbn,
3731 from_url,
3732 file,
3733 collection,
3734 dry_run,
3735 ..
3736 } => {
3737 if let Some(doi) = doi {
3738 run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
3739 } else if let Some(isbn) = isbn {
3740 run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
3741 } else if let Some(from_url) = from_url {
3742 run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
3743 } else if let Some(file) = file {
3744 let mut params = serde_json::json!({"path": zotero_path(&file)});
3745 maybe_insert_collection(client, &mut params, collection)?;
3746 run_mutation_command(client, "items.addFromFile", params, dry_run)?
3747 } else {
3748 return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, or --file".into());
3749 }
3750 }
3751 ItemsCommand::Create {
3752 item_type,
3753 fields,
3754 dry_run,
3755 ..
3756 } => {
3757 let parsed_fields = parse_field_options(&fields)?;
3758 let mut params = serde_json::json!({"itemType": item_type});
3759 if !parsed_fields.is_empty() {
3760 if let Some(map) = params.as_object_mut() {
3761 map.insert("fields".to_string(), Value::Object(parsed_fields));
3762 }
3763 }
3764 run_mutation_command(client, "items.create", params, dry_run)?
3765 }
3766 ItemsCommand::Update {
3767 key,
3768 fields,
3769 dry_run,
3770 ..
3771 } => {
3772 let parsed_fields = parse_field_options(&fields)?;
3773 let mut params = serde_json::json!({"key": key});
3774 if !parsed_fields.is_empty() {
3775 if let Some(map) = params.as_object_mut() {
3776 map.insert("fields".to_string(), Value::Object(parsed_fields));
3777 }
3778 }
3779 run_mutation_command(client, "items.update", params, dry_run)?
3780 }
3781 ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
3782 client,
3783 "items.delete",
3784 serde_json::json!({"key": key}),
3785 dry_run,
3786 )?,
3787 ItemsCommand::Trash {
3788 items, dry_run, ..
3789 } => {
3790 if items.len() == 1 {
3791 run_mutation_command(
3792 client,
3793 "items.trash",
3794 serde_json::json!({"key": items[0]}),
3795 dry_run,
3796 )?
3797 } else {
3798 run_mutation_command(
3799 client,
3800 "items.batchTrash",
3801 serde_json::json!({"keys": items}),
3802 dry_run,
3803 )?
3804 }
3805 }
3806 ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
3807 client,
3808 "items.restore",
3809 serde_json::json!({"key": item}),
3810 dry_run,
3811 )?,
3812 ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
3813 if keys.len() < 2 {
3814 return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
3815 }
3816 run_mutation_command(
3817 client,
3818 "items.mergeDuplicates",
3819 serde_json::json!({"keys": keys}),
3820 dry_run,
3821 )?
3822 }
3823 ItemsCommand::AddRelated {
3824 key,
3825 target,
3826 dry_run,
3827 ..
3828 } => run_mutation_command(
3829 client,
3830 "items.addRelated",
3831 serde_json::json!({"key": key, "targetKey": target}),
3832 dry_run,
3833 )?,
3834 ItemsCommand::RemoveRelated {
3835 key,
3836 target,
3837 dry_run,
3838 ..
3839 } => run_mutation_command(
3840 client,
3841 "items.removeRelated",
3842 serde_json::json!({"key": key, "targetKey": target}),
3843 dry_run,
3844 )?,
3845 ItemsCommand::Get { item, .. } => (
3846 client.call("items.get", Some(serde_json::json!({"key": item})))?,
3847 JsonStyle::Pretty,
3848 ),
3849 ItemsCommand::List {
3850 limit,
3851 offset,
3852 sort,
3853 direction,
3854 trash,
3855 ..
3856 } => {
3857 if trash {
3858 let value = client.call(
3859 "items.getTrash",
3860 Some(serde_json::json!({"limit": limit, "offset": offset})),
3861 )?;
3862 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3863 } else {
3864 let mut params = serde_json::json!({
3865 "limit": limit,
3866 "offset": offset,
3867 "direction": direction,
3868 });
3869 if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
3870 map.insert("sort".to_string(), Value::String(sort));
3871 }
3872 let value = client.call("items.list", Some(params))?;
3873 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3874 }
3875 }
3876 ItemsCommand::FindDuplicates { .. } => (
3877 client.call("items.findDuplicates", None)?,
3878 JsonStyle::Pretty,
3879 ),
3880 ItemsCommand::Recent {
3881 limit,
3882 offset,
3883 recent_type,
3884 ..
3885 } => {
3886 if recent_type != "added" && recent_type != "modified" {
3887 return Err(format!(
3888 "--type must be added or modified, got {recent_type:?}"
3889 ));
3890 }
3891 let value = client.call(
3892 "items.getRecent",
3893 Some(
3894 serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
3895 ),
3896 )?;
3897 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3898 }
3899 ItemsCommand::Fulltext { key, .. } => (
3900 client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
3901 JsonStyle::Pretty,
3902 ),
3903 ItemsCommand::Related { key, .. } => (
3904 normalize_list_envelope(
3905 client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
3906 "items",
3907 None,
3908 0,
3909 ),
3910 JsonStyle::Pretty,
3911 ),
3912 ItemsCommand::CitationKey { key, .. } => (
3913 client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
3914 JsonStyle::Pretty,
3915 ),
3916 };
3917 Ok((value, style))
3918}
3919
3920fn run_add_identifier_command(
3921 client: &mut impl RpcCaller,
3922 method: &str,
3923 param_name: &str,
3924 param_value: String,
3925 collection: Option<String>,
3926 dry_run: bool,
3927) -> Result<(Value, JsonStyle), String> {
3928 let mut params = Value::Object(serde_json::Map::from_iter([(
3929 param_name.to_string(),
3930 Value::String(param_value),
3931 )]));
3932 maybe_insert_collection(client, &mut params, collection)?;
3933 run_mutation_command(client, method, params, dry_run)
3934}
3935
3936fn run_mutation_command(
3937 client: &mut impl RpcCaller,
3938 method: &str,
3939 params: Value,
3940 dry_run: bool,
3941) -> Result<(Value, JsonStyle), String> {
3942 let value = if dry_run {
3943 serde_json::json!({
3944 "ok": true,
3945 "dryRun": true,
3946 "wouldCall": method,
3947 "wouldCallParams": params,
3948 })
3949 } else {
3950 client.call(method, Some(params))?
3951 };
3952 Ok((value, JsonStyle::PythonCompact))
3953}
3954
3955fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
3956 let mut parsed = serde_json::Map::new();
3957 for field in fields {
3958 let (key, value) = field
3959 .split_once('=')
3960 .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
3961 parsed.insert(key.to_string(), Value::String(value.to_string()));
3962 }
3963 Ok(parsed)
3964}
3965
3966fn maybe_insert_collection(
3967 client: &mut impl RpcCaller,
3968 params: &mut Value,
3969 collection: Option<String>,
3970) -> Result<(), String> {
3971 let Some(collection) = collection else {
3972 return Ok(());
3973 };
3974 let collection = resolve_collection(client, &collection)?;
3975 let include = match &collection {
3976 Value::Null => false,
3977 Value::Number(number) => number.as_i64() != Some(0),
3978 _ => true,
3979 };
3980 if include {
3981 params
3982 .as_object_mut()
3983 .expect("mutation params are always objects")
3984 .insert("collection".to_string(), collection);
3985 }
3986 Ok(())
3987}
3988
3989fn run_settings_command(
3990 command: SettingsCommand,
3991 client: &mut impl RpcCaller,
3992) -> Result<(Value, JsonStyle), String> {
3993 let (value, style) = match command {
3994 SettingsCommand::Get { key, .. } => (
3995 client.call("settings.get", Some(serde_json::json!({"key": key})))?,
3996 JsonStyle::Pretty,
3997 ),
3998 SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
3999 SettingsCommand::Set {
4000 pairs,
4001 file,
4002 dry_run,
4003 ..
4004 } => {
4005 if let Some(file) = file {
4006 let raw = fs::read_to_string(&file)
4008 .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4009 let settings: Value = serde_json::from_str(&raw)
4010 .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4011 if dry_run {
4012 (
4013 dry_run_value("settings.setAll", settings),
4014 JsonStyle::PythonCompact,
4015 )
4016 } else {
4017 (
4018 client.call("settings.setAll", Some(settings))?,
4019 JsonStyle::PythonCompact,
4020 )
4021 }
4022 } else if pairs.len() == 2 {
4023 let key = &pairs[0];
4025 let value = &pairs[1];
4026 let parsed_value = serde_json::from_str::<Value>(value)
4027 .unwrap_or(Value::String(value.clone()));
4028 let params = serde_json::json!({"key": key, "value": parsed_value});
4029 if dry_run {
4030 (
4031 dry_run_value("settings.set", params),
4032 JsonStyle::PythonCompact,
4033 )
4034 } else {
4035 (
4036 client.call("settings.set", Some(params))?,
4037 JsonStyle::PythonCompact,
4038 )
4039 }
4040 } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4041 let mut map = serde_json::Map::new();
4043 for chunk in pairs.chunks(2) {
4044 let parsed = serde_json::from_str::<Value>(&chunk[1])
4045 .unwrap_or(Value::String(chunk[1].clone()));
4046 map.insert(chunk[0].clone(), parsed);
4047 }
4048 let settings = Value::Object(map);
4049 if dry_run {
4050 (
4051 dry_run_value("settings.setAll", settings),
4052 JsonStyle::PythonCompact,
4053 )
4054 } else {
4055 (
4056 client.call("settings.setAll", Some(settings))?,
4057 JsonStyle::PythonCompact,
4058 )
4059 }
4060 } else {
4061 return Err(
4062 "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4063 );
4064 }
4065 }
4066 };
4067 Ok((value, style))
4068}
4069
4070fn run_tags_command(
4071 command: TagsCommand,
4072 client: &mut impl RpcCaller,
4073) -> Result<(Value, JsonStyle), String> {
4074 let (value, style) = match command {
4075 TagsCommand::List { limit, .. } => {
4076 let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4077 (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4078 }
4079 TagsCommand::Rename {
4080 old, new, dry_run, ..
4081 } => run_tag_mutation(
4082 client,
4083 "tags.rename",
4084 serde_json::json!({"oldName": old, "newName": new}),
4085 dry_run,
4086 )?,
4087 TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4088 client,
4089 "tags.delete",
4090 serde_json::json!({"tag": tag}),
4091 dry_run,
4092 )?,
4093 TagsCommand::Add {
4094 keys, tags, dry_run, ..
4095 } => {
4096 if keys.len() == 1 {
4097 run_tag_mutation(
4098 client,
4099 "tags.add",
4100 serde_json::json!({"key": keys[0], "tags": tags}),
4101 dry_run,
4102 )?
4103 } else {
4104 run_tag_mutation(
4105 client,
4106 "tags.batchUpdate",
4107 serde_json::json!({"keys": keys, "add": tags}),
4108 dry_run,
4109 )?
4110 }
4111 }
4112 TagsCommand::Remove {
4113 keys, tags, dry_run, ..
4114 } => {
4115 if keys.len() == 1 {
4116 run_tag_mutation(
4117 client,
4118 "tags.remove",
4119 serde_json::json!({"key": keys[0], "tags": tags}),
4120 dry_run,
4121 )?
4122 } else {
4123 run_tag_mutation(
4124 client,
4125 "tags.batchUpdate",
4126 serde_json::json!({"keys": keys, "remove": tags}),
4127 dry_run,
4128 )?
4129 }
4130 }
4131 };
4132 Ok((value, style))
4133}
4134
4135fn run_tag_mutation(
4136 client: &mut impl RpcCaller,
4137 method: &str,
4138 params: Value,
4139 dry_run: bool,
4140) -> Result<(Value, JsonStyle), String> {
4141 if dry_run {
4142 Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4143 } else {
4144 Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4145 }
4146}
4147
4148fn dry_run_value(method: &str, params: Value) -> Value {
4149 serde_json::json!({
4150 "ok": true,
4151 "dryRun": true,
4152 "wouldCall": method,
4153 "wouldCallParams": params,
4154 })
4155}
4156
4157fn run_annotations_command(
4158 command: AnnotationsCommand,
4159 client: &mut impl RpcCaller,
4160) -> Result<(Value, JsonStyle), String> {
4161 let (value, style) = match command {
4162 AnnotationsCommand::List { parent, .. } => {
4163 let value = client.call(
4164 "annotations.list",
4165 Some(serde_json::json!({"parentKey": parent})),
4166 )?;
4167 (normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty)
4168 }
4169 AnnotationsCommand::Create {
4170 parent,
4171 annotation_type,
4172 position,
4173 sort_index,
4174 text,
4175 comment,
4176 color,
4177 dry_run,
4178 ..
4179 } => {
4180 if !matches!(
4181 annotation_type.as_str(),
4182 "highlight" | "note" | "underline" | "image" | "ink"
4183 ) {
4184 return Err(format!(
4185 "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4186 ));
4187 }
4188 let position = position
4189 .ok_or_else(|| "INVALID_ARGS: --position JSON is required".to_string())
4190 .and_then(|raw| {
4191 serde_json::from_str::<Value>(&raw)
4192 .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4193 })?;
4194 validate_annotation_position(annotation_type.as_str(), &position)?;
4195 let mut params = serde_json::Map::new();
4196 params.insert("parentKey".to_string(), Value::String(parent));
4197 params.insert("type".to_string(), Value::String(annotation_type));
4198 params.insert("color".to_string(), Value::String(color));
4199 params.insert("position".to_string(), position);
4200 if let Some(sort_index) = sort_index {
4201 params.insert(
4202 "sortIndex".to_string(),
4203 parse_annotation_sort_index(sort_index)?,
4204 );
4205 }
4206 if let Some(text) = text {
4207 params.insert("text".to_string(), Value::String(text));
4208 }
4209 if let Some(comment) = comment {
4210 params.insert("comment".to_string(), Value::String(comment));
4211 }
4212 run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4213 }
4214 AnnotationsCommand::Delete {
4215 annotation_key,
4216 dry_run,
4217 ..
4218 } => run_mutating_command(
4219 client,
4220 "annotations.delete",
4221 serde_json::json!({"key": annotation_key}),
4222 dry_run,
4223 )?,
4224 };
4225 Ok((value, style))
4226}
4227
4228fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4229 position
4230 .get("pageIndex")
4231 .and_then(Value::as_i64)
4232 .filter(|value| *value >= 0)
4233 .ok_or_else(|| {
4234 "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4235 })?;
4236
4237 if annotation_type == "ink" {
4238 let has_paths = position
4239 .get("paths")
4240 .and_then(Value::as_array)
4241 .is_some_and(|paths| !paths.is_empty());
4242 if !has_paths {
4243 return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4244 }
4245 return Ok(());
4246 }
4247
4248 let valid_rects = position
4249 .get("rects")
4250 .and_then(Value::as_array)
4251 .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4252 if !valid_rects {
4253 return Err(
4254 "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4255 );
4256 }
4257 Ok(())
4258}
4259
4260fn is_annotation_rect(value: &Value) -> bool {
4261 value.as_array().is_some_and(|coords| {
4262 coords.len() == 4
4263 && coords
4264 .iter()
4265 .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4266 })
4267}
4268
4269fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4270 let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4271 let valid = match &parsed {
4272 Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4273 Value::String(value) => {
4274 is_zotero_pdf_sort_index(value.trim())
4275 || (!value.trim().is_empty()
4276 && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4277 }
4278 _ => false,
4279 };
4280 if valid {
4281 Ok(parsed)
4282 } else {
4283 Err(format!(
4284 "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4285 ))
4286 }
4287}
4288
4289fn is_zotero_pdf_sort_index(value: &str) -> bool {
4290 let mut parts = value.split('|');
4291 matches!(
4292 (parts.next(), parts.next(), parts.next(), parts.next()),
4293 (Some(page), Some(offset), Some(y), None)
4294 if page.len() == 5
4295 && offset.len() == 6
4296 && y.len() == 5
4297 && page.chars().all(|ch| ch.is_ascii_digit())
4298 && offset.chars().all(|ch| ch.is_ascii_digit())
4299 && y.chars().all(|ch| ch.is_ascii_digit())
4300 )
4301}
4302
4303fn run_attachments_command(
4304 command: AttachmentsCommand,
4305 client: &mut impl RpcCaller,
4306) -> Result<(Value, JsonStyle), String> {
4307 let value = match command {
4308 AttachmentsCommand::List {
4309 parent,
4310 limit,
4311 offset,
4312 ..
4313 } => normalize_list_envelope(
4314 client.call(
4315 "attachments.list",
4316 Some(serde_json::json!({"parentKey": parent})),
4317 )?,
4318 "items",
4319 Some(limit),
4320 offset,
4321 ),
4322 AttachmentsCommand::Get { key, .. } => {
4323 client.call("attachments.get", Some(serde_json::json!({"key": key})))?
4324 }
4325 AttachmentsCommand::Fulltext { key, .. } => client.call(
4326 "attachments.getFulltext",
4327 Some(serde_json::json!({"key": key})),
4328 )?,
4329 AttachmentsCommand::Path { key, .. } => localize_attachment_path_response(
4330 client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4331 ),
4332 AttachmentsCommand::Add {
4333 parent,
4334 path,
4335 from_url,
4336 title,
4337 dry_run,
4338 ..
4339 } => {
4340 match (path, from_url) {
4341 (Some(p), None) => {
4342 let mut params = serde_json::json!({"parentKey": parent, "path": zotero_path(&p)});
4343 insert_optional_string(&mut params, "title", title);
4344 if dry_run {
4345 return Ok((
4346 dry_run_value("attachments.add", params),
4347 JsonStyle::PythonCompact,
4348 ));
4349 }
4350 return Ok((
4351 client.call("attachments.add", Some(params))?,
4352 JsonStyle::PythonCompact,
4353 ));
4354 }
4355 (None, Some(u)) => {
4356 let mut params = serde_json::json!({"parentKey": parent, "url": u});
4357 insert_optional_string(&mut params, "title", title);
4358 if dry_run {
4359 return Ok((
4360 dry_run_value("attachments.addByURL", params),
4361 JsonStyle::PythonCompact,
4362 ));
4363 }
4364 return Ok((
4365 client.call("attachments.addByURL", Some(params))?,
4366 JsonStyle::PythonCompact,
4367 ));
4368 }
4369 (Some(_), Some(_)) => {
4370 return Err("INVALID_ARGS: --path and --from-url are mutually exclusive".to_string());
4371 }
4372 (None, None) => {
4373 return Err("INVALID_ARGS: either --path or --from-url is required".to_string());
4374 }
4375 }
4376 }
4377 AttachmentsCommand::Delete { key, dry_run, .. } => {
4378 let params = serde_json::json!({"key": key});
4379 if dry_run {
4380 return Ok((
4381 dry_run_value("attachments.delete", params),
4382 JsonStyle::PythonCompact,
4383 ));
4384 }
4385 return Ok((
4386 client.call("attachments.delete", Some(params))?,
4387 JsonStyle::PythonCompact,
4388 ));
4389 }
4390 AttachmentsCommand::FindPdf { parent, .. } => client.call(
4391 "attachments.findPDF",
4392 Some(serde_json::json!({"parentKey": parent})),
4393 )?,
4394 };
4395 Ok((value, JsonStyle::Pretty))
4396}
4397
4398fn localize_attachment_path_response(mut value: Value) -> Value {
4399 if let Some(path) = value.get("path").and_then(Value::as_str) {
4400 let local = local_path_from_zotero_path(path);
4401 if let Some(map) = value.as_object_mut() {
4402 map.insert("path".to_string(), Value::String(local));
4403 }
4404 }
4405 value
4406}
4407
4408fn run_notes_command(
4409 command: NotesCommand,
4410 client: &mut impl RpcCaller,
4411) -> Result<(Value, JsonStyle), String> {
4412 let (value, style) = match command {
4413 NotesCommand::List {
4414 parent,
4415 limit,
4416 offset,
4417 ..
4418 } => {
4419 let value = client.call(
4420 "notes.list",
4421 Some(serde_json::json!({"parentKey": parent})),
4422 )?;
4423 (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4424 }
4425 NotesCommand::Get { note_key, .. } => {
4426 let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
4427 (value, JsonStyle::Pretty)
4428 }
4429 NotesCommand::Create {
4430 parent,
4431 content,
4432 tags,
4433 dry_run,
4434 ..
4435 } => {
4436 let mut params = serde_json::Map::new();
4437 params.insert("parentKey".to_string(), Value::String(parent));
4438 params.insert("content".to_string(), Value::String(content));
4439 if !tags.is_empty() {
4440 params.insert(
4441 "tags".to_string(),
4442 Value::Array(tags.into_iter().map(Value::String).collect()),
4443 );
4444 }
4445 run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
4446 }
4447 NotesCommand::Update {
4448 note_key,
4449 content,
4450 dry_run,
4451 ..
4452 } => run_mutating_command(
4453 client,
4454 "notes.update",
4455 serde_json::json!({"key": note_key, "content": content}),
4456 dry_run,
4457 )?,
4458 NotesCommand::Delete {
4459 note_key, dry_run, ..
4460 } => {
4461 run_mutating_command(
4463 client,
4464 "items.delete",
4465 serde_json::json!({"key": note_key}),
4466 dry_run,
4467 )?
4468 }
4469 NotesCommand::Search { query, limit, .. } => {
4470 let value = client.call(
4471 "notes.search",
4472 Some(serde_json::json!({"query": query, "limit": limit})),
4473 )?;
4474 (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4475 }
4476 };
4477 Ok((value, style))
4478}
4479
4480fn run_mutating_command(
4481 client: &mut impl RpcCaller,
4482 method: &str,
4483 params: Value,
4484 dry_run: bool,
4485) -> Result<(Value, JsonStyle), String> {
4486 if dry_run {
4487 Ok((
4488 serde_json::json!({
4489 "ok": true,
4490 "dryRun": true,
4491 "wouldCall": method,
4492 "wouldCallParams": params,
4493 }),
4494 JsonStyle::PythonCompact,
4495 ))
4496 } else {
4497 client
4498 .call(method, Some(params))
4499 .map(|value| (value, JsonStyle::PythonCompact))
4500 }
4501}
4502
4503fn run_collections_command(
4504 command: CollectionsCommand,
4505 client: &mut impl RpcCaller,
4506) -> Result<(Value, JsonStyle), String> {
4507 let value = match command {
4508 CollectionsCommand::List { .. } => normalize_list_envelope(
4509 client.call("collections.list", None)?,
4510 "items",
4511 None,
4512 0,
4513 ),
4514 CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
4515 CollectionsCommand::Get { name_or_id, .. } => {
4516 let key = resolve_collection(client, &name_or_id)?;
4517 client.call("collections.get", Some(serde_json::json!({"key": key})))?
4518 }
4519 CollectionsCommand::GetItems {
4520 name_or_id,
4521 limit,
4522 offset,
4523 ..
4524 } => {
4525 let key = resolve_collection(client, &name_or_id)?;
4526 let mut params = serde_json::json!({"key": key});
4527 if let Some(map) = params.as_object_mut() {
4528 if let Some(limit) = limit {
4529 map.insert("limit".to_string(), Value::Number(limit.into()));
4530 }
4531 if offset > 0 {
4532 map.insert("offset".to_string(), Value::Number(offset.into()));
4533 }
4534 }
4535 normalize_list_envelope(
4536 client.call("collections.getItems", Some(params))?,
4537 "items",
4538 limit,
4539 offset,
4540 )
4541 }
4542 CollectionsCommand::Stats { name_or_id, .. } => {
4543 let key = resolve_collection(client, &name_or_id)?;
4544 client.call("collections.stats", Some(serde_json::json!({"key": key})))?
4545 }
4546 CollectionsCommand::Rename {
4547 old_name,
4548 new_name,
4549 dry_run,
4550 ..
4551 } => {
4552 let key = resolve_mutable_collection(client, &old_name, "rename")?;
4553 let params = serde_json::json!({"key": key, "name": new_name});
4554 if dry_run {
4555 return Ok((
4556 dry_run_value("collections.rename", params),
4557 JsonStyle::PythonCompact,
4558 ));
4559 }
4560 return Ok((
4561 client.call("collections.rename", Some(params))?,
4562 JsonStyle::PythonCompact,
4563 ));
4564 }
4565 CollectionsCommand::Create {
4566 name,
4567 parent,
4568 dry_run,
4569 ..
4570 } => {
4571 let mut params = serde_json::json!({"name": name});
4572 if let Some(parent) = parent {
4573 let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
4574 if let Some(map) = params.as_object_mut() {
4575 map.insert("parentKey".to_string(), parent_key);
4576 }
4577 }
4578 if dry_run {
4579 return Ok((
4580 dry_run_value("collections.create", params),
4581 JsonStyle::PythonCompact,
4582 ));
4583 }
4584 return Ok((
4585 client.call("collections.create", Some(params))?,
4586 JsonStyle::PythonCompact,
4587 ));
4588 }
4589 CollectionsCommand::Delete {
4590 name_or_id,
4591 dry_run,
4592 ..
4593 } => {
4594 let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
4595 let params = serde_json::json!({"key": key});
4596 if dry_run {
4597 return Ok((
4598 dry_run_value("collections.delete", params),
4599 JsonStyle::PythonCompact,
4600 ));
4601 }
4602 return Ok((
4603 client.call("collections.delete", Some(params))?,
4604 JsonStyle::PythonCompact,
4605 ));
4606 }
4607 CollectionsCommand::AddItems {
4608 collection,
4609 item_keys,
4610 dry_run,
4611 ..
4612 } => {
4613 let key = resolve_mutable_collection(client, &collection, "add to")?;
4614 let params = serde_json::json!({"key": key, "keys": item_keys});
4615 if dry_run {
4616 return Ok((
4617 dry_run_value("collections.addItems", params),
4618 JsonStyle::PythonCompact,
4619 ));
4620 }
4621 return Ok((
4622 client.call("collections.addItems", Some(params))?,
4623 JsonStyle::PythonCompact,
4624 ));
4625 }
4626 CollectionsCommand::RemoveItems {
4627 collection,
4628 item_keys,
4629 dry_run,
4630 ..
4631 } => {
4632 let key = resolve_mutable_collection(client, &collection, "operate on")?;
4633 let params = serde_json::json!({"key": key, "keys": item_keys});
4634 if dry_run {
4635 return Ok((
4636 dry_run_value("collections.removeItems", params),
4637 JsonStyle::PythonCompact,
4638 ));
4639 }
4640 return Ok((
4641 client.call("collections.removeItems", Some(params))?,
4642 JsonStyle::PythonCompact,
4643 ));
4644 }
4645 };
4646 Ok((value, JsonStyle::Pretty))
4647}
4648
4649fn resolve_export_keys(
4650 client: &mut impl RpcCaller,
4651 mut keys: Vec<String>,
4652 collection: Option<String>,
4653) -> Result<Vec<String>, String> {
4654 if let Some(name) = collection {
4655 let col_key = resolve_collection(client, &name)?;
4656 let response = client.call(
4657 "collections.getItems",
4658 Some(serde_json::json!({"key": col_key})),
4659 )?;
4660 let items = collection_items(&response);
4661 for item in items {
4662 if let Some(key) = item.get("key").and_then(Value::as_str) {
4663 if !keys.contains(&key.to_string()) {
4664 keys.push(key.to_string());
4665 }
4666 }
4667 }
4668 }
4669 if keys.is_empty() {
4670 return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
4671 }
4672 Ok(keys)
4673}
4674
4675fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
4676 let keys = resolve_export_keys(client, args.keys, args.collection)?;
4677 match args.format.as_str() {
4678 "bibtex" => run_export_content_command(client, "export.bibtex", keys),
4679 "ris" => run_export_content_command(client, "export.ris", keys),
4680 "csl-json" => {
4681 let response =
4682 client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
4683 if let Some(content) = response.get("content") {
4684 format_json(content, JsonStyle::Pretty)
4685 } else {
4686 format_json(&response, JsonStyle::PythonCompact)
4687 }
4688 }
4689 "bibliography" => {
4690 let response = client.call(
4691 "export.bibliography",
4692 Some(serde_json::json!({"keys": keys, "style": args.style})),
4693 )?;
4694 if let Some(object) = response.as_object() {
4695 let field = if args.html { "html" } else { "text" };
4696 if object.contains_key("html") || object.contains_key("text") {
4697 return raw_value_output(
4698 object.get(field).unwrap_or(&Value::String(String::new())),
4699 );
4700 }
4701 }
4702 format_json(&response, JsonStyle::PythonCompact)
4703 }
4704 other => Err(format!(
4705 "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
4706 )),
4707 }
4708}
4709
4710fn run_export_content_command(
4711 client: &mut impl RpcCaller,
4712 method: &str,
4713 keys: Vec<String>,
4714) -> Result<String, String> {
4715 let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
4716 if let Some(content) = response.get("content") {
4717 raw_value_output(content)
4718 } else {
4719 format_json(&response, JsonStyle::PythonCompact)
4720 }
4721}
4722
4723fn raw_value_output(value: &Value) -> Result<String, String> {
4724 let mut out = match value {
4725 Value::Null => String::new(),
4726 Value::String(content) => content.clone(),
4727 other => to_python_repr(other),
4728 };
4729 out.push('\n');
4730 Ok(out)
4731}
4732
4733fn to_python_repr(value: &Value) -> String {
4734 match value {
4735 Value::Null => "None".to_string(),
4736 Value::Bool(value) => {
4737 if *value {
4738 "True".to_string()
4739 } else {
4740 "False".to_string()
4741 }
4742 }
4743 Value::Number(value) => value.to_string(),
4744 Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
4745 Value::Array(values) => {
4746 let inner = values
4747 .iter()
4748 .map(to_python_repr)
4749 .collect::<Vec<_>>()
4750 .join(", ");
4751 format!("[{inner}]")
4752 }
4753 Value::Object(entries) => {
4754 let inner = entries
4755 .iter()
4756 .map(|(key, value)| {
4757 format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
4758 })
4759 .collect::<Vec<_>>()
4760 .join(", ");
4761 format!("{{{inner}}}")
4762 }
4763 }
4764}
4765
4766fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
4767 let trimmed = name_or_id.trim();
4768 if let Ok(id) = trimmed.parse::<i64>() {
4769 return Ok(Value::Number(id.into()));
4770 }
4771
4772 let collections = client.call("collections.list", None)?;
4773 let items = collections
4774 .get("items")
4775 .and_then(Value::as_array)
4776 .or_else(|| collections.as_array())
4777 .ok_or_else(|| "collections.list returned non-array result".to_string())?;
4778
4779 if let Some(collection) = items
4780 .iter()
4781 .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
4782 {
4783 return collection_key(collection);
4784 }
4785
4786 let exact = items
4787 .iter()
4788 .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
4789 .collect::<Vec<_>>();
4790 if exact.len() == 1 {
4791 return collection_key(exact[0]);
4792 }
4793
4794 let needle = normalize_collection_name(trimmed);
4795 let fuzzy = items
4796 .iter()
4797 .filter(|collection| {
4798 collection
4799 .get("name")
4800 .and_then(Value::as_str)
4801 .map(normalize_collection_name)
4802 .is_some_and(|name| name.contains(&needle))
4803 })
4804 .collect::<Vec<_>>();
4805
4806 match fuzzy.len() {
4807 1 => collection_key(fuzzy[0]),
4808 0 => Err(format!(
4809 "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
4810 )),
4811 _ => Err(format!(
4812 "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
4813 )),
4814 }
4815}
4816
4817fn collection_key(collection: &Value) -> Result<Value, String> {
4818 collection
4819 .get("key")
4820 .cloned()
4821 .ok_or_else(|| "collection result is missing key".to_string())
4822}
4823
4824fn resolve_mutable_collection(
4825 client: &mut impl RpcCaller,
4826 name_or_id: &str,
4827 operation: &str,
4828) -> Result<Value, String> {
4829 let key = resolve_collection(client, name_or_id)?;
4830 if key.as_i64() == Some(0) {
4831 return Err(format!(
4832 "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
4833 ));
4834 }
4835 Ok(key)
4836}
4837
4838fn insert_optional_string(value: &mut Value, key: &str, maybe: Option<String>) {
4839 if let (Some(map), Some(content)) = (value.as_object_mut(), maybe) {
4840 map.insert(key.to_string(), Value::String(content));
4841 }
4842}
4843
4844fn normalize_collection_name(name: &str) -> String {
4845 name.split_whitespace()
4846 .collect::<Vec<_>>()
4847 .join(" ")
4848 .to_lowercase()
4849}
4850
4851fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
4852 let mut out = match style {
4853 JsonStyle::PythonCompact => to_python_compact_json(value),
4854 JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
4855 };
4856 out.push('\n');
4857 Ok(out)
4858}
4859
4860fn to_python_compact_json(value: &Value) -> String {
4861 match value {
4862 Value::Null => "null".to_string(),
4863 Value::Bool(value) => value.to_string(),
4864 Value::Number(value) => value.to_string(),
4865 Value::String(value) => {
4866 serde_json::to_string(value).expect("string serialization cannot fail")
4867 }
4868 Value::Array(values) => {
4869 let inner = values
4870 .iter()
4871 .map(to_python_compact_json)
4872 .collect::<Vec<_>>()
4873 .join(", ");
4874 format!("[{inner}]")
4875 }
4876 Value::Object(entries) => {
4877 let inner = entries
4878 .iter()
4879 .map(|(key, value)| {
4880 let key = serde_json::to_string(key).expect("string serialization cannot fail");
4881 format!("{key}: {}", to_python_compact_json(value))
4882 })
4883 .collect::<Vec<_>>()
4884 .join(", ");
4885 format!("{{{inner}}}")
4886 }
4887 }
4888}