Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::{
4    env, fs,
5    io::{self, Read},
6    path::{Path, PathBuf},
7    process::Command as ProcessCommand,
8    thread,
9    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16    bm25_score_chunks, build_embedding_provider_request, build_ocr_provider_request,
17    builtin_ocr_provider_specs, cosine_similarity, execute_embedding_provider_request,
18    is_zotron_evidence_artifact, machine_artifact_exists_for_item,
19    machine_artifact_exists_in_sidecar, machine_artifact_store_root,
20    ocr_provider_spec as raw_ocr_provider_spec, parse_embedding_provider_response,
21    parse_ocr_provider_response, read_machine_artifact_sidecar, rrf_merge,
22    write_machine_artifact_sidecar, ArtifactStorePlatform, EmbeddingChunkInput,
23    EmbeddingRequestInput, EmbeddingVector, MachineArtifactKind, OcrRequestInput,
24    ProviderCommandRunner, ProviderHttpInvocation, ProviderHttpTransport, StructureChunk,
25    DEFAULT_RPC_URL,
26};
27
28pub trait RpcCaller {
29    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
30}
31
32#[derive(Debug, Clone, PartialEq, serde::Serialize)]
33pub struct CliOcrProviderSpec {
34    pub id: &'static str,
35    pub provider: &'static str,
36    pub request_style: &'static str,
37    pub auth: &'static str,
38    pub auth_header: &'static str,
39    pub supports_pdf_direct: bool,
40    pub key_field: &'static str,
41}
42
43#[derive(Debug, Clone, PartialEq, serde::Serialize)]
44pub struct CliEmbeddingProviderSpec {
45    pub id: &'static str,
46    pub provider: &'static str,
47    pub request_style: &'static str,
48    pub default_url: String,
49    pub default_model: &'static str,
50    pub auth: &'static str,
51    pub key_field: &'static str,
52}
53
54pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
55    builtin_ocr_provider_specs()
56        .into_iter()
57        .map(cli_ocr_provider_spec)
58        .collect()
59}
60
61pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
62    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
63}
64
65pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
66    let spec = zotron_types::embedding_provider_spec(provider)?;
67    Ok(CliEmbeddingProviderSpec {
68        id: spec.id,
69        provider: spec.provider_key,
70        request_style: if spec.provider_key == "alibaba" {
71            "dashscope"
72        } else {
73            spec.request_style.as_str()
74        },
75        default_url: spec.default_url.unwrap_or("").to_string(),
76        default_model: spec.default_model,
77        auth: spec.auth,
78        key_field: spec.key_field,
79    })
80}
81
82pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
83    let typed = blocks
84        .iter()
85        .map(json_block_to_pdf_block)
86        .collect::<Result<Vec<_>, _>>()?;
87    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
88    chunks
89        .into_iter()
90        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
91        .collect()
92}
93
94fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
95    CliOcrProviderSpec {
96        id: spec.provider_key,
97        provider: spec.provider_key,
98        request_style: spec.request_style.as_str(),
99        auth: spec.auth,
100        auth_header: spec.auth_header,
101        supports_pdf_direct: spec.supports_pdf_direct,
102        key_field: spec.key_field,
103    }
104}
105
106fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
107    let block_key = value
108        .get("block_key")
109        .and_then(Value::as_str)
110        .ok_or_else(|| "block missing block_key".to_string())?
111        .to_string();
112    let item_key = value
113        .get("item_key")
114        .and_then(Value::as_str)
115        .ok_or_else(|| "block missing item_key".to_string())?
116        .to_string();
117    let attachment_key = value
118        .get("attachment_key")
119        .and_then(Value::as_str)
120        .ok_or_else(|| "block missing attachment_key".to_string())?
121        .to_string();
122    let page_idx = value
123        .get("page_idx")
124        .or_else(|| value.get("page"))
125        .and_then(Value::as_u64)
126        .unwrap_or(1);
127    let block_type = value
128        .get("type")
129        .or_else(|| value.get("block_type"))
130        .and_then(Value::as_str)
131        .unwrap_or("paragraph")
132        .to_string();
133    let section_path = value
134        .get("section_path")
135        .and_then(Value::as_array)
136        .map(|items| {
137            items
138                .iter()
139                .filter_map(Value::as_str)
140                .map(ToString::to_string)
141                .collect::<Vec<_>>()
142        })
143        .unwrap_or_default();
144    let text = value
145        .get("text")
146        .and_then(Value::as_str)
147        .unwrap_or("")
148        .to_string();
149    let bbox = value.get("bbox").and_then(value_bbox4);
150
151    Ok(zotron_types::PdfEvidenceBlock {
152        block_key,
153        item_key,
154        attachment_key,
155        page_idx,
156        block_type,
157        bbox,
158        section_path,
159        text,
160    })
161}
162
163fn chunk_to_cli_value(
164    chunk: &zotron_types::StructureChunk,
165    blocks: &[zotron_types::PdfEvidenceBlock],
166) -> Result<Value, String> {
167    let refs = chunk
168        .block_keys
169        .iter()
170        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
171        .map(|block| {
172            serde_json::json!({
173                "block_key": block.block_key,
174                "page_idx": block.page_idx,
175                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
176                    if n.fract() == 0.0 {
177                        Value::from(*n as i64)
178                    } else {
179                        Value::from(*n)
180                    }
181                }).collect::<Vec<_>>()),
182            })
183        })
184        .collect::<Vec<_>>();
185    Ok(serde_json::json!({
186        "chunk_key": chunk.chunk_key,
187        "item_key": chunk.item_key,
188        "attachment_key": chunk.attachment_key,
189        "block_keys": chunk.block_keys,
190        "section_path": chunk.section_path,
191        "text": chunk.text,
192        "page_start": chunk.page_start,
193        "page_end": chunk.page_end,
194        "evidence_refs": refs,
195    }))
196}
197
198fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
199    let arr = value.as_array()?;
200    if arr.len() != 4 {
201        return None;
202    }
203    Some([
204        arr[0].as_f64()?,
205        arr[1].as_f64()?,
206        arr[2].as_f64()?,
207        arr[3].as_f64()?,
208    ])
209}
210
211impl RpcCaller for ZoteroRpc {
212    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
213        self.call(method, params).map_err(|err| err.to_string())
214    }
215}
216
217#[derive(Debug, Parser)]
218#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
219struct Cli {
220    #[command(subcommand)]
221    command: Command,
222}
223
224#[derive(Debug, Subcommand)]
225enum OcrCommand {
226    /// Print supported OCR provider contracts.
227    Providers,
228    /// Execute an OCR provider request from JSON and emit normalized blocks.
229    #[command(name = "run")]
230    Run {
231        #[arg(long)]
232        provider: String,
233        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
234        #[arg(long)]
235        input: Option<String>,
236        /// Local PDF/image file to encode into an OcrRequestInput.
237        #[arg(long)]
238        file: Option<String>,
239        /// Zotero item key used when --file builds the OCR request.
240        #[arg(long = "item-key")]
241        item_key: Option<String>,
242        /// Zotero attachment key used when --file builds the OCR request.
243        #[arg(long = "attachment-key")]
244        attachment_key: Option<String>,
245        /// MIME type used when --file builds the OCR request.
246        #[arg(long = "mime-type")]
247        mime_type: Option<String>,
248        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
249        #[arg(long)]
250        endpoint: Option<String>,
251        /// Environment variable containing the provider bearer token.
252        #[arg(long = "api-key-env")]
253        api_key_env: Option<String>,
254    },
255    /// Show OCR statistics for a collection.
256    Status {
257        #[arg(long)]
258        collection: String,
259        #[arg(long, default_value = DEFAULT_RPC_URL)]
260        url: String,
261    },
262    /// Parse a Zotero PDF through MinerU and write hidden sidecar OCR/RAG artifacts.
263    #[command(name = "process")]
264    Process {
265        #[arg(long, default_value = "mineru")]
266        provider: String,
267        /// Parent Zotero item key.
268        #[arg(long)]
269        parent: String,
270        /// Zotero PDF attachment key (auto-resolved from --parent when omitted).
271        #[arg(long)]
272        attachment: Option<String>,
273        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
274        #[arg(long = "source-url")]
275        source_url: Option<String>,
276        /// Already-extracted MinerU result directory, used by tests/offline replay.
277        #[arg(long = "result-dir")]
278        result_dir: Option<String>,
279        /// Already-downloaded MinerU result zip, used by tests/offline replay.
280        #[arg(long = "result-zip")]
281        result_zip: Option<String>,
282        /// Override MinerU submit endpoint.
283        #[arg(long = "provider-endpoint")]
284        provider_endpoint: Option<String>,
285        /// Environment variable containing the MinerU bearer token.
286        #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
287        api_key_env: String,
288        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
289        poll_interval_seconds: u64,
290        #[arg(long = "timeout-seconds", default_value_t = 900)]
291        timeout_seconds: u64,
292        #[arg(long = "chunk-chars", default_value_t = 1200)]
293        chunk_chars: usize,
294        #[arg(long, default_value = DEFAULT_RPC_URL)]
295        url: String,
296    },
297}
298
299#[derive(Debug, Subcommand)]
300enum Command {
301    /// Check that Zotero is running with the Zotron XPI enabled.
302    Ping {
303        #[arg(long, default_value = DEFAULT_RPC_URL)]
304        url: String,
305    },
306    /// Generic RPC escape hatch.
307    Rpc {
308        method: String,
309        #[arg(default_value = "{}")]
310        params_json: String,
311        #[arg(long, default_value = DEFAULT_RPC_URL)]
312        url: String,
313        #[arg(long)]
314        paginate: bool,
315        #[arg(long, default_value_t = 100)]
316        page_size: usize,
317    },
318    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
319    Push {
320        /// Path to a JSON file, or "-" to read from stdin.
321        json_file: String,
322        /// Optional PDF attachment path.
323        #[arg(long)]
324        pdf: Option<String>,
325        /// Collection name (fuzzy) or key.
326        #[arg(long)]
327        collection: Option<String>,
328        /// Duplicate handling: skip | update | create.
329        #[arg(long = "on-duplicate", default_value = "skip")]
330        on_duplicate: String,
331        #[arg(long, default_value = DEFAULT_RPC_URL)]
332        url: String,
333        /// Parse input + resolve collection only; do not push to Zotero.
334        #[arg(long = "dry-run")]
335        dry_run: bool,
336    },
337    /// System and plugin introspection commands.
338    System {
339        #[command(subcommand)]
340        command: SystemCommand,
341    },
342    /// Search items by text, tag, identifier, or structured conditions.
343    Search(SearchArgs),
344    /// Inspect and manage Zotero items.
345    Items {
346        #[command(subcommand)]
347        command: ItemsCommand,
348    },
349    /// Inspect Zotero collections.
350    Collections {
351        #[command(subcommand)]
352        command: CollectionsCommand,
353    },
354    /// Inspect Zotero notes.
355    Notes {
356        #[command(subcommand)]
357        command: NotesCommand,
358    },
359    /// Inspect Zotero attachments.
360    Attachments {
361        #[command(subcommand)]
362        command: AttachmentsCommand,
363    },
364    /// Inspect Zotero preferences.
365    Settings {
366        #[command(subcommand)]
367        command: SettingsCommand,
368    },
369    /// Inspect and manage Zotero tags.
370    Tags {
371        #[command(subcommand)]
372        command: TagsCommand,
373    },
374    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
375    Export(ExportArgs),
376    /// List, create, and delete PDF annotations.
377    Annotations {
378        #[command(subcommand)]
379        command: AnnotationsCommand,
380    },
381    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
382    Ocr {
383        #[command(subcommand)]
384        command: OcrCommand,
385    },
386    /// Build and search retrieval artifacts.
387    Rag {
388        #[command(subcommand)]
389        command: RagCommand,
390    },
391    /// Batch fill missing PDFs in a collection via Zotero's resolver chain.
392    #[command(name = "find-pdfs")]
393    FindPdfs {
394        #[arg(long)]
395        collection: String,
396        #[arg(long, default_value_t = 0)]
397        limit: usize,
398        #[arg(long, default_value = DEFAULT_RPC_URL)]
399        url: String,
400    },
401}
402
403struct RagSearchOptions {
404    query: String,
405    collection: Option<String>,
406    keys: Vec<String>,
407    zotero: bool,
408    top_spans_per_item: u64,
409    include_fulltext_spans: bool,
410    top_k: u64,
411    output: String,
412}
413
414#[derive(Debug, Subcommand)]
415enum RagCommand {
416    /// Print supported embedding provider contracts.
417    #[command(name = "providers")]
418    Providers,
419    /// Execute an embedding provider request from JSON and emit vectors.
420    #[command(name = "embed")]
421    Embed {
422        #[arg(long)]
423        provider: String,
424        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
425        #[arg(long)]
426        input: String,
427        /// Override the embedding endpoint.
428        #[arg(long)]
429        endpoint: Option<String>,
430        /// Override the embedding model.
431        #[arg(long)]
432        model: Option<String>,
433        /// Override provider input type, for example document or query.
434        #[arg(long = "input-type")]
435        input_type: Option<String>,
436        /// Environment variable containing the provider bearer token.
437        #[arg(long = "api-key-env")]
438        api_key_env: Option<String>,
439    },
440    /// Show index status for a collection.
441    Status {
442        #[arg(long)]
443        collection: String,
444        #[arg(long, default_value = DEFAULT_RPC_URL)]
445        url: String,
446    },
447    /// Emit academic-zh retrieval hits with item_key/title/text provenance.
448    #[command(name = "search")]
449    Search {
450        query: String,
451        #[arg(long)]
452        collection: Option<String>,
453        /// Limit retrieval to one or more Zotero item keys.
454        #[arg(long = "key", alias = "keys")]
455        keys: Vec<String>,
456        #[arg(long)]
457        zotero: bool,
458        #[arg(long = "top-spans-per-item", default_value_t = 3)]
459        top_spans_per_item: u64,
460        #[arg(long = "include-fulltext-spans")]
461        include_fulltext_spans: bool,
462        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
463        top_k: u64,
464        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
465        output: String,
466        #[arg(long, default_value = DEFAULT_RPC_URL)]
467        url: String,
468    },
469}
470
471#[derive(Debug, Subcommand)]
472enum SystemCommand {
473    /// Show XPI version and exposed method metadata.
474    Version {
475        #[arg(long, default_value = DEFAULT_RPC_URL)]
476        url: String,
477    },
478    /// List all libraries (user + groups).
479    Libraries {
480        #[arg(long, default_value = DEFAULT_RPC_URL)]
481        url: String,
482    },
483    /// Get statistics for the current (or specified) library.
484    #[command(name = "library-stats")]
485    LibraryStats {
486        #[arg(long)]
487        library: Option<i64>,
488        #[arg(long, default_value = DEFAULT_RPC_URL)]
489        url: String,
490    },
491    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
492    Schema {
493        #[arg(long = "type")]
494        item_type: Option<String>,
495        #[arg(long, default_value = DEFAULT_RPC_URL)]
496        url: String,
497    },
498    /// Get the currently selected Zotero collection (or null).
499    #[command(name = "current-collection")]
500    CurrentCollection {
501        #[arg(long, default_value = DEFAULT_RPC_URL)]
502        url: String,
503    },
504    /// List all RPC methods exposed by the XPI.
505    #[command(name = "list-methods")]
506    ListMethods {
507        #[arg(long, default_value = DEFAULT_RPC_URL)]
508        url: String,
509    },
510    /// Describe one or all RPC methods (schema / signatures).
511    Describe {
512        method: Option<String>,
513        #[arg(long, default_value = DEFAULT_RPC_URL)]
514        url: String,
515    },
516}
517
518#[derive(Debug, clap::Args)]
519struct SearchArgs {
520    /// Search query (title/creator/year by default; PDF content with --fulltext).
521    query: Option<String>,
522    /// Search inside PDF full-text content instead of metadata.
523    #[arg(long)]
524    fulltext: bool,
525    /// Filter by author/creator name (contains match).
526    #[arg(long)]
527    author: Option<String>,
528    /// Filter by date after (YYYY or YYYY-MM-DD).
529    #[arg(long)]
530    after: Option<String>,
531    /// Filter by date before (YYYY or YYYY-MM-DD).
532    #[arg(long)]
533    before: Option<String>,
534    /// Filter by journal/publication title (contains match).
535    #[arg(long)]
536    journal: Option<String>,
537    /// Filter by tag (exact match).
538    #[arg(long)]
539    tag: Option<String>,
540    /// Find by DOI.
541    #[arg(long)]
542    doi: Option<String>,
543    /// Find by ISBN.
544    #[arg(long)]
545    isbn: Option<String>,
546    /// Find by ISSN.
547    #[arg(long)]
548    issn: Option<String>,
549    /// Limit results to a collection name or key.
550    #[arg(long)]
551    collection: Option<String>,
552    #[arg(long, default_value_t = 50)]
553    limit: u64,
554    #[arg(long, default_value_t = 0)]
555    offset: u64,
556    #[arg(long, default_value = DEFAULT_RPC_URL)]
557    url: String,
558    #[command(subcommand)]
559    management: Option<SearchManagementCommand>,
560}
561
562#[derive(Debug, Subcommand)]
563enum SearchManagementCommand {
564    /// List all saved searches in the library.
565    #[command(name = "saved-searches")]
566    SavedSearches {
567        #[arg(long, default_value = DEFAULT_RPC_URL)]
568        url: String,
569    },
570    /// Create a saved search with one or more conditions.
571    #[command(name = "create-saved")]
572    CreateSaved {
573        name: String,
574        #[arg(long = "condition", required = true)]
575        condition: Vec<String>,
576        #[arg(long)]
577        dry_run: bool,
578        #[arg(long, default_value = DEFAULT_RPC_URL)]
579        url: String,
580    },
581    /// Delete a saved search by key.
582    #[command(name = "delete-saved")]
583    DeleteSaved {
584        search_key: String,
585        #[arg(long)]
586        dry_run: bool,
587        #[arg(long, default_value = DEFAULT_RPC_URL)]
588        url: String,
589    },
590}
591
592#[derive(Debug, Subcommand)]
593enum ItemsCommand {
594    /// Add an item by DOI, ISBN, URL, or local file.
595    Add {
596        #[arg(long)]
597        doi: Option<String>,
598        #[arg(long)]
599        isbn: Option<String>,
600        /// Web page URL to add from.
601        #[arg(long = "from-url")]
602        from_url: Option<String>,
603        /// Local file path to add from.
604        #[arg(long)]
605        file: Option<String>,
606        #[arg(long)]
607        collection: Option<String>,
608        #[arg(long)]
609        dry_run: bool,
610        #[arg(long, default_value = DEFAULT_RPC_URL)]
611        url: String,
612    },
613    /// Create a new item of the given type.
614    Create {
615        #[arg(long = "type")]
616        item_type: String,
617        #[arg(long = "field")]
618        fields: Vec<String>,
619        #[arg(long)]
620        dry_run: bool,
621        #[arg(long, default_value = DEFAULT_RPC_URL)]
622        url: String,
623    },
624    /// Update fields on an existing item.
625    Update {
626        key: String,
627        #[arg(long = "field")]
628        fields: Vec<String>,
629        #[arg(long)]
630        dry_run: bool,
631        #[arg(long, default_value = DEFAULT_RPC_URL)]
632        url: String,
633    },
634    /// Permanently delete an item.
635    Delete {
636        key: String,
637        #[arg(long)]
638        dry_run: bool,
639        #[arg(long, default_value = DEFAULT_RPC_URL)]
640        url: String,
641    },
642    /// Move one or more items to trash.
643    Trash {
644        items: Vec<String>,
645        #[arg(long)]
646        dry_run: bool,
647        #[arg(long, default_value = DEFAULT_RPC_URL)]
648        url: String,
649    },
650    /// Restore a trashed item.
651    Restore {
652        item: String,
653        #[arg(long)]
654        dry_run: bool,
655        #[arg(long, default_value = DEFAULT_RPC_URL)]
656        url: String,
657    },
658    /// Merge a group of duplicate items.
659    #[command(name = "merge-duplicates")]
660    MergeDuplicates {
661        keys: Vec<String>,
662        #[arg(long)]
663        dry_run: bool,
664        #[arg(long, default_value = DEFAULT_RPC_URL)]
665        url: String,
666    },
667    /// Add a related-item link between two items.
668    #[command(name = "add-related")]
669    AddRelated {
670        key: String,
671        #[arg(long)]
672        target: String,
673        #[arg(long)]
674        dry_run: bool,
675        #[arg(long, default_value = DEFAULT_RPC_URL)]
676        url: String,
677    },
678    /// Remove a related-item link between two items.
679    #[command(name = "remove-related")]
680    RemoveRelated {
681        key: String,
682        #[arg(long)]
683        target: String,
684        #[arg(long)]
685        dry_run: bool,
686        #[arg(long, default_value = DEFAULT_RPC_URL)]
687        url: String,
688    },
689    /// Print the full serialization of an item by key.
690    Get {
691        item: String,
692        #[arg(long, default_value = DEFAULT_RPC_URL)]
693        url: String,
694    },
695    /// List items in the library with optional sorting and pagination.
696    List {
697        #[arg(long, default_value_t = 50)]
698        limit: u64,
699        #[arg(long, default_value_t = 0)]
700        offset: u64,
701        #[arg(long)]
702        sort: Option<String>,
703        #[arg(long, default_value = "asc")]
704        direction: String,
705        /// List trashed items instead of regular items.
706        #[arg(long)]
707        trash: bool,
708        #[arg(long, default_value = DEFAULT_RPC_URL)]
709        url: String,
710    },
711    /// Run Zotero's duplicate scan and print groups.
712    #[command(name = "find-duplicates")]
713    FindDuplicates {
714        #[arg(long, default_value = DEFAULT_RPC_URL)]
715        url: String,
716    },
717    /// List recently added or modified items.
718    Recent {
719        #[arg(long, default_value_t = 20)]
720        limit: u64,
721        #[arg(long, default_value_t = 0)]
722        offset: u64,
723        #[arg(long = "type", default_value = "added")]
724        recent_type: String,
725        #[arg(long, default_value = DEFAULT_RPC_URL)]
726        url: String,
727    },
728    /// Retrieve the full-text content of an item's attachment.
729    Fulltext {
730        key: String,
731        #[arg(long, default_value = DEFAULT_RPC_URL)]
732        url: String,
733    },
734    /// List items related to the given item.
735    Related {
736        key: String,
737        #[arg(long, default_value = DEFAULT_RPC_URL)]
738        url: String,
739    },
740    /// Get the citation key for an item.
741    #[command(name = "citation-key")]
742    CitationKey {
743        key: String,
744        #[arg(long, default_value = DEFAULT_RPC_URL)]
745        url: String,
746    },
747}
748
749#[derive(Debug, Subcommand)]
750enum SettingsCommand {
751    /// Get a single Zotero preference value.
752    Get {
753        key: String,
754        #[arg(long, default_value = DEFAULT_RPC_URL)]
755        url: String,
756    },
757    /// List all Zotero preferences as a key->value dict.
758    #[command(visible_alias = "get-all")]
759    List {
760        #[arg(long, default_value = DEFAULT_RPC_URL)]
761        url: String,
762    },
763    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
764    Set {
765        /// key value key value ... (pairs of positional args)
766        pairs: Vec<String>,
767        /// Bulk-set from a JSON file.
768        #[arg(long)]
769        file: Option<String>,
770        #[arg(long)]
771        dry_run: bool,
772        #[arg(long, default_value = DEFAULT_RPC_URL)]
773        url: String,
774    },
775}
776
777#[derive(Debug, Subcommand)]
778enum TagsCommand {
779    /// List all tags in the library (flat).
780    List {
781        #[arg(long, default_value_t = 200)]
782        limit: u64,
783        #[arg(long, default_value = DEFAULT_RPC_URL)]
784        url: String,
785    },
786    /// Rename a tag across all items.
787    Rename {
788        old: String,
789        new: String,
790        #[arg(long)]
791        dry_run: bool,
792        #[arg(long, default_value = DEFAULT_RPC_URL)]
793        url: String,
794    },
795    /// Delete a tag library-wide.
796    Delete {
797        tag: String,
798        #[arg(long)]
799        dry_run: bool,
800        #[arg(long, default_value = DEFAULT_RPC_URL)]
801        url: String,
802    },
803    /// Add tags to one or more items.
804    Add {
805        keys: Vec<String>,
806        #[arg(long = "tag", required = true)]
807        tags: Vec<String>,
808        #[arg(long)]
809        dry_run: bool,
810        #[arg(long, default_value = DEFAULT_RPC_URL)]
811        url: String,
812    },
813    /// Remove tags from one or more items.
814    Remove {
815        keys: Vec<String>,
816        #[arg(long = "tag", required = true)]
817        tags: Vec<String>,
818        #[arg(long)]
819        dry_run: bool,
820        #[arg(long, default_value = DEFAULT_RPC_URL)]
821        url: String,
822    },
823}
824
825#[derive(Debug, clap::Args)]
826struct ExportArgs {
827    /// Item keys to export.
828    keys: Vec<String>,
829    /// Output format: bibtex, ris, csl-json, bibliography.
830    #[arg(long, default_value = "bibtex")]
831    format: String,
832    /// Export all items from this collection (name or key).
833    #[arg(long)]
834    collection: Option<String>,
835    /// Citation style URL (only for bibliography format).
836    #[arg(long, default_value = "http://www.zotero.org/styles/gb-t-7714-2015-numeric")]
837    style: String,
838    /// Output HTML instead of plain text (only for bibliography format).
839    #[arg(long)]
840    html: bool,
841    #[arg(long, default_value = DEFAULT_RPC_URL)]
842    url: String,
843}
844
845#[derive(Debug, Subcommand)]
846enum AnnotationsCommand {
847    /// List annotations on a PDF attachment.
848    List {
849        #[arg(long)]
850        parent: String,
851        #[arg(long, default_value = DEFAULT_RPC_URL)]
852        url: String,
853    },
854    /// Create a new annotation on a PDF attachment.
855    Create {
856        #[arg(long)]
857        parent: String,
858        #[arg(long = "type")]
859        annotation_type: String,
860        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
861        #[arg(long)]
862        position: Option<String>,
863        /// Zotero annotation sort index.
864        #[arg(long = "sort-index")]
865        sort_index: Option<String>,
866        #[arg(long)]
867        text: Option<String>,
868        #[arg(long)]
869        comment: Option<String>,
870        #[arg(long, default_value = "#ffd400")]
871        color: String,
872        #[arg(long)]
873        dry_run: bool,
874        #[arg(long, default_value = DEFAULT_RPC_URL)]
875        url: String,
876    },
877    /// Delete an annotation by key.
878    Delete {
879        annotation_key: String,
880        #[arg(long)]
881        dry_run: bool,
882        #[arg(long, default_value = DEFAULT_RPC_URL)]
883        url: String,
884    },
885}
886
887#[derive(Debug, Subcommand)]
888enum AttachmentsCommand {
889    /// List attachments belonging to a parent item.
890    List {
891        #[arg(long)]
892        parent: String,
893        #[arg(long, default_value_t = 50)]
894        limit: u64,
895        #[arg(long, default_value_t = 0)]
896        offset: u64,
897        #[arg(long, default_value = DEFAULT_RPC_URL)]
898        url: String,
899    },
900    /// Get a single attachment by key.
901    Get {
902        key: String,
903        #[arg(long, default_value = DEFAULT_RPC_URL)]
904        url: String,
905    },
906    /// Get full-text content of an attachment.
907    Fulltext {
908        key: String,
909        #[arg(long, default_value = DEFAULT_RPC_URL)]
910        url: String,
911    },
912    /// Get the local filesystem path of an attachment.
913    Path {
914        key: String,
915        #[arg(long, default_value = DEFAULT_RPC_URL)]
916        url: String,
917    },
918    /// Attach a local file or remote URL to an item.
919    Add {
920        #[arg(long)]
921        parent: String,
922        /// Local file path to attach.
923        #[arg(long)]
924        path: Option<String>,
925        /// Remote URL to attach.
926        #[arg(long = "from-url")]
927        from_url: Option<String>,
928        #[arg(long)]
929        title: Option<String>,
930        #[arg(long)]
931        dry_run: bool,
932        #[arg(long, default_value = DEFAULT_RPC_URL)]
933        url: String,
934    },
935    /// Delete an attachment.
936    Delete {
937        key: String,
938        #[arg(long, default_value = DEFAULT_RPC_URL)]
939        url: String,
940        #[arg(long)]
941        dry_run: bool,
942    },
943    /// Trigger Zotero's Find Available PDF for a parent item.
944    #[command(name = "find-pdf")]
945    FindPdf {
946        #[arg(long)]
947        parent: String,
948        #[arg(long, default_value = DEFAULT_RPC_URL)]
949        url: String,
950    },
951}
952
953#[derive(Debug, Subcommand)]
954enum NotesCommand {
955    /// List notes attached to a parent item.
956    List {
957        #[arg(long)]
958        parent: String,
959        #[arg(long, default_value_t = 50)]
960        limit: u64,
961        #[arg(long, default_value_t = 0)]
962        offset: u64,
963        #[arg(long, default_value = DEFAULT_RPC_URL)]
964        url: String,
965    },
966    /// Get a single note by key.
967    Get {
968        note_key: String,
969        #[arg(long, default_value = DEFAULT_RPC_URL)]
970        url: String,
971    },
972    /// Create a note attached to a parent item.
973    Create {
974        #[arg(long)]
975        parent: String,
976        #[arg(long)]
977        content: String,
978        #[arg(long = "tag")]
979        tags: Vec<String>,
980        #[arg(long)]
981        dry_run: bool,
982        #[arg(long, default_value = DEFAULT_RPC_URL)]
983        url: String,
984    },
985    /// Update the content of an existing note.
986    Update {
987        note_key: String,
988        #[arg(long)]
989        content: String,
990        #[arg(long)]
991        dry_run: bool,
992        #[arg(long, default_value = DEFAULT_RPC_URL)]
993        url: String,
994    },
995    /// Delete a note by key.
996    Delete {
997        note_key: String,
998        #[arg(long)]
999        dry_run: bool,
1000        #[arg(long, default_value = DEFAULT_RPC_URL)]
1001        url: String,
1002    },
1003    /// Search notes by text content.
1004    Search {
1005        query: String,
1006        #[arg(long, default_value_t = 50)]
1007        limit: u64,
1008        #[arg(long, default_value = DEFAULT_RPC_URL)]
1009        url: String,
1010    },
1011}
1012
1013#[derive(Debug, Subcommand)]
1014enum CollectionsCommand {
1015    /// List all collections in the user library (flat).
1016    List {
1017        #[arg(long, default_value = DEFAULT_RPC_URL)]
1018        url: String,
1019    },
1020    /// Print the collection hierarchy as a tree.
1021    Tree {
1022        #[arg(long, default_value = DEFAULT_RPC_URL)]
1023        url: String,
1024    },
1025    /// Get a single collection's metadata.
1026    Get {
1027        name_or_id: String,
1028        #[arg(long, default_value = DEFAULT_RPC_URL)]
1029        url: String,
1030    },
1031    /// List all items in a collection.
1032    #[command(name = "get-items", visible_alias = "items")]
1033    GetItems {
1034        name_or_id: String,
1035        #[arg(long)]
1036        limit: Option<u64>,
1037        #[arg(long, default_value_t = 0)]
1038        offset: u64,
1039        #[arg(long, default_value = DEFAULT_RPC_URL)]
1040        url: String,
1041    },
1042    /// Show item/attachment/note/subcollection counts for a collection.
1043    Stats {
1044        name_or_id: String,
1045        #[arg(long, default_value = DEFAULT_RPC_URL)]
1046        url: String,
1047    },
1048    /// Rename a collection.
1049    Rename {
1050        old_name: String,
1051        new_name: String,
1052        #[arg(long, default_value = DEFAULT_RPC_URL)]
1053        url: String,
1054        #[arg(long)]
1055        dry_run: bool,
1056    },
1057    /// Create a collection, optionally nested under a parent.
1058    Create {
1059        name: String,
1060        #[arg(long)]
1061        parent: Option<String>,
1062        #[arg(long, default_value = DEFAULT_RPC_URL)]
1063        url: String,
1064        #[arg(long)]
1065        dry_run: bool,
1066    },
1067    /// Delete a collection.
1068    Delete {
1069        name_or_id: String,
1070        #[arg(long, default_value = DEFAULT_RPC_URL)]
1071        url: String,
1072        #[arg(long)]
1073        dry_run: bool,
1074    },
1075    /// Add existing items to a collection.
1076    #[command(name = "add-items")]
1077    AddItems {
1078        collection: String,
1079        item_keys: Vec<String>,
1080        #[arg(long, default_value = DEFAULT_RPC_URL)]
1081        url: String,
1082        #[arg(long)]
1083        dry_run: bool,
1084    },
1085    /// Remove items from a collection.
1086    #[command(name = "remove-items")]
1087    RemoveItems {
1088        collection: String,
1089        item_keys: Vec<String>,
1090        #[arg(long, default_value = DEFAULT_RPC_URL)]
1091        url: String,
1092        #[arg(long)]
1093        dry_run: bool,
1094    },
1095}
1096
1097#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1098enum JsonStyle {
1099    /// Matches Python's top-level `ping` command (`json.dumps` defaults).
1100    PythonCompact,
1101    /// Matches namespace commands that route through Python `emit(..., indent=2)`.
1102    Pretty,
1103}
1104
1105enum ParseOutcome<T> {
1106    Command(T),
1107    Display(String),
1108}
1109
1110fn parse_cli<T>(
1111    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1112) -> Result<ParseOutcome<T>, String>
1113where
1114    T: Parser,
1115{
1116    match T::try_parse_from(args) {
1117        Ok(cli) => Ok(ParseOutcome::Command(cli)),
1118        Err(err)
1119            if matches!(
1120                err.kind(),
1121                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1122            ) =>
1123        {
1124            Ok(ParseOutcome::Display(err.to_string()))
1125        }
1126        Err(err) => Err(err.to_string()),
1127    }
1128}
1129
1130pub fn format_error_json(message: &str) -> String {
1131    let message = message.trim_end();
1132    let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1133    serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1134}
1135
1136fn split_error_code(message: &str) -> Option<(&str, &str)> {
1137    let (code, rest) = message.split_once(':')?;
1138    if !code.is_empty()
1139        && code
1140            .chars()
1141            .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1142    {
1143        Some((code, rest.trim_start()))
1144    } else {
1145        None
1146    }
1147}
1148
1149pub fn run(
1150    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1151) -> Result<String, String> {
1152    let cli = match parse_cli::<Cli>(args)? {
1153        ParseOutcome::Command(cli) => cli,
1154        ParseOutcome::Display(output) => return Ok(output),
1155    };
1156    let url = command_url(&cli.command);
1157    let mut client = ZoteroRpc::new(url);
1158    run_command(cli.command, &mut client)
1159}
1160
1161pub fn run_with_client(
1162    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1163    client: &mut impl RpcCaller,
1164) -> Result<String, String> {
1165    let cli = match parse_cli::<Cli>(args)? {
1166        ParseOutcome::Command(cli) => cli,
1167        ParseOutcome::Display(output) => return Ok(output),
1168    };
1169    run_command(cli.command, client)
1170}
1171
1172fn rag_command_url(command: &RagCommand) -> String {
1173    match command {
1174        RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1175        RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1176        RagCommand::Status { url, .. } => url.clone(),
1177        RagCommand::Search { url, .. } => url.clone(),
1178    }
1179}
1180
1181fn command_url(command: &Command) -> String {
1182    match command {
1183        Command::Ping { url }
1184        | Command::Rpc { url, .. }
1185        | Command::Push { url, .. }
1186        | Command::FindPdfs { url, .. } => url.clone(),
1187        Command::Ocr { command } => match command {
1188            OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1189            OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1190            OcrCommand::Status { url, .. } => url.clone(),
1191            OcrCommand::Process { url, .. } => url.clone(),
1192        },
1193        Command::Rag { command } => rag_command_url(command),
1194        Command::System { command } => match command {
1195            SystemCommand::Version { url }
1196            | SystemCommand::Libraries { url }
1197            | SystemCommand::LibraryStats { url, .. }
1198            | SystemCommand::Schema { url, .. }
1199            | SystemCommand::CurrentCollection { url }
1200            | SystemCommand::ListMethods { url }
1201            | SystemCommand::Describe { url, .. } => url.clone(),
1202        },
1203        Command::Search(ref args) => match &args.management {
1204            Some(SearchManagementCommand::SavedSearches { url })
1205            | Some(SearchManagementCommand::CreateSaved { url, .. })
1206            | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1207            None => args.url.clone(),
1208        },
1209        Command::Items { command } => match command {
1210            ItemsCommand::Add { url, .. }
1211            | ItemsCommand::Create { url, .. }
1212            | ItemsCommand::Update { url, .. }
1213            | ItemsCommand::Delete { url, .. }
1214            | ItemsCommand::Trash { url, .. }
1215            | ItemsCommand::Restore { url, .. }
1216            | ItemsCommand::MergeDuplicates { url, .. }
1217            | ItemsCommand::AddRelated { url, .. }
1218            | ItemsCommand::RemoveRelated { url, .. }
1219            | ItemsCommand::Get { url, .. }
1220            | ItemsCommand::List { url, .. }
1221            | ItemsCommand::FindDuplicates { url }
1222            | ItemsCommand::Recent { url, .. }
1223            | ItemsCommand::Fulltext { url, .. }
1224            | ItemsCommand::Related { url, .. }
1225            | ItemsCommand::CitationKey { url, .. } => url.clone(),
1226        },
1227        Command::Collections { command } => match command {
1228            CollectionsCommand::List { url }
1229            | CollectionsCommand::Tree { url }
1230            | CollectionsCommand::Get { url, .. }
1231            | CollectionsCommand::GetItems { url, .. }
1232            | CollectionsCommand::Stats { url, .. }
1233            | CollectionsCommand::Rename { url, .. }
1234            | CollectionsCommand::Create { url, .. }
1235            | CollectionsCommand::Delete { url, .. }
1236            | CollectionsCommand::AddItems { url, .. }
1237            | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1238        },
1239        Command::Notes { command } => match command {
1240            NotesCommand::List { url, .. }
1241            | NotesCommand::Get { url, .. }
1242            | NotesCommand::Create { url, .. }
1243            | NotesCommand::Update { url, .. }
1244            | NotesCommand::Delete { url, .. }
1245            | NotesCommand::Search { url, .. } => url.clone(),
1246        },
1247        Command::Attachments { command } => match command {
1248            AttachmentsCommand::List { url, .. }
1249            | AttachmentsCommand::Get { url, .. }
1250            | AttachmentsCommand::Fulltext { url, .. }
1251            | AttachmentsCommand::Path { url, .. }
1252            | AttachmentsCommand::Add { url, .. }
1253            | AttachmentsCommand::Delete { url, .. }
1254            | AttachmentsCommand::FindPdf { url, .. } => url.clone(),
1255        },
1256        Command::Settings { command } => match command {
1257            SettingsCommand::Get { url, .. }
1258            | SettingsCommand::List { url }
1259            | SettingsCommand::Set { url, .. } => url.clone(),
1260        },
1261        Command::Tags { command } => match command {
1262            TagsCommand::List { url, .. }
1263            | TagsCommand::Rename { url, .. }
1264            | TagsCommand::Delete { url, .. }
1265            | TagsCommand::Add { url, .. }
1266            | TagsCommand::Remove { url, .. } => url.clone(),
1267        },
1268        Command::Export(ref args) => args.url.clone(),
1269        Command::Annotations { command } => match command {
1270            AnnotationsCommand::List { url, .. }
1271            | AnnotationsCommand::Create { url, .. }
1272            | AnnotationsCommand::Delete { url, .. } => url.clone(),
1273        },
1274    }
1275}
1276
1277fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1278    let value = match command {
1279        OcrCommand::Providers => serde_json::json!({
1280            "providers": ocr_provider_specs(),
1281        }),
1282        OcrCommand::Run {
1283            provider,
1284            input,
1285            file,
1286            item_key,
1287            attachment_key,
1288            mime_type,
1289            endpoint,
1290            api_key_env,
1291        } => run_ocr_run_command(OcrRunOptions {
1292            provider,
1293            input,
1294            file,
1295            item_key,
1296            attachment_key,
1297            mime_type,
1298            endpoint,
1299            api_key_env,
1300        })?,
1301        OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1302        OcrCommand::Process {
1303            provider,
1304            parent,
1305            attachment,
1306            source_url,
1307            result_dir,
1308            result_zip,
1309            provider_endpoint,
1310            api_key_env,
1311            poll_interval_seconds,
1312            timeout_seconds,
1313            chunk_chars,
1314            ..
1315        } => run_ocr_process_command(
1316            client,
1317            OcrProcessOptions {
1318                provider,
1319                parent,
1320                attachment,
1321                source_url,
1322                result_dir,
1323                result_zip,
1324                provider_endpoint,
1325                api_key_env,
1326                poll_interval_seconds,
1327                timeout_seconds,
1328                chunk_chars,
1329            },
1330        )?,
1331    };
1332    format_json(&value, JsonStyle::PythonCompact)
1333}
1334
1335struct OcrProcessOptions {
1336    provider: String,
1337    parent: String,
1338    attachment: Option<String>,
1339    source_url: Option<String>,
1340    result_dir: Option<String>,
1341    result_zip: Option<String>,
1342    provider_endpoint: Option<String>,
1343    api_key_env: String,
1344    poll_interval_seconds: u64,
1345    timeout_seconds: u64,
1346    chunk_chars: usize,
1347}
1348
1349struct OcrRunOptions {
1350    provider: String,
1351    input: Option<String>,
1352    file: Option<String>,
1353    item_key: Option<String>,
1354    attachment_key: Option<String>,
1355    mime_type: Option<String>,
1356    endpoint: Option<String>,
1357    api_key_env: Option<String>,
1358}
1359
1360fn run_ocr_run_command(options: OcrRunOptions) -> Result<Value, String> {
1361    let input: OcrRequestInput = match (options.input, options.file) {
1362        (Some(input), None) => read_json_input(&input)?,
1363        (None, Some(file)) => ocr_input_from_file(
1364            file,
1365            options.item_key,
1366            options.attachment_key,
1367            options.mime_type,
1368        )?,
1369        (Some(_), Some(_)) => {
1370            return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1371        }
1372        (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1373    };
1374    let request = build_ocr_provider_request(&options.provider, &input)?;
1375    let payload = if request.command.is_empty() {
1376        let method = request
1377            .method
1378            .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1379        let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1380        let mut transport =
1381            provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1382        transport.post_json(&ProviderHttpInvocation {
1383            provider: request.provider.to_string(),
1384            style: request.style.to_string(),
1385            method: method.to_string(),
1386            url: options
1387                .endpoint
1388                .or_else(|| request.url.map(ToString::to_string)),
1389            auth_header_name: request.auth_header.map(ToString::to_string),
1390            auth_header_value: None,
1391            body: request.body,
1392        })?
1393    } else {
1394        let mut command_runner = StdProviderCommandRunner;
1395        command_runner.run_json(&request.command)?
1396    };
1397    let blocks = match parse_ocr_provider_response(
1398        request.provider,
1399        &payload,
1400        &input.item_key,
1401        &input.attachment_key,
1402    ) {
1403        Ok(blocks) => blocks,
1404        Err(err) => {
1405            if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1406                return Ok(task);
1407            }
1408            return Err(err);
1409        }
1410    };
1411
1412    Ok(serde_json::json!({
1413        "provider": request.provider,
1414        "blocks": blocks,
1415    }))
1416}
1417
1418fn run_ocr_process_command(
1419    client: &mut impl RpcCaller,
1420    mut options: OcrProcessOptions,
1421) -> Result<Value, String> {
1422    let spec = raw_ocr_provider_spec(&options.provider)?;
1423
1424    let attachment = match options.attachment.take() {
1425        Some(key) => key,
1426        None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1427    };
1428    options.attachment = Some(attachment.clone());
1429
1430    let attachment_path = resolve_attachment_path(client, &attachment)?;
1431    let storage_dir = attachment_path
1432        .parent()
1433        .ok_or_else(|| {
1434            format!(
1435                "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1436                attachment_path.display()
1437            )
1438        })?
1439        .to_path_buf();
1440
1441    match spec.provider_key {
1442        "mineru" | "mineru-cli" => {
1443            if options.result_dir.is_some() && options.result_zip.is_some() {
1444                return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1445            }
1446            if options.source_url.is_some()
1447                && (options.result_dir.is_some() || options.result_zip.is_some())
1448            {
1449                return Err(
1450                    "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1451                        .to_string(),
1452                );
1453            }
1454            let file_name = attachment_path
1455                .file_name()
1456                .and_then(|name| name.to_str())
1457                .unwrap_or("document.pdf")
1458                .to_string();
1459            let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1460            let artifacts = persist_mineru_result_sidecars(
1461                &storage_dir, &options.parent, &attachment,
1462                &options.provider, &source, options.chunk_chars,
1463            )?;
1464            Ok(serde_json::json!({
1465                "provider": spec.provider_key,
1466                "status": "indexed",
1467                "item_key": options.parent,
1468                "attachment_key": attachment,
1469                "attachment_path": attachment_path,
1470                "storage_dir": storage_dir,
1471                "task_id": source.task_id,
1472                "state": source.state,
1473                "blocks": artifacts.block_count,
1474                "chunks": artifacts.chunk_count,
1475                "artifacts": artifacts.artifacts,
1476            }))
1477        }
1478        _ => {
1479            run_ocr_process_sync(
1480                client, &options, spec.provider_key,
1481                &attachment, &attachment_path, &storage_dir,
1482            )
1483        }
1484    }
1485}
1486
1487fn run_ocr_process_sync(
1488    client: &mut impl RpcCaller,
1489    options: &OcrProcessOptions,
1490    provider: &str,
1491    attachment_key: &str,
1492    attachment_path: &Path,
1493    storage_dir: &Path,
1494) -> Result<Value, String> {
1495    let api_url = if let Some(endpoint) = &options.provider_endpoint {
1496        endpoint.clone()
1497    } else {
1498        let settings = client.call("settings.getAll", None)?;
1499        settings.get("ocr.apiUrl")
1500            .and_then(Value::as_str)
1501            .unwrap_or("")
1502            .to_string()
1503    };
1504    if api_url.is_empty() {
1505        return Err(format!("MISSING_CONFIG: ocr.apiUrl not configured for provider {provider}"));
1506    }
1507
1508    let api_key = {
1509        let from_env = if !options.api_key_env.is_empty() {
1510            env::var(&options.api_key_env).ok().filter(|v| !v.is_empty())
1511        } else {
1512            None
1513        };
1514        from_env.unwrap_or_else(|| {
1515            client.call("settings.getRaw", Some(serde_json::json!({"key": "ocr.apiKey"})))
1516                .ok()
1517                .and_then(|raw| raw.get("ocr.apiKey").and_then(Value::as_str).map(String::from))
1518                .unwrap_or_default()
1519        })
1520    };
1521
1522    let pdf_bytes = fs::read(attachment_path)
1523        .map_err(|e| format!("READ_PDF_FAILED: {}: {e}", attachment_path.display()))?;
1524    let base64_pdf = format!("data:application/pdf;base64,{}", base64_encode(&pdf_bytes));
1525
1526    let model = {
1527        let settings = client.call("settings.getAll", None)?;
1528        settings.get("ocr.model")
1529            .and_then(Value::as_str)
1530            .unwrap_or("glm-ocr")
1531            .to_string()
1532    };
1533
1534    let body = serde_json::json!({
1535        "model": model,
1536        "file": base64_pdf,
1537    });
1538
1539    let mut req = ureq::post(&api_url).set("Content-Type", "application/json");
1540    if !api_key.is_empty() {
1541        req = req.set("Authorization", &format!("Bearer {api_key}"));
1542    }
1543    let response = req.send_json(&body)
1544        .map_err(|e| format!("OCR_REQUEST_FAILED: {provider}: {e}"))?;
1545    let payload: Value = response.into_json()
1546        .map_err(|e| format!("OCR_RESPONSE_PARSE_FAILED: {e}"))?;
1547
1548    let blocks = parse_ocr_provider_response(provider, &payload, &options.parent, attachment_key)?;
1549    let chunks = zotron_types::chunks_from_blocks(&blocks, options.chunk_chars);
1550
1551    let mut artifacts = Vec::new();
1552    artifacts.push(write_sidecar_json(
1553        storage_dir, &options.parent, attachment_key,
1554        MachineArtifactKind::OcrRaw, &payload,
1555    )?);
1556    artifacts.push(write_sidecar_jsonl(
1557        storage_dir, &options.parent, attachment_key,
1558        MachineArtifactKind::Blocks, &blocks,
1559    )?);
1560    artifacts.push(write_sidecar_jsonl(
1561        storage_dir, &options.parent, attachment_key,
1562        MachineArtifactKind::Chunks, &chunks,
1563    )?);
1564
1565    let embedding_count = embed_sidecar_chunks(client, storage_dir, &options.parent, attachment_key, &chunks);
1566
1567    Ok(serde_json::json!({
1568        "provider": provider,
1569        "status": "indexed",
1570        "item_key": options.parent,
1571        "attachment_key": attachment_key,
1572        "embeddings": embedding_count,
1573        "attachment_path": attachment_path,
1574        "storage_dir": storage_dir,
1575        "blocks": blocks.len(),
1576        "chunks": chunks.len(),
1577        "artifacts": artifacts,
1578    }))
1579}
1580
1581fn embed_sidecar_chunks(
1582    client: &mut impl RpcCaller,
1583    storage_dir: &Path,
1584    item_key: &str,
1585    attachment_key: &str,
1586    chunks: &[zotron_types::StructureChunk],
1587) -> usize {
1588    let Ok((provider, model, api_url, api_key)) = fetch_embedding_settings(client) else {
1589        return 0;
1590    };
1591    if provider.is_empty() || (api_key.is_empty() && provider != "ollama") {
1592        return 0;
1593    }
1594    let emb_chunks: Vec<EmbeddingChunkInput> = chunks
1595        .iter()
1596        .map(|c| EmbeddingChunkInput {
1597            chunk_key: c.chunk_key.clone(),
1598            text: c.text.clone(),
1599        })
1600        .collect();
1601    if emb_chunks.is_empty() {
1602        return 0;
1603    }
1604    // Batch in groups of 20 to avoid API limits
1605    let batch_size = 20;
1606    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
1607    for batch in emb_chunks.chunks(batch_size) {
1608        let input = EmbeddingRequestInput {
1609            item_key: item_key.to_string(),
1610            chunks: batch.to_vec(),
1611            model: if model.is_empty() { None } else { Some(model.clone()) },
1612            url: if api_url.is_empty() { None } else { Some(api_url.clone()) },
1613            input_type: Some("document".to_string()),
1614        };
1615        let Ok(request) = build_embedding_provider_request(&provider, &input) else {
1616            break;
1617        };
1618        let Some(url) = request.url.as_deref() else { break };
1619        let mut http = ureq::post(url).set("Content-Type", "application/json");
1620        if let Some(auth) = request.auth_header {
1621            if !api_key.is_empty() {
1622                http = http.set(auth, &format!("Bearer {api_key}"));
1623            }
1624        }
1625        let Ok(resp) = http.send_json(&request.body) else { break };
1626        let Ok(payload): Result<Value, _> = resp.into_json() else { break };
1627        let Ok(vectors) = parse_embedding_provider_response(&provider, &payload, item_key, batch)
1628        else {
1629            break;
1630        };
1631        all_vectors.extend(vectors);
1632    }
1633    let count = all_vectors.len();
1634    if count > 0 {
1635        let filename = embedding_vector_filename(&provider, &model);
1636        let vectors_dir = storage_dir.join(".zotron").join("embeddings");
1637        let _ = fs::create_dir_all(&vectors_dir);
1638        let vectors_path = vectors_dir.join(&filename);
1639        let mut out = String::new();
1640        for v in &all_vectors {
1641            if let Ok(line) = serde_json::to_string(v) {
1642                out.push_str(&line);
1643                out.push('\n');
1644            }
1645        }
1646        let _ = fs::write(&vectors_path, &out);
1647    }
1648    count
1649}
1650
1651struct MineruResultSource {
1652    task_id: Option<String>,
1653    state: String,
1654    result_dir: PathBuf,
1655    raw_zip_bytes: Option<Vec<u8>>,
1656    task_status: Option<Value>,
1657    payload: Value,
1658    content_list_file: Option<PathBuf>,
1659    markdown: Option<String>,
1660}
1661
1662struct PersistedOcrArtifacts {
1663    block_count: usize,
1664    chunk_count: usize,
1665    artifacts: Vec<Value>,
1666}
1667
1668fn resolve_attachment_path(
1669    client: &mut impl RpcCaller,
1670    attachment_key: &str,
1671) -> Result<PathBuf, String> {
1672    let payload = client.call(
1673        "attachments.getPath",
1674        Some(serde_json::json!({"key": attachment_key})),
1675    )?;
1676    let raw_path = payload
1677        .get("path")
1678        .and_then(Value::as_str)
1679        .filter(|path| !path.trim().is_empty())
1680        .ok_or_else(|| {
1681            format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1682        })?;
1683    Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1684}
1685
1686/// Resolve the first PDF attachment key for a parent item via `attachments.list`.
1687fn resolve_first_pdf_attachment_key(
1688    client: &mut impl RpcCaller,
1689    parent_key: &str,
1690) -> Result<String, String> {
1691    let response = client.call(
1692        "attachments.list",
1693        Some(serde_json::json!({"parentKey": parent_key})),
1694    )?;
1695    // The XPI returns {items: [...], total: N}.
1696    let attachments = response
1697        .get("items")
1698        .and_then(Value::as_array)
1699        .or_else(|| response.as_array())
1700        .ok_or_else(|| {
1701            format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1702        })?;
1703    for attachment in attachments {
1704        if is_pdf_attachment(attachment) {
1705            if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1706                return Ok(key.to_string());
1707            }
1708        }
1709    }
1710    Err(format!(
1711        "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1712    ))
1713}
1714
1715fn load_mineru_result_source(
1716    options: &OcrProcessOptions,
1717    attachment_path: &Path,
1718    file_name: &str,
1719) -> Result<MineruResultSource, String> {
1720    if let Some(result_dir) = options.result_dir.as_deref() {
1721        return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1722    }
1723    if let Some(result_zip) = options.result_zip.as_deref() {
1724        let zip_path = PathBuf::from(result_zip);
1725        let zip_bytes = fs::read(&zip_path)
1726            .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1727        let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1728        return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1729    }
1730
1731    let Some(source_url) = options
1732        .source_url
1733        .as_deref()
1734        .filter(|value| !value.trim().is_empty())
1735    else {
1736        return submit_mineru_local_file(options, attachment_path, file_name);
1737    };
1738    let input = OcrRequestInput {
1739        item_key: options.parent.clone(),
1740        attachment_key: options.attachment.clone().expect("attachment resolved"),
1741        file_name: file_name.to_string(),
1742        mime_type: "application/pdf".to_string(),
1743        content_base64: format!("url:{source_url}"),
1744        source_url: Some(source_url.to_string()),
1745        local_path: None,
1746        output_dir: None,
1747    };
1748    let task = submit_mineru_task(
1749        &options.provider,
1750        &input,
1751        options.provider_endpoint.clone(),
1752        &options.api_key_env,
1753    )?;
1754    let task_id = task
1755        .get("data")
1756        .and_then(|data| data.get("task_id"))
1757        .and_then(Value::as_str)
1758        .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1759        .to_string();
1760    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1761    let status = poll_mineru_task(
1762        options.provider_endpoint.as_deref(),
1763        &task_id,
1764        &auth_header,
1765        options.poll_interval_seconds,
1766        options.timeout_seconds,
1767    )?;
1768    let zip_url = status
1769        .pointer("/data/full_zip_url")
1770        .or_else(|| status.pointer("/data/result/full_zip_url"))
1771        .and_then(Value::as_str)
1772        .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1773    let zip_bytes = download_bytes(zip_url)?;
1774    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1775    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1776}
1777
1778fn submit_mineru_local_file(
1779    options: &OcrProcessOptions,
1780    attachment_path: &Path,
1781    file_name: &str,
1782) -> Result<MineruResultSource, String> {
1783    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1784    let upload_request = create_mineru_file_upload(
1785        options.provider_endpoint.as_deref(),
1786        file_name,
1787        options.attachment.as_deref().expect("attachment resolved"),
1788        &auth_header,
1789    )?;
1790    let upload_url = upload_request
1791        .pointer("/data/file_urls/0")
1792        .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1793        .and_then(Value::as_str)
1794        .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1795    let batch_id = upload_request
1796        .pointer("/data/batch_id")
1797        .or_else(|| upload_request.pointer("/data/batchId"))
1798        .and_then(Value::as_str)
1799        .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1800        .to_string();
1801    let bytes = fs::read(attachment_path)
1802        .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1803    put_bytes(upload_url, &bytes)?;
1804    let status = poll_mineru_batch(
1805        options.provider_endpoint.as_deref(),
1806        &batch_id,
1807        &auth_header,
1808        options.poll_interval_seconds,
1809        options.timeout_seconds,
1810    )?;
1811    let zip_url = mineru_batch_zip_url(&status)
1812        .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1813    let zip_bytes = download_bytes(&zip_url)?;
1814    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1815    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1816}
1817
1818fn create_mineru_file_upload(
1819    endpoint: Option<&str>,
1820    file_name: &str,
1821    data_id: &str,
1822    auth_header: &str,
1823) -> Result<Value, String> {
1824    let url = mineru_file_urls_url(endpoint);
1825    let body = serde_json::json!({
1826        "files": [{"name": file_name, "data_id": data_id}],
1827        "model_version": "vlm",
1828        "is_ocr": false,
1829        "enable_formula": true,
1830        "enable_table": true,
1831        "language": "ch",
1832        "page_ranges": "1-200",
1833    });
1834    ureq::post(&url)
1835        .set("Authorization", auth_header)
1836        .send_json(body)
1837        .map_err(|err| format!("POST {url} failed: {err}"))?
1838        .into_json::<Value>()
1839        .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1840}
1841
1842fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1843    ureq::put(url)
1844        .send_bytes(bytes)
1845        .map_err(|err| format!("PUT {url} failed: {err}"))?;
1846    Ok(())
1847}
1848
1849fn submit_mineru_task(
1850    provider: &str,
1851    input: &OcrRequestInput,
1852    endpoint: Option<String>,
1853    api_key_env: &str,
1854) -> Result<Value, String> {
1855    let request = build_ocr_provider_request(provider, input)?;
1856    let method = request
1857        .method
1858        .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1859    let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1860    transport.post_json(&ProviderHttpInvocation {
1861        provider: request.provider.to_string(),
1862        style: request.style.to_string(),
1863        method: method.to_string(),
1864        url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1865        auth_header_name: request.auth_header.map(ToString::to_string),
1866        auth_header_value: None,
1867        body: request.body,
1868    })
1869}
1870
1871fn poll_mineru_task(
1872    endpoint: Option<&str>,
1873    task_id: &str,
1874    auth_header: &str,
1875    poll_interval_seconds: u64,
1876    timeout_seconds: u64,
1877) -> Result<Value, String> {
1878    let url = mineru_task_status_url(endpoint, task_id);
1879    let started = Instant::now();
1880    loop {
1881        let status = get_json_with_auth(&url, auth_header)?;
1882        let state = status
1883            .pointer("/data/state")
1884            .or_else(|| status.pointer("/data/status"))
1885            .and_then(Value::as_str)
1886            .unwrap_or("unknown");
1887        match state {
1888            "done" | "finished" | "success" => return Ok(status),
1889            "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1890            _ => {
1891                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1892                    return Err(format!(
1893                        "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1894                    ));
1895                }
1896                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1897            }
1898        }
1899    }
1900}
1901
1902fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1903    let base = endpoint
1904        .unwrap_or("https://mineru.net/api/v4/extract/task")
1905        .trim_end_matches('/');
1906    if base.ends_with("/extract/task") {
1907        format!("{base}/{task_id}")
1908    } else {
1909        format!("{base}/extract/task/{task_id}")
1910    }
1911}
1912
1913fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1914    let base = mineru_api_base(endpoint);
1915    format!("{base}/file-urls/batch")
1916}
1917
1918fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1919    let base = mineru_api_base(endpoint);
1920    format!("{base}/extract-results/batch/{batch_id}")
1921}
1922
1923fn mineru_api_base(endpoint: Option<&str>) -> String {
1924    let base = endpoint
1925        .unwrap_or("https://mineru.net/api/v4/extract/task")
1926        .trim_end_matches('/');
1927    if let Some(stripped) = base.strip_suffix("/extract/task") {
1928        return stripped.to_string();
1929    }
1930    if let Some(stripped) = base.strip_suffix("/extract") {
1931        return stripped.to_string();
1932    }
1933    base.to_string()
1934}
1935
1936fn poll_mineru_batch(
1937    endpoint: Option<&str>,
1938    batch_id: &str,
1939    auth_header: &str,
1940    poll_interval_seconds: u64,
1941    timeout_seconds: u64,
1942) -> Result<Value, String> {
1943    let url = mineru_batch_status_url(endpoint, batch_id);
1944    let started = Instant::now();
1945    loop {
1946        let status = get_json_with_auth(&url, auth_header)?;
1947        let state = mineru_batch_state(&status).unwrap_or("unknown");
1948        match state {
1949            "done" | "finished" | "success" => return Ok(status),
1950            "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1951            _ => {
1952                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1953                    return Err(format!(
1954                        "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1955                    ));
1956                }
1957                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1958            }
1959        }
1960    }
1961}
1962
1963fn mineru_batch_state(status: &Value) -> Option<&str> {
1964    status
1965        .pointer("/data/extract_result/0/state")
1966        .or_else(|| status.pointer("/data/extractResult/0/state"))
1967        .or_else(|| status.pointer("/data/state"))
1968        .and_then(Value::as_str)
1969}
1970
1971fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1972    status
1973        .pointer("/data/extract_result/0/full_zip_url")
1974        .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
1975        .or_else(|| status.pointer("/data/full_zip_url"))
1976        .and_then(Value::as_str)
1977        .map(ToString::to_string)
1978}
1979
1980fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
1981    let token = env::var(api_key_env)
1982        .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
1983    let token = token.trim();
1984    if token.is_empty() {
1985        return Err(format!(
1986            "provider credential env var {api_key_env} is empty"
1987        ));
1988    }
1989    Ok(match auth_scheme {
1990        "bearer" if token.starts_with("Bearer ") => token.to_string(),
1991        "bearer" => format!("Bearer {token}"),
1992        "token" if token.starts_with("token ") => token.to_string(),
1993        "token" => format!("token {token}"),
1994        _ => token.to_string(),
1995    })
1996}
1997
1998fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
1999    ureq::get(url)
2000        .set("Authorization", auth_header)
2001        .call()
2002        .map_err(|err| format!("GET {url} failed: {err}"))?
2003        .into_json::<Value>()
2004        .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
2005}
2006
2007fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
2008    let response = ureq::get(url)
2009        .call()
2010        .map_err(|err| format!("download {url} failed: {err}"))?;
2011    let mut bytes = Vec::new();
2012    response
2013        .into_reader()
2014        .read_to_end(&mut bytes)
2015        .map_err(|err| format!("read download {url}: {err}"))?;
2016    Ok(bytes)
2017}
2018
2019fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
2020    let dir = unique_temp_path(prefix);
2021    fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
2022    let zip_path = dir.with_extension("zip");
2023    fs::write(&zip_path, zip_bytes)
2024        .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
2025    let output = ProcessCommand::new("unzip")
2026        .arg("-q")
2027        .arg("-o")
2028        .arg(&zip_path)
2029        .arg("-d")
2030        .arg(&dir)
2031        .output()
2032        .map_err(|err| format!("run unzip: {err}"))?;
2033    if !output.status.success() {
2034        return Err(format!(
2035            "unzip {} failed: {}",
2036            zip_path.display(),
2037            String::from_utf8_lossy(&output.stderr).trim()
2038        ));
2039    }
2040    Ok(dir)
2041}
2042
2043fn unique_temp_path(prefix: &str) -> PathBuf {
2044    let nanos = SystemTime::now()
2045        .duration_since(UNIX_EPOCH)
2046        .map(|duration| duration.as_nanos())
2047        .unwrap_or(0);
2048    env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
2049}
2050
2051fn mineru_result_source_from_dir(
2052    result_dir: PathBuf,
2053    raw_zip_bytes: Option<Vec<u8>>,
2054    task_status: Option<Value>,
2055    task_id: Option<String>,
2056) -> Result<MineruResultSource, String> {
2057    let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
2058    let markdown = find_first_file_by_name(&result_dir, "full.md")
2059        .map(|path| {
2060            fs::read_to_string(&path)
2061                .map_err(|err| format!("read native markdown {}: {err}", path.display()))
2062        })
2063        .transpose()?;
2064    Ok(MineruResultSource {
2065        task_id,
2066        state: "done".to_string(),
2067        result_dir,
2068        raw_zip_bytes,
2069        task_status,
2070        payload,
2071        content_list_file,
2072        markdown,
2073    })
2074}
2075
2076fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
2077    let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
2078    if let Some(path) = v2 {
2079        let value = read_json_file(&path)?;
2080        return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
2081    }
2082    let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
2083    if let Some(path) = content_list {
2084        let value = read_json_file(&path)?;
2085        return Ok((serde_json::json!({"content_list": value}), Some(path)));
2086    }
2087    let layout = find_first_file_by_name(result_dir, "layout.json");
2088    if let Some(path) = layout {
2089        return Ok((read_json_file(&path)?, Some(path)));
2090    }
2091    let markdown = find_first_file_by_name(result_dir, "full.md");
2092    if let Some(path) = markdown {
2093        let text = fs::read_to_string(&path)
2094            .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
2095        return Ok((serde_json::json!({"result": text}), Some(path)));
2096    }
2097    Err(format!(
2098        "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
2099        result_dir.display()
2100    ))
2101}
2102
2103fn read_json_file(path: &Path) -> Result<Value, String> {
2104    let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
2105    serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
2106}
2107
2108fn persist_mineru_result_sidecars(
2109    storage_dir: &Path,
2110    item_key: &str,
2111    attachment_key: &str,
2112    provider: &str,
2113    source: &MineruResultSource,
2114    chunk_chars: usize,
2115) -> Result<PersistedOcrArtifacts, String> {
2116    let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
2117    let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
2118    let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
2119    let raw_bundle = serde_json::json!({
2120        "provider": provider,
2121        "item_key": item_key,
2122        "attachment_key": attachment_key,
2123        "task_id": source.task_id,
2124        "state": source.state,
2125        "task_status": source.task_status,
2126        "content_list_file": source.content_list_file,
2127        "payload": source.payload,
2128    });
2129
2130    let mut artifacts = Vec::new();
2131    artifacts.push(write_sidecar_json(
2132        storage_dir,
2133        item_key,
2134        attachment_key,
2135        MachineArtifactKind::OcrRaw,
2136        &raw_bundle,
2137    )?);
2138    artifacts.push(write_sidecar_jsonl(
2139        storage_dir,
2140        item_key,
2141        attachment_key,
2142        MachineArtifactKind::Blocks,
2143        &blocks,
2144    )?);
2145    artifacts.push(write_sidecar_jsonl(
2146        storage_dir,
2147        item_key,
2148        attachment_key,
2149        MachineArtifactKind::Chunks,
2150        &chunks,
2151    )?);
2152    if let Some(markdown) = source.markdown.as_deref() {
2153        artifacts.push(write_sidecar_bytes(
2154            storage_dir,
2155            item_key,
2156            attachment_key,
2157            MachineArtifactKind::OcrNativeMarkdown,
2158            markdown.as_bytes(),
2159        )?);
2160    }
2161    artifacts.push(write_sidecar_json(
2162        storage_dir,
2163        item_key,
2164        attachment_key,
2165        MachineArtifactKind::OcrNativeAssets,
2166        &assets,
2167    )?);
2168    if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2169        artifacts.push(write_extra_sidecar_bytes(
2170            storage_dir,
2171            ".zotron/ocr/latest.raw.zip",
2172            bytes,
2173        )?);
2174    }
2175
2176    Ok(PersistedOcrArtifacts {
2177        block_count: blocks.len(),
2178        chunk_count: chunks.len(),
2179        artifacts,
2180    })
2181}
2182
2183fn write_sidecar_json(
2184    storage_dir: &Path,
2185    item_key: &str,
2186    attachment_key: &str,
2187    kind: MachineArtifactKind,
2188    value: &Value,
2189) -> Result<Value, String> {
2190    let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2191    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2192}
2193
2194fn write_sidecar_jsonl<T: serde::Serialize>(
2195    storage_dir: &Path,
2196    item_key: &str,
2197    attachment_key: &str,
2198    kind: MachineArtifactKind,
2199    values: &[T],
2200) -> Result<Value, String> {
2201    let mut out = String::new();
2202    for value in values {
2203        out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2204        out.push('\n');
2205    }
2206    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2207}
2208
2209fn write_sidecar_bytes(
2210    storage_dir: &Path,
2211    item_key: &str,
2212    attachment_key: &str,
2213    kind: MachineArtifactKind,
2214    bytes: &[u8],
2215) -> Result<Value, String> {
2216    let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2217        .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2218    Ok(serde_json::json!({
2219        "kind": kind,
2220        "relative_path": record.relative_path,
2221        "absolute_path": record.absolute_path,
2222    }))
2223}
2224
2225fn write_extra_sidecar_bytes(
2226    storage_dir: &Path,
2227    relative_path: &str,
2228    bytes: &[u8],
2229) -> Result<Value, String> {
2230    let absolute_path = storage_dir.join(relative_path);
2231    if let Some(parent) = absolute_path.parent() {
2232        fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2233    }
2234    fs::write(&absolute_path, bytes)
2235        .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2236    Ok(serde_json::json!({
2237        "kind": "ocr_raw_zip",
2238        "relative_path": relative_path,
2239        "absolute_path": absolute_path,
2240    }))
2241}
2242
2243fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2244    let mut images = Vec::new();
2245    for file in collect_files(result_dir)? {
2246        if !is_image_file(&file) {
2247            continue;
2248        }
2249        let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2250        let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2251        if let Some(parent) = destination.parent() {
2252            fs::create_dir_all(parent)
2253                .map_err(|err| format!("create {}: {err}", parent.display()))?;
2254        }
2255        fs::copy(&file, &destination).map_err(|err| {
2256            format!(
2257                "copy MinerU asset {} to {}: {err}",
2258                file.display(),
2259                destination.display()
2260            )
2261        })?;
2262        images.push(serde_json::json!({
2263            "source_relative": relative,
2264            "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2265            "absolute_path": destination,
2266        }));
2267    }
2268    Ok(serde_json::json!({
2269        "provider": "mineru",
2270        "images": images,
2271    }))
2272}
2273
2274fn is_image_file(path: &Path) -> bool {
2275    matches!(
2276        path.extension()
2277            .and_then(|ext| ext.to_str())
2278            .unwrap_or_default()
2279            .to_ascii_lowercase()
2280            .as_str(),
2281        "png" | "jpg" | "jpeg" | "webp" | "gif"
2282    )
2283}
2284
2285fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2286    collect_files(root).ok()?.into_iter().find(|path| {
2287        path.file_name()
2288            .and_then(|name| name.to_str())
2289            .is_some_and(|name| name.ends_with(suffix))
2290    })
2291}
2292
2293fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2294    collect_files(root).ok()?.into_iter().find(|path| {
2295        path.file_name()
2296            .and_then(|file_name| file_name.to_str())
2297            .is_some_and(|file_name| file_name == name)
2298    })
2299}
2300
2301fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2302    let mut files = Vec::new();
2303    collect_files_into(root, &mut files)?;
2304    files.sort();
2305    Ok(files)
2306}
2307
2308fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2309    for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2310        let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2311        let path = entry.path();
2312        let file_type = entry
2313            .file_type()
2314            .map_err(|err| format!("stat {}: {err}", path.display()))?;
2315        if file_type.is_dir() {
2316            collect_files_into(&path, files)?;
2317        } else if file_type.is_file() {
2318            files.push(path);
2319        }
2320    }
2321    Ok(())
2322}
2323
2324fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2325    let data = payload.get("data")?;
2326    let task_id = data.get("task_id").and_then(Value::as_str)?;
2327    Some(serde_json::json!({
2328        "provider": provider,
2329        "status": "submitted",
2330        "task_id": task_id,
2331        "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2332        "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2333        "raw": payload,
2334    }))
2335}
2336
2337fn ocr_input_from_file(
2338    file: String,
2339    item_key: Option<String>,
2340    attachment_key: Option<String>,
2341    mime_type: Option<String>,
2342) -> Result<OcrRequestInput, String> {
2343    let item_key = item_key
2344        .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2345    let attachment_key = attachment_key.ok_or_else(|| {
2346        "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2347    })?;
2348    let path = PathBuf::from(&file);
2349    let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2350    let file_name = path
2351        .file_name()
2352        .and_then(|name| name.to_str())
2353        .unwrap_or("document.pdf")
2354        .to_string();
2355    let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2356    Ok(OcrRequestInput {
2357        item_key,
2358        attachment_key,
2359        file_name,
2360        mime_type,
2361        content_base64: base64_encode(&bytes),
2362        source_url: None,
2363        local_path: Some(file),
2364        output_dir: None,
2365    })
2366}
2367
2368fn guess_mime_type(path: &Path) -> &'static str {
2369    match path
2370        .extension()
2371        .and_then(|ext| ext.to_str())
2372        .unwrap_or_default()
2373        .to_ascii_lowercase()
2374        .as_str()
2375    {
2376        "png" => "image/png",
2377        "jpg" | "jpeg" => "image/jpeg",
2378        "webp" => "image/webp",
2379        _ => "application/pdf",
2380    }
2381}
2382
2383fn base64_encode(bytes: &[u8]) -> String {
2384    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2385    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2386    for chunk in bytes.chunks(3) {
2387        let b0 = chunk[0];
2388        let b1 = *chunk.get(1).unwrap_or(&0);
2389        let b2 = *chunk.get(2).unwrap_or(&0);
2390        out.push(TABLE[(b0 >> 2) as usize] as char);
2391        out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2392        if chunk.len() > 1 {
2393            out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2394        } else {
2395            out.push('=');
2396        }
2397        if chunk.len() > 2 {
2398            out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2399        } else {
2400            out.push('=');
2401        }
2402    }
2403    out
2404}
2405
2406fn run_ocr_status_command(
2407    client: &mut impl RpcCaller,
2408    collection: String,
2409) -> Result<Value, String> {
2410    let collection_key = find_collection_in_tree(client, &collection)?
2411        .and_then(|node| node.get("key").cloned())
2412        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2413    let raw = paginate_rpc(
2414        client,
2415        "collections.getItems",
2416        serde_json::json!({"key": collection_key}),
2417        500,
2418    )?;
2419    let items = raw
2420        .get("items")
2421        .and_then(Value::as_array)
2422        .or_else(|| raw.as_array())
2423        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2424        .clone();
2425
2426    let mut has_ocr = 0usize;
2427    for item in &items {
2428        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2429        if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2430            has_ocr += 1;
2431        }
2432    }
2433
2434    Ok(serde_json::json!({
2435        "collection": collection,
2436        "total": items.len(),
2437        "has_ocr": has_ocr,
2438        "missing_ocr": items.len() - has_ocr,
2439    }))
2440}
2441
2442fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2443    if let Some(item_key) = item_key.as_str() {
2444        // Legacy external store lookup for artifacts produced before the
2445        // per-attachment hidden sidecar became the default.
2446        if machine_artifact_exists_for_item(
2447            machine_artifact_store_root(),
2448            item_key,
2449            MachineArtifactKind::Chunks,
2450        ) {
2451            return Ok(true);
2452        }
2453    }
2454
2455    let attachments = client.call(
2456        "attachments.list",
2457        Some(serde_json::json!({"parentKey": item_key.clone()})),
2458    )?;
2459    Ok(attachments.as_array().is_some_and(|attachments| {
2460        attachments.iter().any(|attachment| {
2461            let has_sidecar_chunks = attachment
2462                .get("path")
2463                .and_then(Value::as_str)
2464                .map(local_path_from_zotero_path)
2465                .as_deref()
2466                .map(Path::new)
2467                .and_then(Path::parent)
2468                .is_some_and(|dir| {
2469                    machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2470                });
2471            if has_sidecar_chunks {
2472                return true;
2473            }
2474
2475            // Read-only fallback for old Zotero-visible artifact attachments.
2476            attachment
2477                .get("title")
2478                .and_then(Value::as_str)
2479                .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2480        })
2481    }))
2482}
2483
2484fn local_path_from_zotero_path(path: &str) -> String {
2485    if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2486        return ProcessCommand::new("wslpath")
2487            .arg("-u")
2488            .arg(path)
2489            .output()
2490            .ok()
2491            .filter(|output| output.status.success())
2492            .and_then(|output| String::from_utf8(output.stdout).ok())
2493            .map(|converted| converted.trim().to_string())
2494            .filter(|converted| !converted.is_empty())
2495            .unwrap_or_else(|| path.to_string());
2496    }
2497    path.to_string()
2498}
2499
2500fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2501    let notes = client.call(
2502        "notes.list",
2503        Some(serde_json::json!({"parentKey": item_key.clone()})),
2504    )?;
2505    Ok(notes.as_array().is_some_and(|notes| {
2506        notes.iter().any(|note| {
2507            note.get("tags")
2508                .and_then(Value::as_array)
2509                .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2510        })
2511    }))
2512}
2513
2514fn tag_is_ocr(tag: &Value) -> bool {
2515    tag.as_str() == Some("ocr")
2516        || tag
2517            .get("tag")
2518            .and_then(Value::as_str)
2519            .is_some_and(|tag| tag == "ocr")
2520}
2521
2522fn find_collection_in_tree(
2523    client: &mut impl RpcCaller,
2524    collection: &str,
2525) -> Result<Option<Value>, String> {
2526    let tree = client.call("collections.tree", None)?;
2527    let nodes = tree
2528        .as_array()
2529        .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2530    Ok(search_collection_tree(nodes, collection).cloned())
2531}
2532
2533fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2534    for node in nodes {
2535        if node.get("key").and_then(Value::as_str) == Some(collection)
2536            || node.get("name").and_then(Value::as_str) == Some(collection)
2537        {
2538            return Some(node);
2539        }
2540        if let Some(children) = node.get("children").and_then(Value::as_array) {
2541            if let Some(found) = search_collection_tree(children, collection) {
2542                return Some(found);
2543            }
2544        }
2545    }
2546    None
2547}
2548
2549fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2550    if let Command::Export(args) = command {
2551        return run_export(args, client);
2552    }
2553
2554    let (value, style) = match command {
2555        Command::Ping { .. } => (
2556            call_json(client, "system.ping", None)?,
2557            JsonStyle::PythonCompact,
2558        ),
2559        Command::Rpc {
2560            method,
2561            params_json,
2562            paginate,
2563            page_size,
2564            ..
2565        } => {
2566            let params = serde_json::from_str::<Value>(&params_json)
2567                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2568            if !params.is_object() {
2569                return Err("INVALID_JSON: params must be a JSON object".to_string());
2570            }
2571            if paginate {
2572                (
2573                    paginate_rpc(client, &method, params, page_size)?,
2574                    JsonStyle::Pretty,
2575                )
2576            } else {
2577                (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2578            }
2579        }
2580        Command::Push {
2581            json_file,
2582            pdf,
2583            collection,
2584            on_duplicate,
2585            dry_run,
2586            ..
2587        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2588        Command::System { command } => run_system_command(command, client)?,
2589        Command::Search(args) => {
2590            if let Some(mgmt) = args.management {
2591                run_search_management_command(mgmt, client)?
2592            } else {
2593                run_search(args, client)?
2594            }
2595        }
2596        Command::Items { command } => run_items_command(command, client)?,
2597        Command::Collections { command } => run_collections_command(command, client)?,
2598        Command::Notes { command } => run_notes_command(command, client)?,
2599        Command::Attachments { command } => run_attachments_command(command, client)?,
2600        Command::Settings { command } => run_settings_command(command, client)?,
2601        Command::Tags { command } => run_tags_command(command, client)?,
2602        Command::Annotations { command } => run_annotations_command(command, client)?,
2603        Command::Ocr { command } => {
2604            return run_ocr_command(command, client);
2605        }
2606        Command::Rag { command } => {
2607            return run_rag_command(command, client);
2608        }
2609        Command::Export(_) => unreachable!("export commands return raw output above"),
2610        Command::FindPdfs {
2611            collection, limit, ..
2612        } => run_find_pdfs_command(client, collection, limit)?,
2613    };
2614
2615    format_json(&value, style)
2616}
2617
2618fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2619    match command {
2620        RagCommand::Providers => format_json(
2621            &serde_json::json!({
2622                "providers": [
2623                    embedding_provider_spec("volcengine")?,
2624                    embedding_provider_spec("alibaba")?,
2625                    embedding_provider_spec("custom")?,
2626                ],
2627            }),
2628            JsonStyle::Pretty,
2629        ),
2630        RagCommand::Embed {
2631            provider,
2632            input,
2633            endpoint,
2634            model,
2635            input_type,
2636            api_key_env,
2637        } => {
2638            let value = run_embedding_provider_json_command(
2639                provider,
2640                input,
2641                endpoint,
2642                model,
2643                input_type,
2644                api_key_env,
2645            )?;
2646            format_json(&value, JsonStyle::PythonCompact)
2647        }
2648        RagCommand::Status { collection, .. } => {
2649            let value = rag_status_value(client, &collection)?;
2650            format_json(&value, JsonStyle::PythonCompact)
2651        }
2652        RagCommand::Search {
2653            query,
2654            collection,
2655            keys,
2656            zotero,
2657            top_spans_per_item,
2658            include_fulltext_spans,
2659            top_k,
2660            output,
2661            ..
2662        } => run_rag_search_command(
2663            client,
2664            RagSearchOptions {
2665                query,
2666                collection,
2667                keys,
2668                zotero,
2669                top_spans_per_item,
2670                include_fulltext_spans,
2671                top_k,
2672                output,
2673            },
2674        ),
2675    }
2676}
2677
2678fn run_embedding_provider_json_command(
2679    provider: String,
2680    input: String,
2681    endpoint: Option<String>,
2682    model: Option<String>,
2683    input_type: Option<String>,
2684    api_key_env: Option<String>,
2685) -> Result<Value, String> {
2686    let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2687    if endpoint.is_some() {
2688        input.url = endpoint;
2689    }
2690    if model.is_some() {
2691        input.model = model;
2692    }
2693    if input_type.is_some() {
2694        input.input_type = input_type;
2695    }
2696    let mut transport = provider_http_transport(api_key_env.as_deref())?;
2697    let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2698
2699    Ok(serde_json::json!({
2700        "provider": provider,
2701        "vectors": vectors,
2702    }))
2703}
2704
2705fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2706    provider_http_transport_with_auth(api_key_env, "bearer")
2707}
2708
2709fn provider_http_transport_with_auth(
2710    api_key_env: Option<&str>,
2711    auth_scheme: &str,
2712) -> Result<UreqProviderHttpTransport, String> {
2713    let Some(env_name) = api_key_env else {
2714        return Ok(UreqProviderHttpTransport::new());
2715    };
2716    let token = env::var(env_name)
2717        .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2718    if token.trim().is_empty() {
2719        return Err(format!("provider credential env var {env_name} is empty"));
2720    }
2721    let token = token.trim();
2722    match auth_scheme {
2723        "token" if token.starts_with("token ") => {
2724            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2725        }
2726        "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2727            "token {token}"
2728        ))),
2729        "bearer" if token.starts_with("Bearer ") => {
2730            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2731        }
2732        "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2733        "none" => Ok(UreqProviderHttpTransport::new()),
2734        other => Err(format!("unsupported provider auth scheme {other}")),
2735    }
2736}
2737
2738fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2739    let payload = if path == "-" {
2740        let mut input = String::new();
2741        io::stdin()
2742            .read_to_string(&mut input)
2743            .map_err(|err| format!("read stdin: {err}"))?;
2744        input
2745    } else {
2746        fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2747    };
2748    serde_json::from_str::<T>(&payload)
2749        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2750}
2751
2752fn fetch_embedding_settings(
2753    client: &mut impl RpcCaller,
2754) -> Result<(String, String, String, String), String> {
2755    let settings = client.call("settings.getAll", None)?;
2756    let provider = settings
2757        .get("embedding.provider")
2758        .and_then(Value::as_str)
2759        .unwrap_or("ollama")
2760        .to_string();
2761    let model = settings
2762        .get("embedding.model")
2763        .and_then(Value::as_str)
2764        .unwrap_or("")
2765        .to_string();
2766    let api_url = settings
2767        .get("embedding.apiUrl")
2768        .and_then(Value::as_str)
2769        .unwrap_or("")
2770        .to_string();
2771    let raw = client.call("settings.getRaw", Some(serde_json::json!({"key": "embedding.apiKey"})))?;
2772    let api_key = raw
2773        .get("embedding.apiKey")
2774        .and_then(Value::as_str)
2775        .unwrap_or("")
2776        .to_string();
2777    Ok((provider, model, api_url, api_key))
2778}
2779
2780fn fetch_retrieval_mode(client: &mut impl RpcCaller) -> String {
2781    client
2782        .call(
2783            "settings.get",
2784            Some(serde_json::json!({"key": "rag.retrievalMode"})),
2785        )
2786        .ok()
2787        .and_then(|v| {
2788            v.get("rag.retrievalMode")
2789                .and_then(Value::as_str)
2790                .map(String::from)
2791        })
2792        .unwrap_or_else(|| "hybrid".to_string())
2793}
2794
2795fn resolve_sidecar_paths(
2796    client: &mut impl RpcCaller,
2797    collection: Option<&str>,
2798    keys: &[String],
2799) -> Result<Vec<(String, String, PathBuf)>, String> {
2800    let items = if !keys.is_empty() {
2801        let mut items = Vec::new();
2802        for key in keys {
2803            let item = client.call("items.get", Some(serde_json::json!({"key": key})))?;
2804            items.push(item);
2805        }
2806        items
2807    } else if let Some(col) = collection {
2808        let col_key = resolve_collection(client, col)?;
2809        let response = client.call(
2810            "collections.getItems",
2811            Some(serde_json::json!({"key": col_key})),
2812        )?;
2813        collection_items(&response)
2814    } else {
2815        return Err("INVALID_ARGS: --collection or --key required".into());
2816    };
2817
2818    let mut results = Vec::new();
2819    for item in &items {
2820        let item_key = item.get("key").and_then(Value::as_str).unwrap_or_default();
2821        let attachments = client.call(
2822            "attachments.list",
2823            Some(serde_json::json!({"parentKey": item_key})),
2824        )?;
2825        let att_list = attachments
2826            .get("items")
2827            .and_then(Value::as_array)
2828            .or_else(|| attachments.as_array())
2829            .cloned()
2830            .unwrap_or_default();
2831        for att in &att_list {
2832            let content_type = att
2833                .get("contentType")
2834                .and_then(Value::as_str)
2835                .unwrap_or("");
2836            if content_type != "application/pdf" {
2837                continue;
2838            }
2839            let att_key = att.get("key").and_then(Value::as_str).unwrap_or_default();
2840            let path = att.get("path").and_then(Value::as_str).unwrap_or_default();
2841            if path.is_empty() {
2842                continue;
2843            }
2844            let local_path = local_path_from_zotero_path(path);
2845            let pdf_path = PathBuf::from(&local_path);
2846            if let Some(parent) = pdf_path.parent() {
2847                let sidecar_root = parent.join(".zotron");
2848                if sidecar_root.exists() {
2849                    results.push((item_key.to_string(), att_key.to_string(), sidecar_root));
2850                }
2851            }
2852        }
2853    }
2854    Ok(results)
2855}
2856
2857fn load_sidecar_chunks(sidecar_root: &Path) -> Vec<StructureChunk> {
2858    let chunks_path = sidecar_root.join("chunks").join("chunks.v1.jsonl");
2859    let Ok(content) = fs::read_to_string(&chunks_path) else {
2860        return Vec::new();
2861    };
2862    content
2863        .lines()
2864        .filter(|line| !line.trim().is_empty())
2865        .filter_map(|line| serde_json::from_str::<StructureChunk>(line).ok())
2866        .collect()
2867}
2868
2869fn embedding_vector_filename(provider: &str, model: &str) -> String {
2870    let p = provider.trim().to_lowercase().replace('/', "-");
2871    let m = model.trim().to_lowercase().replace('/', "-");
2872    if p.is_empty() && m.is_empty() {
2873        return "vectors.jsonl".to_string();
2874    }
2875    format!("{p}--{m}.jsonl")
2876}
2877
2878fn load_sidecar_vectors(sidecar_root: &Path, provider: &str, model: &str) -> Vec<EmbeddingVector> {
2879    let embeddings_dir = sidecar_root.join("embeddings");
2880    let target = embedding_vector_filename(provider, model);
2881    let target_path = embeddings_dir.join(&target);
2882    if let Ok(content) = fs::read_to_string(&target_path) {
2883        let vecs: Vec<EmbeddingVector> = content
2884            .lines()
2885            .filter(|line| !line.trim().is_empty())
2886            .filter_map(|line| serde_json::from_str(line).ok())
2887            .collect();
2888        if !vecs.is_empty() {
2889            return vecs;
2890        }
2891    }
2892    // Fallback: try legacy vectors.v1.jsonl / vectors.jsonl with provider match
2893    for legacy in &["vectors.v1.jsonl", "vectors.jsonl"] {
2894        let path = embeddings_dir.join(legacy);
2895        if let Ok(content) = fs::read_to_string(&path) {
2896            let vecs: Vec<EmbeddingVector> = content
2897                .lines()
2898                .filter(|line| !line.trim().is_empty())
2899                .filter_map(|line| serde_json::from_str::<EmbeddingVector>(line).ok())
2900                .filter(|v| v.source_provider == provider || provider.is_empty())
2901                .collect();
2902            if !vecs.is_empty() {
2903                return vecs;
2904            }
2905        }
2906    }
2907    Vec::new()
2908}
2909
2910fn embed_query_text(
2911    query: &str,
2912    provider: &str,
2913    model: &str,
2914    api_url: &str,
2915    api_key: &str,
2916) -> Result<Vec<f64>, String> {
2917    let input = EmbeddingRequestInput {
2918        item_key: "query".to_string(),
2919        chunks: vec![EmbeddingChunkInput {
2920            chunk_key: "q0".to_string(),
2921            text: query.to_string(),
2922        }],
2923        model: if model.is_empty() {
2924            None
2925        } else {
2926            Some(model.to_string())
2927        },
2928        url: if api_url.is_empty() {
2929            None
2930        } else {
2931            Some(api_url.to_string())
2932        },
2933        input_type: Some("query".to_string()),
2934    };
2935    let request = build_embedding_provider_request(provider, &input)?;
2936    let url = request
2937        .url
2938        .as_deref()
2939        .ok_or("no embedding URL configured")?;
2940    let mut http = ureq::post(url).set("Content-Type", "application/json");
2941    if let Some(auth) = request.auth_header {
2942        if !api_key.is_empty() {
2943            http = http.set(auth, &format!("Bearer {api_key}"));
2944        }
2945    }
2946    let resp = http
2947        .send_json(&request.body)
2948        .map_err(|e| format!("embedding request failed: {e}"))?;
2949    let payload: Value = resp
2950        .into_json()
2951        .map_err(|e| format!("embedding response parse: {e}"))?;
2952    let vectors =
2953        parse_embedding_provider_response(provider, &payload, "query", &input.chunks)?;
2954    vectors
2955        .into_iter()
2956        .next()
2957        .map(|v| v.vector)
2958        .ok_or_else(|| "no embedding vector returned".to_string())
2959}
2960
2961fn run_rag_search_xpi_fallback(
2962    client: &mut impl RpcCaller,
2963    options: &RagSearchOptions,
2964) -> Result<String, String> {
2965    let mut params = serde_json::json!({
2966        "query": options.query,
2967        "limit": options.top_k,
2968        "top_spans_per_item": options.top_spans_per_item,
2969        "include_fulltext_spans": options.include_fulltext_spans,
2970    });
2971    if let Some(map) = params.as_object_mut() {
2972        if let Some(col) = &options.collection {
2973            map.insert("collection".into(), Value::String(col.clone()));
2974        }
2975        if !options.keys.is_empty() {
2976            map.insert(
2977                "keys".into(),
2978                Value::Array(options.keys.iter().map(|k| Value::String(k.clone())).collect()),
2979            );
2980        }
2981    }
2982    let payload = client.call("rag.searchHits", Some(params))?;
2983    let hits = payload
2984        .get("hits")
2985        .and_then(Value::as_array)
2986        .cloned()
2987        .unwrap_or_default();
2988    if options.output == "jsonl" {
2989        let mut out = String::new();
2990        for hit in &hits {
2991            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
2992            out.push('\n');
2993        }
2994        Ok(out)
2995    } else {
2996        let total = hits.len() as u64;
2997        format_json(
2998            &normalize_list_envelope(
2999                serde_json::json!({"items": hits, "total": total}),
3000                "items",
3001                Some(options.top_k),
3002                0,
3003            ),
3004            JsonStyle::Pretty,
3005        )
3006    }
3007}
3008
3009fn run_rag_search_command(
3010    client: &mut impl RpcCaller,
3011    options: RagSearchOptions,
3012) -> Result<String, String> {
3013    // When --zotero is explicitly passed, use XPI fallback directly (backward compat).
3014    if options.zotero {
3015        if options.collection.is_none() && options.keys.is_empty() {
3016            return Err(
3017                "INVALID_ARGS: --collection or --key is required".to_string(),
3018            );
3019        }
3020        return run_rag_search_xpi_fallback(client, &options);
3021    }
3022
3023    // Hybrid path: require scope.
3024    if options.collection.is_none() && options.keys.is_empty() {
3025        return Err("INVALID_ARGS: --collection or --key required".to_string());
3026    }
3027
3028    // Step 1: resolve sidecar paths from collection/keys.
3029    let sidecars = resolve_sidecar_paths(
3030        client,
3031        options.collection.as_deref(),
3032        &options.keys,
3033    );
3034
3035    // If sidecar resolution fails or returns empty, fall back to XPI.
3036    let sidecars = match sidecars {
3037        Ok(ref s) if !s.is_empty() => s,
3038        _ => return run_rag_search_xpi_fallback(client, &options),
3039    };
3040
3041    // Step 2: load all chunks and vectors from sidecars.
3042    let (emb_provider, emb_model, emb_url, emb_key) = fetch_embedding_settings(client)?;
3043    let mut all_chunks: Vec<StructureChunk> = Vec::new();
3044    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
3045    for (_item_key, _att_key, sidecar_root) in sidecars {
3046        all_chunks.extend(load_sidecar_chunks(sidecar_root));
3047        all_vectors.extend(load_sidecar_vectors(sidecar_root, &emb_provider, &emb_model));
3048    }
3049
3050    if all_chunks.is_empty() {
3051        return run_rag_search_xpi_fallback(client, &options);
3052    }
3053
3054    // Step 3: determine retrieval mode.
3055    let mode = fetch_retrieval_mode(client);
3056
3057    // Step 4: BM25 scoring (unless mode is "dense").
3058    let bm25_ranked = if mode != "dense" {
3059        bm25_score_chunks(&all_chunks, &options.query, 1.2, 0.75)
3060    } else {
3061        Vec::new()
3062    };
3063
3064    // Step 5: dense vector scoring (unless mode is "lexical" or no vectors).
3065    let dense_ranked = if mode != "lexical" && !all_vectors.is_empty() {
3066        match embed_query_text(&options.query, &emb_provider, &emb_model, &emb_url, &emb_key) {
3067            Ok(query_vec) => {
3068                let vec_map: std::collections::HashMap<&str, &[f64]> = all_vectors
3069                    .iter()
3070                    .map(|v| (v.chunk_key.as_str(), v.vector.as_slice()))
3071                    .collect();
3072                let mut scores: Vec<(usize, f64)> = all_chunks
3073                    .iter()
3074                    .enumerate()
3075                    .filter_map(|(i, chunk)| {
3076                        vec_map.get(chunk.chunk_key.as_str()).map(|stored| {
3077                            (i, cosine_similarity(&query_vec, stored))
3078                        })
3079                    })
3080                    .filter(|(_, s)| *s > 0.0)
3081                    .collect();
3082                scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3083                scores
3084            }
3085            Err(_) => Vec::new(),
3086        }
3087    } else {
3088        Vec::new()
3089    };
3090
3091    // Step 6: merge results.
3092    let limit = options.top_k as usize;
3093    let ranked = if !bm25_ranked.is_empty() && !dense_ranked.is_empty() {
3094        rrf_merge(&bm25_ranked, &dense_ranked, 60.0, limit)
3095    } else if !bm25_ranked.is_empty() {
3096        bm25_ranked.into_iter().take(limit).collect()
3097    } else {
3098        dense_ranked.into_iter().take(limit).collect()
3099    };
3100
3101    // Step 7: apply per-item span limit.
3102    let mut per_item_count: std::collections::HashMap<&str, u64> =
3103        std::collections::HashMap::new();
3104    let mut selected: Vec<(usize, f64)> = Vec::new();
3105    for (idx, score) in &ranked {
3106        let item_key = all_chunks[*idx].item_key.as_str();
3107        let count = per_item_count.entry(item_key).or_insert(0);
3108        if *count < options.top_spans_per_item {
3109            *count += 1;
3110            selected.push((*idx, *score));
3111        }
3112    }
3113
3114    // Step 8: enrich hits with item metadata.
3115    let mut meta_cache: std::collections::HashMap<String, Value> =
3116        std::collections::HashMap::new();
3117    let mut hits: Vec<Value> = Vec::new();
3118    for (idx, score) in &selected {
3119        let chunk = &all_chunks[*idx];
3120        let meta = if let Some(cached) = meta_cache.get(&chunk.item_key) {
3121            cached.clone()
3122        } else {
3123            let fetched = client
3124                .call(
3125                    "items.get",
3126                    Some(serde_json::json!({"key": chunk.item_key})),
3127                )
3128                .unwrap_or(Value::Null);
3129            meta_cache.insert(chunk.item_key.clone(), fetched.clone());
3130            fetched
3131        };
3132        let title = meta
3133            .get("title")
3134            .and_then(Value::as_str)
3135            .unwrap_or("")
3136            .to_string();
3137        let authors = meta
3138            .get("creators")
3139            .and_then(Value::as_array)
3140            .map(|creators| {
3141                creators
3142                    .iter()
3143                    .filter_map(|c| {
3144                        let last = c.get("lastName").and_then(Value::as_str).unwrap_or("");
3145                        let first = c.get("firstName").and_then(Value::as_str).unwrap_or("");
3146                        if last.is_empty() && first.is_empty() {
3147                            None
3148                        } else {
3149                            Some(format!("{last}{first}"))
3150                        }
3151                    })
3152                    .collect::<Vec<_>>()
3153                    .join(", ")
3154            })
3155            .unwrap_or_default();
3156        let year = meta.get("date").and_then(Value::as_str).unwrap_or("");
3157        let mut hit = serde_json::json!({
3158            "item_key": chunk.item_key,
3159            "chunk_key": chunk.chunk_key,
3160            "title": title,
3161            "authors": authors,
3162            "year": year,
3163            "text": chunk.text,
3164            "page_range": chunk.page_range,
3165            "section_path": chunk.section_path,
3166            "score": score,
3167        });
3168        if options.include_fulltext_spans {
3169            hit.as_object_mut().unwrap().insert(
3170                "attachment_key".to_string(),
3171                Value::String(chunk.attachment_key.clone()),
3172            );
3173        }
3174        hits.push(hit);
3175    }
3176
3177    // Step 9: format output.
3178    if options.output == "jsonl" {
3179        let mut out = String::new();
3180        for hit in &hits {
3181            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3182            out.push('\n');
3183        }
3184        Ok(out)
3185    } else {
3186        let total = hits.len() as u64;
3187        format_json(
3188            &normalize_list_envelope(
3189                serde_json::json!({"items": hits, "total": total}),
3190                "items",
3191                Some(options.top_k),
3192                0,
3193            ),
3194            JsonStyle::Pretty,
3195        )
3196    }
3197}
3198
3199fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
3200    let raw_store_path = rag_store_path(collection);
3201    if raw_store_path.exists() {
3202        return rag_status_from_store(collection, &raw_store_path);
3203    }
3204
3205    let mut store_candidates = Vec::new();
3206    let collection_match = find_collection_in_tree(client, collection)?;
3207    if let Some(collection_node) = collection_match.as_ref() {
3208        if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
3209            store_candidates.push(rag_store_path(name));
3210        }
3211        if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
3212            store_candidates.push(rag_store_path(key));
3213        }
3214    }
3215    for store_path in unique_paths(store_candidates) {
3216        if store_path.exists() {
3217            return rag_status_from_store(collection, &store_path);
3218        }
3219    }
3220
3221    rag_status_from_zotero_sidecars(client, collection, collection_match)
3222}
3223
3224fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
3225    let mut unique = Vec::new();
3226    for path in paths {
3227        if !unique.iter().any(|seen| seen == &path) {
3228            unique.push(path);
3229        }
3230    }
3231    unique
3232}
3233
3234fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
3235    let raw = fs::read_to_string(store_path)
3236        .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
3237    let store: Value = serde_json::from_str(&raw)
3238        .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
3239    let chunks = store
3240        .get("chunks")
3241        .and_then(Value::as_array)
3242        .cloned()
3243        .unwrap_or_default();
3244    let mut item_keys = Vec::<Value>::new();
3245    for chunk in &chunks {
3246        let Some(item_key) = chunk.get("item_key") else {
3247            continue;
3248        };
3249        if !item_keys.iter().any(|seen| seen == item_key) {
3250            item_keys.push(item_key.clone());
3251        }
3252    }
3253    Ok(serde_json::json!({
3254        "status": "indexed",
3255        "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
3256        "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
3257        "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
3258        "total_chunks": chunks.len(),
3259        "total_items": item_keys.len(),
3260        "store_path": store_path.to_string_lossy(),
3261    }))
3262}
3263
3264fn rag_status_from_zotero_sidecars(
3265    client: &mut impl RpcCaller,
3266    collection: &str,
3267    collection_match: Option<Value>,
3268) -> Result<Value, String> {
3269    let collection_key = collection_match
3270        .as_ref()
3271        .and_then(|node| node.get("key").cloned())
3272        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
3273    let raw = paginate_rpc(
3274        client,
3275        "collections.getItems",
3276        serde_json::json!({"key": collection_key}),
3277        500,
3278    )?;
3279    let items = raw
3280        .get("items")
3281        .and_then(Value::as_array)
3282        .or_else(|| raw.as_array())
3283        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
3284        .clone();
3285
3286    let mut indexed_items = 0usize;
3287    let mut total_chunks = 0usize;
3288    for item in &items {
3289        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
3290        let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
3291        if chunk_count > 0 {
3292            indexed_items += 1;
3293            total_chunks += chunk_count;
3294        }
3295    }
3296
3297    if indexed_items == 0 {
3298        return Ok(serde_json::json!({
3299            "status": "not indexed",
3300            "collection": collection,
3301            "total_items": items.len(),
3302            "indexed_items": 0,
3303        }));
3304    }
3305
3306    Ok(serde_json::json!({
3307        "status": "indexed",
3308        "collection": collection,
3309        "total_chunks": total_chunks,
3310        "total_items": indexed_items,
3311        "collection_items": items.len(),
3312        "source": "zotero-sidecar",
3313    }))
3314}
3315
3316fn sidecar_chunk_count_for_item(
3317    client: &mut impl RpcCaller,
3318    item_key: &Value,
3319) -> Result<usize, String> {
3320    let attachments = client.call(
3321        "attachments.list",
3322        Some(serde_json::json!({"parentKey": item_key.clone()})),
3323    )?;
3324    let Some(attachments) = attachments.as_array() else {
3325        return Ok(0);
3326    };
3327
3328    let mut count = 0usize;
3329    for attachment in attachments {
3330        let Some(path) = attachment.get("path").and_then(Value::as_str) else {
3331            continue;
3332        };
3333        let local = local_path_from_zotero_path(path);
3334        let Some(dir) = Path::new(&local).parent() else {
3335            continue;
3336        };
3337        let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
3338            continue;
3339        };
3340        let text = String::from_utf8_lossy(&bytes);
3341        count += text.lines().filter(|line| !line.trim().is_empty()).count();
3342    }
3343    Ok(count)
3344}
3345
3346fn rag_store_path(collection: &str) -> PathBuf {
3347    rag_store_root().join(format!("{collection}.json"))
3348}
3349
3350fn rag_store_root() -> PathBuf {
3351    let xdg_data_home = env::var_os("XDG_DATA_HOME")
3352        .filter(|path| !path.is_empty())
3353        .map(PathBuf::from);
3354    let appdata = env::var_os("APPDATA")
3355        .filter(|path| !path.is_empty())
3356        .map(PathBuf::from);
3357    let userprofile = env::var_os("USERPROFILE")
3358        .filter(|path| !path.is_empty())
3359        .map(PathBuf::from);
3360    let home = env::var_os("HOME")
3361        .filter(|path| !path.is_empty())
3362        .map(PathBuf::from);
3363
3364    rag_store_root_for_platform(
3365        ArtifactStorePlatform::current(),
3366        xdg_data_home.as_deref(),
3367        appdata.as_deref(),
3368        userprofile.as_deref(),
3369        home.as_deref(),
3370    )
3371}
3372
3373fn rag_store_root_for_platform(
3374    platform: ArtifactStorePlatform,
3375    xdg_data_home: Option<&Path>,
3376    appdata: Option<&Path>,
3377    userprofile: Option<&Path>,
3378    home: Option<&Path>,
3379) -> PathBuf {
3380    match platform {
3381        ArtifactStorePlatform::Windows => {
3382            if let Some(path) = appdata {
3383                return path.join("Zotron").join("rag");
3384            }
3385            if let Some(path) = userprofile {
3386                return path
3387                    .join("AppData")
3388                    .join("Roaming")
3389                    .join("Zotron")
3390                    .join("rag");
3391            }
3392            if let Some(path) = home {
3393                return path
3394                    .join("AppData")
3395                    .join("Roaming")
3396                    .join("Zotron")
3397                    .join("rag");
3398            }
3399            PathBuf::from(".zotron").join("rag")
3400        }
3401        ArtifactStorePlatform::Macos => {
3402            if let Some(path) = home {
3403                return path
3404                    .join("Library")
3405                    .join("Application Support")
3406                    .join("Zotron")
3407                    .join("rag");
3408            }
3409            if let Some(path) = xdg_data_home {
3410                return path.join("zotron").join("rag");
3411            }
3412            PathBuf::from(".zotron").join("rag")
3413        }
3414        ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
3415            .map(|path| path.join("zotron").join("rag"))
3416            .or_else(|| {
3417                home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
3418            })
3419            .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
3420    }
3421}
3422
3423fn run_push_command(
3424    json_file: String,
3425    pdf: Option<String>,
3426    collection: Option<String>,
3427    on_duplicate: String,
3428    dry_run: bool,
3429    client: &mut impl RpcCaller,
3430) -> Result<String, String> {
3431    if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
3432        return Err(format!(
3433            "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
3434        ));
3435    }
3436
3437    let payload = if json_file == "-" {
3438        let mut input = String::new();
3439        io::stdin()
3440            .read_to_string(&mut input)
3441            .map_err(|err| format!("read stdin: {err}"))?;
3442        input
3443    } else {
3444        fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
3445    };
3446    let item_json = serde_json::from_str::<Value>(&payload)
3447        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
3448
3449    if dry_run {
3450        let collection_key = collection
3451            .as_deref()
3452            .map(|name| resolve_collection(client, name))
3453            .transpose()?;
3454        return format_json(
3455            &serde_json::json!({
3456                "ok": true,
3457                "dryRun": true,
3458                "wouldPush": {
3459                    "title": item_json.get("title").cloned().unwrap_or(Value::Null),
3460                    "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
3461                    "collectionKey": collection_key,
3462                    "pdfPath": pdf,
3463                    "onDuplicate": on_duplicate,
3464                }
3465            }),
3466            JsonStyle::PythonCompact,
3467        );
3468    }
3469
3470    let result = push_item(
3471        client,
3472        &item_json,
3473        pdf.as_deref(),
3474        collection.as_deref(),
3475        &on_duplicate,
3476    )?;
3477    format_json(&result, JsonStyle::PythonCompact)
3478}
3479
3480fn push_item(
3481    client: &mut impl RpcCaller,
3482    item_json: &Value,
3483    pdf_path: Option<&str>,
3484    collection: Option<&str>,
3485    on_duplicate: &str,
3486) -> Result<Value, String> {
3487    let pdf_size = if let Some(path) = pdf_path {
3488        validate_pdf_magic(path)?
3489    } else {
3490        0
3491    };
3492
3493    let collection_key = match collection {
3494        Some(name) => resolve_collection(client, name)?,
3495        None => resolve_current_collection(client)?,
3496    };
3497
3498    let dup_id = find_duplicate(client, item_json)?;
3499    if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
3500        if !is_library_root(&collection_key) {
3501            client.call(
3502                "collections.addItems",
3503                Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
3504            )?;
3505        }
3506        let mut pdf_attached = false;
3507        if let Some(path) = pdf_path {
3508            if !item_has_pdf_attachment(client, dup_id)? {
3509                attach_pdf(client, dup_id, path)?;
3510                pdf_attached = true;
3511            }
3512        }
3513        return Ok(push_result(
3514            "skipped_duplicate",
3515            Some(dup_id.to_string()),
3516            pdf_attached,
3517            if pdf_attached { pdf_size } else { 0 },
3518            Value::Null,
3519        ));
3520    }
3521
3522    let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
3523    let (item_key, status) =
3524        if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
3525            let mut params = serde_json::Map::new();
3526            params.insert("key".to_string(), Value::String(dup_id.to_string()));
3527            params.insert(
3528                "fields".to_string(),
3529                xpi_payload
3530                    .get("fields")
3531                    .cloned()
3532                    .unwrap_or_else(|| serde_json::json!({})),
3533            );
3534            if let Some(creators) = xpi_payload.get("creators") {
3535                params.insert("creators".to_string(), creators.clone());
3536            }
3537            if let Some(tags) = xpi_payload.get("tags") {
3538                params.insert("tags".to_string(), tags.clone());
3539            }
3540            client.call("items.update", Some(Value::Object(params)))?;
3541            (dup_id.to_string(), "updated")
3542        } else {
3543            let created = client.call("items.create", Some(xpi_payload))?;
3544            let key = created
3545                .get("key")
3546                .and_then(Value::as_str)
3547                .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
3548            (key.to_string(), "created")
3549        };
3550
3551    let mut pdf_attached = false;
3552    if let Some(path) = pdf_path {
3553        if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
3554            attach_pdf(client, &item_key, path)?;
3555            pdf_attached = true;
3556        }
3557    }
3558
3559    if status == "updated" && !is_library_root(&collection_key) {
3560        client.call(
3561            "collections.addItems",
3562            Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3563        )?;
3564    }
3565
3566    Ok(push_result(
3567        status,
3568        Some(item_key),
3569        pdf_attached,
3570        if pdf_attached { pdf_size } else { 0 },
3571        Value::Null,
3572    ))
3573}
3574
3575fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3576    let bytes = fs::read(path)
3577        .map_err(|_| format!("INVALID_PDF: {path} does not start with %PDF- magic bytes"))?;
3578    if !bytes.starts_with(b"%PDF-") {
3579        return Err(format!(
3580            "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3581        ));
3582    }
3583    Ok(bytes.len() as u64)
3584}
3585
3586fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3587    let selected = client.call("system.currentCollection", None)?;
3588    Ok(selected
3589        .get("key")
3590        .cloned()
3591        .unwrap_or_else(|| Value::Number(0.into())))
3592}
3593
3594fn find_duplicate(
3595    client: &mut impl RpcCaller,
3596    item_json: &Value,
3597) -> Result<Option<String>, String> {
3598    if let Some(doi) = item_json
3599        .get("DOI")
3600        .and_then(Value::as_str)
3601        .filter(|doi| !doi.is_empty())
3602    {
3603        let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3604        if let Some(key) = first_hit_key(&hits) {
3605            return Ok(Some(key));
3606        }
3607    }
3608
3609    if let Some(title) = item_json
3610        .get("title")
3611        .and_then(Value::as_str)
3612        .filter(|title| title.len() >= 10)
3613    {
3614        let hits = client.call(
3615            "search.quick",
3616            Some(serde_json::json!({"query": title, "limit": 20})),
3617        )?;
3618        if let Some(items) = response_items(&hits) {
3619            for item in items {
3620                if item.get("title").and_then(Value::as_str) == Some(title) {
3621                    if let Some(key) = item.get("key").and_then(Value::as_str) {
3622                        return Ok(Some(key.to_string()));
3623                    }
3624                }
3625            }
3626        }
3627    }
3628
3629    Ok(None)
3630}
3631
3632fn first_hit_key(response: &Value) -> Option<String> {
3633    response_items(response)?
3634        .first()?
3635        .get("key")?
3636        .as_str()
3637        .map(ToString::to_string)
3638}
3639
3640fn response_items(response: &Value) -> Option<&Vec<Value>> {
3641    response
3642        .get("items")
3643        .and_then(Value::as_array)
3644        .or_else(|| response.as_array())
3645}
3646
3647fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3648    const NON_FIELD_KEYS: &[&str] = &[
3649        "itemType",
3650        "creators",
3651        "tags",
3652        "collections",
3653        "attachments",
3654        "relations",
3655        "notes",
3656        "id",
3657        "key",
3658        "version",
3659    ];
3660
3661    let mut fields = serde_json::Map::new();
3662    if let Some(item) = item_json.as_object() {
3663        for (key, value) in item {
3664            if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3665                fields.insert(key.clone(), value.clone());
3666            }
3667        }
3668    }
3669
3670    let mut payload = serde_json::Map::new();
3671    payload.insert(
3672        "itemType".to_string(),
3673        item_json
3674            .get("itemType")
3675            .cloned()
3676            .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3677    );
3678    payload.insert("fields".to_string(), Value::Object(fields));
3679
3680    if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3681        if !creators.is_empty() {
3682            payload.insert(
3683                "creators".to_string(),
3684                Value::Array(
3685                    creators
3686                        .iter()
3687                        .map(|creator| {
3688                            serde_json::json!({
3689                                "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3690                                "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3691                                "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3692                            })
3693                        })
3694                        .collect(),
3695                ),
3696            );
3697        }
3698    }
3699
3700    if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3701        if !tags.is_empty() {
3702            payload.insert(
3703                "tags".to_string(),
3704                Value::Array(
3705                    tags.iter()
3706                        .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3707                        .collect(),
3708                ),
3709            );
3710        }
3711    }
3712
3713    if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3714        payload.insert(
3715            "collections".to_string(),
3716            Value::Array(vec![collection_key.clone()]),
3717        );
3718    }
3719
3720    Value::Object(payload)
3721}
3722
3723fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3724    let attachments = client.call(
3725        "attachments.list",
3726        Some(serde_json::json!({"parentKey": item_key})),
3727    )?;
3728    Ok(has_pdf_attachment(&attachments))
3729}
3730
3731fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3732    client.call(
3733        "attachments.add",
3734        Some(serde_json::json!({
3735            "parentKey": item_key,
3736            "path": zotero_path(path),
3737            "title": "Full Text PDF",
3738        })),
3739    )?;
3740    Ok(())
3741}
3742
3743fn zotero_path(path: &str) -> String {
3744    let path = Path::new(path)
3745        .canonicalize()
3746        .unwrap_or_else(|_| Path::new(path).to_path_buf())
3747        .to_string_lossy()
3748        .into_owned();
3749    if is_wsl() {
3750        return ProcessCommand::new("wslpath")
3751            .arg("-w")
3752            .arg(&path)
3753            .output()
3754            .ok()
3755            .filter(|output| output.status.success())
3756            .and_then(|output| String::from_utf8(output.stdout).ok())
3757            .map(|converted| converted.trim().to_string())
3758            .filter(|converted| !converted.is_empty())
3759            .unwrap_or(path);
3760    }
3761    path
3762}
3763
3764fn is_wsl() -> bool {
3765    if env::var_os("WSL_DISTRO_NAME").is_some() {
3766        return true;
3767    }
3768    fs::read_to_string("/proc/sys/kernel/osrelease")
3769        .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3770        .unwrap_or(false)
3771}
3772
3773fn is_library_root(value: &Value) -> bool {
3774    value.as_i64() == Some(0) || value.as_u64() == Some(0)
3775}
3776
3777fn push_result(
3778    status: &str,
3779    zotero_item_key: Option<String>,
3780    pdf_attached: bool,
3781    pdf_size_bytes: u64,
3782    error: Value,
3783) -> Value {
3784    serde_json::json!({
3785        "status": status,
3786        "zotero_item_key": zotero_item_key,
3787        "pdf_attached": pdf_attached,
3788        "pdf_size_bytes": pdf_size_bytes,
3789        "error": error,
3790    })
3791}
3792
3793fn run_search(
3794    args: SearchArgs,
3795    client: &mut impl RpcCaller,
3796) -> Result<(Value, JsonStyle), String> {
3797    let SearchArgs {
3798        query, fulltext, author, after, before, journal, tag,
3799        doi, isbn, issn, collection, limit, offset, ..
3800    } = args;
3801
3802    let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3803    if has_identifier {
3804        let mut params = serde_json::Map::new();
3805        if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3806        if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3807        if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3808        let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3809        return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3810    }
3811
3812    if fulltext {
3813        let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3814        let mut params = serde_json::json!({"query": query, "limit": limit});
3815        if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3816            map.insert("collection".into(), resolve_collection(client, &col)?);
3817        }
3818        let value = client.call("search.fulltext", Some(params))?;
3819        return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3820    }
3821
3822    let has_filters = author.is_some() || after.is_some() || before.is_some()
3823        || journal.is_some() || tag.is_some();
3824    if has_filters {
3825        let mut conditions: Vec<Value> = Vec::new();
3826        if let Some(query) = &query {
3827            conditions.push(serde_json::json!({
3828                "field": "quicksearch-titleCreatorYear",
3829                "operator": "contains",
3830                "value": query,
3831            }));
3832        }
3833        if let Some(author) = author {
3834            conditions.push(serde_json::json!({
3835                "field": "creator", "operator": "contains", "value": author,
3836            }));
3837        }
3838        if let Some(after) = after {
3839            conditions.push(serde_json::json!({
3840                "field": "date", "operator": "isAfter", "value": after,
3841            }));
3842        }
3843        if let Some(before) = before {
3844            conditions.push(serde_json::json!({
3845                "field": "date", "operator": "isBefore", "value": before,
3846            }));
3847        }
3848        if let Some(journal) = journal {
3849            conditions.push(serde_json::json!({
3850                "field": "publicationTitle", "operator": "contains", "value": journal,
3851            }));
3852        }
3853        if let Some(tag) = tag {
3854            conditions.push(serde_json::json!({
3855                "field": "tag", "operator": "is", "value": tag,
3856            }));
3857        }
3858        let value = client.call(
3859            "search.advanced",
3860            Some(serde_json::json!({
3861                "conditions": conditions,
3862                "operator": "and",
3863                "limit": limit,
3864                "offset": offset,
3865            })),
3866        )?;
3867        return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3868    }
3869
3870    let query = query.ok_or(
3871        "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3872    )?;
3873    let value = if let Some(col) = collection {
3874        let key = resolve_collection(client, &col)?;
3875        let response = client.call(
3876            "collections.getItems",
3877            Some(serde_json::json!({"key": key})),
3878        )?;
3879        collection_quick_search_response(&response, &query, limit)
3880    } else {
3881        filter_search_artifacts(client.call(
3882            "search.quick",
3883            Some(serde_json::json!({"query": query, "limit": limit})),
3884        )?)
3885    };
3886    Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3887}
3888
3889fn run_search_management_command(
3890    command: SearchManagementCommand,
3891    client: &mut impl RpcCaller,
3892) -> Result<(Value, JsonStyle), String> {
3893    match command {
3894        SearchManagementCommand::SavedSearches { .. } => Ok((
3895            normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3896            JsonStyle::Pretty,
3897        )),
3898        SearchManagementCommand::CreateSaved {
3899            name, condition, dry_run, ..
3900        } => {
3901            let conditions = condition
3902                .iter()
3903                .map(|raw| parse_search_condition(raw))
3904                .collect::<Result<Vec<_>, _>>()?;
3905            let params = serde_json::json!({"name": name, "conditions": conditions});
3906            if dry_run {
3907                Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3908            } else {
3909                Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3910            }
3911        }
3912        SearchManagementCommand::DeleteSaved {
3913            search_key, dry_run, ..
3914        } => {
3915            let params = serde_json::json!({"key": search_key});
3916            if dry_run {
3917                Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3918            } else {
3919                Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3920            }
3921        }
3922    }
3923}
3924
3925fn filter_search_artifacts(mut value: Value) -> Value {
3926    let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3927        return value;
3928    };
3929    items.retain(|item| match item.get("title").and_then(Value::as_str) {
3930        Some(title) => !is_zotron_evidence_artifact(title),
3931        None => true,
3932    });
3933    let total_items = items.len() as u64;
3934    if let Some(total) = value.get_mut("total") {
3935        *total = Value::from(total_items);
3936    }
3937    value
3938}
3939
3940fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3941    let mut matched = collection_items(response)
3942        .into_iter()
3943        .filter(|item| !item_is_evidence_artifact(item))
3944        .filter(|item| quick_item_matches(item, query))
3945        .collect::<Vec<_>>();
3946    let total = matched.len() as u64;
3947    let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3948    if matched.len() > limit {
3949        matched.truncate(limit);
3950    }
3951    serde_json::json!({"items": matched, "total": total})
3952}
3953
3954fn item_is_evidence_artifact(item: &Value) -> bool {
3955    item.get("title")
3956        .and_then(Value::as_str)
3957        .is_some_and(is_zotron_evidence_artifact)
3958}
3959
3960fn quick_item_matches(item: &Value, query: &str) -> bool {
3961    let terms = query
3962        .split_whitespace()
3963        .map(|term| term.to_lowercase())
3964        .filter(|term| !term.is_empty())
3965        .collect::<Vec<_>>();
3966    if terms.is_empty() {
3967        return true;
3968    }
3969    let mut haystack = String::new();
3970    append_search_text(item, &mut haystack);
3971    let haystack = haystack.to_lowercase();
3972    terms.iter().all(|term| haystack.contains(term))
3973}
3974
3975fn append_search_text(value: &Value, out: &mut String) {
3976    match value {
3977        Value::String(text) => {
3978            out.push(' ');
3979            out.push_str(text);
3980        }
3981        Value::Number(number) => {
3982            out.push(' ');
3983            out.push_str(&number.to_string());
3984        }
3985        Value::Bool(value) => {
3986            out.push(' ');
3987            out.push_str(if *value { "true" } else { "false" });
3988        }
3989        Value::Array(items) => {
3990            for item in items {
3991                append_search_text(item, out);
3992            }
3993        }
3994        Value::Object(map) => {
3995            for item in map.values() {
3996                append_search_text(item, out);
3997            }
3998        }
3999        Value::Null => {}
4000    }
4001}
4002
4003fn parse_search_condition(raw: &str) -> Result<Value, String> {
4004    let mut parts = raw.split_whitespace();
4005    let field = parts.next();
4006    let operator = parts.next();
4007    let value = parts.collect::<Vec<_>>().join(" ");
4008    match (field, operator, value.is_empty()) {
4009        (Some(field), Some(operator), false) => Ok(serde_json::json!({
4010            "field": field,
4011            "operator": operator,
4012            "value": value,
4013        })),
4014        _ => Err(format!(
4015            "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
4016        )),
4017    }
4018}
4019
4020fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
4021    if let Value::Array(arr) = value {
4022        let total = arr.len() as u64;
4023        let mut obj = serde_json::Map::new();
4024        obj.insert(list_key.to_string(), Value::Array(arr));
4025        obj.insert("total".to_string(), Value::from(total));
4026        if let Some(limit) = limit {
4027            obj.insert("limit".to_string(), Value::from(limit));
4028        }
4029        obj.insert("offset".to_string(), Value::from(offset));
4030        obj.insert("hasMore".to_string(), Value::Bool(false));
4031        return Value::Object(obj);
4032    }
4033
4034    let mut obj = match value {
4035        Value::Object(obj) if obj.contains_key(list_key) => obj,
4036        other => return other,
4037    };
4038
4039    let items_len = obj
4040        .get(list_key)
4041        .and_then(Value::as_array)
4042        .map_or(0, |a| a.len()) as u64;
4043    let total = obj
4044        .get("total")
4045        .and_then(Value::as_u64)
4046        .unwrap_or(items_len);
4047
4048    obj.insert("total".to_string(), Value::from(total));
4049    if let Some(limit) = limit {
4050        obj.insert("limit".to_string(), Value::from(limit));
4051    }
4052    obj.insert("offset".to_string(), Value::from(offset));
4053    obj.insert(
4054        "hasMore".to_string(),
4055        Value::Bool(offset + items_len < total),
4056    );
4057
4058    Value::Object(obj)
4059}
4060
4061const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
4062const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
4063
4064fn paginate_rpc(
4065    client: &mut impl RpcCaller,
4066    method: &str,
4067    params: Value,
4068    page_size: usize,
4069) -> Result<Value, String> {
4070    let base = params
4071        .as_object()
4072        .ok_or_else(|| "params must be a JSON object".to_string())?;
4073    let mut out = Vec::new();
4074    let mut prev_page: Option<Vec<Value>> = None;
4075    let mut offset = 0usize;
4076
4077    loop {
4078        let mut page_params = base.clone();
4079        page_params.insert("offset".to_string(), Value::Number(offset.into()));
4080        page_params.insert("limit".to_string(), Value::Number(page_size.into()));
4081        let response = client.call(method, Some(Value::Object(page_params)))?;
4082
4083        let page = match extract_page(&response) {
4084            Some(page) => page,
4085            None if out.is_empty() => return Ok(response),
4086            None if response.is_object() => {
4087                return Err(format!(
4088                    "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
4089                    out.len()
4090                ));
4091            }
4092            None => {
4093                return Err(format!(
4094                    "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
4095                    out.len()
4096                ));
4097            }
4098        };
4099
4100        if prev_page.as_ref() == Some(&page) {
4101            return Err(format!(
4102                "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
4103                out.len()
4104            ));
4105        }
4106
4107        let page_len = page.len();
4108        out.extend(page.clone());
4109        if page_len < page_size {
4110            return Ok(Value::Array(out));
4111        }
4112        if out.len() >= RPC_PAGINATION_SAFETY_CAP {
4113            out.truncate(RPC_PAGINATION_SAFETY_CAP);
4114            return Ok(Value::Array(out));
4115        }
4116        prev_page = Some(page);
4117        offset += page_size;
4118    }
4119}
4120
4121fn extract_page(response: &Value) -> Option<Vec<Value>> {
4122    if let Some(page) = response.as_array() {
4123        return Some(page.clone());
4124    }
4125    let object = response.as_object()?;
4126    for key in RPC_PAGE_LIST_KEYS {
4127        if let Some(page) = object.get(key).and_then(Value::as_array) {
4128            return Some(page.clone());
4129        }
4130    }
4131    None
4132}
4133
4134fn run_find_pdfs_command(
4135    client: &mut impl RpcCaller,
4136    collection: String,
4137    limit: usize,
4138) -> Result<(Value, JsonStyle), String> {
4139    let collection_key = resolve_collection(client, &collection)?;
4140    let response = client.call(
4141        "collections.getItems",
4142        Some(serde_json::json!({"key": collection_key})),
4143    )?;
4144    let items = collection_items(&response);
4145
4146    let mut missing = Vec::new();
4147    for item in &items {
4148        let Some(item_key) = item.get("key").and_then(Value::as_str) else {
4149            continue;
4150        };
4151        let attachments = client.call(
4152            "attachments.list",
4153            Some(serde_json::json!({"parentKey": item_key})),
4154        )?;
4155        if !has_pdf_attachment(&attachments) {
4156            missing.push(item.clone());
4157        }
4158        if limit > 0 && missing.len() >= limit {
4159            break;
4160        }
4161    }
4162
4163    let mut results = Vec::new();
4164    for item in &missing {
4165        let item_key = item
4166            .get("key")
4167            .and_then(Value::as_str)
4168            .ok_or_else(|| "missing item lacks key".to_string())?;
4169        let response = client.call(
4170            "attachments.findPDF",
4171            Some(serde_json::json!({"parentKey": item_key})),
4172        )?;
4173        let attachment = response.get("attachment").filter(|value| !value.is_null());
4174        results.push(serde_json::json!({
4175            "item_key": item_key,
4176            "title": item.get("title").cloned().unwrap_or(Value::Null),
4177            "found": attachment.is_some(),
4178            "attachment_key": attachment
4179                .and_then(|attachment| attachment.get("key"))
4180                .cloned()
4181                .unwrap_or(Value::Null),
4182        }));
4183    }
4184
4185    Ok((
4186        serde_json::json!({
4187            "scanned": items.len(),
4188            "attempted": missing.len(),
4189            "results": results,
4190        }),
4191        JsonStyle::Pretty,
4192    ))
4193}
4194
4195fn collection_items(response: &Value) -> Vec<Value> {
4196    if let Some(items) = response.get("items").and_then(Value::as_array) {
4197        return items.clone();
4198    }
4199    response.as_array().cloned().unwrap_or_default()
4200}
4201
4202fn has_pdf_attachment(attachments: &Value) -> bool {
4203    attachments
4204        .as_array()
4205        .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
4206}
4207
4208fn is_pdf_attachment(attachment: &Value) -> bool {
4209    let content_type = attachment
4210        .get("contentType")
4211        .and_then(Value::as_str)
4212        .unwrap_or_default()
4213        .to_lowercase();
4214    let path = attachment
4215        .get("path")
4216        .and_then(Value::as_str)
4217        .unwrap_or_default()
4218        .to_lowercase();
4219    matches!(
4220        content_type.as_str(),
4221        "application/pdf" | "application/x-pdf"
4222    ) || path.ends_with(".pdf")
4223}
4224
4225fn call_json(
4226    client: &mut impl RpcCaller,
4227    method: &str,
4228    params: Option<Value>,
4229) -> Result<Value, String> {
4230    client.call(method, params)
4231}
4232
4233fn run_system_command(
4234    command: SystemCommand,
4235    client: &mut impl RpcCaller,
4236) -> Result<(Value, JsonStyle), String> {
4237    let value = match command {
4238        SystemCommand::Version { .. } => client.call("system.version", None)?,
4239        SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
4240        SystemCommand::LibraryStats { library, .. } => {
4241            let params = library.map(|id| serde_json::json!({"id": id}));
4242            client.call("system.libraryStats", params)?
4243        }
4244        SystemCommand::Schema { item_type, .. } => {
4245            if let Some(item_type) = item_type {
4246                let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
4247                let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
4248                let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
4249                    .iter()
4250                    .filter_map(|f| f.get("field").cloned())
4251                    .collect();
4252                let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
4253                    .iter()
4254                    .filter_map(|c| c.get("creatorType").cloned())
4255                    .collect();
4256                serde_json::json!({
4257                    "itemType": item_type,
4258                    "fields": field_names,
4259                    "creatorTypes": creator_names,
4260                })
4261            } else {
4262                let types = client.call("system.itemTypes", None)?;
4263                let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
4264                    .iter()
4265                    .filter_map(|t| t.get("itemType").cloned())
4266                    .collect();
4267                Value::Array(type_names)
4268            }
4269        }
4270        SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
4271        SystemCommand::ListMethods { .. } => client.call("system.listMethods", None)?,
4272        SystemCommand::Describe { method, .. } => {
4273            let params = method.map(|method| serde_json::json!({"method": method}));
4274            client.call("system.describe", params)?
4275        }
4276    };
4277    Ok((value, JsonStyle::Pretty))
4278}
4279
4280fn run_items_command(
4281    command: ItemsCommand,
4282    client: &mut impl RpcCaller,
4283) -> Result<(Value, JsonStyle), String> {
4284    let (value, style) = match command {
4285        ItemsCommand::Add {
4286            doi,
4287            isbn,
4288            from_url,
4289            file,
4290            collection,
4291            dry_run,
4292            ..
4293        } => {
4294            if let Some(doi) = doi {
4295                run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
4296            } else if let Some(isbn) = isbn {
4297                run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
4298            } else if let Some(from_url) = from_url {
4299                run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
4300            } else if let Some(file) = file {
4301                let mut params = serde_json::json!({"path": zotero_path(&file)});
4302                maybe_insert_collection(client, &mut params, collection)?;
4303                run_mutation_command(client, "items.addFromFile", params, dry_run)?
4304            } else {
4305                return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, or --file".into());
4306            }
4307        }
4308        ItemsCommand::Create {
4309            item_type,
4310            fields,
4311            dry_run,
4312            ..
4313        } => {
4314            let parsed_fields = parse_field_options(&fields)?;
4315            let mut params = serde_json::json!({"itemType": item_type});
4316            if !parsed_fields.is_empty() {
4317                if let Some(map) = params.as_object_mut() {
4318                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4319                }
4320            }
4321            run_mutation_command(client, "items.create", params, dry_run)?
4322        }
4323        ItemsCommand::Update {
4324            key,
4325            fields,
4326            dry_run,
4327            ..
4328        } => {
4329            let parsed_fields = parse_field_options(&fields)?;
4330            let mut params = serde_json::json!({"key": key});
4331            if !parsed_fields.is_empty() {
4332                if let Some(map) = params.as_object_mut() {
4333                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4334                }
4335            }
4336            run_mutation_command(client, "items.update", params, dry_run)?
4337        }
4338        ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
4339            client,
4340            "items.delete",
4341            serde_json::json!({"key": key}),
4342            dry_run,
4343        )?,
4344        ItemsCommand::Trash {
4345            items, dry_run, ..
4346        } => {
4347            if items.len() == 1 {
4348                run_mutation_command(
4349                    client,
4350                    "items.trash",
4351                    serde_json::json!({"key": items[0]}),
4352                    dry_run,
4353                )?
4354            } else {
4355                run_mutation_command(
4356                    client,
4357                    "items.batchTrash",
4358                    serde_json::json!({"keys": items}),
4359                    dry_run,
4360                )?
4361            }
4362        }
4363        ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
4364            client,
4365            "items.restore",
4366            serde_json::json!({"key": item}),
4367            dry_run,
4368        )?,
4369        ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
4370            if keys.len() < 2 {
4371                return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
4372            }
4373            run_mutation_command(
4374                client,
4375                "items.mergeDuplicates",
4376                serde_json::json!({"keys": keys}),
4377                dry_run,
4378            )?
4379        }
4380        ItemsCommand::AddRelated {
4381            key,
4382            target,
4383            dry_run,
4384            ..
4385        } => run_mutation_command(
4386            client,
4387            "items.addRelated",
4388            serde_json::json!({"key": key, "targetKey": target}),
4389            dry_run,
4390        )?,
4391        ItemsCommand::RemoveRelated {
4392            key,
4393            target,
4394            dry_run,
4395            ..
4396        } => run_mutation_command(
4397            client,
4398            "items.removeRelated",
4399            serde_json::json!({"key": key, "targetKey": target}),
4400            dry_run,
4401        )?,
4402        ItemsCommand::Get { item, .. } => (
4403            client.call("items.get", Some(serde_json::json!({"key": item})))?,
4404            JsonStyle::Pretty,
4405        ),
4406        ItemsCommand::List {
4407            limit,
4408            offset,
4409            sort,
4410            direction,
4411            trash,
4412            ..
4413        } => {
4414            if trash {
4415                let value = client.call(
4416                    "items.getTrash",
4417                    Some(serde_json::json!({"limit": limit, "offset": offset})),
4418                )?;
4419                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4420            } else {
4421                let mut params = serde_json::json!({
4422                    "limit": limit,
4423                    "offset": offset,
4424                    "direction": direction,
4425                });
4426                if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
4427                    map.insert("sort".to_string(), Value::String(sort));
4428                }
4429                let value = client.call("items.list", Some(params))?;
4430                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4431            }
4432        }
4433        ItemsCommand::FindDuplicates { .. } => (
4434            client.call("items.findDuplicates", None)?,
4435            JsonStyle::Pretty,
4436        ),
4437        ItemsCommand::Recent {
4438            limit,
4439            offset,
4440            recent_type,
4441            ..
4442        } => {
4443            if recent_type != "added" && recent_type != "modified" {
4444                return Err(format!(
4445                    "--type must be added or modified, got {recent_type:?}"
4446                ));
4447            }
4448            let value = client.call(
4449                "items.getRecent",
4450                Some(
4451                    serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
4452                ),
4453            )?;
4454            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4455        }
4456        ItemsCommand::Fulltext { key, .. } => (
4457            client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
4458            JsonStyle::Pretty,
4459        ),
4460        ItemsCommand::Related { key, .. } => (
4461            normalize_list_envelope(
4462                client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
4463                "items",
4464                None,
4465                0,
4466            ),
4467            JsonStyle::Pretty,
4468        ),
4469        ItemsCommand::CitationKey { key, .. } => (
4470            client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
4471            JsonStyle::Pretty,
4472        ),
4473    };
4474    Ok((value, style))
4475}
4476
4477fn run_add_identifier_command(
4478    client: &mut impl RpcCaller,
4479    method: &str,
4480    param_name: &str,
4481    param_value: String,
4482    collection: Option<String>,
4483    dry_run: bool,
4484) -> Result<(Value, JsonStyle), String> {
4485    let mut params = Value::Object(serde_json::Map::from_iter([(
4486        param_name.to_string(),
4487        Value::String(param_value),
4488    )]));
4489    maybe_insert_collection(client, &mut params, collection)?;
4490    run_mutation_command(client, method, params, dry_run)
4491}
4492
4493fn run_mutation_command(
4494    client: &mut impl RpcCaller,
4495    method: &str,
4496    params: Value,
4497    dry_run: bool,
4498) -> Result<(Value, JsonStyle), String> {
4499    let value = if dry_run {
4500        serde_json::json!({
4501            "ok": true,
4502            "dryRun": true,
4503            "wouldCall": method,
4504            "wouldCallParams": params,
4505        })
4506    } else {
4507        client.call(method, Some(params))?
4508    };
4509    Ok((value, JsonStyle::PythonCompact))
4510}
4511
4512fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
4513    let mut parsed = serde_json::Map::new();
4514    for field in fields {
4515        let (key, value) = field
4516            .split_once('=')
4517            .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
4518        parsed.insert(key.to_string(), Value::String(value.to_string()));
4519    }
4520    Ok(parsed)
4521}
4522
4523fn maybe_insert_collection(
4524    client: &mut impl RpcCaller,
4525    params: &mut Value,
4526    collection: Option<String>,
4527) -> Result<(), String> {
4528    let Some(collection) = collection else {
4529        return Ok(());
4530    };
4531    let collection = resolve_collection(client, &collection)?;
4532    let include = match &collection {
4533        Value::Null => false,
4534        Value::Number(number) => number.as_i64() != Some(0),
4535        _ => true,
4536    };
4537    if include {
4538        params
4539            .as_object_mut()
4540            .expect("mutation params are always objects")
4541            .insert("collection".to_string(), collection);
4542    }
4543    Ok(())
4544}
4545
4546fn run_settings_command(
4547    command: SettingsCommand,
4548    client: &mut impl RpcCaller,
4549) -> Result<(Value, JsonStyle), String> {
4550    let (value, style) = match command {
4551        SettingsCommand::Get { key, .. } => (
4552            client.call("settings.get", Some(serde_json::json!({"key": key})))?,
4553            JsonStyle::Pretty,
4554        ),
4555        SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
4556        SettingsCommand::Set {
4557            pairs,
4558            file,
4559            dry_run,
4560            ..
4561        } => {
4562            if let Some(file) = file {
4563                // --file mode: read JSON and call settings.setAll
4564                let raw = fs::read_to_string(&file)
4565                    .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4566                let settings: Value = serde_json::from_str(&raw)
4567                    .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4568                if dry_run {
4569                    (
4570                        dry_run_value("settings.setAll", settings),
4571                        JsonStyle::PythonCompact,
4572                    )
4573                } else {
4574                    (
4575                        client.call("settings.setAll", Some(settings))?,
4576                        JsonStyle::PythonCompact,
4577                    )
4578                }
4579            } else if pairs.len() == 2 {
4580                // Single key=value: settings.set
4581                let key = &pairs[0];
4582                let value = &pairs[1];
4583                let parsed_value = serde_json::from_str::<Value>(value)
4584                    .unwrap_or(Value::String(value.clone()));
4585                let params = serde_json::json!({"key": key, "value": parsed_value});
4586                if dry_run {
4587                    (
4588                        dry_run_value("settings.set", params),
4589                        JsonStyle::PythonCompact,
4590                    )
4591                } else {
4592                    (
4593                        client.call("settings.set", Some(params))?,
4594                        JsonStyle::PythonCompact,
4595                    )
4596                }
4597            } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4598                // Multiple pairs: build a map and call settings.setAll
4599                let mut map = serde_json::Map::new();
4600                for chunk in pairs.chunks(2) {
4601                    let parsed = serde_json::from_str::<Value>(&chunk[1])
4602                        .unwrap_or(Value::String(chunk[1].clone()));
4603                    map.insert(chunk[0].clone(), parsed);
4604                }
4605                let settings = Value::Object(map);
4606                if dry_run {
4607                    (
4608                        dry_run_value("settings.setAll", settings),
4609                        JsonStyle::PythonCompact,
4610                    )
4611                } else {
4612                    (
4613                        client.call("settings.setAll", Some(settings))?,
4614                        JsonStyle::PythonCompact,
4615                    )
4616                }
4617            } else {
4618                return Err(
4619                    "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4620                );
4621            }
4622        }
4623    };
4624    Ok((value, style))
4625}
4626
4627fn run_tags_command(
4628    command: TagsCommand,
4629    client: &mut impl RpcCaller,
4630) -> Result<(Value, JsonStyle), String> {
4631    let (value, style) = match command {
4632        TagsCommand::List { limit, .. } => {
4633            let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4634            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4635        }
4636        TagsCommand::Rename {
4637            old, new, dry_run, ..
4638        } => run_tag_mutation(
4639            client,
4640            "tags.rename",
4641            serde_json::json!({"oldName": old, "newName": new}),
4642            dry_run,
4643        )?,
4644        TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4645            client,
4646            "tags.delete",
4647            serde_json::json!({"tag": tag}),
4648            dry_run,
4649        )?,
4650        TagsCommand::Add {
4651            keys, tags, dry_run, ..
4652        } => {
4653            if keys.len() == 1 {
4654                run_tag_mutation(
4655                    client,
4656                    "tags.add",
4657                    serde_json::json!({"key": keys[0], "tags": tags}),
4658                    dry_run,
4659                )?
4660            } else {
4661                run_tag_mutation(
4662                    client,
4663                    "tags.batchUpdate",
4664                    serde_json::json!({"keys": keys, "add": tags}),
4665                    dry_run,
4666                )?
4667            }
4668        }
4669        TagsCommand::Remove {
4670            keys, tags, dry_run, ..
4671        } => {
4672            if keys.len() == 1 {
4673                run_tag_mutation(
4674                    client,
4675                    "tags.remove",
4676                    serde_json::json!({"key": keys[0], "tags": tags}),
4677                    dry_run,
4678                )?
4679            } else {
4680                run_tag_mutation(
4681                    client,
4682                    "tags.batchUpdate",
4683                    serde_json::json!({"keys": keys, "remove": tags}),
4684                    dry_run,
4685                )?
4686            }
4687        }
4688    };
4689    Ok((value, style))
4690}
4691
4692fn run_tag_mutation(
4693    client: &mut impl RpcCaller,
4694    method: &str,
4695    params: Value,
4696    dry_run: bool,
4697) -> Result<(Value, JsonStyle), String> {
4698    if dry_run {
4699        Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4700    } else {
4701        Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4702    }
4703}
4704
4705fn dry_run_value(method: &str, params: Value) -> Value {
4706    serde_json::json!({
4707        "ok": true,
4708        "dryRun": true,
4709        "wouldCall": method,
4710        "wouldCallParams": params,
4711    })
4712}
4713
4714fn run_annotations_command(
4715    command: AnnotationsCommand,
4716    client: &mut impl RpcCaller,
4717) -> Result<(Value, JsonStyle), String> {
4718    let (value, style) = match command {
4719        AnnotationsCommand::List { parent, .. } => {
4720            let value = client.call(
4721                "annotations.list",
4722                Some(serde_json::json!({"parentKey": parent})),
4723            )?;
4724            (normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty)
4725        }
4726        AnnotationsCommand::Create {
4727            parent,
4728            annotation_type,
4729            position,
4730            sort_index,
4731            text,
4732            comment,
4733            color,
4734            dry_run,
4735            ..
4736        } => {
4737            if !matches!(
4738                annotation_type.as_str(),
4739                "highlight" | "note" | "underline" | "image" | "ink"
4740            ) {
4741                return Err(format!(
4742                    "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4743                ));
4744            }
4745            let position = position
4746                .ok_or_else(|| "INVALID_ARGS: --position JSON is required".to_string())
4747                .and_then(|raw| {
4748                    serde_json::from_str::<Value>(&raw)
4749                        .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4750                })?;
4751            validate_annotation_position(annotation_type.as_str(), &position)?;
4752            let mut params = serde_json::Map::new();
4753            params.insert("parentKey".to_string(), Value::String(parent));
4754            params.insert("type".to_string(), Value::String(annotation_type));
4755            params.insert("color".to_string(), Value::String(color));
4756            params.insert("position".to_string(), position);
4757            if let Some(sort_index) = sort_index {
4758                params.insert(
4759                    "sortIndex".to_string(),
4760                    parse_annotation_sort_index(sort_index)?,
4761                );
4762            }
4763            if let Some(text) = text {
4764                params.insert("text".to_string(), Value::String(text));
4765            }
4766            if let Some(comment) = comment {
4767                params.insert("comment".to_string(), Value::String(comment));
4768            }
4769            run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4770        }
4771        AnnotationsCommand::Delete {
4772            annotation_key,
4773            dry_run,
4774            ..
4775        } => run_mutating_command(
4776            client,
4777            "annotations.delete",
4778            serde_json::json!({"key": annotation_key}),
4779            dry_run,
4780        )?,
4781    };
4782    Ok((value, style))
4783}
4784
4785fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4786    position
4787        .get("pageIndex")
4788        .and_then(Value::as_i64)
4789        .filter(|value| *value >= 0)
4790        .ok_or_else(|| {
4791            "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4792        })?;
4793
4794    if annotation_type == "ink" {
4795        let has_paths = position
4796            .get("paths")
4797            .and_then(Value::as_array)
4798            .is_some_and(|paths| !paths.is_empty());
4799        if !has_paths {
4800            return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4801        }
4802        return Ok(());
4803    }
4804
4805    let valid_rects = position
4806        .get("rects")
4807        .and_then(Value::as_array)
4808        .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4809    if !valid_rects {
4810        return Err(
4811            "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4812        );
4813    }
4814    Ok(())
4815}
4816
4817fn is_annotation_rect(value: &Value) -> bool {
4818    value.as_array().is_some_and(|coords| {
4819        coords.len() == 4
4820            && coords
4821                .iter()
4822                .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4823    })
4824}
4825
4826fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4827    let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4828    let valid = match &parsed {
4829        Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4830        Value::String(value) => {
4831            is_zotero_pdf_sort_index(value.trim())
4832                || (!value.trim().is_empty()
4833                    && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4834        }
4835        _ => false,
4836    };
4837    if valid {
4838        Ok(parsed)
4839    } else {
4840        Err(format!(
4841            "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4842        ))
4843    }
4844}
4845
4846fn is_zotero_pdf_sort_index(value: &str) -> bool {
4847    let mut parts = value.split('|');
4848    matches!(
4849        (parts.next(), parts.next(), parts.next(), parts.next()),
4850        (Some(page), Some(offset), Some(y), None)
4851            if page.len() == 5
4852                && offset.len() == 6
4853                && y.len() == 5
4854                && page.chars().all(|ch| ch.is_ascii_digit())
4855                && offset.chars().all(|ch| ch.is_ascii_digit())
4856                && y.chars().all(|ch| ch.is_ascii_digit())
4857    )
4858}
4859
4860fn run_attachments_command(
4861    command: AttachmentsCommand,
4862    client: &mut impl RpcCaller,
4863) -> Result<(Value, JsonStyle), String> {
4864    let value = match command {
4865        AttachmentsCommand::List {
4866            parent,
4867            limit,
4868            offset,
4869            ..
4870        } => normalize_list_envelope(
4871            client.call(
4872                "attachments.list",
4873                Some(serde_json::json!({"parentKey": parent})),
4874            )?,
4875            "items",
4876            Some(limit),
4877            offset,
4878        ),
4879        AttachmentsCommand::Get { key, .. } => {
4880            client.call("attachments.get", Some(serde_json::json!({"key": key})))?
4881        }
4882        AttachmentsCommand::Fulltext { key, .. } => client.call(
4883            "attachments.getFulltext",
4884            Some(serde_json::json!({"key": key})),
4885        )?,
4886        AttachmentsCommand::Path { key, .. } => localize_attachment_path_response(
4887            client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4888        ),
4889        AttachmentsCommand::Add {
4890            parent,
4891            path,
4892            from_url,
4893            title,
4894            dry_run,
4895            ..
4896        } => {
4897            match (path, from_url) {
4898                (Some(p), None) => {
4899                    let mut params = serde_json::json!({"parentKey": parent, "path": zotero_path(&p)});
4900                    insert_optional_string(&mut params, "title", title);
4901                    if dry_run {
4902                        return Ok((
4903                            dry_run_value("attachments.add", params),
4904                            JsonStyle::PythonCompact,
4905                        ));
4906                    }
4907                    return Ok((
4908                        client.call("attachments.add", Some(params))?,
4909                        JsonStyle::PythonCompact,
4910                    ));
4911                }
4912                (None, Some(u)) => {
4913                    let mut params = serde_json::json!({"parentKey": parent, "url": u});
4914                    insert_optional_string(&mut params, "title", title);
4915                    if dry_run {
4916                        return Ok((
4917                            dry_run_value("attachments.addByURL", params),
4918                            JsonStyle::PythonCompact,
4919                        ));
4920                    }
4921                    return Ok((
4922                        client.call("attachments.addByURL", Some(params))?,
4923                        JsonStyle::PythonCompact,
4924                    ));
4925                }
4926                (Some(_), Some(_)) => {
4927                    return Err("INVALID_ARGS: --path and --from-url are mutually exclusive".to_string());
4928                }
4929                (None, None) => {
4930                    return Err("INVALID_ARGS: either --path or --from-url is required".to_string());
4931                }
4932            }
4933        }
4934        AttachmentsCommand::Delete { key, dry_run, .. } => {
4935            let params = serde_json::json!({"key": key});
4936            if dry_run {
4937                return Ok((
4938                    dry_run_value("attachments.delete", params),
4939                    JsonStyle::PythonCompact,
4940                ));
4941            }
4942            return Ok((
4943                client.call("attachments.delete", Some(params))?,
4944                JsonStyle::PythonCompact,
4945            ));
4946        }
4947        AttachmentsCommand::FindPdf { parent, .. } => client.call(
4948            "attachments.findPDF",
4949            Some(serde_json::json!({"parentKey": parent})),
4950        )?,
4951    };
4952    Ok((value, JsonStyle::Pretty))
4953}
4954
4955fn localize_attachment_path_response(mut value: Value) -> Value {
4956    if let Some(path) = value.get("path").and_then(Value::as_str) {
4957        let local = local_path_from_zotero_path(path);
4958        if let Some(map) = value.as_object_mut() {
4959            map.insert("path".to_string(), Value::String(local));
4960        }
4961    }
4962    value
4963}
4964
4965fn run_notes_command(
4966    command: NotesCommand,
4967    client: &mut impl RpcCaller,
4968) -> Result<(Value, JsonStyle), String> {
4969    let (value, style) = match command {
4970        NotesCommand::List {
4971            parent,
4972            limit,
4973            offset,
4974            ..
4975        } => {
4976            let value = client.call(
4977                "notes.list",
4978                Some(serde_json::json!({"parentKey": parent})),
4979            )?;
4980            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4981        }
4982        NotesCommand::Get { note_key, .. } => {
4983            let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
4984            (value, JsonStyle::Pretty)
4985        }
4986        NotesCommand::Create {
4987            parent,
4988            content,
4989            tags,
4990            dry_run,
4991            ..
4992        } => {
4993            let mut params = serde_json::Map::new();
4994            params.insert("parentKey".to_string(), Value::String(parent));
4995            params.insert("content".to_string(), Value::String(content));
4996            if !tags.is_empty() {
4997                params.insert(
4998                    "tags".to_string(),
4999                    Value::Array(tags.into_iter().map(Value::String).collect()),
5000                );
5001            }
5002            run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
5003        }
5004        NotesCommand::Update {
5005            note_key,
5006            content,
5007            dry_run,
5008            ..
5009        } => run_mutating_command(
5010            client,
5011            "notes.update",
5012            serde_json::json!({"key": note_key, "content": content}),
5013            dry_run,
5014        )?,
5015        NotesCommand::Delete {
5016            note_key, dry_run, ..
5017        } => {
5018            // Python CLI intentionally routes note deletion through items.delete.
5019            run_mutating_command(
5020                client,
5021                "items.delete",
5022                serde_json::json!({"key": note_key}),
5023                dry_run,
5024            )?
5025        }
5026        NotesCommand::Search { query, limit, .. } => {
5027            let value = client.call(
5028                "notes.search",
5029                Some(serde_json::json!({"query": query, "limit": limit})),
5030            )?;
5031            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
5032        }
5033    };
5034    Ok((value, style))
5035}
5036
5037fn run_mutating_command(
5038    client: &mut impl RpcCaller,
5039    method: &str,
5040    params: Value,
5041    dry_run: bool,
5042) -> Result<(Value, JsonStyle), String> {
5043    if dry_run {
5044        Ok((
5045            serde_json::json!({
5046                "ok": true,
5047                "dryRun": true,
5048                "wouldCall": method,
5049                "wouldCallParams": params,
5050            }),
5051            JsonStyle::PythonCompact,
5052        ))
5053    } else {
5054        client
5055            .call(method, Some(params))
5056            .map(|value| (value, JsonStyle::PythonCompact))
5057    }
5058}
5059
5060fn run_collections_command(
5061    command: CollectionsCommand,
5062    client: &mut impl RpcCaller,
5063) -> Result<(Value, JsonStyle), String> {
5064    let value = match command {
5065        CollectionsCommand::List { .. } => normalize_list_envelope(
5066            client.call("collections.list", None)?,
5067            "items",
5068            None,
5069            0,
5070        ),
5071        CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
5072        CollectionsCommand::Get { name_or_id, .. } => {
5073            let key = resolve_collection(client, &name_or_id)?;
5074            client.call("collections.get", Some(serde_json::json!({"key": key})))?
5075        }
5076        CollectionsCommand::GetItems {
5077            name_or_id,
5078            limit,
5079            offset,
5080            ..
5081        } => {
5082            let key = resolve_collection(client, &name_or_id)?;
5083            let mut params = serde_json::json!({"key": key});
5084            if let Some(map) = params.as_object_mut() {
5085                if let Some(limit) = limit {
5086                    map.insert("limit".to_string(), Value::Number(limit.into()));
5087                }
5088                if offset > 0 {
5089                    map.insert("offset".to_string(), Value::Number(offset.into()));
5090                }
5091            }
5092            normalize_list_envelope(
5093                client.call("collections.getItems", Some(params))?,
5094                "items",
5095                limit,
5096                offset,
5097            )
5098        }
5099        CollectionsCommand::Stats { name_or_id, .. } => {
5100            let key = resolve_collection(client, &name_or_id)?;
5101            client.call("collections.stats", Some(serde_json::json!({"key": key})))?
5102        }
5103        CollectionsCommand::Rename {
5104            old_name,
5105            new_name,
5106            dry_run,
5107            ..
5108        } => {
5109            let key = resolve_mutable_collection(client, &old_name, "rename")?;
5110            let params = serde_json::json!({"key": key, "name": new_name});
5111            if dry_run {
5112                return Ok((
5113                    dry_run_value("collections.rename", params),
5114                    JsonStyle::PythonCompact,
5115                ));
5116            }
5117            return Ok((
5118                client.call("collections.rename", Some(params))?,
5119                JsonStyle::PythonCompact,
5120            ));
5121        }
5122        CollectionsCommand::Create {
5123            name,
5124            parent,
5125            dry_run,
5126            ..
5127        } => {
5128            let mut params = serde_json::json!({"name": name});
5129            if let Some(parent) = parent {
5130                let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
5131                if let Some(map) = params.as_object_mut() {
5132                    map.insert("parentKey".to_string(), parent_key);
5133                }
5134            }
5135            if dry_run {
5136                return Ok((
5137                    dry_run_value("collections.create", params),
5138                    JsonStyle::PythonCompact,
5139                ));
5140            }
5141            return Ok((
5142                client.call("collections.create", Some(params))?,
5143                JsonStyle::PythonCompact,
5144            ));
5145        }
5146        CollectionsCommand::Delete {
5147            name_or_id,
5148            dry_run,
5149            ..
5150        } => {
5151            let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
5152            let params = serde_json::json!({"key": key});
5153            if dry_run {
5154                return Ok((
5155                    dry_run_value("collections.delete", params),
5156                    JsonStyle::PythonCompact,
5157                ));
5158            }
5159            return Ok((
5160                client.call("collections.delete", Some(params))?,
5161                JsonStyle::PythonCompact,
5162            ));
5163        }
5164        CollectionsCommand::AddItems {
5165            collection,
5166            item_keys,
5167            dry_run,
5168            ..
5169        } => {
5170            let key = resolve_mutable_collection(client, &collection, "add to")?;
5171            let params = serde_json::json!({"key": key, "keys": item_keys});
5172            if dry_run {
5173                return Ok((
5174                    dry_run_value("collections.addItems", params),
5175                    JsonStyle::PythonCompact,
5176                ));
5177            }
5178            return Ok((
5179                client.call("collections.addItems", Some(params))?,
5180                JsonStyle::PythonCompact,
5181            ));
5182        }
5183        CollectionsCommand::RemoveItems {
5184            collection,
5185            item_keys,
5186            dry_run,
5187            ..
5188        } => {
5189            let key = resolve_mutable_collection(client, &collection, "operate on")?;
5190            let params = serde_json::json!({"key": key, "keys": item_keys});
5191            if dry_run {
5192                return Ok((
5193                    dry_run_value("collections.removeItems", params),
5194                    JsonStyle::PythonCompact,
5195                ));
5196            }
5197            return Ok((
5198                client.call("collections.removeItems", Some(params))?,
5199                JsonStyle::PythonCompact,
5200            ));
5201        }
5202    };
5203    Ok((value, JsonStyle::Pretty))
5204}
5205
5206fn resolve_export_keys(
5207    client: &mut impl RpcCaller,
5208    mut keys: Vec<String>,
5209    collection: Option<String>,
5210) -> Result<Vec<String>, String> {
5211    if let Some(name) = collection {
5212        let col_key = resolve_collection(client, &name)?;
5213        let response = client.call(
5214            "collections.getItems",
5215            Some(serde_json::json!({"key": col_key})),
5216        )?;
5217        let items = collection_items(&response);
5218        for item in items {
5219            if let Some(key) = item.get("key").and_then(Value::as_str) {
5220                if !keys.contains(&key.to_string()) {
5221                    keys.push(key.to_string());
5222                }
5223            }
5224        }
5225    }
5226    if keys.is_empty() {
5227        return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
5228    }
5229    Ok(keys)
5230}
5231
5232fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
5233    let keys = resolve_export_keys(client, args.keys, args.collection)?;
5234    match args.format.as_str() {
5235        "bibtex" => run_export_content_command(client, "export.bibtex", keys),
5236        "ris" => run_export_content_command(client, "export.ris", keys),
5237        "csl-json" => {
5238            let response =
5239                client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
5240            if let Some(content) = response.get("content") {
5241                format_json(content, JsonStyle::Pretty)
5242            } else {
5243                format_json(&response, JsonStyle::PythonCompact)
5244            }
5245        }
5246        "bibliography" => {
5247            let response = client.call(
5248                "export.bibliography",
5249                Some(serde_json::json!({"keys": keys, "style": args.style})),
5250            )?;
5251            if let Some(object) = response.as_object() {
5252                let field = if args.html { "html" } else { "text" };
5253                if object.contains_key("html") || object.contains_key("text") {
5254                    return raw_value_output(
5255                        object.get(field).unwrap_or(&Value::String(String::new())),
5256                    );
5257                }
5258            }
5259            format_json(&response, JsonStyle::PythonCompact)
5260        }
5261        other => Err(format!(
5262            "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
5263        )),
5264    }
5265}
5266
5267fn run_export_content_command(
5268    client: &mut impl RpcCaller,
5269    method: &str,
5270    keys: Vec<String>,
5271) -> Result<String, String> {
5272    let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
5273    if let Some(content) = response.get("content") {
5274        raw_value_output(content)
5275    } else {
5276        format_json(&response, JsonStyle::PythonCompact)
5277    }
5278}
5279
5280fn raw_value_output(value: &Value) -> Result<String, String> {
5281    let mut out = match value {
5282        Value::Null => String::new(),
5283        Value::String(content) => content.clone(),
5284        other => to_python_repr(other),
5285    };
5286    out.push('\n');
5287    Ok(out)
5288}
5289
5290fn to_python_repr(value: &Value) -> String {
5291    match value {
5292        Value::Null => "None".to_string(),
5293        Value::Bool(value) => {
5294            if *value {
5295                "True".to_string()
5296            } else {
5297                "False".to_string()
5298            }
5299        }
5300        Value::Number(value) => value.to_string(),
5301        Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
5302        Value::Array(values) => {
5303            let inner = values
5304                .iter()
5305                .map(to_python_repr)
5306                .collect::<Vec<_>>()
5307                .join(", ");
5308            format!("[{inner}]")
5309        }
5310        Value::Object(entries) => {
5311            let inner = entries
5312                .iter()
5313                .map(|(key, value)| {
5314                    format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
5315                })
5316                .collect::<Vec<_>>()
5317                .join(", ");
5318            format!("{{{inner}}}")
5319        }
5320    }
5321}
5322
5323fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
5324    let trimmed = name_or_id.trim();
5325    if let Ok(id) = trimmed.parse::<i64>() {
5326        return Ok(Value::Number(id.into()));
5327    }
5328
5329    let collections = client.call("collections.list", None)?;
5330    let items = collections
5331        .get("items")
5332        .and_then(Value::as_array)
5333        .or_else(|| collections.as_array())
5334        .ok_or_else(|| "collections.list returned non-array result".to_string())?;
5335
5336    if let Some(collection) = items
5337        .iter()
5338        .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
5339    {
5340        return collection_key(collection);
5341    }
5342
5343    let exact = items
5344        .iter()
5345        .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
5346        .collect::<Vec<_>>();
5347    if exact.len() == 1 {
5348        return collection_key(exact[0]);
5349    }
5350
5351    let needle = normalize_collection_name(trimmed);
5352    let fuzzy = items
5353        .iter()
5354        .filter(|collection| {
5355            collection
5356                .get("name")
5357                .and_then(Value::as_str)
5358                .map(normalize_collection_name)
5359                .is_some_and(|name| name.contains(&needle))
5360        })
5361        .collect::<Vec<_>>();
5362
5363    match fuzzy.len() {
5364        1 => collection_key(fuzzy[0]),
5365        0 => Err(format!(
5366            "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
5367        )),
5368        _ => Err(format!(
5369            "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
5370        )),
5371    }
5372}
5373
5374fn collection_key(collection: &Value) -> Result<Value, String> {
5375    collection
5376        .get("key")
5377        .cloned()
5378        .ok_or_else(|| "collection result is missing key".to_string())
5379}
5380
5381fn resolve_mutable_collection(
5382    client: &mut impl RpcCaller,
5383    name_or_id: &str,
5384    operation: &str,
5385) -> Result<Value, String> {
5386    let key = resolve_collection(client, name_or_id)?;
5387    if key.as_i64() == Some(0) {
5388        return Err(format!(
5389            "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
5390        ));
5391    }
5392    Ok(key)
5393}
5394
5395fn insert_optional_string(value: &mut Value, key: &str, maybe: Option<String>) {
5396    if let (Some(map), Some(content)) = (value.as_object_mut(), maybe) {
5397        map.insert(key.to_string(), Value::String(content));
5398    }
5399}
5400
5401fn normalize_collection_name(name: &str) -> String {
5402    name.split_whitespace()
5403        .collect::<Vec<_>>()
5404        .join(" ")
5405        .to_lowercase()
5406}
5407
5408fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
5409    let mut out = match style {
5410        JsonStyle::PythonCompact => to_python_compact_json(value),
5411        JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
5412    };
5413    out.push('\n');
5414    Ok(out)
5415}
5416
5417fn to_python_compact_json(value: &Value) -> String {
5418    match value {
5419        Value::Null => "null".to_string(),
5420        Value::Bool(value) => value.to_string(),
5421        Value::Number(value) => value.to_string(),
5422        Value::String(value) => {
5423            serde_json::to_string(value).expect("string serialization cannot fail")
5424        }
5425        Value::Array(values) => {
5426            let inner = values
5427                .iter()
5428                .map(to_python_compact_json)
5429                .collect::<Vec<_>>()
5430                .join(", ");
5431            format!("[{inner}]")
5432        }
5433        Value::Object(entries) => {
5434            let inner = entries
5435                .iter()
5436                .map(|(key, value)| {
5437                    let key = serde_json::to_string(key).expect("string serialization cannot fail");
5438                    format!("{key}: {}", to_python_compact_json(value))
5439                })
5440                .collect::<Vec<_>>()
5441                .join(", ");
5442            format!("{{{inner}}}")
5443        }
5444    }
5445}