Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::{
4    env, fs,
5    io::{self, Read},
6    path::{Path, PathBuf},
7    process::Command as ProcessCommand,
8    thread,
9    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16    bm25_score_chunks, build_embedding_provider_request, build_ocr_provider_request,
17    builtin_ocr_provider_specs, cosine_similarity, execute_embedding_provider_request,
18    is_zotron_evidence_artifact, machine_artifact_exists_for_item,
19    machine_artifact_exists_in_sidecar, machine_artifact_store_root,
20    ocr_provider_spec as raw_ocr_provider_spec, parse_embedding_provider_response,
21    parse_ocr_provider_response, read_machine_artifact_sidecar, rrf_merge,
22    write_machine_artifact_sidecar, ArtifactStorePlatform, EmbeddingChunkInput,
23    EmbeddingRequestInput, EmbeddingVector, MachineArtifactKind, OcrRequestInput,
24    ProviderCommandRunner, ProviderHttpInvocation, ProviderHttpTransport, StructureChunk,
25    DEFAULT_RPC_URL,
26};
27
28pub trait RpcCaller {
29    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
30}
31
32#[derive(Debug, Clone, PartialEq, serde::Serialize)]
33pub struct CliOcrProviderSpec {
34    pub id: &'static str,
35    pub provider: &'static str,
36    pub request_style: &'static str,
37    pub auth: &'static str,
38    pub auth_header: &'static str,
39    pub supports_pdf_direct: bool,
40    pub key_field: &'static str,
41}
42
43#[derive(Debug, Clone, PartialEq, serde::Serialize)]
44pub struct CliEmbeddingProviderSpec {
45    pub id: &'static str,
46    pub provider: &'static str,
47    pub request_style: &'static str,
48    pub default_url: String,
49    pub default_model: &'static str,
50    pub auth: &'static str,
51    pub key_field: &'static str,
52}
53
54pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
55    builtin_ocr_provider_specs()
56        .into_iter()
57        .map(cli_ocr_provider_spec)
58        .collect()
59}
60
61pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
62    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
63}
64
65pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
66    let spec = zotron_types::embedding_provider_spec(provider)?;
67    Ok(CliEmbeddingProviderSpec {
68        id: spec.id,
69        provider: spec.provider_key,
70        request_style: if spec.provider_key == "alibaba" {
71            "dashscope"
72        } else {
73            spec.request_style.as_str()
74        },
75        default_url: spec.default_url.unwrap_or("").to_string(),
76        default_model: spec.default_model,
77        auth: spec.auth,
78        key_field: spec.key_field,
79    })
80}
81
82pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
83    let typed = blocks
84        .iter()
85        .map(json_block_to_pdf_block)
86        .collect::<Result<Vec<_>, _>>()?;
87    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
88    chunks
89        .into_iter()
90        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
91        .collect()
92}
93
94fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
95    CliOcrProviderSpec {
96        id: spec.provider_key,
97        provider: spec.provider_key,
98        request_style: spec.request_style.as_str(),
99        auth: spec.auth,
100        auth_header: spec.auth_header,
101        supports_pdf_direct: spec.supports_pdf_direct,
102        key_field: spec.key_field,
103    }
104}
105
106fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
107    let block_key = value
108        .get("block_key")
109        .and_then(Value::as_str)
110        .ok_or_else(|| "block missing block_key".to_string())?
111        .to_string();
112    let item_key = value
113        .get("item_key")
114        .and_then(Value::as_str)
115        .ok_or_else(|| "block missing item_key".to_string())?
116        .to_string();
117    let attachment_key = value
118        .get("attachment_key")
119        .and_then(Value::as_str)
120        .ok_or_else(|| "block missing attachment_key".to_string())?
121        .to_string();
122    let page_idx = value
123        .get("page_idx")
124        .or_else(|| value.get("page"))
125        .and_then(Value::as_u64)
126        .unwrap_or(1);
127    let block_type = value
128        .get("type")
129        .or_else(|| value.get("block_type"))
130        .and_then(Value::as_str)
131        .unwrap_or("paragraph")
132        .to_string();
133    let section_path = value
134        .get("section_path")
135        .and_then(Value::as_array)
136        .map(|items| {
137            items
138                .iter()
139                .filter_map(Value::as_str)
140                .map(ToString::to_string)
141                .collect::<Vec<_>>()
142        })
143        .unwrap_or_default();
144    let text = value
145        .get("text")
146        .and_then(Value::as_str)
147        .unwrap_or("")
148        .to_string();
149    let bbox = value.get("bbox").and_then(value_bbox4);
150
151    Ok(zotron_types::PdfEvidenceBlock {
152        block_key,
153        item_key,
154        attachment_key,
155        page_idx,
156        block_type,
157        bbox,
158        section_path,
159        text,
160    })
161}
162
163fn chunk_to_cli_value(
164    chunk: &zotron_types::StructureChunk,
165    blocks: &[zotron_types::PdfEvidenceBlock],
166) -> Result<Value, String> {
167    let refs = chunk
168        .block_keys
169        .iter()
170        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
171        .map(|block| {
172            serde_json::json!({
173                "block_key": block.block_key,
174                "page_idx": block.page_idx,
175                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
176                    if n.fract() == 0.0 {
177                        Value::from(*n as i64)
178                    } else {
179                        Value::from(*n)
180                    }
181                }).collect::<Vec<_>>()),
182            })
183        })
184        .collect::<Vec<_>>();
185    Ok(serde_json::json!({
186        "chunk_key": chunk.chunk_key,
187        "item_key": chunk.item_key,
188        "attachment_key": chunk.attachment_key,
189        "block_keys": chunk.block_keys,
190        "section_path": chunk.section_path,
191        "text": chunk.text,
192        "page_start": chunk.page_start,
193        "page_end": chunk.page_end,
194        "evidence_refs": refs,
195    }))
196}
197
198fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
199    let arr = value.as_array()?;
200    if arr.len() != 4 {
201        return None;
202    }
203    Some([
204        arr[0].as_f64()?,
205        arr[1].as_f64()?,
206        arr[2].as_f64()?,
207        arr[3].as_f64()?,
208    ])
209}
210
211impl RpcCaller for ZoteroRpc {
212    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
213        self.call(method, params).map_err(|err| err.to_string())
214    }
215}
216
217#[derive(Debug, Parser)]
218#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
219struct Cli {
220    #[command(subcommand)]
221    command: Command,
222}
223
224#[derive(Debug, Subcommand)]
225enum OcrCommand {
226    /// Print supported OCR provider contracts.
227    Providers,
228    /// Execute an OCR provider request from JSON and emit normalized blocks.
229    #[command(name = "run")]
230    Run {
231        #[arg(long)]
232        provider: String,
233        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
234        #[arg(long)]
235        input: Option<String>,
236        /// Local PDF/image file to encode into an OcrRequestInput.
237        #[arg(long)]
238        file: Option<String>,
239        /// Zotero item key used when --file builds the OCR request.
240        #[arg(long = "item-key")]
241        item_key: Option<String>,
242        /// Zotero attachment key used when --file builds the OCR request.
243        #[arg(long = "attachment-key")]
244        attachment_key: Option<String>,
245        /// MIME type used when --file builds the OCR request.
246        #[arg(long = "mime-type")]
247        mime_type: Option<String>,
248        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
249        #[arg(long)]
250        endpoint: Option<String>,
251        /// Environment variable containing the provider bearer token.
252        #[arg(long = "api-key-env")]
253        api_key_env: Option<String>,
254    },
255    /// Show OCR statistics for a collection.
256    Status {
257        #[arg(long)]
258        collection: String,
259        #[arg(long, default_value = DEFAULT_RPC_URL)]
260        url: String,
261    },
262    /// Parse a Zotero PDF through MinerU and write hidden sidecar OCR/RAG artifacts.
263    #[command(name = "process")]
264    Process {
265        #[arg(long, default_value = "mineru")]
266        provider: String,
267        /// Parent Zotero item key.
268        #[arg(long)]
269        parent: String,
270        /// Zotero PDF attachment key (auto-resolved from --parent when omitted).
271        #[arg(long)]
272        attachment: Option<String>,
273        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
274        #[arg(long = "source-url")]
275        source_url: Option<String>,
276        /// Already-extracted MinerU result directory, used by tests/offline replay.
277        #[arg(long = "result-dir")]
278        result_dir: Option<String>,
279        /// Already-downloaded MinerU result zip, used by tests/offline replay.
280        #[arg(long = "result-zip")]
281        result_zip: Option<String>,
282        /// Override MinerU submit endpoint.
283        #[arg(long = "provider-endpoint")]
284        provider_endpoint: Option<String>,
285        /// Environment variable containing the MinerU bearer token.
286        #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
287        api_key_env: String,
288        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
289        poll_interval_seconds: u64,
290        #[arg(long = "timeout-seconds", default_value_t = 900)]
291        timeout_seconds: u64,
292        #[arg(long = "chunk-chars", default_value_t = 1200)]
293        chunk_chars: usize,
294        #[arg(long, default_value = DEFAULT_RPC_URL)]
295        url: String,
296    },
297}
298
299#[derive(Debug, Subcommand)]
300enum Command {
301    /// Check that Zotero is running with the Zotron XPI enabled.
302    Ping {
303        #[arg(long, default_value = DEFAULT_RPC_URL)]
304        url: String,
305    },
306    /// Generic RPC escape hatch.
307    Rpc {
308        method: String,
309        #[arg(default_value = "{}")]
310        params_json: String,
311        #[arg(long, default_value = DEFAULT_RPC_URL)]
312        url: String,
313        #[arg(long)]
314        paginate: bool,
315        #[arg(long, default_value_t = 100)]
316        page_size: usize,
317    },
318    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
319    Push {
320        /// Path to a JSON file, or "-" to read from stdin.
321        json_file: String,
322        /// Optional PDF attachment path.
323        #[arg(long)]
324        pdf: Option<String>,
325        /// Collection name (fuzzy) or key.
326        #[arg(long)]
327        collection: Option<String>,
328        /// Duplicate handling: skip | update | create.
329        #[arg(long = "on-duplicate", default_value = "skip")]
330        on_duplicate: String,
331        #[arg(long, default_value = DEFAULT_RPC_URL)]
332        url: String,
333        /// Parse input + resolve collection only; do not push to Zotero.
334        #[arg(long = "dry-run")]
335        dry_run: bool,
336    },
337    /// System and plugin introspection commands.
338    System {
339        #[command(subcommand)]
340        command: SystemCommand,
341    },
342    /// Search items by text, tag, identifier, or structured conditions.
343    Search(SearchArgs),
344    /// Inspect and manage Zotero items.
345    Items {
346        #[command(subcommand)]
347        command: ItemsCommand,
348    },
349    /// Inspect Zotero collections.
350    Collections {
351        #[command(subcommand)]
352        command: CollectionsCommand,
353    },
354    /// Inspect Zotero notes.
355    Notes {
356        #[command(subcommand)]
357        command: NotesCommand,
358    },
359    /// Inspect Zotero preferences.
360    Settings {
361        #[command(subcommand)]
362        command: SettingsCommand,
363    },
364    /// Inspect and manage Zotero tags.
365    Tags {
366        #[command(subcommand)]
367        command: TagsCommand,
368    },
369    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
370    Export(ExportArgs),
371    /// List, create, and delete PDF annotations.
372    Annotations {
373        #[command(subcommand)]
374        command: AnnotationsCommand,
375    },
376    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
377    Ocr {
378        #[command(subcommand)]
379        command: OcrCommand,
380    },
381    /// Build and search retrieval artifacts.
382    Rag {
383        #[command(subcommand)]
384        command: RagCommand,
385    },
386}
387
388struct RagSearchOptions {
389    query: String,
390    collection: Option<String>,
391    keys: Vec<String>,
392    zotero: bool,
393    top_spans_per_item: u64,
394    include_fulltext_spans: bool,
395    top_k: u64,
396    output: String,
397}
398
399#[derive(Debug, Subcommand)]
400enum RagCommand {
401    /// Print supported embedding provider contracts.
402    #[command(name = "providers")]
403    Providers,
404    /// Execute an embedding provider request from JSON and emit vectors.
405    #[command(name = "embed")]
406    Embed {
407        #[arg(long)]
408        provider: String,
409        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
410        #[arg(long)]
411        input: String,
412        /// Override the embedding endpoint.
413        #[arg(long)]
414        endpoint: Option<String>,
415        /// Override the embedding model.
416        #[arg(long)]
417        model: Option<String>,
418        /// Override provider input type, for example document or query.
419        #[arg(long = "input-type")]
420        input_type: Option<String>,
421        /// Environment variable containing the provider bearer token.
422        #[arg(long = "api-key-env")]
423        api_key_env: Option<String>,
424    },
425    /// Show index status for a collection.
426    Status {
427        #[arg(long)]
428        collection: String,
429        #[arg(long, default_value = DEFAULT_RPC_URL)]
430        url: String,
431    },
432    /// Emit academic-zh retrieval hits with item_key/title/text provenance.
433    #[command(name = "search")]
434    Search {
435        query: String,
436        #[arg(long)]
437        collection: Option<String>,
438        /// Limit retrieval to one or more Zotero item keys.
439        #[arg(long = "key", alias = "keys")]
440        keys: Vec<String>,
441        #[arg(long)]
442        zotero: bool,
443        #[arg(long = "top-spans-per-item", default_value_t = 3)]
444        top_spans_per_item: u64,
445        #[arg(long = "include-fulltext-spans")]
446        include_fulltext_spans: bool,
447        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
448        top_k: u64,
449        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
450        output: String,
451        #[arg(long, default_value = DEFAULT_RPC_URL)]
452        url: String,
453    },
454}
455
456#[derive(Debug, Subcommand)]
457enum SystemCommand {
458    /// Show XPI version and exposed method metadata.
459    Version {
460        #[arg(long, default_value = DEFAULT_RPC_URL)]
461        url: String,
462    },
463    /// List all libraries (user + groups).
464    Libraries {
465        #[arg(long, default_value = DEFAULT_RPC_URL)]
466        url: String,
467    },
468    /// Get statistics for the current (or specified) library.
469    #[command(name = "library-stats")]
470    LibraryStats {
471        #[arg(long)]
472        library: Option<i64>,
473        #[arg(long, default_value = DEFAULT_RPC_URL)]
474        url: String,
475    },
476    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
477    Schema {
478        #[arg(long = "type")]
479        item_type: Option<String>,
480        #[arg(long, default_value = DEFAULT_RPC_URL)]
481        url: String,
482    },
483    /// Get the currently selected Zotero collection (or null).
484    #[command(name = "current-collection")]
485    CurrentCollection {
486        #[arg(long, default_value = DEFAULT_RPC_URL)]
487        url: String,
488    },
489    /// List RPC methods, or describe a specific method.
490    Methods {
491        /// Method name to describe. Omit to list all methods.
492        method: Option<String>,
493        #[arg(long, default_value = DEFAULT_RPC_URL)]
494        url: String,
495    },
496}
497
498#[derive(Debug, clap::Args)]
499struct SearchArgs {
500    /// Search query (title/creator/year by default; PDF content with --fulltext).
501    query: Option<String>,
502    /// Search inside PDF full-text content instead of metadata.
503    #[arg(long)]
504    fulltext: bool,
505    /// Filter by author/creator name (contains match).
506    #[arg(long)]
507    author: Option<String>,
508    /// Filter by date after (YYYY or YYYY-MM-DD).
509    #[arg(long)]
510    after: Option<String>,
511    /// Filter by date before (YYYY or YYYY-MM-DD).
512    #[arg(long)]
513    before: Option<String>,
514    /// Filter by journal/publication title (contains match).
515    #[arg(long)]
516    journal: Option<String>,
517    /// Filter by tag (exact match).
518    #[arg(long)]
519    tag: Option<String>,
520    /// Find by DOI.
521    #[arg(long)]
522    doi: Option<String>,
523    /// Find by ISBN.
524    #[arg(long)]
525    isbn: Option<String>,
526    /// Find by ISSN.
527    #[arg(long)]
528    issn: Option<String>,
529    /// Limit results to a collection name or key.
530    #[arg(long)]
531    collection: Option<String>,
532    #[arg(long, default_value_t = 50)]
533    limit: u64,
534    #[arg(long, default_value_t = 0)]
535    offset: u64,
536    #[arg(long, default_value = DEFAULT_RPC_URL)]
537    url: String,
538    #[command(subcommand)]
539    management: Option<SearchManagementCommand>,
540}
541
542#[derive(Debug, Subcommand)]
543enum SearchManagementCommand {
544    /// List all saved searches in the library.
545    #[command(name = "saved-searches")]
546    SavedSearches {
547        #[arg(long, default_value = DEFAULT_RPC_URL)]
548        url: String,
549    },
550    /// Create a saved search with one or more conditions.
551    #[command(name = "create-saved")]
552    CreateSaved {
553        name: String,
554        #[arg(long = "condition", required = true)]
555        condition: Vec<String>,
556        #[arg(long)]
557        dry_run: bool,
558        #[arg(long, default_value = DEFAULT_RPC_URL)]
559        url: String,
560    },
561    /// Delete a saved search by key.
562    #[command(name = "delete-saved")]
563    DeleteSaved {
564        search_key: String,
565        #[arg(long)]
566        dry_run: bool,
567        #[arg(long, default_value = DEFAULT_RPC_URL)]
568        url: String,
569    },
570}
571
572#[derive(Debug, Subcommand)]
573enum ItemsCommand {
574    /// Add an item by DOI, ISBN, URL, local file, or manual entry (--type + --field).
575    Add {
576        #[arg(long)]
577        doi: Option<String>,
578        #[arg(long)]
579        isbn: Option<String>,
580        /// Web page URL to add from.
581        #[arg(long = "from-url")]
582        from_url: Option<String>,
583        /// Local file path to add from.
584        #[arg(long)]
585        file: Option<String>,
586        /// Item type for manual creation (e.g. journalArticle).
587        #[arg(long = "type")]
588        item_type: Option<String>,
589        /// Field values for manual creation (e.g. title="My Paper").
590        #[arg(long = "field")]
591        fields: Vec<String>,
592        #[arg(long)]
593        collection: Option<String>,
594        #[arg(long)]
595        dry_run: bool,
596        #[arg(long, default_value = DEFAULT_RPC_URL)]
597        url: String,
598    },
599    /// Update fields on an existing item.
600    Update {
601        key: String,
602        #[arg(long = "field")]
603        fields: Vec<String>,
604        #[arg(long)]
605        dry_run: bool,
606        #[arg(long, default_value = DEFAULT_RPC_URL)]
607        url: String,
608    },
609    /// Permanently delete an item.
610    Delete {
611        key: String,
612        #[arg(long)]
613        dry_run: bool,
614        #[arg(long, default_value = DEFAULT_RPC_URL)]
615        url: String,
616    },
617    /// Move one or more items to trash.
618    Trash {
619        items: Vec<String>,
620        #[arg(long)]
621        dry_run: bool,
622        #[arg(long, default_value = DEFAULT_RPC_URL)]
623        url: String,
624    },
625    /// Restore a trashed item.
626    Restore {
627        item: String,
628        #[arg(long)]
629        dry_run: bool,
630        #[arg(long, default_value = DEFAULT_RPC_URL)]
631        url: String,
632    },
633    /// Merge a group of duplicate items.
634    #[command(name = "merge-duplicates")]
635    MergeDuplicates {
636        keys: Vec<String>,
637        #[arg(long)]
638        dry_run: bool,
639        #[arg(long, default_value = DEFAULT_RPC_URL)]
640        url: String,
641    },
642    /// Add a related-item link between two items.
643    #[command(name = "add-related")]
644    AddRelated {
645        key: String,
646        #[arg(long)]
647        target: String,
648        #[arg(long)]
649        dry_run: bool,
650        #[arg(long, default_value = DEFAULT_RPC_URL)]
651        url: String,
652    },
653    /// Remove a related-item link between two items.
654    #[command(name = "remove-related")]
655    RemoveRelated {
656        key: String,
657        #[arg(long)]
658        target: String,
659        #[arg(long)]
660        dry_run: bool,
661        #[arg(long, default_value = DEFAULT_RPC_URL)]
662        url: String,
663    },
664    /// Print the full serialization of an item by key.
665    Get {
666        item: String,
667        #[arg(long, default_value = DEFAULT_RPC_URL)]
668        url: String,
669    },
670    /// List items in the library with optional sorting and pagination.
671    List {
672        #[arg(long, default_value_t = 50)]
673        limit: u64,
674        #[arg(long, default_value_t = 0)]
675        offset: u64,
676        #[arg(long)]
677        sort: Option<String>,
678        #[arg(long, default_value = "asc")]
679        direction: String,
680        /// List trashed items instead of regular items.
681        #[arg(long)]
682        trash: bool,
683        #[arg(long, default_value = DEFAULT_RPC_URL)]
684        url: String,
685    },
686    /// Run Zotero's duplicate scan and print groups.
687    #[command(name = "find-duplicates")]
688    FindDuplicates {
689        #[arg(long, default_value = DEFAULT_RPC_URL)]
690        url: String,
691    },
692    /// List recently added or modified items.
693    Recent {
694        #[arg(long, default_value_t = 20)]
695        limit: u64,
696        #[arg(long, default_value_t = 0)]
697        offset: u64,
698        #[arg(long = "type", default_value = "added")]
699        recent_type: String,
700        #[arg(long, default_value = DEFAULT_RPC_URL)]
701        url: String,
702    },
703    /// Retrieve the full-text content of an item's attachment.
704    Fulltext {
705        key: String,
706        #[arg(long, default_value = DEFAULT_RPC_URL)]
707        url: String,
708    },
709    /// List items related to the given item.
710    Related {
711        key: String,
712        #[arg(long, default_value = DEFAULT_RPC_URL)]
713        url: String,
714    },
715    /// Get the citation key for an item.
716    #[command(name = "citation-key")]
717    CitationKey {
718        key: String,
719        #[arg(long, default_value = DEFAULT_RPC_URL)]
720        url: String,
721    },
722    /// Get the local filesystem path of an item's PDF attachment.
723    Path {
724        key: String,
725        #[arg(long, default_value = DEFAULT_RPC_URL)]
726        url: String,
727    },
728    /// List attachments belonging to an item.
729    Attachments {
730        key: String,
731        #[arg(long, default_value_t = 0)]
732        offset: u64,
733        #[arg(long, default_value = DEFAULT_RPC_URL)]
734        url: String,
735    },
736    /// Batch find missing PDFs in a collection via Zotero's resolver chain.
737    #[command(name = "find-pdfs")]
738    FindPdfs {
739        #[arg(long)]
740        collection: String,
741        #[arg(long, default_value_t = 0)]
742        limit: usize,
743        #[arg(long, default_value = DEFAULT_RPC_URL)]
744        url: String,
745    },
746}
747
748#[derive(Debug, Subcommand)]
749enum SettingsCommand {
750    /// Get a single Zotero preference value.
751    Get {
752        key: String,
753        #[arg(long, default_value = DEFAULT_RPC_URL)]
754        url: String,
755    },
756    /// List all Zotero preferences as a key->value dict.
757    #[command(visible_alias = "get-all")]
758    List {
759        #[arg(long, default_value = DEFAULT_RPC_URL)]
760        url: String,
761    },
762    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
763    Set {
764        /// key value key value ... (pairs of positional args)
765        pairs: Vec<String>,
766        /// Bulk-set from a JSON file.
767        #[arg(long)]
768        file: Option<String>,
769        #[arg(long)]
770        dry_run: bool,
771        #[arg(long, default_value = DEFAULT_RPC_URL)]
772        url: String,
773    },
774}
775
776#[derive(Debug, Subcommand)]
777enum TagsCommand {
778    /// List all tags in the library (flat).
779    List {
780        #[arg(long, default_value_t = 200)]
781        limit: u64,
782        #[arg(long, default_value = DEFAULT_RPC_URL)]
783        url: String,
784    },
785    /// Rename a tag across all items.
786    Rename {
787        old: String,
788        new: String,
789        #[arg(long)]
790        dry_run: bool,
791        #[arg(long, default_value = DEFAULT_RPC_URL)]
792        url: String,
793    },
794    /// Delete a tag library-wide.
795    Delete {
796        tag: String,
797        #[arg(long)]
798        dry_run: bool,
799        #[arg(long, default_value = DEFAULT_RPC_URL)]
800        url: String,
801    },
802    /// Add tags to one or more items.
803    Add {
804        keys: Vec<String>,
805        #[arg(long = "tag", required = true)]
806        tags: Vec<String>,
807        #[arg(long)]
808        dry_run: bool,
809        #[arg(long, default_value = DEFAULT_RPC_URL)]
810        url: String,
811    },
812    /// Remove tags from one or more items.
813    Remove {
814        keys: Vec<String>,
815        #[arg(long = "tag", required = true)]
816        tags: Vec<String>,
817        #[arg(long)]
818        dry_run: bool,
819        #[arg(long, default_value = DEFAULT_RPC_URL)]
820        url: String,
821    },
822}
823
824#[derive(Debug, clap::Args)]
825struct ExportArgs {
826    /// Item keys to export.
827    keys: Vec<String>,
828    /// Output format: bibtex, ris, csl-json, bibliography.
829    #[arg(long, default_value = "bibtex")]
830    format: String,
831    /// Export all items from this collection (name or key).
832    #[arg(long)]
833    collection: Option<String>,
834    /// Citation style URL (only for bibliography format).
835    #[arg(long, default_value = "http://www.zotero.org/styles/apa")]
836    style: String,
837    /// Output HTML instead of plain text (only for bibliography format).
838    #[arg(long)]
839    html: bool,
840    #[arg(long, default_value = DEFAULT_RPC_URL)]
841    url: String,
842}
843
844#[derive(Debug, Subcommand)]
845enum AnnotationsCommand {
846    /// List annotations on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
847    List {
848        /// Item key or attachment key
849        parent: String,
850        /// Use a specific attachment when the item has multiple PDFs
851        #[arg(long)]
852        attachment: Option<String>,
853        /// Include N characters of surrounding text for each annotation
854        #[arg(long)]
855        context: Option<u32>,
856        #[arg(long, default_value = DEFAULT_RPC_URL)]
857        url: String,
858    },
859    /// Create a new annotation on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
860    Create {
861        /// Item key or attachment key
862        parent: String,
863        /// Use a specific attachment when the item has multiple PDFs
864        #[arg(long)]
865        attachment: Option<String>,
866        #[arg(long = "type")]
867        annotation_type: Option<String>,
868        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
869        /// Not required when --quote is given.
870        #[arg(long)]
871        position: Option<String>,
872        /// Text to locate in the PDF and highlight. Resolves to rects automatically.
873        /// Locates text headlessly (no PDF viewer required).
874        #[arg(long)]
875        quote: Option<String>,
876        /// Restrict quote search to a specific page (0-indexed).
877        #[arg(long)]
878        page: Option<u32>,
879        /// Zotero annotation sort index.
880        #[arg(long = "sort-index")]
881        sort_index: Option<String>,
882        #[arg(long)]
883        text: Option<String>,
884        #[arg(long)]
885        comment: Option<String>,
886        #[arg(long, default_value = "#ffd400")]
887        color: String,
888        #[arg(long)]
889        dry_run: bool,
890        #[arg(long, default_value = DEFAULT_RPC_URL)]
891        url: String,
892    },
893    /// Batch-create annotations from a JSON array on stdin or --file.
894    /// Each entry: {"quote": "...", "color": "#hex", "comment": "...", "type": "highlight"}
895    CreateBatch {
896        /// Item key or attachment key
897        parent: String,
898        /// Use a specific attachment when the item has multiple PDFs
899        #[arg(long)]
900        attachment: Option<String>,
901        /// Read annotations from a JSON file instead of stdin
902        #[arg(long)]
903        file: Option<String>,
904        #[arg(long)]
905        dry_run: bool,
906        #[arg(long, default_value = DEFAULT_RPC_URL)]
907        url: String,
908    },
909    /// Locate a text quote in a PDF without creating an annotation.
910    /// Returns page index and rects if found.
911    Locate {
912        /// Item key or attachment key
913        parent: String,
914        /// Use a specific attachment when the item has multiple PDFs
915        #[arg(long)]
916        attachment: Option<String>,
917        /// Text to locate in the PDF
918        #[arg(long)]
919        quote: String,
920        /// Restrict search to a specific page (0-indexed)
921        #[arg(long)]
922        page: Option<u32>,
923        #[arg(long, default_value = DEFAULT_RPC_URL)]
924        url: String,
925    },
926    /// Delete an annotation by key.
927    Delete {
928        annotation_key: String,
929        #[arg(long)]
930        dry_run: bool,
931        #[arg(long, default_value = DEFAULT_RPC_URL)]
932        url: String,
933    },
934}
935
936#[derive(Debug, Subcommand)]
937enum NotesCommand {
938    /// List notes attached to a parent item.
939    List {
940        #[arg(long)]
941        parent: String,
942        #[arg(long, default_value_t = 50)]
943        limit: u64,
944        #[arg(long, default_value_t = 0)]
945        offset: u64,
946        #[arg(long, default_value = DEFAULT_RPC_URL)]
947        url: String,
948    },
949    /// Get a single note by key.
950    Get {
951        note_key: String,
952        #[arg(long, default_value = DEFAULT_RPC_URL)]
953        url: String,
954    },
955    /// Create a note attached to a parent item.
956    Create {
957        #[arg(long)]
958        parent: String,
959        #[arg(long)]
960        content: String,
961        #[arg(long = "tag")]
962        tags: Vec<String>,
963        #[arg(long)]
964        dry_run: bool,
965        #[arg(long, default_value = DEFAULT_RPC_URL)]
966        url: String,
967    },
968    /// Update the content of an existing note.
969    Update {
970        note_key: String,
971        #[arg(long)]
972        content: String,
973        #[arg(long)]
974        dry_run: bool,
975        #[arg(long, default_value = DEFAULT_RPC_URL)]
976        url: String,
977    },
978    /// Delete a note by key.
979    Delete {
980        note_key: String,
981        #[arg(long)]
982        dry_run: bool,
983        #[arg(long, default_value = DEFAULT_RPC_URL)]
984        url: String,
985    },
986    /// Search notes by text content.
987    Search {
988        query: String,
989        #[arg(long, default_value_t = 50)]
990        limit: u64,
991        #[arg(long, default_value = DEFAULT_RPC_URL)]
992        url: String,
993    },
994}
995
996#[derive(Debug, Subcommand)]
997enum CollectionsCommand {
998    /// List all collections in the user library (flat).
999    List {
1000        #[arg(long, default_value = DEFAULT_RPC_URL)]
1001        url: String,
1002    },
1003    /// Print the collection hierarchy as a tree.
1004    Tree {
1005        #[arg(long, default_value = DEFAULT_RPC_URL)]
1006        url: String,
1007    },
1008    /// Get a single collection's metadata.
1009    Get {
1010        name_or_id: String,
1011        #[arg(long, default_value = DEFAULT_RPC_URL)]
1012        url: String,
1013    },
1014    /// List all items in a collection.
1015    #[command(name = "get-items", visible_alias = "items")]
1016    GetItems {
1017        name_or_id: String,
1018        #[arg(long)]
1019        limit: Option<u64>,
1020        #[arg(long, default_value_t = 0)]
1021        offset: u64,
1022        #[arg(long, default_value = DEFAULT_RPC_URL)]
1023        url: String,
1024    },
1025    /// Show item/attachment/note/subcollection counts for a collection.
1026    Stats {
1027        name_or_id: String,
1028        #[arg(long, default_value = DEFAULT_RPC_URL)]
1029        url: String,
1030    },
1031    /// Rename a collection.
1032    Rename {
1033        old_name: String,
1034        new_name: String,
1035        #[arg(long, default_value = DEFAULT_RPC_URL)]
1036        url: String,
1037        #[arg(long)]
1038        dry_run: bool,
1039    },
1040    /// Create a collection, optionally nested under a parent.
1041    Create {
1042        name: String,
1043        #[arg(long)]
1044        parent: Option<String>,
1045        #[arg(long, default_value = DEFAULT_RPC_URL)]
1046        url: String,
1047        #[arg(long)]
1048        dry_run: bool,
1049    },
1050    /// Delete a collection.
1051    Delete {
1052        name_or_id: String,
1053        #[arg(long, default_value = DEFAULT_RPC_URL)]
1054        url: String,
1055        #[arg(long)]
1056        dry_run: bool,
1057    },
1058    /// Add existing items to a collection.
1059    #[command(name = "add-items")]
1060    AddItems {
1061        collection: String,
1062        item_keys: Vec<String>,
1063        #[arg(long, default_value = DEFAULT_RPC_URL)]
1064        url: String,
1065        #[arg(long)]
1066        dry_run: bool,
1067    },
1068    /// Remove items from a collection.
1069    #[command(name = "remove-items")]
1070    RemoveItems {
1071        collection: String,
1072        item_keys: Vec<String>,
1073        #[arg(long, default_value = DEFAULT_RPC_URL)]
1074        url: String,
1075        #[arg(long)]
1076        dry_run: bool,
1077    },
1078}
1079
1080#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1081enum JsonStyle {
1082    /// Matches Python's top-level `ping` command (`json.dumps` defaults).
1083    PythonCompact,
1084    /// Matches namespace commands that route through Python `emit(..., indent=2)`.
1085    Pretty,
1086}
1087
1088enum ParseOutcome<T> {
1089    Command(T),
1090    Display(String),
1091}
1092
1093fn parse_cli<T>(
1094    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1095) -> Result<ParseOutcome<T>, String>
1096where
1097    T: Parser,
1098{
1099    match T::try_parse_from(args) {
1100        Ok(cli) => Ok(ParseOutcome::Command(cli)),
1101        Err(err)
1102            if matches!(
1103                err.kind(),
1104                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1105            ) =>
1106        {
1107            Ok(ParseOutcome::Display(err.to_string()))
1108        }
1109        Err(err) => Err(err.to_string()),
1110    }
1111}
1112
1113pub fn format_error_json(message: &str) -> String {
1114    let message = message.trim_end();
1115    let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1116    serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1117}
1118
1119fn split_error_code(message: &str) -> Option<(&str, &str)> {
1120    let (code, rest) = message.split_once(':')?;
1121    if !code.is_empty()
1122        && code
1123            .chars()
1124            .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1125    {
1126        Some((code, rest.trim_start()))
1127    } else {
1128        None
1129    }
1130}
1131
1132pub fn run(
1133    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1134) -> Result<String, String> {
1135    // Emit plugin hint for Claude Code discovery
1136    if std::env::var("CLAUDECODE").as_deref() == Ok("1") {
1137        eprintln!(r#"<claude-code-hint v="1" type="plugin" value="zotron@dianzuan/zotron" />"#);
1138    }
1139
1140    let cli = match parse_cli::<Cli>(args)? {
1141        ParseOutcome::Command(cli) => cli,
1142        ParseOutcome::Display(output) => return Ok(output),
1143    };
1144    let url = command_url(&cli.command);
1145    let mut client = ZoteroRpc::new(url);
1146    run_command(cli.command, &mut client)
1147}
1148
1149pub fn run_with_client(
1150    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1151    client: &mut impl RpcCaller,
1152) -> Result<String, String> {
1153    let cli = match parse_cli::<Cli>(args)? {
1154        ParseOutcome::Command(cli) => cli,
1155        ParseOutcome::Display(output) => return Ok(output),
1156    };
1157    run_command(cli.command, client)
1158}
1159
1160fn rag_command_url(command: &RagCommand) -> String {
1161    match command {
1162        RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1163        RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1164        RagCommand::Status { url, .. } => url.clone(),
1165        RagCommand::Search { url, .. } => url.clone(),
1166    }
1167}
1168
1169fn command_url(command: &Command) -> String {
1170    match command {
1171        Command::Ping { url }
1172        | Command::Rpc { url, .. }
1173        | Command::Push { url, .. }
1174        => url.clone(),
1175        Command::Ocr { command } => match command {
1176            OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1177            OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1178            OcrCommand::Status { url, .. } => url.clone(),
1179            OcrCommand::Process { url, .. } => url.clone(),
1180        },
1181        Command::Rag { command } => rag_command_url(command),
1182        Command::System { command } => match command {
1183            SystemCommand::Version { url }
1184            | SystemCommand::Libraries { url }
1185            | SystemCommand::LibraryStats { url, .. }
1186            | SystemCommand::Schema { url, .. }
1187            | SystemCommand::CurrentCollection { url }
1188            | SystemCommand::Methods { url, .. } => url.clone(),
1189        },
1190        Command::Search(ref args) => match &args.management {
1191            Some(SearchManagementCommand::SavedSearches { url })
1192            | Some(SearchManagementCommand::CreateSaved { url, .. })
1193            | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1194            None => args.url.clone(),
1195        },
1196        Command::Items { command } => match command {
1197            ItemsCommand::Add { url, .. }
1198            | ItemsCommand::Update { url, .. }
1199            | ItemsCommand::Delete { url, .. }
1200            | ItemsCommand::Trash { url, .. }
1201            | ItemsCommand::Restore { url, .. }
1202            | ItemsCommand::MergeDuplicates { url, .. }
1203            | ItemsCommand::AddRelated { url, .. }
1204            | ItemsCommand::RemoveRelated { url, .. }
1205            | ItemsCommand::Get { url, .. }
1206            | ItemsCommand::List { url, .. }
1207            | ItemsCommand::FindDuplicates { url }
1208            | ItemsCommand::Recent { url, .. }
1209            | ItemsCommand::Fulltext { url, .. }
1210            | ItemsCommand::Related { url, .. }
1211            | ItemsCommand::CitationKey { url, .. }
1212            | ItemsCommand::Path { url, .. }
1213            | ItemsCommand::Attachments { url, .. }
1214            | ItemsCommand::FindPdfs { url, .. } => url.clone(),
1215        },
1216        Command::Collections { command } => match command {
1217            CollectionsCommand::List { url }
1218            | CollectionsCommand::Tree { url }
1219            | CollectionsCommand::Get { url, .. }
1220            | CollectionsCommand::GetItems { url, .. }
1221            | CollectionsCommand::Stats { url, .. }
1222            | CollectionsCommand::Rename { url, .. }
1223            | CollectionsCommand::Create { url, .. }
1224            | CollectionsCommand::Delete { url, .. }
1225            | CollectionsCommand::AddItems { url, .. }
1226            | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1227        },
1228        Command::Notes { command } => match command {
1229            NotesCommand::List { url, .. }
1230            | NotesCommand::Get { url, .. }
1231            | NotesCommand::Create { url, .. }
1232            | NotesCommand::Update { url, .. }
1233            | NotesCommand::Delete { url, .. }
1234            | NotesCommand::Search { url, .. } => url.clone(),
1235        },
1236        Command::Settings { command } => match command {
1237            SettingsCommand::Get { url, .. }
1238            | SettingsCommand::List { url }
1239            | SettingsCommand::Set { url, .. } => url.clone(),
1240        },
1241        Command::Tags { command } => match command {
1242            TagsCommand::List { url, .. }
1243            | TagsCommand::Rename { url, .. }
1244            | TagsCommand::Delete { url, .. }
1245            | TagsCommand::Add { url, .. }
1246            | TagsCommand::Remove { url, .. } => url.clone(),
1247        },
1248        Command::Export(ref args) => args.url.clone(),
1249        Command::Annotations { command } => match command {
1250            AnnotationsCommand::List { url, .. }
1251            | AnnotationsCommand::Create { url, .. }
1252            | AnnotationsCommand::CreateBatch { url, .. }
1253            | AnnotationsCommand::Locate { url, .. }
1254            | AnnotationsCommand::Delete { url, .. } => url.clone(),
1255        },
1256    }
1257}
1258
1259fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1260    if let OcrCommand::Providers = &command {
1261        return format_json(
1262            &serde_json::json!({ "providers": ocr_provider_specs() }),
1263            JsonStyle::Pretty,
1264        );
1265    }
1266    let value = match command {
1267        OcrCommand::Providers => unreachable!(),
1268        OcrCommand::Run {
1269            provider,
1270            input,
1271            file,
1272            item_key,
1273            attachment_key,
1274            mime_type,
1275            endpoint,
1276            api_key_env,
1277        } => run_ocr_run_command(OcrRunOptions {
1278            provider,
1279            input,
1280            file,
1281            item_key,
1282            attachment_key,
1283            mime_type,
1284            endpoint,
1285            api_key_env,
1286        })?,
1287        OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1288        OcrCommand::Process {
1289            provider,
1290            parent,
1291            attachment,
1292            source_url,
1293            result_dir,
1294            result_zip,
1295            provider_endpoint,
1296            api_key_env,
1297            poll_interval_seconds,
1298            timeout_seconds,
1299            chunk_chars,
1300            ..
1301        } => run_ocr_process_command(
1302            client,
1303            OcrProcessOptions {
1304                provider,
1305                parent,
1306                attachment,
1307                source_url,
1308                result_dir,
1309                result_zip,
1310                provider_endpoint,
1311                api_key_env,
1312                poll_interval_seconds,
1313                timeout_seconds,
1314                chunk_chars,
1315            },
1316        )?,
1317    };
1318    format_json(&value, JsonStyle::PythonCompact)
1319}
1320
1321struct OcrProcessOptions {
1322    provider: String,
1323    parent: String,
1324    attachment: Option<String>,
1325    source_url: Option<String>,
1326    result_dir: Option<String>,
1327    result_zip: Option<String>,
1328    provider_endpoint: Option<String>,
1329    api_key_env: String,
1330    poll_interval_seconds: u64,
1331    timeout_seconds: u64,
1332    chunk_chars: usize,
1333}
1334
1335struct OcrRunOptions {
1336    provider: String,
1337    input: Option<String>,
1338    file: Option<String>,
1339    item_key: Option<String>,
1340    attachment_key: Option<String>,
1341    mime_type: Option<String>,
1342    endpoint: Option<String>,
1343    api_key_env: Option<String>,
1344}
1345
1346fn run_ocr_run_command(options: OcrRunOptions) -> Result<Value, String> {
1347    let input: OcrRequestInput = match (options.input, options.file) {
1348        (Some(input), None) => read_json_input(&input)?,
1349        (None, Some(file)) => ocr_input_from_file(
1350            file,
1351            options.item_key,
1352            options.attachment_key,
1353            options.mime_type,
1354        )?,
1355        (Some(_), Some(_)) => {
1356            return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1357        }
1358        (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1359    };
1360    let request = build_ocr_provider_request(&options.provider, &input)?;
1361    let payload = if request.command.is_empty() {
1362        let method = request
1363            .method
1364            .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1365        let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1366        let mut transport =
1367            provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1368        transport.post_json(&ProviderHttpInvocation {
1369            provider: request.provider.to_string(),
1370            style: request.style.to_string(),
1371            method: method.to_string(),
1372            url: options
1373                .endpoint
1374                .or_else(|| request.url.map(ToString::to_string)),
1375            auth_header_name: request.auth_header.map(ToString::to_string),
1376            auth_header_value: None,
1377            body: request.body,
1378        })?
1379    } else {
1380        let mut command_runner = StdProviderCommandRunner;
1381        command_runner.run_json(&request.command)?
1382    };
1383    let blocks = match parse_ocr_provider_response(
1384        request.provider,
1385        &payload,
1386        &input.item_key,
1387        &input.attachment_key,
1388    ) {
1389        Ok(blocks) => blocks,
1390        Err(err) => {
1391            if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1392                return Ok(task);
1393            }
1394            return Err(err);
1395        }
1396    };
1397
1398    Ok(serde_json::json!({
1399        "provider": request.provider,
1400        "blocks": blocks,
1401    }))
1402}
1403
1404fn run_ocr_process_command(
1405    client: &mut impl RpcCaller,
1406    mut options: OcrProcessOptions,
1407) -> Result<Value, String> {
1408    let spec = raw_ocr_provider_spec(&options.provider)?;
1409
1410    let attachment = match options.attachment.take() {
1411        Some(key) => key,
1412        None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1413    };
1414    options.attachment = Some(attachment.clone());
1415
1416    let attachment_path = resolve_attachment_path(client, &attachment)?;
1417    let storage_dir = attachment_path
1418        .parent()
1419        .ok_or_else(|| {
1420            format!(
1421                "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1422                attachment_path.display()
1423            )
1424        })?
1425        .to_path_buf();
1426
1427    match spec.provider_key {
1428        "mineru" | "mineru-cli" => {
1429            if options.result_dir.is_some() && options.result_zip.is_some() {
1430                return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1431            }
1432            if options.source_url.is_some()
1433                && (options.result_dir.is_some() || options.result_zip.is_some())
1434            {
1435                return Err(
1436                    "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1437                        .to_string(),
1438                );
1439            }
1440            let file_name = attachment_path
1441                .file_name()
1442                .and_then(|name| name.to_str())
1443                .unwrap_or("document.pdf")
1444                .to_string();
1445            let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1446            let artifacts = persist_mineru_result_sidecars(
1447                &storage_dir, &options.parent, &attachment,
1448                &options.provider, &source, options.chunk_chars,
1449            )?;
1450            Ok(serde_json::json!({
1451                "provider": spec.provider_key,
1452                "status": "indexed",
1453                "item_key": options.parent,
1454                "attachment_key": attachment,
1455                "attachment_path": attachment_path,
1456                "storage_dir": storage_dir,
1457                "task_id": source.task_id,
1458                "state": source.state,
1459                "blocks": artifacts.block_count,
1460                "chunks": artifacts.chunk_count,
1461                "artifacts": artifacts.artifacts,
1462            }))
1463        }
1464        _ => {
1465            run_ocr_process_sync(
1466                client, &options, spec.provider_key,
1467                &attachment, &attachment_path, &storage_dir,
1468            )
1469        }
1470    }
1471}
1472
1473fn run_ocr_process_sync(
1474    client: &mut impl RpcCaller,
1475    options: &OcrProcessOptions,
1476    provider: &str,
1477    attachment_key: &str,
1478    attachment_path: &Path,
1479    storage_dir: &Path,
1480) -> Result<Value, String> {
1481    let api_url = if let Some(endpoint) = &options.provider_endpoint {
1482        endpoint.clone()
1483    } else {
1484        let settings = client.call("settings.getAll", None)?;
1485        settings.get("ocr.apiUrl")
1486            .and_then(Value::as_str)
1487            .unwrap_or("")
1488            .to_string()
1489    };
1490    if api_url.is_empty() {
1491        return Err(format!("MISSING_CONFIG: ocr.apiUrl not configured for provider {provider}"));
1492    }
1493
1494    let api_key = {
1495        let from_env = if !options.api_key_env.is_empty() {
1496            env::var(&options.api_key_env).ok().filter(|v| !v.is_empty())
1497        } else {
1498            None
1499        };
1500        from_env.unwrap_or_else(|| {
1501            client.call("settings.getRaw", Some(serde_json::json!({"key": "ocr.apiKey"})))
1502                .ok()
1503                .and_then(|raw| raw.get("ocr.apiKey").and_then(Value::as_str).map(String::from))
1504                .unwrap_or_default()
1505        })
1506    };
1507
1508    let pdf_bytes = fs::read(attachment_path)
1509        .map_err(|e| format!("READ_PDF_FAILED: {}: {e}", attachment_path.display()))?;
1510
1511    const MAX_PDF_SIZE: usize = 100 * 1024 * 1024; // 100 MB
1512    if pdf_bytes.len() > MAX_PDF_SIZE {
1513        return Err(format!(
1514            "PDF_TOO_LARGE: {} is {} MB, max {} MB",
1515            attachment_path.display(),
1516            pdf_bytes.len() / (1024 * 1024),
1517            MAX_PDF_SIZE / (1024 * 1024),
1518        ));
1519    }
1520
1521    let base64_pdf = format!("data:application/pdf;base64,{}", base64_encode(&pdf_bytes));
1522
1523    let input = OcrRequestInput {
1524        content_base64: base64_pdf,
1525        file_name: attachment_path
1526            .file_name()
1527            .and_then(|n| n.to_str())
1528            .unwrap_or("document.pdf")
1529            .to_string(),
1530        mime_type: "application/pdf".to_string(),
1531        item_key: options.parent.clone(),
1532        attachment_key: attachment_key.to_string(),
1533        source_url: None,
1534        local_path: Some(attachment_path.to_string_lossy().to_string()),
1535        output_dir: None,
1536    };
1537    let request = build_ocr_provider_request(provider, &input)?;
1538
1539    let payload = if request.command.is_empty() {
1540        let method = request
1541            .method
1542            .ok_or_else(|| format!("OCR provider {provider} missing HTTP method"))?;
1543        let spec = raw_ocr_provider_spec(provider)?;
1544
1545        let mut transport = if !api_key.is_empty() {
1546            match spec.auth {
1547                "bearer" => UreqProviderHttpTransport::with_bearer_token(&api_key),
1548                "token" => UreqProviderHttpTransport::with_api_key(format!("token {api_key}")),
1549                _ => UreqProviderHttpTransport::new(),
1550            }
1551        } else {
1552            UreqProviderHttpTransport::new()
1553        };
1554
1555        transport.post_json(&ProviderHttpInvocation {
1556            provider: request.provider.to_string(),
1557            style: request.style.to_string(),
1558            method: method.to_string(),
1559            url: Some(api_url),
1560            auth_header_name: request.auth_header.map(ToString::to_string),
1561            auth_header_value: None,
1562            body: request.body,
1563        })?
1564    } else {
1565        let mut runner = StdProviderCommandRunner;
1566        runner.run_json(&request.command)?
1567    };
1568
1569    let blocks = parse_ocr_provider_response(provider, &payload, &options.parent, attachment_key)?;
1570    let chunks = zotron_types::chunks_from_blocks(&blocks, options.chunk_chars);
1571
1572    let artifacts = vec![
1573        write_sidecar_json(
1574            storage_dir, &options.parent, attachment_key,
1575            MachineArtifactKind::OcrRaw, &payload,
1576        )?,
1577        write_sidecar_jsonl(
1578            storage_dir, &options.parent, attachment_key,
1579            MachineArtifactKind::Blocks, &blocks,
1580        )?,
1581        write_sidecar_jsonl(
1582            storage_dir, &options.parent, attachment_key,
1583            MachineArtifactKind::Chunks, &chunks,
1584        )?,
1585    ];
1586
1587    let embedding_count = embed_sidecar_chunks(client, storage_dir, &options.parent, attachment_key, &chunks);
1588
1589    Ok(serde_json::json!({
1590        "provider": provider,
1591        "status": "indexed",
1592        "item_key": options.parent,
1593        "attachment_key": attachment_key,
1594        "embeddings": embedding_count,
1595        "attachment_path": attachment_path,
1596        "storage_dir": storage_dir,
1597        "blocks": blocks.len(),
1598        "chunks": chunks.len(),
1599        "artifacts": artifacts,
1600    }))
1601}
1602
1603fn embed_sidecar_chunks(
1604    client: &mut impl RpcCaller,
1605    storage_dir: &Path,
1606    item_key: &str,
1607    _attachment_key: &str,
1608    chunks: &[zotron_types::StructureChunk],
1609) -> usize {
1610    let Ok((provider, model, api_url, api_key)) = fetch_embedding_settings(client) else {
1611        return 0;
1612    };
1613    if provider.is_empty() || (api_key.is_empty() && provider != "ollama") {
1614        return 0;
1615    }
1616    let emb_chunks: Vec<EmbeddingChunkInput> = chunks
1617        .iter()
1618        .map(|c| EmbeddingChunkInput {
1619            chunk_key: c.chunk_key.clone(),
1620            text: c.text.clone(),
1621        })
1622        .collect();
1623    if emb_chunks.is_empty() {
1624        return 0;
1625    }
1626    // Batch in groups of 20 to avoid API limits
1627    let batch_size = 20;
1628    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
1629    for batch in emb_chunks.chunks(batch_size) {
1630        let input = EmbeddingRequestInput {
1631            item_key: item_key.to_string(),
1632            chunks: batch.to_vec(),
1633            model: if model.is_empty() { None } else { Some(model.clone()) },
1634            url: if api_url.is_empty() { None } else { Some(api_url.clone()) },
1635            input_type: Some("document".to_string()),
1636        };
1637        let Ok(request) = build_embedding_provider_request(&provider, &input) else {
1638            break;
1639        };
1640        let Some(url) = request.url.as_deref() else { break };
1641        let mut http = ureq::post(url).set("Content-Type", "application/json");
1642        if let Some(auth) = request.auth_header {
1643            if !api_key.is_empty() {
1644                http = http.set(auth, &format!("Bearer {api_key}"));
1645            }
1646        }
1647        let Ok(resp) = http.send_json(&request.body) else { break };
1648        let Ok(payload): Result<Value, _> = resp.into_json() else { break };
1649        let Ok(vectors) = parse_embedding_provider_response(&provider, &payload, item_key, batch)
1650        else {
1651            break;
1652        };
1653        all_vectors.extend(vectors);
1654    }
1655    let count = all_vectors.len();
1656    if count > 0 {
1657        let filename = embedding_vector_filename(&provider, &model);
1658        let vectors_dir = storage_dir.join(".zotron").join("embeddings");
1659        fs::create_dir_all(&vectors_dir).map_err(|e| {
1660            eprintln!("warning: cannot create embeddings dir {}: {e}", vectors_dir.display());
1661            e
1662        }).ok();
1663        let vectors_path = vectors_dir.join(&filename);
1664        let mut out = String::new();
1665        for v in &all_vectors {
1666            if let Ok(line) = serde_json::to_string(v) {
1667                out.push_str(&line);
1668                out.push('\n');
1669            }
1670        }
1671        if let Err(e) = fs::write(&vectors_path, &out) {
1672            eprintln!("warning: failed to persist embeddings to {}: {e}", vectors_path.display());
1673        }
1674    }
1675    count
1676}
1677
1678struct MineruResultSource {
1679    task_id: Option<String>,
1680    state: String,
1681    result_dir: PathBuf,
1682    raw_zip_bytes: Option<Vec<u8>>,
1683    task_status: Option<Value>,
1684    payload: Value,
1685    content_list_file: Option<PathBuf>,
1686    markdown: Option<String>,
1687}
1688
1689struct PersistedOcrArtifacts {
1690    block_count: usize,
1691    chunk_count: usize,
1692    artifacts: Vec<Value>,
1693}
1694
1695fn resolve_attachment_path(
1696    client: &mut impl RpcCaller,
1697    attachment_key: &str,
1698) -> Result<PathBuf, String> {
1699    let payload = client.call(
1700        "attachments.getPath",
1701        Some(serde_json::json!({"key": attachment_key})),
1702    )?;
1703    let raw_path = payload
1704        .get("path")
1705        .and_then(Value::as_str)
1706        .filter(|path| !path.trim().is_empty())
1707        .ok_or_else(|| {
1708            format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1709        })?;
1710    Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1711}
1712
1713/// Resolve the first PDF attachment key for a parent item via `attachments.list`.
1714fn resolve_first_pdf_attachment_key(
1715    client: &mut impl RpcCaller,
1716    parent_key: &str,
1717) -> Result<String, String> {
1718    let response = client.call(
1719        "attachments.list",
1720        Some(serde_json::json!({"parentKey": parent_key})),
1721    )?;
1722    // The XPI returns {items: [...], total: N}.
1723    let attachments = response
1724        .get("items")
1725        .and_then(Value::as_array)
1726        .or_else(|| response.as_array())
1727        .ok_or_else(|| {
1728            format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1729        })?;
1730    for attachment in attachments {
1731        if is_pdf_attachment(attachment) {
1732            if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1733                return Ok(key.to_string());
1734            }
1735        }
1736    }
1737    Err(format!(
1738        "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1739    ))
1740}
1741
1742fn load_mineru_result_source(
1743    options: &OcrProcessOptions,
1744    attachment_path: &Path,
1745    file_name: &str,
1746) -> Result<MineruResultSource, String> {
1747    if let Some(result_dir) = options.result_dir.as_deref() {
1748        return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1749    }
1750    if let Some(result_zip) = options.result_zip.as_deref() {
1751        let zip_path = PathBuf::from(result_zip);
1752        let zip_bytes = fs::read(&zip_path)
1753            .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1754        let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1755        return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1756    }
1757
1758    let Some(source_url) = options
1759        .source_url
1760        .as_deref()
1761        .filter(|value| !value.trim().is_empty())
1762    else {
1763        return submit_mineru_local_file(options, attachment_path, file_name);
1764    };
1765    let input = OcrRequestInput {
1766        item_key: options.parent.clone(),
1767        attachment_key: options.attachment.clone().expect("attachment resolved"),
1768        file_name: file_name.to_string(),
1769        mime_type: "application/pdf".to_string(),
1770        content_base64: format!("url:{source_url}"),
1771        source_url: Some(source_url.to_string()),
1772        local_path: None,
1773        output_dir: None,
1774    };
1775    let task = submit_mineru_task(
1776        &options.provider,
1777        &input,
1778        options.provider_endpoint.clone(),
1779        &options.api_key_env,
1780    )?;
1781    let task_id = task
1782        .get("data")
1783        .and_then(|data| data.get("task_id"))
1784        .and_then(Value::as_str)
1785        .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1786        .to_string();
1787    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1788    let status = poll_mineru_task(
1789        options.provider_endpoint.as_deref(),
1790        &task_id,
1791        &auth_header,
1792        options.poll_interval_seconds,
1793        options.timeout_seconds,
1794    )?;
1795    let zip_url = status
1796        .pointer("/data/full_zip_url")
1797        .or_else(|| status.pointer("/data/result/full_zip_url"))
1798        .and_then(Value::as_str)
1799        .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1800    let zip_bytes = download_bytes(zip_url)?;
1801    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1802    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1803}
1804
1805fn submit_mineru_local_file(
1806    options: &OcrProcessOptions,
1807    attachment_path: &Path,
1808    file_name: &str,
1809) -> Result<MineruResultSource, String> {
1810    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1811    let upload_request = create_mineru_file_upload(
1812        options.provider_endpoint.as_deref(),
1813        file_name,
1814        options.attachment.as_deref().expect("attachment resolved"),
1815        &auth_header,
1816    )?;
1817    let upload_url = upload_request
1818        .pointer("/data/file_urls/0")
1819        .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1820        .and_then(Value::as_str)
1821        .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1822    let batch_id = upload_request
1823        .pointer("/data/batch_id")
1824        .or_else(|| upload_request.pointer("/data/batchId"))
1825        .and_then(Value::as_str)
1826        .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1827        .to_string();
1828    let bytes = fs::read(attachment_path)
1829        .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1830    put_bytes(upload_url, &bytes)?;
1831    let status = poll_mineru_batch(
1832        options.provider_endpoint.as_deref(),
1833        &batch_id,
1834        &auth_header,
1835        options.poll_interval_seconds,
1836        options.timeout_seconds,
1837    )?;
1838    let zip_url = mineru_batch_zip_url(&status)
1839        .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1840    let zip_bytes = download_bytes(&zip_url)?;
1841    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1842    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1843}
1844
1845fn create_mineru_file_upload(
1846    endpoint: Option<&str>,
1847    file_name: &str,
1848    data_id: &str,
1849    auth_header: &str,
1850) -> Result<Value, String> {
1851    let url = mineru_file_urls_url(endpoint);
1852    let body = serde_json::json!({
1853        "files": [{"name": file_name, "data_id": data_id}],
1854        "model_version": "vlm",
1855        "is_ocr": false,
1856        "enable_formula": true,
1857        "enable_table": true,
1858        "language": "ch",
1859        "page_ranges": "1-200",
1860    });
1861    ureq::post(&url)
1862        .set("Authorization", auth_header)
1863        .send_json(body)
1864        .map_err(|err| format!("POST {url} failed: {err}"))?
1865        .into_json::<Value>()
1866        .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1867}
1868
1869fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1870    ureq::put(url)
1871        .send_bytes(bytes)
1872        .map_err(|err| format!("PUT {url} failed: {err}"))?;
1873    Ok(())
1874}
1875
1876fn submit_mineru_task(
1877    provider: &str,
1878    input: &OcrRequestInput,
1879    endpoint: Option<String>,
1880    api_key_env: &str,
1881) -> Result<Value, String> {
1882    let request = build_ocr_provider_request(provider, input)?;
1883    let method = request
1884        .method
1885        .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1886    let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1887    transport.post_json(&ProviderHttpInvocation {
1888        provider: request.provider.to_string(),
1889        style: request.style.to_string(),
1890        method: method.to_string(),
1891        url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1892        auth_header_name: request.auth_header.map(ToString::to_string),
1893        auth_header_value: None,
1894        body: request.body,
1895    })
1896}
1897
1898fn poll_mineru_task(
1899    endpoint: Option<&str>,
1900    task_id: &str,
1901    auth_header: &str,
1902    poll_interval_seconds: u64,
1903    timeout_seconds: u64,
1904) -> Result<Value, String> {
1905    let url = mineru_task_status_url(endpoint, task_id);
1906    let started = Instant::now();
1907    loop {
1908        let status = get_json_with_auth(&url, auth_header)?;
1909        let state = status
1910            .pointer("/data/state")
1911            .or_else(|| status.pointer("/data/status"))
1912            .and_then(Value::as_str)
1913            .unwrap_or("unknown");
1914        match state {
1915            "done" | "finished" | "success" => return Ok(status),
1916            "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1917            _ => {
1918                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1919                    return Err(format!(
1920                        "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1921                    ));
1922                }
1923                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1924            }
1925        }
1926    }
1927}
1928
1929fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1930    let base = endpoint
1931        .unwrap_or("https://mineru.net/api/v4/extract/task")
1932        .trim_end_matches('/');
1933    if base.ends_with("/extract/task") {
1934        format!("{base}/{task_id}")
1935    } else {
1936        format!("{base}/extract/task/{task_id}")
1937    }
1938}
1939
1940fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1941    let base = mineru_api_base(endpoint);
1942    format!("{base}/file-urls/batch")
1943}
1944
1945fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1946    let base = mineru_api_base(endpoint);
1947    format!("{base}/extract-results/batch/{batch_id}")
1948}
1949
1950fn mineru_api_base(endpoint: Option<&str>) -> String {
1951    let base = endpoint
1952        .unwrap_or("https://mineru.net/api/v4/extract/task")
1953        .trim_end_matches('/');
1954    if let Some(stripped) = base.strip_suffix("/extract/task") {
1955        return stripped.to_string();
1956    }
1957    if let Some(stripped) = base.strip_suffix("/extract") {
1958        return stripped.to_string();
1959    }
1960    base.to_string()
1961}
1962
1963fn poll_mineru_batch(
1964    endpoint: Option<&str>,
1965    batch_id: &str,
1966    auth_header: &str,
1967    poll_interval_seconds: u64,
1968    timeout_seconds: u64,
1969) -> Result<Value, String> {
1970    let url = mineru_batch_status_url(endpoint, batch_id);
1971    let started = Instant::now();
1972    loop {
1973        let status = get_json_with_auth(&url, auth_header)?;
1974        let state = mineru_batch_state(&status).unwrap_or("unknown");
1975        match state {
1976            "done" | "finished" | "success" => return Ok(status),
1977            "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1978            _ => {
1979                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1980                    return Err(format!(
1981                        "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1982                    ));
1983                }
1984                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1985            }
1986        }
1987    }
1988}
1989
1990fn mineru_batch_state(status: &Value) -> Option<&str> {
1991    status
1992        .pointer("/data/extract_result/0/state")
1993        .or_else(|| status.pointer("/data/extractResult/0/state"))
1994        .or_else(|| status.pointer("/data/state"))
1995        .and_then(Value::as_str)
1996}
1997
1998fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1999    status
2000        .pointer("/data/extract_result/0/full_zip_url")
2001        .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
2002        .or_else(|| status.pointer("/data/full_zip_url"))
2003        .and_then(Value::as_str)
2004        .map(ToString::to_string)
2005}
2006
2007fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
2008    let token = env::var(api_key_env)
2009        .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
2010    let token = token.trim();
2011    if token.is_empty() {
2012        return Err(format!(
2013            "provider credential env var {api_key_env} is empty"
2014        ));
2015    }
2016    Ok(match auth_scheme {
2017        "bearer" if token.starts_with("Bearer ") => token.to_string(),
2018        "bearer" => format!("Bearer {token}"),
2019        "token" if token.starts_with("token ") => token.to_string(),
2020        "token" => format!("token {token}"),
2021        _ => token.to_string(),
2022    })
2023}
2024
2025fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
2026    ureq::get(url)
2027        .set("Authorization", auth_header)
2028        .call()
2029        .map_err(|err| format!("GET {url} failed: {err}"))?
2030        .into_json::<Value>()
2031        .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
2032}
2033
2034fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
2035    let response = ureq::get(url)
2036        .call()
2037        .map_err(|err| format!("download {url} failed: {err}"))?;
2038    let mut bytes = Vec::new();
2039    response
2040        .into_reader()
2041        .read_to_end(&mut bytes)
2042        .map_err(|err| format!("read download {url}: {err}"))?;
2043    Ok(bytes)
2044}
2045
2046fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
2047    let dir = unique_temp_path(prefix);
2048    fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
2049    let zip_path = dir.with_extension("zip");
2050    fs::write(&zip_path, zip_bytes)
2051        .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
2052    let output = ProcessCommand::new("unzip")
2053        .arg("-q")
2054        .arg("-o")
2055        .arg(&zip_path)
2056        .arg("-d")
2057        .arg(&dir)
2058        .output()
2059        .map_err(|err| format!("run unzip: {err}"))?;
2060    if !output.status.success() {
2061        return Err(format!(
2062            "unzip {} failed: {}",
2063            zip_path.display(),
2064            String::from_utf8_lossy(&output.stderr).trim()
2065        ));
2066    }
2067    Ok(dir)
2068}
2069
2070fn unique_temp_path(prefix: &str) -> PathBuf {
2071    let nanos = SystemTime::now()
2072        .duration_since(UNIX_EPOCH)
2073        .map(|duration| duration.as_nanos())
2074        .unwrap_or(0);
2075    env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
2076}
2077
2078fn mineru_result_source_from_dir(
2079    result_dir: PathBuf,
2080    raw_zip_bytes: Option<Vec<u8>>,
2081    task_status: Option<Value>,
2082    task_id: Option<String>,
2083) -> Result<MineruResultSource, String> {
2084    let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
2085    let markdown = find_first_file_by_name(&result_dir, "full.md")
2086        .map(|path| {
2087            fs::read_to_string(&path)
2088                .map_err(|err| format!("read native markdown {}: {err}", path.display()))
2089        })
2090        .transpose()?;
2091    Ok(MineruResultSource {
2092        task_id,
2093        state: "done".to_string(),
2094        result_dir,
2095        raw_zip_bytes,
2096        task_status,
2097        payload,
2098        content_list_file,
2099        markdown,
2100    })
2101}
2102
2103fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
2104    let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
2105    if let Some(path) = v2 {
2106        let value = read_json_file(&path)?;
2107        return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
2108    }
2109    let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
2110    if let Some(path) = content_list {
2111        let value = read_json_file(&path)?;
2112        return Ok((serde_json::json!({"content_list": value}), Some(path)));
2113    }
2114    let layout = find_first_file_by_name(result_dir, "layout.json");
2115    if let Some(path) = layout {
2116        return Ok((read_json_file(&path)?, Some(path)));
2117    }
2118    let markdown = find_first_file_by_name(result_dir, "full.md");
2119    if let Some(path) = markdown {
2120        let text = fs::read_to_string(&path)
2121            .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
2122        return Ok((serde_json::json!({"result": text}), Some(path)));
2123    }
2124    Err(format!(
2125        "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
2126        result_dir.display()
2127    ))
2128}
2129
2130fn read_json_file(path: &Path) -> Result<Value, String> {
2131    let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
2132    serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
2133}
2134
2135fn persist_mineru_result_sidecars(
2136    storage_dir: &Path,
2137    item_key: &str,
2138    attachment_key: &str,
2139    provider: &str,
2140    source: &MineruResultSource,
2141    chunk_chars: usize,
2142) -> Result<PersistedOcrArtifacts, String> {
2143    let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
2144    let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
2145    let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
2146    let raw_bundle = serde_json::json!({
2147        "provider": provider,
2148        "item_key": item_key,
2149        "attachment_key": attachment_key,
2150        "task_id": source.task_id,
2151        "state": source.state,
2152        "task_status": source.task_status,
2153        "content_list_file": source.content_list_file,
2154        "payload": source.payload,
2155    });
2156
2157    let mut artifacts = Vec::new();
2158    artifacts.push(write_sidecar_json(
2159        storage_dir,
2160        item_key,
2161        attachment_key,
2162        MachineArtifactKind::OcrRaw,
2163        &raw_bundle,
2164    )?);
2165    artifacts.push(write_sidecar_jsonl(
2166        storage_dir,
2167        item_key,
2168        attachment_key,
2169        MachineArtifactKind::Blocks,
2170        &blocks,
2171    )?);
2172    artifacts.push(write_sidecar_jsonl(
2173        storage_dir,
2174        item_key,
2175        attachment_key,
2176        MachineArtifactKind::Chunks,
2177        &chunks,
2178    )?);
2179    if let Some(markdown) = source.markdown.as_deref() {
2180        artifacts.push(write_sidecar_bytes(
2181            storage_dir,
2182            item_key,
2183            attachment_key,
2184            MachineArtifactKind::OcrNativeMarkdown,
2185            markdown.as_bytes(),
2186        )?);
2187    }
2188    artifacts.push(write_sidecar_json(
2189        storage_dir,
2190        item_key,
2191        attachment_key,
2192        MachineArtifactKind::OcrNativeAssets,
2193        &assets,
2194    )?);
2195    if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2196        artifacts.push(write_extra_sidecar_bytes(
2197            storage_dir,
2198            ".zotron/ocr/latest.raw.zip",
2199            bytes,
2200        )?);
2201    }
2202
2203    Ok(PersistedOcrArtifacts {
2204        block_count: blocks.len(),
2205        chunk_count: chunks.len(),
2206        artifacts,
2207    })
2208}
2209
2210fn write_sidecar_json(
2211    storage_dir: &Path,
2212    item_key: &str,
2213    attachment_key: &str,
2214    kind: MachineArtifactKind,
2215    value: &Value,
2216) -> Result<Value, String> {
2217    let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2218    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2219}
2220
2221fn write_sidecar_jsonl<T: serde::Serialize>(
2222    storage_dir: &Path,
2223    item_key: &str,
2224    attachment_key: &str,
2225    kind: MachineArtifactKind,
2226    values: &[T],
2227) -> Result<Value, String> {
2228    let mut out = String::new();
2229    for value in values {
2230        out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2231        out.push('\n');
2232    }
2233    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2234}
2235
2236fn write_sidecar_bytes(
2237    storage_dir: &Path,
2238    item_key: &str,
2239    attachment_key: &str,
2240    kind: MachineArtifactKind,
2241    bytes: &[u8],
2242) -> Result<Value, String> {
2243    let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2244        .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2245    Ok(serde_json::json!({
2246        "kind": kind,
2247        "relative_path": record.relative_path,
2248        "absolute_path": record.absolute_path,
2249    }))
2250}
2251
2252fn write_extra_sidecar_bytes(
2253    storage_dir: &Path,
2254    relative_path: &str,
2255    bytes: &[u8],
2256) -> Result<Value, String> {
2257    let absolute_path = storage_dir.join(relative_path);
2258    if let Some(parent) = absolute_path.parent() {
2259        fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2260    }
2261    fs::write(&absolute_path, bytes)
2262        .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2263    Ok(serde_json::json!({
2264        "kind": "ocr_raw_zip",
2265        "relative_path": relative_path,
2266        "absolute_path": absolute_path,
2267    }))
2268}
2269
2270fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2271    let mut images = Vec::new();
2272    for file in collect_files(result_dir)? {
2273        if !is_image_file(&file) {
2274            continue;
2275        }
2276        let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2277        let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2278        if let Some(parent) = destination.parent() {
2279            fs::create_dir_all(parent)
2280                .map_err(|err| format!("create {}: {err}", parent.display()))?;
2281        }
2282        fs::copy(&file, &destination).map_err(|err| {
2283            format!(
2284                "copy MinerU asset {} to {}: {err}",
2285                file.display(),
2286                destination.display()
2287            )
2288        })?;
2289        images.push(serde_json::json!({
2290            "source_relative": relative,
2291            "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2292            "absolute_path": destination,
2293        }));
2294    }
2295    Ok(serde_json::json!({
2296        "provider": "mineru",
2297        "images": images,
2298    }))
2299}
2300
2301fn is_image_file(path: &Path) -> bool {
2302    matches!(
2303        path.extension()
2304            .and_then(|ext| ext.to_str())
2305            .unwrap_or_default()
2306            .to_ascii_lowercase()
2307            .as_str(),
2308        "png" | "jpg" | "jpeg" | "webp" | "gif"
2309    )
2310}
2311
2312fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2313    collect_files(root).ok()?.into_iter().find(|path| {
2314        path.file_name()
2315            .and_then(|name| name.to_str())
2316            .is_some_and(|name| name.ends_with(suffix))
2317    })
2318}
2319
2320fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2321    collect_files(root).ok()?.into_iter().find(|path| {
2322        path.file_name()
2323            .and_then(|file_name| file_name.to_str())
2324            .is_some_and(|file_name| file_name == name)
2325    })
2326}
2327
2328fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2329    let mut files = Vec::new();
2330    collect_files_into(root, &mut files)?;
2331    files.sort();
2332    Ok(files)
2333}
2334
2335fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2336    for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2337        let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2338        let path = entry.path();
2339        let file_type = entry
2340            .file_type()
2341            .map_err(|err| format!("stat {}: {err}", path.display()))?;
2342        if file_type.is_dir() {
2343            collect_files_into(&path, files)?;
2344        } else if file_type.is_file() {
2345            files.push(path);
2346        }
2347    }
2348    Ok(())
2349}
2350
2351fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2352    let data = payload.get("data")?;
2353    let task_id = data.get("task_id").and_then(Value::as_str)?;
2354    Some(serde_json::json!({
2355        "provider": provider,
2356        "status": "submitted",
2357        "task_id": task_id,
2358        "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2359        "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2360        "raw": payload,
2361    }))
2362}
2363
2364fn ocr_input_from_file(
2365    file: String,
2366    item_key: Option<String>,
2367    attachment_key: Option<String>,
2368    mime_type: Option<String>,
2369) -> Result<OcrRequestInput, String> {
2370    let item_key = item_key
2371        .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2372    let attachment_key = attachment_key.ok_or_else(|| {
2373        "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2374    })?;
2375    let path = PathBuf::from(&file);
2376    let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2377    let file_name = path
2378        .file_name()
2379        .and_then(|name| name.to_str())
2380        .unwrap_or("document.pdf")
2381        .to_string();
2382    let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2383    Ok(OcrRequestInput {
2384        item_key,
2385        attachment_key,
2386        file_name,
2387        mime_type,
2388        content_base64: base64_encode(&bytes),
2389        source_url: None,
2390        local_path: Some(file),
2391        output_dir: None,
2392    })
2393}
2394
2395fn guess_mime_type(path: &Path) -> &'static str {
2396    match path
2397        .extension()
2398        .and_then(|ext| ext.to_str())
2399        .unwrap_or_default()
2400        .to_ascii_lowercase()
2401        .as_str()
2402    {
2403        "png" => "image/png",
2404        "jpg" | "jpeg" => "image/jpeg",
2405        "webp" => "image/webp",
2406        _ => "application/pdf",
2407    }
2408}
2409
2410fn base64_encode(bytes: &[u8]) -> String {
2411    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2412    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2413    for chunk in bytes.chunks(3) {
2414        let b0 = chunk[0];
2415        let b1 = *chunk.get(1).unwrap_or(&0);
2416        let b2 = *chunk.get(2).unwrap_or(&0);
2417        out.push(TABLE[(b0 >> 2) as usize] as char);
2418        out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2419        if chunk.len() > 1 {
2420            out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2421        } else {
2422            out.push('=');
2423        }
2424        if chunk.len() > 2 {
2425            out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2426        } else {
2427            out.push('=');
2428        }
2429    }
2430    out
2431}
2432
2433fn run_ocr_status_command(
2434    client: &mut impl RpcCaller,
2435    collection: String,
2436) -> Result<Value, String> {
2437    let collection_key = find_collection_in_tree(client, &collection)?
2438        .and_then(|node| node.get("key").cloned())
2439        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2440    let raw = paginate_rpc(
2441        client,
2442        "collections.getItems",
2443        serde_json::json!({"key": collection_key}),
2444        500,
2445    )?;
2446    let items = raw
2447        .get("items")
2448        .and_then(Value::as_array)
2449        .or_else(|| raw.as_array())
2450        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2451        .clone();
2452
2453    let mut has_ocr = 0usize;
2454    for item in &items {
2455        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2456        if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2457            has_ocr += 1;
2458        }
2459    }
2460
2461    Ok(serde_json::json!({
2462        "collection": collection,
2463        "total": items.len(),
2464        "has_ocr": has_ocr,
2465        "missing_ocr": items.len() - has_ocr,
2466    }))
2467}
2468
2469fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2470    if let Some(item_key) = item_key.as_str() {
2471        // Legacy external store lookup for artifacts produced before the
2472        // per-attachment hidden sidecar became the default.
2473        if machine_artifact_exists_for_item(
2474            machine_artifact_store_root(),
2475            item_key,
2476            MachineArtifactKind::Chunks,
2477        ) {
2478            return Ok(true);
2479        }
2480    }
2481
2482    let attachments = client.call(
2483        "attachments.list",
2484        Some(serde_json::json!({"parentKey": item_key.clone()})),
2485    )?;
2486    Ok(attachments.as_array().is_some_and(|attachments| {
2487        attachments.iter().any(|attachment| {
2488            let has_sidecar_chunks = attachment
2489                .get("path")
2490                .and_then(Value::as_str)
2491                .map(local_path_from_zotero_path)
2492                .as_deref()
2493                .map(Path::new)
2494                .and_then(Path::parent)
2495                .is_some_and(|dir| {
2496                    machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2497                });
2498            if has_sidecar_chunks {
2499                return true;
2500            }
2501
2502            // Read-only fallback for old Zotero-visible artifact attachments.
2503            attachment
2504                .get("title")
2505                .and_then(Value::as_str)
2506                .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2507        })
2508    }))
2509}
2510
2511fn local_path_from_zotero_path(path: &str) -> String {
2512    if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2513        return ProcessCommand::new("wslpath")
2514            .arg("-u")
2515            .arg(path)
2516            .output()
2517            .ok()
2518            .filter(|output| output.status.success())
2519            .and_then(|output| String::from_utf8(output.stdout).ok())
2520            .map(|converted| converted.trim().to_string())
2521            .filter(|converted| !converted.is_empty())
2522            .unwrap_or_else(|| path.to_string());
2523    }
2524    path.to_string()
2525}
2526
2527fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2528    let notes = client.call(
2529        "notes.list",
2530        Some(serde_json::json!({"parentKey": item_key.clone()})),
2531    )?;
2532    Ok(notes.as_array().is_some_and(|notes| {
2533        notes.iter().any(|note| {
2534            note.get("tags")
2535                .and_then(Value::as_array)
2536                .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2537        })
2538    }))
2539}
2540
2541fn tag_is_ocr(tag: &Value) -> bool {
2542    tag.as_str() == Some("ocr")
2543        || tag
2544            .get("tag")
2545            .and_then(Value::as_str)
2546            .is_some_and(|tag| tag == "ocr")
2547}
2548
2549fn find_collection_in_tree(
2550    client: &mut impl RpcCaller,
2551    collection: &str,
2552) -> Result<Option<Value>, String> {
2553    let tree = client.call("collections.tree", None)?;
2554    let nodes = tree
2555        .as_array()
2556        .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2557    Ok(search_collection_tree(nodes, collection).cloned())
2558}
2559
2560fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2561    for node in nodes {
2562        if node.get("key").and_then(Value::as_str) == Some(collection)
2563            || node.get("name").and_then(Value::as_str) == Some(collection)
2564        {
2565            return Some(node);
2566        }
2567        if let Some(children) = node.get("children").and_then(Value::as_array) {
2568            if let Some(found) = search_collection_tree(children, collection) {
2569                return Some(found);
2570            }
2571        }
2572    }
2573    None
2574}
2575
2576fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2577    if let Command::Export(args) = command {
2578        return run_export(args, client);
2579    }
2580
2581    let (value, style) = match command {
2582        Command::Ping { .. } => (
2583            call_json(client, "system.ping", None)?,
2584            JsonStyle::PythonCompact,
2585        ),
2586        Command::Rpc {
2587            method,
2588            params_json,
2589            paginate,
2590            page_size,
2591            ..
2592        } => {
2593            let params = serde_json::from_str::<Value>(&params_json)
2594                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2595            if !params.is_object() {
2596                return Err("INVALID_JSON: params must be a JSON object".to_string());
2597            }
2598            if paginate {
2599                (
2600                    paginate_rpc(client, &method, params, page_size)?,
2601                    JsonStyle::Pretty,
2602                )
2603            } else {
2604                (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2605            }
2606        }
2607        Command::Push {
2608            json_file,
2609            pdf,
2610            collection,
2611            on_duplicate,
2612            dry_run,
2613            ..
2614        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2615        Command::System { command } => run_system_command(command, client)?,
2616        Command::Search(args) => {
2617            if let Some(mgmt) = args.management {
2618                run_search_management_command(mgmt, client)?
2619            } else {
2620                run_search(args, client)?
2621            }
2622        }
2623        Command::Items { command } => run_items_command(command, client)?,
2624        Command::Collections { command } => run_collections_command(command, client)?,
2625        Command::Notes { command } => run_notes_command(command, client)?,
2626        Command::Settings { command } => run_settings_command(command, client)?,
2627        Command::Tags { command } => run_tags_command(command, client)?,
2628        Command::Annotations { command } => run_annotations_command(command, client)?,
2629        Command::Ocr { command } => {
2630            return run_ocr_command(command, client);
2631        }
2632        Command::Rag { command } => {
2633            return run_rag_command(command, client);
2634        }
2635        Command::Export(_) => unreachable!("export commands return raw output above"),
2636    };
2637
2638    format_json(&value, style)
2639}
2640
2641fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2642    match command {
2643        RagCommand::Providers => format_json(
2644            &serde_json::json!({
2645                "providers": [
2646                    embedding_provider_spec("volcengine")?,
2647                    embedding_provider_spec("alibaba")?,
2648                    embedding_provider_spec("custom")?,
2649                ],
2650            }),
2651            JsonStyle::Pretty,
2652        ),
2653        RagCommand::Embed {
2654            provider,
2655            input,
2656            endpoint,
2657            model,
2658            input_type,
2659            api_key_env,
2660        } => {
2661            let value = run_embedding_provider_json_command(
2662                provider,
2663                input,
2664                endpoint,
2665                model,
2666                input_type,
2667                api_key_env,
2668            )?;
2669            format_json(&value, JsonStyle::PythonCompact)
2670        }
2671        RagCommand::Status { collection, .. } => {
2672            let value = rag_status_value(client, &collection)?;
2673            format_json(&value, JsonStyle::PythonCompact)
2674        }
2675        RagCommand::Search {
2676            query,
2677            collection,
2678            keys,
2679            zotero,
2680            top_spans_per_item,
2681            include_fulltext_spans,
2682            top_k,
2683            output,
2684            ..
2685        } => run_rag_search_command(
2686            client,
2687            RagSearchOptions {
2688                query,
2689                collection,
2690                keys,
2691                zotero,
2692                top_spans_per_item,
2693                include_fulltext_spans,
2694                top_k,
2695                output,
2696            },
2697        ),
2698    }
2699}
2700
2701fn run_embedding_provider_json_command(
2702    provider: String,
2703    input: String,
2704    endpoint: Option<String>,
2705    model: Option<String>,
2706    input_type: Option<String>,
2707    api_key_env: Option<String>,
2708) -> Result<Value, String> {
2709    let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2710    if endpoint.is_some() {
2711        input.url = endpoint;
2712    }
2713    if model.is_some() {
2714        input.model = model;
2715    }
2716    if input_type.is_some() {
2717        input.input_type = input_type;
2718    }
2719    let mut transport = provider_http_transport(api_key_env.as_deref())?;
2720    let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2721
2722    Ok(serde_json::json!({
2723        "provider": provider,
2724        "vectors": vectors,
2725    }))
2726}
2727
2728fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2729    provider_http_transport_with_auth(api_key_env, "bearer")
2730}
2731
2732fn provider_http_transport_with_auth(
2733    api_key_env: Option<&str>,
2734    auth_scheme: &str,
2735) -> Result<UreqProviderHttpTransport, String> {
2736    let Some(env_name) = api_key_env else {
2737        return Ok(UreqProviderHttpTransport::new());
2738    };
2739    let token = env::var(env_name)
2740        .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2741    if token.trim().is_empty() {
2742        return Err(format!("provider credential env var {env_name} is empty"));
2743    }
2744    let token = token.trim();
2745    match auth_scheme {
2746        "token" if token.starts_with("token ") => {
2747            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2748        }
2749        "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2750            "token {token}"
2751        ))),
2752        "bearer" if token.starts_with("Bearer ") => {
2753            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2754        }
2755        "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2756        "none" => Ok(UreqProviderHttpTransport::new()),
2757        other => Err(format!("unsupported provider auth scheme {other}")),
2758    }
2759}
2760
2761fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2762    let payload = if path == "-" {
2763        let mut input = String::new();
2764        io::stdin()
2765            .read_to_string(&mut input)
2766            .map_err(|err| format!("read stdin: {err}"))?;
2767        input
2768    } else {
2769        fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2770    };
2771    serde_json::from_str::<T>(&payload)
2772        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2773}
2774
2775fn fetch_embedding_settings(
2776    client: &mut impl RpcCaller,
2777) -> Result<(String, String, String, String), String> {
2778    let settings = client.call("settings.getAll", None)?;
2779    let provider = settings
2780        .get("embedding.provider")
2781        .and_then(Value::as_str)
2782        .unwrap_or("ollama")
2783        .to_string();
2784    let model = settings
2785        .get("embedding.model")
2786        .and_then(Value::as_str)
2787        .unwrap_or("")
2788        .to_string();
2789    let api_url = settings
2790        .get("embedding.apiUrl")
2791        .and_then(Value::as_str)
2792        .unwrap_or("")
2793        .to_string();
2794    let raw = client.call("settings.getRaw", Some(serde_json::json!({"key": "embedding.apiKey"})))?;
2795    let api_key = raw
2796        .get("embedding.apiKey")
2797        .and_then(Value::as_str)
2798        .unwrap_or("")
2799        .to_string();
2800    Ok((provider, model, api_url, api_key))
2801}
2802
2803fn fetch_retrieval_mode(client: &mut impl RpcCaller) -> String {
2804    client
2805        .call(
2806            "settings.get",
2807            Some(serde_json::json!({"key": "rag.retrievalMode"})),
2808        )
2809        .ok()
2810        .and_then(|v| {
2811            v.get("rag.retrievalMode")
2812                .and_then(Value::as_str)
2813                .map(String::from)
2814        })
2815        .unwrap_or_else(|| "hybrid".to_string())
2816}
2817
2818fn resolve_sidecar_paths(
2819    client: &mut impl RpcCaller,
2820    collection: Option<&str>,
2821    keys: &[String],
2822) -> Result<Vec<(String, String, PathBuf)>, String> {
2823    let items = if !keys.is_empty() {
2824        let mut items = Vec::new();
2825        for key in keys {
2826            let item = client.call("items.get", Some(serde_json::json!({"key": key})))?;
2827            items.push(item);
2828        }
2829        items
2830    } else if let Some(col) = collection {
2831        let col_key = resolve_collection(client, col)?;
2832        let response = client.call(
2833            "collections.getItems",
2834            Some(serde_json::json!({"key": col_key})),
2835        )?;
2836        collection_items(&response)
2837    } else {
2838        return Err("INVALID_ARGS: --collection or --key required".into());
2839    };
2840
2841    let mut results = Vec::new();
2842    for item in &items {
2843        let item_key = item.get("key").and_then(Value::as_str).unwrap_or_default();
2844        let attachments = client.call(
2845            "attachments.list",
2846            Some(serde_json::json!({"parentKey": item_key})),
2847        )?;
2848        let att_list = attachments
2849            .get("items")
2850            .and_then(Value::as_array)
2851            .or_else(|| attachments.as_array())
2852            .cloned()
2853            .unwrap_or_default();
2854        for att in &att_list {
2855            let content_type = att
2856                .get("contentType")
2857                .and_then(Value::as_str)
2858                .unwrap_or("");
2859            if content_type != "application/pdf" {
2860                continue;
2861            }
2862            let att_key = att.get("key").and_then(Value::as_str).unwrap_or_default();
2863            let path = att.get("path").and_then(Value::as_str).unwrap_or_default();
2864            if path.is_empty() {
2865                continue;
2866            }
2867            let local_path = local_path_from_zotero_path(path);
2868            let pdf_path = PathBuf::from(&local_path);
2869            if let Some(parent) = pdf_path.parent() {
2870                let sidecar_root = parent.join(".zotron");
2871                if sidecar_root.exists() {
2872                    results.push((item_key.to_string(), att_key.to_string(), sidecar_root));
2873                }
2874            }
2875        }
2876    }
2877    Ok(results)
2878}
2879
2880fn load_sidecar_chunks(sidecar_root: &Path) -> Vec<StructureChunk> {
2881    let chunks_path = sidecar_root.join("chunks").join("chunks.v1.jsonl");
2882    let Ok(content) = fs::read_to_string(&chunks_path) else {
2883        return Vec::new();
2884    };
2885    content
2886        .lines()
2887        .filter(|line| !line.trim().is_empty())
2888        .filter_map(|line| serde_json::from_str::<StructureChunk>(line).ok())
2889        .collect()
2890}
2891
2892fn embedding_vector_filename(provider: &str, model: &str) -> String {
2893    let p = provider.trim().to_lowercase().replace('/', "-");
2894    let m = model.trim().to_lowercase().replace('/', "-");
2895    if p.is_empty() && m.is_empty() {
2896        return "vectors.jsonl".to_string();
2897    }
2898    format!("{p}--{m}.jsonl")
2899}
2900
2901fn load_sidecar_vectors(sidecar_root: &Path, provider: &str, model: &str) -> Vec<EmbeddingVector> {
2902    let embeddings_dir = sidecar_root.join("embeddings");
2903    let target = embedding_vector_filename(provider, model);
2904    let target_path = embeddings_dir.join(&target);
2905    if let Ok(content) = fs::read_to_string(&target_path) {
2906        let vecs: Vec<EmbeddingVector> = content
2907            .lines()
2908            .filter(|line| !line.trim().is_empty())
2909            .filter_map(|line| serde_json::from_str(line).ok())
2910            .collect();
2911        if !vecs.is_empty() {
2912            return vecs;
2913        }
2914    }
2915    // Fallback: try legacy vectors.v1.jsonl / vectors.jsonl with provider match
2916    for legacy in &["vectors.v1.jsonl", "vectors.jsonl"] {
2917        let path = embeddings_dir.join(legacy);
2918        if let Ok(content) = fs::read_to_string(&path) {
2919            let vecs: Vec<EmbeddingVector> = content
2920                .lines()
2921                .filter(|line| !line.trim().is_empty())
2922                .filter_map(|line| serde_json::from_str::<EmbeddingVector>(line).ok())
2923                .filter(|v| v.source_provider == provider || provider.is_empty())
2924                .collect();
2925            if !vecs.is_empty() {
2926                return vecs;
2927            }
2928        }
2929    }
2930    Vec::new()
2931}
2932
2933fn embed_query_text(
2934    query: &str,
2935    provider: &str,
2936    model: &str,
2937    api_url: &str,
2938    api_key: &str,
2939) -> Result<Vec<f64>, String> {
2940    let input = EmbeddingRequestInput {
2941        item_key: "query".to_string(),
2942        chunks: vec![EmbeddingChunkInput {
2943            chunk_key: "q0".to_string(),
2944            text: query.to_string(),
2945        }],
2946        model: if model.is_empty() {
2947            None
2948        } else {
2949            Some(model.to_string())
2950        },
2951        url: if api_url.is_empty() {
2952            None
2953        } else {
2954            Some(api_url.to_string())
2955        },
2956        input_type: Some("query".to_string()),
2957    };
2958    let request = build_embedding_provider_request(provider, &input)?;
2959    let url = request
2960        .url
2961        .as_deref()
2962        .ok_or("no embedding URL configured")?;
2963    let mut http = ureq::post(url).set("Content-Type", "application/json");
2964    if let Some(auth) = request.auth_header {
2965        if !api_key.is_empty() {
2966            http = http.set(auth, &format!("Bearer {api_key}"));
2967        }
2968    }
2969    let resp = http
2970        .send_json(&request.body)
2971        .map_err(|e| format!("embedding request failed: {e}"))?;
2972    let payload: Value = resp
2973        .into_json()
2974        .map_err(|e| format!("embedding response parse: {e}"))?;
2975    let vectors =
2976        parse_embedding_provider_response(provider, &payload, "query", &input.chunks)?;
2977    vectors
2978        .into_iter()
2979        .next()
2980        .map(|v| v.vector)
2981        .ok_or_else(|| "no embedding vector returned".to_string())
2982}
2983
2984fn run_rag_search_xpi_fallback(
2985    client: &mut impl RpcCaller,
2986    options: &RagSearchOptions,
2987) -> Result<String, String> {
2988    let mut params = serde_json::json!({
2989        "query": options.query,
2990        "limit": options.top_k,
2991        "top_spans_per_item": options.top_spans_per_item,
2992        "include_fulltext_spans": options.include_fulltext_spans,
2993    });
2994    if let Some(map) = params.as_object_mut() {
2995        if let Some(col) = &options.collection {
2996            map.insert("collection".into(), Value::String(col.clone()));
2997        }
2998        if !options.keys.is_empty() {
2999            map.insert(
3000                "keys".into(),
3001                Value::Array(options.keys.iter().map(|k| Value::String(k.clone())).collect()),
3002            );
3003        }
3004    }
3005    let payload = client.call("rag.searchHits", Some(params))?;
3006    let hits = payload
3007        .get("hits")
3008        .and_then(Value::as_array)
3009        .cloned()
3010        .unwrap_or_default();
3011    if options.output == "jsonl" {
3012        let mut out = String::new();
3013        for hit in &hits {
3014            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3015            out.push('\n');
3016        }
3017        Ok(out)
3018    } else {
3019        let total = hits.len() as u64;
3020        format_json(
3021            &normalize_list_envelope(
3022                serde_json::json!({"items": hits, "total": total}),
3023                "items",
3024                Some(options.top_k),
3025                0,
3026            ),
3027            JsonStyle::Pretty,
3028        )
3029    }
3030}
3031
3032fn run_rag_search_command(
3033    client: &mut impl RpcCaller,
3034    options: RagSearchOptions,
3035) -> Result<String, String> {
3036    // When --zotero is explicitly passed, use XPI fallback directly (backward compat).
3037    if options.zotero {
3038        if options.collection.is_none() && options.keys.is_empty() {
3039            return Err(
3040                "INVALID_ARGS: --collection or --key is required".to_string(),
3041            );
3042        }
3043        return run_rag_search_xpi_fallback(client, &options);
3044    }
3045
3046    // Hybrid path: require scope.
3047    if options.collection.is_none() && options.keys.is_empty() {
3048        return Err("INVALID_ARGS: --collection or --key required".to_string());
3049    }
3050
3051    // Step 1: resolve sidecar paths from collection/keys.
3052    let sidecars = resolve_sidecar_paths(
3053        client,
3054        options.collection.as_deref(),
3055        &options.keys,
3056    );
3057
3058    // If sidecar resolution fails or returns empty, fall back to XPI.
3059    // But propagate COLLECTION_NOT_FOUND errors directly instead of masking them.
3060    let sidecars = match sidecars {
3061        Ok(ref s) if !s.is_empty() => s,
3062        Err(ref e) if e.contains("COLLECTION_NOT_FOUND") => return Err(e.clone()),
3063        _ => return run_rag_search_xpi_fallback(client, &options),
3064    };
3065
3066    // Step 2: load all chunks and vectors from sidecars.
3067    let (emb_provider, emb_model, emb_url, emb_key) = fetch_embedding_settings(client)?;
3068    let mut all_chunks: Vec<StructureChunk> = Vec::new();
3069    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
3070    for (_item_key, _att_key, sidecar_root) in sidecars {
3071        all_chunks.extend(load_sidecar_chunks(sidecar_root));
3072        all_vectors.extend(load_sidecar_vectors(sidecar_root, &emb_provider, &emb_model));
3073    }
3074
3075    if all_chunks.is_empty() {
3076        return run_rag_search_xpi_fallback(client, &options);
3077    }
3078
3079    // Step 3: determine retrieval mode.
3080    let mode = fetch_retrieval_mode(client);
3081
3082    // Step 4: BM25 scoring (unless mode is "dense").
3083    let bm25_ranked = if mode != "dense" {
3084        bm25_score_chunks(&all_chunks, &options.query, 1.2, 0.75)
3085    } else {
3086        Vec::new()
3087    };
3088
3089    // Step 5: dense vector scoring (unless mode is "lexical" or no vectors).
3090    let dense_ranked = if mode != "lexical" && !all_vectors.is_empty() {
3091        match embed_query_text(&options.query, &emb_provider, &emb_model, &emb_url, &emb_key) {
3092            Ok(query_vec) => {
3093                let vec_map: std::collections::HashMap<&str, &[f64]> = all_vectors
3094                    .iter()
3095                    .map(|v| (v.chunk_key.as_str(), v.vector.as_slice()))
3096                    .collect();
3097                let mut scores: Vec<(usize, f64)> = all_chunks
3098                    .iter()
3099                    .enumerate()
3100                    .filter_map(|(i, chunk)| {
3101                        vec_map.get(chunk.chunk_key.as_str()).map(|stored| {
3102                            (i, cosine_similarity(&query_vec, stored))
3103                        })
3104                    })
3105                    .filter(|(_, s)| *s > 0.0)
3106                    .collect();
3107                scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3108                scores
3109            }
3110            Err(_) => Vec::new(),
3111        }
3112    } else {
3113        Vec::new()
3114    };
3115
3116    // Step 6: merge results.
3117    let limit = options.top_k as usize;
3118    let ranked = if !bm25_ranked.is_empty() && !dense_ranked.is_empty() {
3119        rrf_merge(&bm25_ranked, &dense_ranked, 60.0, limit)
3120    } else if !bm25_ranked.is_empty() {
3121        bm25_ranked.into_iter().take(limit).collect()
3122    } else {
3123        dense_ranked.into_iter().take(limit).collect()
3124    };
3125
3126    // Step 7: apply per-item span limit.
3127    let mut per_item_count: std::collections::HashMap<&str, u64> =
3128        std::collections::HashMap::new();
3129    let mut selected: Vec<(usize, f64)> = Vec::new();
3130    for (idx, score) in &ranked {
3131        let item_key = all_chunks[*idx].item_key.as_str();
3132        let count = per_item_count.entry(item_key).or_insert(0);
3133        if *count < options.top_spans_per_item {
3134            *count += 1;
3135            selected.push((*idx, *score));
3136        }
3137    }
3138
3139    // Step 8: enrich hits with item metadata.
3140    let mut meta_cache: std::collections::HashMap<String, Value> =
3141        std::collections::HashMap::new();
3142    let mut hits: Vec<Value> = Vec::new();
3143    for (idx, score) in &selected {
3144        let chunk = &all_chunks[*idx];
3145        let meta = if let Some(cached) = meta_cache.get(&chunk.item_key) {
3146            cached.clone()
3147        } else {
3148            let fetched = client
3149                .call(
3150                    "items.get",
3151                    Some(serde_json::json!({"key": chunk.item_key})),
3152                )
3153                .unwrap_or(Value::Null);
3154            meta_cache.insert(chunk.item_key.clone(), fetched.clone());
3155            fetched
3156        };
3157        let title = meta
3158            .get("title")
3159            .and_then(Value::as_str)
3160            .unwrap_or("")
3161            .to_string();
3162        let authors = meta
3163            .get("creators")
3164            .and_then(Value::as_array)
3165            .map(|creators| {
3166                creators
3167                    .iter()
3168                    .filter_map(|c| {
3169                        let last = c.get("lastName").and_then(Value::as_str).unwrap_or("");
3170                        let first = c.get("firstName").and_then(Value::as_str).unwrap_or("");
3171                        if last.is_empty() && first.is_empty() {
3172                            None
3173                        } else {
3174                            Some(format!("{last}{first}"))
3175                        }
3176                    })
3177                    .collect::<Vec<_>>()
3178                    .join(", ")
3179            })
3180            .unwrap_or_default();
3181        let year = meta.get("date").and_then(Value::as_str).unwrap_or("");
3182        let mut hit = serde_json::json!({
3183            "item_key": chunk.item_key,
3184            "chunk_key": chunk.chunk_key,
3185            "title": title,
3186            "authors": authors,
3187            "year": year,
3188            "text": chunk.text,
3189            "page_range": chunk.page_range,
3190            "section_path": chunk.section_path,
3191            "score": score,
3192        });
3193        if options.include_fulltext_spans {
3194            hit.as_object_mut().unwrap().insert(
3195                "attachment_key".to_string(),
3196                Value::String(chunk.attachment_key.clone()),
3197            );
3198        }
3199        hits.push(hit);
3200    }
3201
3202    // Step 9: format output.
3203    if options.output == "jsonl" {
3204        let mut out = String::new();
3205        for hit in &hits {
3206            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3207            out.push('\n');
3208        }
3209        Ok(out)
3210    } else {
3211        let total = hits.len() as u64;
3212        format_json(
3213            &normalize_list_envelope(
3214                serde_json::json!({"items": hits, "total": total}),
3215                "items",
3216                Some(options.top_k),
3217                0,
3218            ),
3219            JsonStyle::Pretty,
3220        )
3221    }
3222}
3223
3224fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
3225    let raw_store_path = rag_store_path(collection);
3226    if raw_store_path.exists() {
3227        return rag_status_from_store(collection, &raw_store_path);
3228    }
3229
3230    let mut store_candidates = Vec::new();
3231    let collection_match = find_collection_in_tree(client, collection)?;
3232    if let Some(collection_node) = collection_match.as_ref() {
3233        if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
3234            store_candidates.push(rag_store_path(name));
3235        }
3236        if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
3237            store_candidates.push(rag_store_path(key));
3238        }
3239    }
3240    for store_path in unique_paths(store_candidates) {
3241        if store_path.exists() {
3242            return rag_status_from_store(collection, &store_path);
3243        }
3244    }
3245
3246    rag_status_from_zotero_sidecars(client, collection, collection_match)
3247}
3248
3249fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
3250    let mut unique = Vec::new();
3251    for path in paths {
3252        if !unique.iter().any(|seen| seen == &path) {
3253            unique.push(path);
3254        }
3255    }
3256    unique
3257}
3258
3259fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
3260    let raw = fs::read_to_string(store_path)
3261        .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
3262    let store: Value = serde_json::from_str(&raw)
3263        .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
3264    let chunks = store
3265        .get("chunks")
3266        .and_then(Value::as_array)
3267        .cloned()
3268        .unwrap_or_default();
3269    let mut item_keys = Vec::<Value>::new();
3270    for chunk in &chunks {
3271        let Some(item_key) = chunk.get("item_key") else {
3272            continue;
3273        };
3274        if !item_keys.iter().any(|seen| seen == item_key) {
3275            item_keys.push(item_key.clone());
3276        }
3277    }
3278    Ok(serde_json::json!({
3279        "status": "indexed",
3280        "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
3281        "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
3282        "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
3283        "total_chunks": chunks.len(),
3284        "total_items": item_keys.len(),
3285        "store_path": store_path.to_string_lossy(),
3286    }))
3287}
3288
3289fn rag_status_from_zotero_sidecars(
3290    client: &mut impl RpcCaller,
3291    collection: &str,
3292    collection_match: Option<Value>,
3293) -> Result<Value, String> {
3294    let collection_key = collection_match
3295        .as_ref()
3296        .and_then(|node| node.get("key").cloned())
3297        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
3298    let raw = paginate_rpc(
3299        client,
3300        "collections.getItems",
3301        serde_json::json!({"key": collection_key}),
3302        500,
3303    )?;
3304    let items = raw
3305        .get("items")
3306        .and_then(Value::as_array)
3307        .or_else(|| raw.as_array())
3308        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
3309        .clone();
3310
3311    let mut indexed_items = 0usize;
3312    let mut total_chunks = 0usize;
3313    for item in &items {
3314        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
3315        let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
3316        if chunk_count > 0 {
3317            indexed_items += 1;
3318            total_chunks += chunk_count;
3319        }
3320    }
3321
3322    if indexed_items == 0 {
3323        return Ok(serde_json::json!({
3324            "status": "not indexed",
3325            "collection": collection,
3326            "total_items": items.len(),
3327            "indexed_items": 0,
3328        }));
3329    }
3330
3331    Ok(serde_json::json!({
3332        "status": "indexed",
3333        "collection": collection,
3334        "total_chunks": total_chunks,
3335        "total_items": indexed_items,
3336        "collection_items": items.len(),
3337        "source": "zotero-sidecar",
3338    }))
3339}
3340
3341fn sidecar_chunk_count_for_item(
3342    client: &mut impl RpcCaller,
3343    item_key: &Value,
3344) -> Result<usize, String> {
3345    let attachments = client.call(
3346        "attachments.list",
3347        Some(serde_json::json!({"parentKey": item_key.clone()})),
3348    )?;
3349    let Some(attachments) = attachments.as_array() else {
3350        return Ok(0);
3351    };
3352
3353    let mut count = 0usize;
3354    for attachment in attachments {
3355        let Some(path) = attachment.get("path").and_then(Value::as_str) else {
3356            continue;
3357        };
3358        let local = local_path_from_zotero_path(path);
3359        let Some(dir) = Path::new(&local).parent() else {
3360            continue;
3361        };
3362        let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
3363            continue;
3364        };
3365        let text = String::from_utf8_lossy(&bytes);
3366        count += text.lines().filter(|line| !line.trim().is_empty()).count();
3367    }
3368    Ok(count)
3369}
3370
3371fn rag_store_path(collection: &str) -> PathBuf {
3372    rag_store_root().join(format!("{collection}.json"))
3373}
3374
3375fn rag_store_root() -> PathBuf {
3376    let xdg_data_home = env::var_os("XDG_DATA_HOME")
3377        .filter(|path| !path.is_empty())
3378        .map(PathBuf::from);
3379    let appdata = env::var_os("APPDATA")
3380        .filter(|path| !path.is_empty())
3381        .map(PathBuf::from);
3382    let userprofile = env::var_os("USERPROFILE")
3383        .filter(|path| !path.is_empty())
3384        .map(PathBuf::from);
3385    let home = env::var_os("HOME")
3386        .filter(|path| !path.is_empty())
3387        .map(PathBuf::from);
3388
3389    rag_store_root_for_platform(
3390        ArtifactStorePlatform::current(),
3391        xdg_data_home.as_deref(),
3392        appdata.as_deref(),
3393        userprofile.as_deref(),
3394        home.as_deref(),
3395    )
3396}
3397
3398fn rag_store_root_for_platform(
3399    platform: ArtifactStorePlatform,
3400    xdg_data_home: Option<&Path>,
3401    appdata: Option<&Path>,
3402    userprofile: Option<&Path>,
3403    home: Option<&Path>,
3404) -> PathBuf {
3405    match platform {
3406        ArtifactStorePlatform::Windows => {
3407            if let Some(path) = appdata {
3408                return path.join("Zotron").join("rag");
3409            }
3410            if let Some(path) = userprofile {
3411                return path
3412                    .join("AppData")
3413                    .join("Roaming")
3414                    .join("Zotron")
3415                    .join("rag");
3416            }
3417            if let Some(path) = home {
3418                return path
3419                    .join("AppData")
3420                    .join("Roaming")
3421                    .join("Zotron")
3422                    .join("rag");
3423            }
3424            PathBuf::from(".zotron").join("rag")
3425        }
3426        ArtifactStorePlatform::Macos => {
3427            if let Some(path) = home {
3428                return path
3429                    .join("Library")
3430                    .join("Application Support")
3431                    .join("Zotron")
3432                    .join("rag");
3433            }
3434            if let Some(path) = xdg_data_home {
3435                return path.join("zotron").join("rag");
3436            }
3437            PathBuf::from(".zotron").join("rag")
3438        }
3439        ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
3440            .map(|path| path.join("zotron").join("rag"))
3441            .or_else(|| {
3442                home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
3443            })
3444            .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
3445    }
3446}
3447
3448fn run_push_command(
3449    json_file: String,
3450    pdf: Option<String>,
3451    collection: Option<String>,
3452    on_duplicate: String,
3453    dry_run: bool,
3454    client: &mut impl RpcCaller,
3455) -> Result<String, String> {
3456    if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
3457        return Err(format!(
3458            "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
3459        ));
3460    }
3461
3462    let payload = if json_file == "-" {
3463        let mut input = String::new();
3464        io::stdin()
3465            .read_to_string(&mut input)
3466            .map_err(|err| format!("read stdin: {err}"))?;
3467        input
3468    } else {
3469        fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
3470    };
3471    let item_json = serde_json::from_str::<Value>(&payload)
3472        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
3473
3474    // Validate required fields
3475    match item_json.get("itemType").and_then(Value::as_str) {
3476        Some(s) if !s.is_empty() => {}
3477        _ => return Err("INVALID_ARGS: input JSON must include a non-empty \"itemType\" field".to_string()),
3478    }
3479
3480    if dry_run {
3481        let collection_key = collection
3482            .as_deref()
3483            .map(|name| resolve_collection(client, name))
3484            .transpose()?;
3485        return format_json(
3486            &serde_json::json!({
3487                "ok": true,
3488                "dryRun": true,
3489                "wouldPush": {
3490                    "title": item_json.get("title").cloned().unwrap_or(Value::Null),
3491                    "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
3492                    "collectionKey": collection_key,
3493                    "pdfPath": pdf,
3494                    "onDuplicate": on_duplicate,
3495                }
3496            }),
3497            JsonStyle::PythonCompact,
3498        );
3499    }
3500
3501    let result = push_item(
3502        client,
3503        &item_json,
3504        pdf.as_deref(),
3505        collection.as_deref(),
3506        &on_duplicate,
3507    )?;
3508    format_json(&result, JsonStyle::PythonCompact)
3509}
3510
3511fn push_item(
3512    client: &mut impl RpcCaller,
3513    item_json: &Value,
3514    pdf_path: Option<&str>,
3515    collection: Option<&str>,
3516    on_duplicate: &str,
3517) -> Result<Value, String> {
3518    let pdf_size = if let Some(path) = pdf_path {
3519        validate_pdf_magic(path)?
3520    } else {
3521        0
3522    };
3523
3524    let collection_key = match collection {
3525        Some(name) => resolve_collection(client, name)?,
3526        None => resolve_current_collection(client)?,
3527    };
3528
3529    let dup_id = find_duplicate(client, item_json)?;
3530    if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
3531        if !is_library_root(&collection_key) {
3532            client.call(
3533                "collections.addItems",
3534                Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
3535            )?;
3536        }
3537        let mut pdf_attached = false;
3538        if let Some(path) = pdf_path {
3539            if !item_has_pdf_attachment(client, dup_id)? {
3540                attach_pdf(client, dup_id, path)?;
3541                pdf_attached = true;
3542            }
3543        }
3544        return Ok(push_result(
3545            "skipped_duplicate",
3546            Some(dup_id.to_string()),
3547            pdf_attached,
3548            if pdf_attached { pdf_size } else { 0 },
3549            Value::Null,
3550        ));
3551    }
3552
3553    let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
3554    let (item_key, status) =
3555        if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
3556            let mut params = serde_json::Map::new();
3557            params.insert("key".to_string(), Value::String(dup_id.to_string()));
3558            params.insert(
3559                "fields".to_string(),
3560                xpi_payload
3561                    .get("fields")
3562                    .cloned()
3563                    .unwrap_or_else(|| serde_json::json!({})),
3564            );
3565            if let Some(creators) = xpi_payload.get("creators") {
3566                params.insert("creators".to_string(), creators.clone());
3567            }
3568            if let Some(tags) = xpi_payload.get("tags") {
3569                params.insert("tags".to_string(), tags.clone());
3570            }
3571            client.call("items.update", Some(Value::Object(params)))?;
3572            (dup_id.to_string(), "updated")
3573        } else {
3574            let created = client.call("items.create", Some(xpi_payload))?;
3575            let key = created
3576                .get("key")
3577                .and_then(Value::as_str)
3578                .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
3579            (key.to_string(), "created")
3580        };
3581
3582    let mut pdf_attached = false;
3583    if let Some(path) = pdf_path {
3584        if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
3585            attach_pdf(client, &item_key, path)?;
3586            pdf_attached = true;
3587        }
3588    }
3589
3590    if status == "updated" && !is_library_root(&collection_key) {
3591        client.call(
3592            "collections.addItems",
3593            Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3594        )?;
3595    }
3596
3597    Ok(push_result(
3598        status,
3599        Some(item_key),
3600        pdf_attached,
3601        if pdf_attached { pdf_size } else { 0 },
3602        Value::Null,
3603    ))
3604}
3605
3606fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3607    let bytes = fs::read(path)
3608        .map_err(|e| format!("INVALID_PDF: cannot read {path}: {e}"))?;
3609    if !bytes.starts_with(b"%PDF-") {
3610        return Err(format!(
3611            "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3612        ));
3613    }
3614    Ok(bytes.len() as u64)
3615}
3616
3617fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3618    let selected = client.call("system.currentCollection", None)?;
3619    Ok(selected
3620        .get("key")
3621        .cloned()
3622        .unwrap_or_else(|| Value::Number(0.into())))
3623}
3624
3625fn find_duplicate(
3626    client: &mut impl RpcCaller,
3627    item_json: &Value,
3628) -> Result<Option<String>, String> {
3629    if let Some(doi) = item_json
3630        .get("DOI")
3631        .and_then(Value::as_str)
3632        .filter(|doi| !doi.is_empty())
3633    {
3634        let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3635        if let Some(key) = first_hit_key(&hits) {
3636            return Ok(Some(key));
3637        }
3638    }
3639
3640    if let Some(title) = item_json
3641        .get("title")
3642        .and_then(Value::as_str)
3643        .filter(|title| title.len() >= 10)
3644    {
3645        let hits = client.call(
3646            "search.quick",
3647            Some(serde_json::json!({"query": title, "limit": 20})),
3648        )?;
3649        if let Some(items) = response_items(&hits) {
3650            for item in items {
3651                if item.get("title").and_then(Value::as_str) == Some(title) {
3652                    if let Some(key) = item.get("key").and_then(Value::as_str) {
3653                        return Ok(Some(key.to_string()));
3654                    }
3655                }
3656            }
3657        }
3658    }
3659
3660    Ok(None)
3661}
3662
3663fn first_hit_key(response: &Value) -> Option<String> {
3664    response_items(response)?
3665        .first()?
3666        .get("key")?
3667        .as_str()
3668        .map(ToString::to_string)
3669}
3670
3671fn response_items(response: &Value) -> Option<&Vec<Value>> {
3672    response
3673        .get("items")
3674        .and_then(Value::as_array)
3675        .or_else(|| response.as_array())
3676}
3677
3678fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3679    const NON_FIELD_KEYS: &[&str] = &[
3680        "itemType",
3681        "creators",
3682        "tags",
3683        "collections",
3684        "attachments",
3685        "relations",
3686        "notes",
3687        "id",
3688        "key",
3689        "version",
3690    ];
3691
3692    let mut fields = serde_json::Map::new();
3693    if let Some(item) = item_json.as_object() {
3694        for (key, value) in item {
3695            if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3696                fields.insert(key.clone(), value.clone());
3697            }
3698        }
3699    }
3700
3701    let mut payload = serde_json::Map::new();
3702    payload.insert(
3703        "itemType".to_string(),
3704        item_json
3705            .get("itemType")
3706            .cloned()
3707            .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3708    );
3709    payload.insert("fields".to_string(), Value::Object(fields));
3710
3711    if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3712        if !creators.is_empty() {
3713            payload.insert(
3714                "creators".to_string(),
3715                Value::Array(
3716                    creators
3717                        .iter()
3718                        .map(|creator| {
3719                            let mut c = serde_json::json!({
3720                                "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3721                                "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3722                                "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3723                            });
3724                            if let Some(fm) = creator.get("fieldMode").and_then(Value::as_u64) {
3725                                c["fieldMode"] = Value::from(fm);
3726                            }
3727                            c
3728                        })
3729                        .collect(),
3730                ),
3731            );
3732        }
3733    }
3734
3735    if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3736        if !tags.is_empty() {
3737            payload.insert(
3738                "tags".to_string(),
3739                Value::Array(
3740                    tags.iter()
3741                        .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3742                        .collect(),
3743                ),
3744            );
3745        }
3746    }
3747
3748    if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3749        payload.insert(
3750            "collections".to_string(),
3751            Value::Array(vec![collection_key.clone()]),
3752        );
3753    }
3754
3755    Value::Object(payload)
3756}
3757
3758fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3759    let attachments = client.call(
3760        "attachments.list",
3761        Some(serde_json::json!({"parentKey": item_key})),
3762    )?;
3763    Ok(has_pdf_attachment(&attachments))
3764}
3765
3766fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3767    client.call(
3768        "attachments.add",
3769        Some(serde_json::json!({
3770            "parentKey": item_key,
3771            "path": zotero_path(path),
3772            "title": "Full Text PDF",
3773        })),
3774    )?;
3775    Ok(())
3776}
3777
3778fn zotero_path(path: &str) -> String {
3779    let path = Path::new(path)
3780        .canonicalize()
3781        .unwrap_or_else(|_| Path::new(path).to_path_buf())
3782        .to_string_lossy()
3783        .into_owned();
3784    if is_wsl() {
3785        return ProcessCommand::new("wslpath")
3786            .arg("-w")
3787            .arg(&path)
3788            .output()
3789            .ok()
3790            .filter(|output| output.status.success())
3791            .and_then(|output| String::from_utf8(output.stdout).ok())
3792            .map(|converted| converted.trim().to_string())
3793            .filter(|converted| !converted.is_empty())
3794            .unwrap_or(path);
3795    }
3796    path
3797}
3798
3799fn is_wsl() -> bool {
3800    if env::var_os("WSL_DISTRO_NAME").is_some() {
3801        return true;
3802    }
3803    fs::read_to_string("/proc/sys/kernel/osrelease")
3804        .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3805        .unwrap_or(false)
3806}
3807
3808fn is_library_root(value: &Value) -> bool {
3809    value.as_i64() == Some(0) || value.as_u64() == Some(0)
3810}
3811
3812fn push_result(
3813    status: &str,
3814    zotero_item_key: Option<String>,
3815    pdf_attached: bool,
3816    pdf_size_bytes: u64,
3817    error: Value,
3818) -> Value {
3819    serde_json::json!({
3820        "status": status,
3821        "zotero_item_key": zotero_item_key,
3822        "pdf_attached": pdf_attached,
3823        "pdf_size_bytes": pdf_size_bytes,
3824        "error": error,
3825    })
3826}
3827
3828fn run_search(
3829    args: SearchArgs,
3830    client: &mut impl RpcCaller,
3831) -> Result<(Value, JsonStyle), String> {
3832    let SearchArgs {
3833        query, fulltext, author, after, before, journal, tag,
3834        doi, isbn, issn, collection, limit, offset, ..
3835    } = args;
3836
3837    let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3838    if has_identifier {
3839        let mut params = serde_json::Map::new();
3840        if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3841        if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3842        if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3843        let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3844        return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3845    }
3846
3847    if fulltext {
3848        let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3849        let mut params = serde_json::json!({"query": query, "limit": limit});
3850        if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3851            map.insert("collection".into(), resolve_collection(client, &col)?);
3852        }
3853        let value = client.call("search.fulltext", Some(params))?;
3854        return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3855    }
3856
3857    let has_filters = author.is_some() || after.is_some() || before.is_some()
3858        || journal.is_some() || tag.is_some();
3859    if has_filters {
3860        let mut conditions: Vec<Value> = Vec::new();
3861        if let Some(query) = &query {
3862            conditions.push(serde_json::json!({
3863                "field": "quicksearch-titleCreatorYear",
3864                "operator": "contains",
3865                "value": query,
3866            }));
3867        }
3868        if let Some(author) = author {
3869            conditions.push(serde_json::json!({
3870                "field": "creator", "operator": "contains", "value": author,
3871            }));
3872        }
3873        if let Some(after) = after {
3874            conditions.push(serde_json::json!({
3875                "field": "date", "operator": "isAfter", "value": after,
3876            }));
3877        }
3878        if let Some(before) = before {
3879            conditions.push(serde_json::json!({
3880                "field": "date", "operator": "isBefore", "value": before,
3881            }));
3882        }
3883        if let Some(journal) = journal {
3884            conditions.push(serde_json::json!({
3885                "field": "publicationTitle", "operator": "contains", "value": journal,
3886            }));
3887        }
3888        if let Some(tag) = tag {
3889            conditions.push(serde_json::json!({
3890                "field": "tag", "operator": "is", "value": tag,
3891            }));
3892        }
3893        let value = client.call(
3894            "search.advanced",
3895            Some(serde_json::json!({
3896                "conditions": conditions,
3897                "operator": "and",
3898                "limit": limit,
3899                "offset": offset,
3900            })),
3901        )?;
3902        return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3903    }
3904
3905    let query = query.ok_or(
3906        "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3907    )?;
3908    let value = if let Some(col) = collection {
3909        let key = resolve_collection(client, &col)?;
3910        let response = client.call(
3911            "collections.getItems",
3912            Some(serde_json::json!({"key": key})),
3913        )?;
3914        collection_quick_search_response(&response, &query, limit)
3915    } else {
3916        filter_search_artifacts(client.call(
3917            "search.quick",
3918            Some(serde_json::json!({"query": query, "limit": limit})),
3919        )?)
3920    };
3921    Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3922}
3923
3924fn run_search_management_command(
3925    command: SearchManagementCommand,
3926    client: &mut impl RpcCaller,
3927) -> Result<(Value, JsonStyle), String> {
3928    match command {
3929        SearchManagementCommand::SavedSearches { .. } => Ok((
3930            normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3931            JsonStyle::Pretty,
3932        )),
3933        SearchManagementCommand::CreateSaved {
3934            name, condition, dry_run, ..
3935        } => {
3936            let conditions = condition
3937                .iter()
3938                .map(|raw| parse_search_condition(raw))
3939                .collect::<Result<Vec<_>, _>>()?;
3940            let params = serde_json::json!({"name": name, "conditions": conditions});
3941            if dry_run {
3942                Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3943            } else {
3944                Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3945            }
3946        }
3947        SearchManagementCommand::DeleteSaved {
3948            search_key, dry_run, ..
3949        } => {
3950            let params = serde_json::json!({"key": search_key});
3951            if dry_run {
3952                Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3953            } else {
3954                Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3955            }
3956        }
3957    }
3958}
3959
3960fn filter_search_artifacts(mut value: Value) -> Value {
3961    let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3962        return value;
3963    };
3964    items.retain(|item| match item.get("title").and_then(Value::as_str) {
3965        Some(title) => !is_zotron_evidence_artifact(title),
3966        None => true,
3967    });
3968    let total_items = items.len() as u64;
3969    if let Some(total) = value.get_mut("total") {
3970        *total = Value::from(total_items);
3971    }
3972    value
3973}
3974
3975fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3976    let mut matched = collection_items(response)
3977        .into_iter()
3978        .filter(|item| !item_is_evidence_artifact(item))
3979        .filter(|item| quick_item_matches(item, query))
3980        .collect::<Vec<_>>();
3981    let total = matched.len() as u64;
3982    let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3983    if matched.len() > limit {
3984        matched.truncate(limit);
3985    }
3986    serde_json::json!({"items": matched, "total": total})
3987}
3988
3989fn item_is_evidence_artifact(item: &Value) -> bool {
3990    item.get("title")
3991        .and_then(Value::as_str)
3992        .is_some_and(is_zotron_evidence_artifact)
3993}
3994
3995fn quick_item_matches(item: &Value, query: &str) -> bool {
3996    let terms = query
3997        .split_whitespace()
3998        .map(|term| term.to_lowercase())
3999        .filter(|term| !term.is_empty())
4000        .collect::<Vec<_>>();
4001    if terms.is_empty() {
4002        return true;
4003    }
4004    let mut haystack = String::new();
4005    append_search_text(item, &mut haystack);
4006    let haystack = haystack.to_lowercase();
4007    terms.iter().all(|term| haystack.contains(term))
4008}
4009
4010fn append_search_text(value: &Value, out: &mut String) {
4011    match value {
4012        Value::String(text) => {
4013            out.push(' ');
4014            out.push_str(text);
4015        }
4016        Value::Number(number) => {
4017            out.push(' ');
4018            out.push_str(&number.to_string());
4019        }
4020        Value::Bool(value) => {
4021            out.push(' ');
4022            out.push_str(if *value { "true" } else { "false" });
4023        }
4024        Value::Array(items) => {
4025            for item in items {
4026                append_search_text(item, out);
4027            }
4028        }
4029        Value::Object(map) => {
4030            for item in map.values() {
4031                append_search_text(item, out);
4032            }
4033        }
4034        Value::Null => {}
4035    }
4036}
4037
4038fn parse_search_condition(raw: &str) -> Result<Value, String> {
4039    let mut parts = raw.split_whitespace();
4040    let field = parts.next();
4041    let operator = parts.next();
4042    let value = parts.collect::<Vec<_>>().join(" ");
4043    match (field, operator, value.is_empty()) {
4044        (Some(field), Some(operator), false) => Ok(serde_json::json!({
4045            "field": field,
4046            "operator": operator,
4047            "value": value,
4048        })),
4049        _ => Err(format!(
4050            "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
4051        )),
4052    }
4053}
4054
4055fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
4056    if let Value::Array(arr) = value {
4057        let total = arr.len() as u64;
4058        let mut obj = serde_json::Map::new();
4059        obj.insert(list_key.to_string(), Value::Array(arr));
4060        obj.insert("total".to_string(), Value::from(total));
4061        if let Some(limit) = limit {
4062            obj.insert("limit".to_string(), Value::from(limit));
4063        }
4064        obj.insert("offset".to_string(), Value::from(offset));
4065        obj.insert("hasMore".to_string(), Value::Bool(false));
4066        return Value::Object(obj);
4067    }
4068
4069    let mut obj = match value {
4070        Value::Object(obj) if obj.contains_key(list_key) => obj,
4071        other => return other,
4072    };
4073
4074    let items_len = obj
4075        .get(list_key)
4076        .and_then(Value::as_array)
4077        .map_or(0, |a| a.len()) as u64;
4078    let total = obj
4079        .get("total")
4080        .and_then(Value::as_u64)
4081        .unwrap_or(items_len);
4082
4083    obj.insert("total".to_string(), Value::from(total));
4084    if let Some(limit) = limit {
4085        obj.insert("limit".to_string(), Value::from(limit));
4086    }
4087    obj.insert("offset".to_string(), Value::from(offset));
4088    obj.insert(
4089        "hasMore".to_string(),
4090        Value::Bool(offset + items_len < total),
4091    );
4092
4093    Value::Object(obj)
4094}
4095
4096const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
4097const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
4098
4099fn paginate_rpc(
4100    client: &mut impl RpcCaller,
4101    method: &str,
4102    params: Value,
4103    page_size: usize,
4104) -> Result<Value, String> {
4105    let base = params
4106        .as_object()
4107        .ok_or_else(|| "params must be a JSON object".to_string())?;
4108    let mut out = Vec::new();
4109    let mut prev_page: Option<Vec<Value>> = None;
4110    let mut offset = 0usize;
4111
4112    loop {
4113        let mut page_params = base.clone();
4114        page_params.insert("offset".to_string(), Value::Number(offset.into()));
4115        page_params.insert("limit".to_string(), Value::Number(page_size.into()));
4116        let response = client.call(method, Some(Value::Object(page_params)))?;
4117
4118        let page = match extract_page(&response) {
4119            Some(page) => page,
4120            None if out.is_empty() => return Ok(response),
4121            None if response.is_object() => {
4122                return Err(format!(
4123                    "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
4124                    out.len()
4125                ));
4126            }
4127            None => {
4128                return Err(format!(
4129                    "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
4130                    out.len()
4131                ));
4132            }
4133        };
4134
4135        if prev_page.as_ref() == Some(&page) {
4136            return Err(format!(
4137                "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
4138                out.len()
4139            ));
4140        }
4141
4142        let page_len = page.len();
4143        out.extend(page.clone());
4144        if page_len < page_size {
4145            return Ok(Value::Array(out));
4146        }
4147        if out.len() >= RPC_PAGINATION_SAFETY_CAP {
4148            out.truncate(RPC_PAGINATION_SAFETY_CAP);
4149            return Ok(Value::Array(out));
4150        }
4151        prev_page = Some(page);
4152        offset += page_size;
4153    }
4154}
4155
4156fn extract_page(response: &Value) -> Option<Vec<Value>> {
4157    if let Some(page) = response.as_array() {
4158        return Some(page.clone());
4159    }
4160    let object = response.as_object()?;
4161    for key in RPC_PAGE_LIST_KEYS {
4162        if let Some(page) = object.get(key).and_then(Value::as_array) {
4163            return Some(page.clone());
4164        }
4165    }
4166    None
4167}
4168
4169fn run_find_pdfs_command(
4170    client: &mut impl RpcCaller,
4171    collection: String,
4172    limit: usize,
4173) -> Result<(Value, JsonStyle), String> {
4174    let collection_key = resolve_collection(client, &collection)?;
4175    let response = client.call(
4176        "collections.getItems",
4177        Some(serde_json::json!({"key": collection_key})),
4178    )?;
4179    let items = collection_items(&response);
4180
4181    let mut missing = Vec::new();
4182    for item in &items {
4183        let Some(item_key) = item.get("key").and_then(Value::as_str) else {
4184            continue;
4185        };
4186        let attachments = client.call(
4187            "attachments.list",
4188            Some(serde_json::json!({"parentKey": item_key})),
4189        )?;
4190        if !has_pdf_attachment(&attachments) {
4191            missing.push(item.clone());
4192        }
4193        if limit > 0 && missing.len() >= limit {
4194            break;
4195        }
4196    }
4197
4198    let mut results = Vec::new();
4199    for item in &missing {
4200        let item_key = item
4201            .get("key")
4202            .and_then(Value::as_str)
4203            .ok_or_else(|| "missing item lacks key".to_string())?;
4204        let response = client.call(
4205            "attachments.findPDF",
4206            Some(serde_json::json!({"parentKey": item_key})),
4207        )?;
4208        let attachment = response.get("attachment").filter(|value| !value.is_null());
4209        results.push(serde_json::json!({
4210            "item_key": item_key,
4211            "title": item.get("title").cloned().unwrap_or(Value::Null),
4212            "found": attachment.is_some(),
4213            "attachment_key": attachment
4214                .and_then(|attachment| attachment.get("key"))
4215                .cloned()
4216                .unwrap_or(Value::Null),
4217        }));
4218    }
4219
4220    Ok((
4221        serde_json::json!({
4222            "scanned": items.len(),
4223            "attempted": missing.len(),
4224            "results": results,
4225        }),
4226        JsonStyle::Pretty,
4227    ))
4228}
4229
4230fn collection_items(response: &Value) -> Vec<Value> {
4231    if let Some(items) = response.get("items").and_then(Value::as_array) {
4232        return items.clone();
4233    }
4234    response.as_array().cloned().unwrap_or_default()
4235}
4236
4237fn has_pdf_attachment(attachments: &Value) -> bool {
4238    attachments
4239        .as_array()
4240        .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
4241}
4242
4243fn is_pdf_attachment(attachment: &Value) -> bool {
4244    let content_type = attachment
4245        .get("contentType")
4246        .and_then(Value::as_str)
4247        .unwrap_or_default()
4248        .to_lowercase();
4249    let path = attachment
4250        .get("path")
4251        .and_then(Value::as_str)
4252        .unwrap_or_default()
4253        .to_lowercase();
4254    matches!(
4255        content_type.as_str(),
4256        "application/pdf" | "application/x-pdf"
4257    ) || path.ends_with(".pdf")
4258}
4259
4260fn call_json(
4261    client: &mut impl RpcCaller,
4262    method: &str,
4263    params: Option<Value>,
4264) -> Result<Value, String> {
4265    client.call(method, params)
4266}
4267
4268fn run_system_command(
4269    command: SystemCommand,
4270    client: &mut impl RpcCaller,
4271) -> Result<(Value, JsonStyle), String> {
4272    let value = match command {
4273        SystemCommand::Version { .. } => client.call("system.version", None)?,
4274        SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
4275        SystemCommand::LibraryStats { library, .. } => {
4276            let params = library.map(|id| serde_json::json!({"id": id}));
4277            client.call("system.libraryStats", params)?
4278        }
4279        SystemCommand::Schema { item_type, .. } => {
4280            if let Some(item_type) = item_type {
4281                let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
4282                let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
4283                let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
4284                    .iter()
4285                    .filter_map(|f| f.get("field").cloned())
4286                    .collect();
4287                let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
4288                    .iter()
4289                    .filter_map(|c| c.get("creatorType").cloned())
4290                    .collect();
4291                serde_json::json!({
4292                    "itemType": item_type,
4293                    "fields": field_names,
4294                    "creatorTypes": creator_names,
4295                })
4296            } else {
4297                let types = client.call("system.itemTypes", None)?;
4298                let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
4299                    .iter()
4300                    .filter_map(|t| t.get("itemType").cloned())
4301                    .collect();
4302                Value::Array(type_names)
4303            }
4304        }
4305        SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
4306        SystemCommand::Methods { method, .. } => {
4307            if let Some(method) = method {
4308                client.call("system.describe", Some(serde_json::json!({"method": method})))?
4309            } else {
4310                client.call("system.listMethods", None)?
4311            }
4312        }
4313    };
4314    Ok((value, JsonStyle::Pretty))
4315}
4316
4317fn run_items_command(
4318    command: ItemsCommand,
4319    client: &mut impl RpcCaller,
4320) -> Result<(Value, JsonStyle), String> {
4321    let (value, style) = match command {
4322        ItemsCommand::Add {
4323            doi,
4324            isbn,
4325            from_url,
4326            file,
4327            item_type,
4328            fields,
4329            collection,
4330            dry_run,
4331            ..
4332        } => {
4333            if let Some(doi) = doi {
4334                run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
4335            } else if let Some(isbn) = isbn {
4336                run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
4337            } else if let Some(from_url) = from_url {
4338                run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
4339            } else if let Some(file) = file {
4340                let mut params = serde_json::json!({"path": zotero_path(&file)});
4341                maybe_insert_collection(client, &mut params, collection)?;
4342                run_mutation_command(client, "items.addFromFile", params, dry_run)?
4343            } else if let Some(item_type) = item_type {
4344                let parsed_fields = parse_field_options(&fields)?;
4345                let mut params = serde_json::json!({"itemType": item_type});
4346                if !parsed_fields.is_empty() {
4347                    if let Some(map) = params.as_object_mut() {
4348                        map.insert("fields".to_string(), Value::Object(parsed_fields));
4349                    }
4350                }
4351                run_mutation_command(client, "items.create", params, dry_run)?
4352            } else {
4353                return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, --file, or --type".into());
4354            }
4355        }
4356        ItemsCommand::Update {
4357            key,
4358            fields,
4359            dry_run,
4360            ..
4361        } => {
4362            let parsed_fields = parse_field_options(&fields)?;
4363            let mut params = serde_json::json!({"key": key});
4364            if !parsed_fields.is_empty() {
4365                if let Some(map) = params.as_object_mut() {
4366                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4367                }
4368            }
4369            run_mutation_command(client, "items.update", params, dry_run)?
4370        }
4371        ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
4372            client,
4373            "items.delete",
4374            serde_json::json!({"key": key}),
4375            dry_run,
4376        )?,
4377        ItemsCommand::Trash {
4378            items, dry_run, ..
4379        } => {
4380            if items.len() == 1 {
4381                run_mutation_command(
4382                    client,
4383                    "items.trash",
4384                    serde_json::json!({"key": items[0]}),
4385                    dry_run,
4386                )?
4387            } else {
4388                run_mutation_command(
4389                    client,
4390                    "items.batchTrash",
4391                    serde_json::json!({"keys": items}),
4392                    dry_run,
4393                )?
4394            }
4395        }
4396        ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
4397            client,
4398            "items.restore",
4399            serde_json::json!({"key": item}),
4400            dry_run,
4401        )?,
4402        ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
4403            if keys.len() < 2 {
4404                return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
4405            }
4406            run_mutation_command(
4407                client,
4408                "items.mergeDuplicates",
4409                serde_json::json!({"keys": keys}),
4410                dry_run,
4411            )?
4412        }
4413        ItemsCommand::AddRelated {
4414            key,
4415            target,
4416            dry_run,
4417            ..
4418        } => run_mutation_command(
4419            client,
4420            "items.addRelated",
4421            serde_json::json!({"key": key, "targetKey": target}),
4422            dry_run,
4423        )?,
4424        ItemsCommand::RemoveRelated {
4425            key,
4426            target,
4427            dry_run,
4428            ..
4429        } => run_mutation_command(
4430            client,
4431            "items.removeRelated",
4432            serde_json::json!({"key": key, "targetKey": target}),
4433            dry_run,
4434        )?,
4435        ItemsCommand::Get { item, .. } => (
4436            client.call("items.get", Some(serde_json::json!({"key": item})))?,
4437            JsonStyle::Pretty,
4438        ),
4439        ItemsCommand::List {
4440            limit,
4441            offset,
4442            sort,
4443            direction,
4444            trash,
4445            ..
4446        } => {
4447            if trash {
4448                let value = client.call(
4449                    "items.getTrash",
4450                    Some(serde_json::json!({"limit": limit, "offset": offset})),
4451                )?;
4452                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4453            } else {
4454                let mut params = serde_json::json!({
4455                    "limit": limit,
4456                    "offset": offset,
4457                    "direction": direction,
4458                });
4459                if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
4460                    map.insert("sort".to_string(), Value::String(sort));
4461                }
4462                let value = client.call("items.list", Some(params))?;
4463                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4464            }
4465        }
4466        ItemsCommand::FindDuplicates { .. } => (
4467            client.call("items.findDuplicates", None)?,
4468            JsonStyle::Pretty,
4469        ),
4470        ItemsCommand::Recent {
4471            limit,
4472            offset,
4473            recent_type,
4474            ..
4475        } => {
4476            if recent_type != "added" && recent_type != "modified" {
4477                return Err(format!(
4478                    "--type must be added or modified, got {recent_type:?}"
4479                ));
4480            }
4481            let value = client.call(
4482                "items.getRecent",
4483                Some(
4484                    serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
4485                ),
4486            )?;
4487            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4488        }
4489        ItemsCommand::Fulltext { key, .. } => (
4490            client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
4491            JsonStyle::Pretty,
4492        ),
4493        ItemsCommand::Related { key, .. } => (
4494            normalize_list_envelope(
4495                client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
4496                "items",
4497                None,
4498                0,
4499            ),
4500            JsonStyle::Pretty,
4501        ),
4502        ItemsCommand::CitationKey { key, .. } => (
4503            client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
4504            JsonStyle::Pretty,
4505        ),
4506        ItemsCommand::Path { key, .. } => (
4507            localize_attachment_path_response(
4508                client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4509            ),
4510            JsonStyle::Pretty,
4511        ),
4512        ItemsCommand::Attachments { key, offset, .. } => {
4513            let value = client.call(
4514                "attachments.list",
4515                Some(serde_json::json!({"parentKey": key})),
4516            )?;
4517            let total = value
4518                .get("items")
4519                .and_then(Value::as_array)
4520                .map_or(0, |a| a.len()) as u64;
4521            (normalize_list_envelope(value, "items", Some(total), offset), JsonStyle::Pretty)
4522        }
4523        ItemsCommand::FindPdfs { collection, limit, .. } => {
4524            run_find_pdfs_command(client, collection, limit)?
4525        }
4526    };
4527    Ok((value, style))
4528}
4529
4530fn run_add_identifier_command(
4531    client: &mut impl RpcCaller,
4532    method: &str,
4533    param_name: &str,
4534    param_value: String,
4535    collection: Option<String>,
4536    dry_run: bool,
4537) -> Result<(Value, JsonStyle), String> {
4538    let mut params = Value::Object(serde_json::Map::from_iter([(
4539        param_name.to_string(),
4540        Value::String(param_value),
4541    )]));
4542    maybe_insert_collection(client, &mut params, collection)?;
4543    run_mutation_command(client, method, params, dry_run)
4544}
4545
4546fn run_mutation_command(
4547    client: &mut impl RpcCaller,
4548    method: &str,
4549    params: Value,
4550    dry_run: bool,
4551) -> Result<(Value, JsonStyle), String> {
4552    let value = if dry_run {
4553        serde_json::json!({
4554            "ok": true,
4555            "dryRun": true,
4556            "wouldCall": method,
4557            "wouldCallParams": params,
4558        })
4559    } else {
4560        client.call(method, Some(params))?
4561    };
4562    Ok((value, JsonStyle::PythonCompact))
4563}
4564
4565fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
4566    let mut parsed = serde_json::Map::new();
4567    for field in fields {
4568        let (key, value) = field
4569            .split_once('=')
4570            .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
4571        parsed.insert(key.to_string(), Value::String(value.to_string()));
4572    }
4573    Ok(parsed)
4574}
4575
4576fn maybe_insert_collection(
4577    client: &mut impl RpcCaller,
4578    params: &mut Value,
4579    collection: Option<String>,
4580) -> Result<(), String> {
4581    let Some(collection) = collection else {
4582        return Ok(());
4583    };
4584    let collection = resolve_collection(client, &collection)?;
4585    let include = match &collection {
4586        Value::Null => false,
4587        Value::Number(number) => number.as_i64() != Some(0),
4588        _ => true,
4589    };
4590    if include {
4591        params
4592            .as_object_mut()
4593            .expect("mutation params are always objects")
4594            .insert("collection".to_string(), collection);
4595    }
4596    Ok(())
4597}
4598
4599fn run_settings_command(
4600    command: SettingsCommand,
4601    client: &mut impl RpcCaller,
4602) -> Result<(Value, JsonStyle), String> {
4603    let (value, style) = match command {
4604        SettingsCommand::Get { key, .. } => (
4605            client.call("settings.get", Some(serde_json::json!({"key": key})))?,
4606            JsonStyle::Pretty,
4607        ),
4608        SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
4609        SettingsCommand::Set {
4610            pairs,
4611            file,
4612            dry_run,
4613            ..
4614        } => {
4615            if let Some(file) = file {
4616                // --file mode: read JSON and call settings.setAll
4617                let raw = fs::read_to_string(&file)
4618                    .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4619                let settings: Value = serde_json::from_str(&raw)
4620                    .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4621                if dry_run {
4622                    (
4623                        dry_run_value("settings.setAll", settings),
4624                        JsonStyle::PythonCompact,
4625                    )
4626                } else {
4627                    (
4628                        client.call("settings.setAll", Some(settings))?,
4629                        JsonStyle::PythonCompact,
4630                    )
4631                }
4632            } else if pairs.len() == 2 {
4633                // Single key=value: settings.set
4634                let key = &pairs[0];
4635                let value = &pairs[1];
4636                let parsed_value = serde_json::from_str::<Value>(value)
4637                    .unwrap_or(Value::String(value.clone()));
4638                let params = serde_json::json!({"key": key, "value": parsed_value});
4639                if dry_run {
4640                    (
4641                        dry_run_value("settings.set", params),
4642                        JsonStyle::PythonCompact,
4643                    )
4644                } else {
4645                    (
4646                        client.call("settings.set", Some(params))?,
4647                        JsonStyle::PythonCompact,
4648                    )
4649                }
4650            } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4651                // Multiple pairs: build a map and call settings.setAll
4652                let mut map = serde_json::Map::new();
4653                for chunk in pairs.chunks(2) {
4654                    let parsed = serde_json::from_str::<Value>(&chunk[1])
4655                        .unwrap_or(Value::String(chunk[1].clone()));
4656                    map.insert(chunk[0].clone(), parsed);
4657                }
4658                let settings = Value::Object(map);
4659                if dry_run {
4660                    (
4661                        dry_run_value("settings.setAll", settings),
4662                        JsonStyle::PythonCompact,
4663                    )
4664                } else {
4665                    (
4666                        client.call("settings.setAll", Some(settings))?,
4667                        JsonStyle::PythonCompact,
4668                    )
4669                }
4670            } else {
4671                return Err(
4672                    "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4673                );
4674            }
4675        }
4676    };
4677    Ok((value, style))
4678}
4679
4680fn run_tags_command(
4681    command: TagsCommand,
4682    client: &mut impl RpcCaller,
4683) -> Result<(Value, JsonStyle), String> {
4684    let (value, style) = match command {
4685        TagsCommand::List { limit, .. } => {
4686            let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4687            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4688        }
4689        TagsCommand::Rename {
4690            old, new, dry_run, ..
4691        } => run_tag_mutation(
4692            client,
4693            "tags.rename",
4694            serde_json::json!({"oldName": old, "newName": new}),
4695            dry_run,
4696        )?,
4697        TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4698            client,
4699            "tags.delete",
4700            serde_json::json!({"tag": tag}),
4701            dry_run,
4702        )?,
4703        TagsCommand::Add {
4704            keys, tags, dry_run, ..
4705        } => {
4706            if keys.len() == 1 {
4707                run_tag_mutation(
4708                    client,
4709                    "tags.add",
4710                    serde_json::json!({"key": keys[0], "tags": tags}),
4711                    dry_run,
4712                )?
4713            } else {
4714                run_tag_mutation(
4715                    client,
4716                    "tags.batchUpdate",
4717                    serde_json::json!({"keys": keys, "add": tags}),
4718                    dry_run,
4719                )?
4720            }
4721        }
4722        TagsCommand::Remove {
4723            keys, tags, dry_run, ..
4724        } => {
4725            if keys.len() == 1 {
4726                run_tag_mutation(
4727                    client,
4728                    "tags.remove",
4729                    serde_json::json!({"key": keys[0], "tags": tags}),
4730                    dry_run,
4731                )?
4732            } else {
4733                run_tag_mutation(
4734                    client,
4735                    "tags.batchUpdate",
4736                    serde_json::json!({"keys": keys, "remove": tags}),
4737                    dry_run,
4738                )?
4739            }
4740        }
4741    };
4742    Ok((value, style))
4743}
4744
4745fn run_tag_mutation(
4746    client: &mut impl RpcCaller,
4747    method: &str,
4748    params: Value,
4749    dry_run: bool,
4750) -> Result<(Value, JsonStyle), String> {
4751    if dry_run {
4752        Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4753    } else {
4754        Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4755    }
4756}
4757
4758fn dry_run_value(method: &str, params: Value) -> Value {
4759    serde_json::json!({
4760        "ok": true,
4761        "dryRun": true,
4762        "wouldCall": method,
4763        "wouldCallParams": params,
4764    })
4765}
4766
4767fn run_annotations_command(
4768    command: AnnotationsCommand,
4769    client: &mut impl RpcCaller,
4770) -> Result<(Value, JsonStyle), String> {
4771    let (value, style) = match command {
4772        AnnotationsCommand::List { parent, attachment, context, .. } => {
4773            let mut params = serde_json::json!({"parentKey": parent});
4774            if let Some(att) = attachment {
4775                params["attachmentKey"] = Value::String(att);
4776            }
4777            if let Some(ctx) = context {
4778                params["context"] = Value::Number(ctx.into());
4779            }
4780            let value = client.call("annotations.list", Some(params))?;
4781            let total = value
4782                .get("items")
4783                .and_then(Value::as_array)
4784                .map_or(0, |a| a.len()) as u64;
4785            (normalize_list_envelope(value, "items", Some(total), 0), JsonStyle::Pretty)
4786        }
4787        AnnotationsCommand::Create {
4788            parent,
4789            attachment,
4790            annotation_type,
4791            position,
4792            quote,
4793            page,
4794            sort_index,
4795            text,
4796            comment,
4797            color,
4798            dry_run,
4799            ..
4800        } => {
4801            let annotation_type = annotation_type.unwrap_or_else(|| "highlight".to_string());
4802            if !matches!(
4803                annotation_type.as_str(),
4804                "highlight" | "note" | "underline" | "image" | "ink"
4805            ) {
4806                return Err(format!(
4807                    "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4808                ));
4809            }
4810            let mut params = serde_json::Map::new();
4811            params.insert("parentKey".to_string(), Value::String(parent));
4812            if let Some(att) = attachment {
4813                params.insert("attachmentKey".to_string(), Value::String(att));
4814            }
4815            params.insert("type".to_string(), Value::String(annotation_type.clone()));
4816            params.insert("color".to_string(), Value::String(color));
4817
4818            if let Some(ref quote_text) = quote {
4819                if !matches!(annotation_type.as_str(), "highlight" | "underline") {
4820                    return Err(format!(
4821                        "INVALID_ARGS: --quote is only valid for highlight|underline, got {annotation_type:?}"
4822                    ));
4823                }
4824                params.insert("quote".to_string(), Value::String(quote_text.clone()));
4825                if let Some(page_idx) = page {
4826                    params.insert(
4827                        "pageIndex".to_string(),
4828                        Value::Number(page_idx.into()),
4829                    );
4830                }
4831                // When --quote is given, --position is optional
4832                if let Some(raw) = position {
4833                    let pos = serde_json::from_str::<Value>(&raw)
4834                        .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))?;
4835                    validate_annotation_position(annotation_type.as_str(), &pos)?;
4836                    params.insert("position".to_string(), pos);
4837                }
4838            } else {
4839                let position = position
4840                    .ok_or_else(|| "INVALID_ARGS: --position JSON is required (or use --quote)".to_string())
4841                    .and_then(|raw| {
4842                        serde_json::from_str::<Value>(&raw)
4843                            .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4844                    })?;
4845                validate_annotation_position(annotation_type.as_str(), &position)?;
4846                params.insert("position".to_string(), position);
4847            }
4848
4849            if let Some(sort_index) = sort_index {
4850                params.insert(
4851                    "sortIndex".to_string(),
4852                    parse_annotation_sort_index(sort_index)?,
4853                );
4854            }
4855            if let Some(text) = text {
4856                params.insert("text".to_string(), Value::String(text));
4857            }
4858            if let Some(comment) = comment {
4859                params.insert("comment".to_string(), Value::String(comment));
4860            }
4861            run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4862        }
4863        AnnotationsCommand::CreateBatch {
4864            parent,
4865            attachment,
4866            file,
4867            dry_run,
4868            ..
4869        } => {
4870            let input = if let Some(ref path) = file {
4871                std::fs::read_to_string(path)
4872                    .map_err(|e| format!("INVALID_ARGS: cannot read file {path}: {e}"))?
4873            } else {
4874                let mut buf = String::new();
4875                std::io::Read::read_to_string(&mut std::io::stdin(), &mut buf)
4876                    .map_err(|e| format!("INVALID_ARGS: cannot read stdin: {e}"))?;
4877                buf
4878            };
4879            let annotations: Value = serde_json::from_str(&input)
4880                .map_err(|e| format!("INVALID_JSON: {e}"))?;
4881            if !annotations.is_array() {
4882                return Err("INVALID_ARGS: input must be a JSON array".to_string());
4883            }
4884            let mut params = serde_json::json!({
4885                "parentKey": parent,
4886                "annotations": annotations,
4887            });
4888            if let Some(att) = attachment {
4889                params["attachmentKey"] = Value::String(att);
4890            }
4891            run_mutating_command(client, "annotations.createBatch", params, dry_run)?
4892        }
4893        AnnotationsCommand::Locate {
4894            parent,
4895            attachment,
4896            quote,
4897            page,
4898            ..
4899        } => {
4900            let mut params = serde_json::json!({
4901                "parentKey": parent,
4902                "quote": quote,
4903            });
4904            if let Some(att) = attachment {
4905                params["attachmentKey"] = Value::String(att);
4906            }
4907            if let Some(page_idx) = page {
4908                params["pageIndex"] = Value::Number(page_idx.into());
4909            }
4910            let value = client.call("annotations.locate", Some(params))?;
4911            (value, JsonStyle::Pretty)
4912        }
4913        AnnotationsCommand::Delete {
4914            annotation_key,
4915            dry_run,
4916            ..
4917        } => run_mutating_command(
4918            client,
4919            "annotations.delete",
4920            serde_json::json!({"key": annotation_key}),
4921            dry_run,
4922        )?,
4923    };
4924    Ok((value, style))
4925}
4926
4927fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4928    position
4929        .get("pageIndex")
4930        .and_then(Value::as_i64)
4931        .filter(|value| *value >= 0)
4932        .ok_or_else(|| {
4933            "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4934        })?;
4935
4936    if annotation_type == "ink" {
4937        let has_paths = position
4938            .get("paths")
4939            .and_then(Value::as_array)
4940            .is_some_and(|paths| !paths.is_empty());
4941        if !has_paths {
4942            return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4943        }
4944        return Ok(());
4945    }
4946
4947    let valid_rects = position
4948        .get("rects")
4949        .and_then(Value::as_array)
4950        .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4951    if !valid_rects {
4952        return Err(
4953            "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4954        );
4955    }
4956    Ok(())
4957}
4958
4959fn is_annotation_rect(value: &Value) -> bool {
4960    value.as_array().is_some_and(|coords| {
4961        coords.len() == 4
4962            && coords
4963                .iter()
4964                .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4965    })
4966}
4967
4968fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4969    let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4970    let valid = match &parsed {
4971        Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4972        Value::String(value) => {
4973            is_zotero_pdf_sort_index(value.trim())
4974                || (!value.trim().is_empty()
4975                    && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4976        }
4977        _ => false,
4978    };
4979    if valid {
4980        Ok(parsed)
4981    } else {
4982        Err(format!(
4983            "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4984        ))
4985    }
4986}
4987
4988fn is_zotero_pdf_sort_index(value: &str) -> bool {
4989    let mut parts = value.split('|');
4990    matches!(
4991        (parts.next(), parts.next(), parts.next(), parts.next()),
4992        (Some(page), Some(offset), Some(y), None)
4993            if page.len() == 5
4994                && offset.len() == 6
4995                && y.len() == 5
4996                && page.chars().all(|ch| ch.is_ascii_digit())
4997                && offset.chars().all(|ch| ch.is_ascii_digit())
4998                && y.chars().all(|ch| ch.is_ascii_digit())
4999    )
5000}
5001
5002
5003fn localize_attachment_path_response(mut value: Value) -> Value {
5004    if let Some(path) = value.get("path").and_then(Value::as_str) {
5005        let local = local_path_from_zotero_path(path);
5006        if let Some(map) = value.as_object_mut() {
5007            map.insert("path".to_string(), Value::String(local));
5008        }
5009    }
5010    value
5011}
5012
5013fn run_notes_command(
5014    command: NotesCommand,
5015    client: &mut impl RpcCaller,
5016) -> Result<(Value, JsonStyle), String> {
5017    let (value, style) = match command {
5018        NotesCommand::List {
5019            parent,
5020            limit,
5021            offset,
5022            ..
5023        } => {
5024            let value = client.call(
5025                "notes.list",
5026                Some(serde_json::json!({"parentKey": parent})),
5027            )?;
5028            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
5029        }
5030        NotesCommand::Get { note_key, .. } => {
5031            let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
5032            (value, JsonStyle::Pretty)
5033        }
5034        NotesCommand::Create {
5035            parent,
5036            content,
5037            tags,
5038            dry_run,
5039            ..
5040        } => {
5041            let mut params = serde_json::Map::new();
5042            params.insert("parentKey".to_string(), Value::String(parent));
5043            params.insert("content".to_string(), Value::String(content));
5044            if !tags.is_empty() {
5045                params.insert(
5046                    "tags".to_string(),
5047                    Value::Array(tags.into_iter().map(Value::String).collect()),
5048                );
5049            }
5050            run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
5051        }
5052        NotesCommand::Update {
5053            note_key,
5054            content,
5055            dry_run,
5056            ..
5057        } => run_mutating_command(
5058            client,
5059            "notes.update",
5060            serde_json::json!({"key": note_key, "content": content}),
5061            dry_run,
5062        )?,
5063        NotesCommand::Delete {
5064            note_key, dry_run, ..
5065        } => {
5066            // Python CLI intentionally routes note deletion through items.delete.
5067            run_mutating_command(
5068                client,
5069                "items.delete",
5070                serde_json::json!({"key": note_key}),
5071                dry_run,
5072            )?
5073        }
5074        NotesCommand::Search { query, limit, .. } => {
5075            let value = client.call(
5076                "notes.search",
5077                Some(serde_json::json!({"query": query, "limit": limit})),
5078            )?;
5079            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
5080        }
5081    };
5082    Ok((value, style))
5083}
5084
5085fn run_mutating_command(
5086    client: &mut impl RpcCaller,
5087    method: &str,
5088    params: Value,
5089    dry_run: bool,
5090) -> Result<(Value, JsonStyle), String> {
5091    if dry_run {
5092        Ok((
5093            serde_json::json!({
5094                "ok": true,
5095                "dryRun": true,
5096                "wouldCall": method,
5097                "wouldCallParams": params,
5098            }),
5099            JsonStyle::PythonCompact,
5100        ))
5101    } else {
5102        client
5103            .call(method, Some(params))
5104            .map(|value| (value, JsonStyle::PythonCompact))
5105    }
5106}
5107
5108fn run_collections_command(
5109    command: CollectionsCommand,
5110    client: &mut impl RpcCaller,
5111) -> Result<(Value, JsonStyle), String> {
5112    let value = match command {
5113        CollectionsCommand::List { .. } => normalize_list_envelope(
5114            client.call("collections.list", None)?,
5115            "items",
5116            None,
5117            0,
5118        ),
5119        CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
5120        CollectionsCommand::Get { name_or_id, .. } => {
5121            let key = resolve_collection(client, &name_or_id)?;
5122            client.call("collections.get", Some(serde_json::json!({"key": key})))?
5123        }
5124        CollectionsCommand::GetItems {
5125            name_or_id,
5126            limit,
5127            offset,
5128            ..
5129        } => {
5130            let key = resolve_collection(client, &name_or_id)?;
5131            let mut params = serde_json::json!({"key": key});
5132            if let Some(map) = params.as_object_mut() {
5133                if let Some(limit) = limit {
5134                    map.insert("limit".to_string(), Value::Number(limit.into()));
5135                }
5136                if offset > 0 {
5137                    map.insert("offset".to_string(), Value::Number(offset.into()));
5138                }
5139            }
5140            normalize_list_envelope(
5141                client.call("collections.getItems", Some(params))?,
5142                "items",
5143                limit,
5144                offset,
5145            )
5146        }
5147        CollectionsCommand::Stats { name_or_id, .. } => {
5148            let key = resolve_collection(client, &name_or_id)?;
5149            client.call("collections.stats", Some(serde_json::json!({"key": key})))?
5150        }
5151        CollectionsCommand::Rename {
5152            old_name,
5153            new_name,
5154            dry_run,
5155            ..
5156        } => {
5157            let key = resolve_mutable_collection(client, &old_name, "rename")?;
5158            let params = serde_json::json!({"key": key, "name": new_name});
5159            if dry_run {
5160                return Ok((
5161                    dry_run_value("collections.rename", params),
5162                    JsonStyle::PythonCompact,
5163                ));
5164            }
5165            return Ok((
5166                client.call("collections.rename", Some(params))?,
5167                JsonStyle::PythonCompact,
5168            ));
5169        }
5170        CollectionsCommand::Create {
5171            name,
5172            parent,
5173            dry_run,
5174            ..
5175        } => {
5176            let mut params = serde_json::json!({"name": name});
5177            if let Some(parent) = parent {
5178                let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
5179                if let Some(map) = params.as_object_mut() {
5180                    map.insert("parentKey".to_string(), parent_key);
5181                }
5182            }
5183            if dry_run {
5184                return Ok((
5185                    dry_run_value("collections.create", params),
5186                    JsonStyle::PythonCompact,
5187                ));
5188            }
5189            return Ok((
5190                client.call("collections.create", Some(params))?,
5191                JsonStyle::PythonCompact,
5192            ));
5193        }
5194        CollectionsCommand::Delete {
5195            name_or_id,
5196            dry_run,
5197            ..
5198        } => {
5199            let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
5200            let params = serde_json::json!({"key": key});
5201            if dry_run {
5202                return Ok((
5203                    dry_run_value("collections.delete", params),
5204                    JsonStyle::PythonCompact,
5205                ));
5206            }
5207            return Ok((
5208                client.call("collections.delete", Some(params))?,
5209                JsonStyle::PythonCompact,
5210            ));
5211        }
5212        CollectionsCommand::AddItems {
5213            collection,
5214            item_keys,
5215            dry_run,
5216            ..
5217        } => {
5218            let key = resolve_mutable_collection(client, &collection, "add to")?;
5219            let params = serde_json::json!({"key": key, "keys": item_keys});
5220            if dry_run {
5221                return Ok((
5222                    dry_run_value("collections.addItems", params),
5223                    JsonStyle::PythonCompact,
5224                ));
5225            }
5226            return Ok((
5227                client.call("collections.addItems", Some(params))?,
5228                JsonStyle::PythonCompact,
5229            ));
5230        }
5231        CollectionsCommand::RemoveItems {
5232            collection,
5233            item_keys,
5234            dry_run,
5235            ..
5236        } => {
5237            let key = resolve_mutable_collection(client, &collection, "operate on")?;
5238            let params = serde_json::json!({"key": key, "keys": item_keys});
5239            if dry_run {
5240                return Ok((
5241                    dry_run_value("collections.removeItems", params),
5242                    JsonStyle::PythonCompact,
5243                ));
5244            }
5245            return Ok((
5246                client.call("collections.removeItems", Some(params))?,
5247                JsonStyle::PythonCompact,
5248            ));
5249        }
5250    };
5251    Ok((value, JsonStyle::Pretty))
5252}
5253
5254fn resolve_export_keys(
5255    client: &mut impl RpcCaller,
5256    mut keys: Vec<String>,
5257    collection: Option<String>,
5258) -> Result<Vec<String>, String> {
5259    if let Some(name) = collection {
5260        let col_key = resolve_collection(client, &name)?;
5261        let response = client.call(
5262            "collections.getItems",
5263            Some(serde_json::json!({"key": col_key})),
5264        )?;
5265        let items = collection_items(&response);
5266        for item in items {
5267            if let Some(key) = item.get("key").and_then(Value::as_str) {
5268                if !keys.contains(&key.to_string()) {
5269                    keys.push(key.to_string());
5270                }
5271            }
5272        }
5273    }
5274    if keys.is_empty() {
5275        return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
5276    }
5277    Ok(keys)
5278}
5279
5280fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
5281    let keys = resolve_export_keys(client, args.keys, args.collection)?;
5282    match args.format.as_str() {
5283        "bibtex" => run_export_content_command(client, "export.bibtex", keys),
5284        "ris" => run_export_content_command(client, "export.ris", keys),
5285        "csl-json" => {
5286            let response =
5287                client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
5288            if let Some(content) = response.get("content") {
5289                format_json(content, JsonStyle::Pretty)
5290            } else {
5291                format_json(&response, JsonStyle::PythonCompact)
5292            }
5293        }
5294        "bibliography" => {
5295            let response = client.call(
5296                "export.bibliography",
5297                Some(serde_json::json!({"keys": keys, "style": args.style})),
5298            )?;
5299            if let Some(object) = response.as_object() {
5300                let field = if args.html { "html" } else { "text" };
5301                if object.contains_key("html") || object.contains_key("text") {
5302                    return raw_value_output(
5303                        object.get(field).unwrap_or(&Value::String(String::new())),
5304                    );
5305                }
5306            }
5307            format_json(&response, JsonStyle::PythonCompact)
5308        }
5309        other => Err(format!(
5310            "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
5311        )),
5312    }
5313}
5314
5315fn run_export_content_command(
5316    client: &mut impl RpcCaller,
5317    method: &str,
5318    keys: Vec<String>,
5319) -> Result<String, String> {
5320    let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
5321    if let Some(content) = response.get("content") {
5322        raw_value_output(content)
5323    } else {
5324        format_json(&response, JsonStyle::PythonCompact)
5325    }
5326}
5327
5328fn raw_value_output(value: &Value) -> Result<String, String> {
5329    let mut out = match value {
5330        Value::Null => String::new(),
5331        Value::String(content) => content.clone(),
5332        other => to_python_repr(other),
5333    };
5334    out.push('\n');
5335    Ok(out)
5336}
5337
5338fn to_python_repr(value: &Value) -> String {
5339    match value {
5340        Value::Null => "None".to_string(),
5341        Value::Bool(value) => {
5342            if *value {
5343                "True".to_string()
5344            } else {
5345                "False".to_string()
5346            }
5347        }
5348        Value::Number(value) => value.to_string(),
5349        Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
5350        Value::Array(values) => {
5351            let inner = values
5352                .iter()
5353                .map(to_python_repr)
5354                .collect::<Vec<_>>()
5355                .join(", ");
5356            format!("[{inner}]")
5357        }
5358        Value::Object(entries) => {
5359            let inner = entries
5360                .iter()
5361                .map(|(key, value)| {
5362                    format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
5363                })
5364                .collect::<Vec<_>>()
5365                .join(", ");
5366            format!("{{{inner}}}")
5367        }
5368    }
5369}
5370
5371fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
5372    let trimmed = name_or_id.trim();
5373    if let Ok(id) = trimmed.parse::<i64>() {
5374        return Ok(Value::Number(id.into()));
5375    }
5376
5377    let collections = client.call("collections.list", None)?;
5378    let items = collections
5379        .get("items")
5380        .and_then(Value::as_array)
5381        .or_else(|| collections.as_array())
5382        .ok_or_else(|| "collections.list returned non-array result".to_string())?;
5383
5384    if let Some(collection) = items
5385        .iter()
5386        .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
5387    {
5388        return collection_key(collection);
5389    }
5390
5391    let exact = items
5392        .iter()
5393        .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
5394        .collect::<Vec<_>>();
5395    if exact.len() == 1 {
5396        return collection_key(exact[0]);
5397    }
5398
5399    let needle = normalize_collection_name(trimmed);
5400    let fuzzy = items
5401        .iter()
5402        .filter(|collection| {
5403            collection
5404                .get("name")
5405                .and_then(Value::as_str)
5406                .map(normalize_collection_name)
5407                .is_some_and(|name| name.contains(&needle))
5408        })
5409        .collect::<Vec<_>>();
5410
5411    match fuzzy.len() {
5412        1 => collection_key(fuzzy[0]),
5413        0 => Err(format!(
5414            "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
5415        )),
5416        _ => Err(format!(
5417            "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
5418        )),
5419    }
5420}
5421
5422fn collection_key(collection: &Value) -> Result<Value, String> {
5423    collection
5424        .get("key")
5425        .cloned()
5426        .ok_or_else(|| "collection result is missing key".to_string())
5427}
5428
5429fn resolve_mutable_collection(
5430    client: &mut impl RpcCaller,
5431    name_or_id: &str,
5432    operation: &str,
5433) -> Result<Value, String> {
5434    let key = resolve_collection(client, name_or_id)?;
5435    if key.as_i64() == Some(0) {
5436        return Err(format!(
5437            "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
5438        ));
5439    }
5440    Ok(key)
5441}
5442
5443fn normalize_collection_name(name: &str) -> String {
5444    name.split_whitespace()
5445        .collect::<Vec<_>>()
5446        .join(" ")
5447        .to_lowercase()
5448}
5449
5450fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
5451    let mut out = match style {
5452        JsonStyle::PythonCompact => to_python_compact_json(value),
5453        JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
5454    };
5455    out.push('\n');
5456    Ok(out)
5457}
5458
5459fn to_python_compact_json(value: &Value) -> String {
5460    match value {
5461        Value::Null => "null".to_string(),
5462        Value::Bool(value) => value.to_string(),
5463        Value::Number(value) => value.to_string(),
5464        Value::String(value) => {
5465            serde_json::to_string(value).expect("string serialization cannot fail")
5466        }
5467        Value::Array(values) => {
5468            let inner = values
5469                .iter()
5470                .map(to_python_compact_json)
5471                .collect::<Vec<_>>()
5472                .join(", ");
5473            format!("[{inner}]")
5474        }
5475        Value::Object(entries) => {
5476            let inner = entries
5477                .iter()
5478                .map(|(key, value)| {
5479                    let key = serde_json::to_string(key).expect("string serialization cannot fail");
5480                    format!("{key}: {}", to_python_compact_json(value))
5481                })
5482                .collect::<Vec<_>>()
5483                .join(", ");
5484            format!("{{{inner}}}")
5485        }
5486    }
5487}