Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::{
4    env, fs,
5    io::{self, Read},
6    path::{Path, PathBuf},
7    process::Command as ProcessCommand,
8    thread,
9    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16    build_ocr_provider_request, builtin_ocr_provider_specs, execute_embedding_provider_request,
17    is_zotron_evidence_artifact, machine_artifact_exists_for_item,
18    machine_artifact_exists_in_sidecar, machine_artifact_store_root,
19    ocr_provider_spec as raw_ocr_provider_spec, parse_ocr_provider_response,
20    read_machine_artifact_sidecar, write_machine_artifact_sidecar, ArtifactStorePlatform,
21    EmbeddingRequestInput, MachineArtifactKind, OcrRequestInput, ProviderCommandRunner,
22    ProviderHttpInvocation, ProviderHttpTransport, DEFAULT_RPC_URL,
23};
24
25pub trait RpcCaller {
26    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
27}
28
29#[derive(Debug, Clone, PartialEq, serde::Serialize)]
30pub struct CliOcrProviderSpec {
31    pub id: &'static str,
32    pub provider: &'static str,
33    pub request_style: &'static str,
34    pub auth: &'static str,
35    pub auth_header: &'static str,
36    pub supports_pdf_direct: bool,
37    pub key_field: &'static str,
38}
39
40#[derive(Debug, Clone, PartialEq, serde::Serialize)]
41pub struct CliEmbeddingProviderSpec {
42    pub id: &'static str,
43    pub provider: &'static str,
44    pub request_style: &'static str,
45    pub default_url: String,
46    pub default_model: &'static str,
47    pub auth: &'static str,
48    pub key_field: &'static str,
49}
50
51pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
52    builtin_ocr_provider_specs()
53        .into_iter()
54        .map(cli_ocr_provider_spec)
55        .collect()
56}
57
58pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
59    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
60}
61
62pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
63    let spec = zotron_types::embedding_provider_spec(provider)?;
64    Ok(CliEmbeddingProviderSpec {
65        id: spec.id,
66        provider: spec.provider_key,
67        request_style: if spec.provider_key == "alibaba" {
68            "dashscope"
69        } else {
70            spec.request_style.as_str()
71        },
72        default_url: spec.default_url.unwrap_or("").to_string(),
73        default_model: spec.default_model,
74        auth: spec.auth,
75        key_field: spec.key_field,
76    })
77}
78
79pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
80    let typed = blocks
81        .iter()
82        .map(json_block_to_pdf_block)
83        .collect::<Result<Vec<_>, _>>()?;
84    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
85    chunks
86        .into_iter()
87        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
88        .collect()
89}
90
91fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
92    CliOcrProviderSpec {
93        id: spec.provider_key,
94        provider: spec.provider_key,
95        request_style: spec.request_style.as_str(),
96        auth: spec.auth,
97        auth_header: spec.auth_header,
98        supports_pdf_direct: spec.supports_pdf_direct,
99        key_field: spec.key_field,
100    }
101}
102
103fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
104    let block_key = value
105        .get("block_key")
106        .and_then(Value::as_str)
107        .ok_or_else(|| "block missing block_key".to_string())?
108        .to_string();
109    let item_key = value
110        .get("item_key")
111        .and_then(Value::as_str)
112        .ok_or_else(|| "block missing item_key".to_string())?
113        .to_string();
114    let attachment_key = value
115        .get("attachment_key")
116        .and_then(Value::as_str)
117        .ok_or_else(|| "block missing attachment_key".to_string())?
118        .to_string();
119    let page_idx = value
120        .get("page_idx")
121        .or_else(|| value.get("page"))
122        .and_then(Value::as_u64)
123        .unwrap_or(1);
124    let block_type = value
125        .get("type")
126        .or_else(|| value.get("block_type"))
127        .and_then(Value::as_str)
128        .unwrap_or("paragraph")
129        .to_string();
130    let section_path = value
131        .get("section_path")
132        .and_then(Value::as_array)
133        .map(|items| {
134            items
135                .iter()
136                .filter_map(Value::as_str)
137                .map(ToString::to_string)
138                .collect::<Vec<_>>()
139        })
140        .unwrap_or_default();
141    let text = value
142        .get("text")
143        .and_then(Value::as_str)
144        .unwrap_or("")
145        .to_string();
146    let bbox = value.get("bbox").and_then(value_bbox4);
147
148    Ok(zotron_types::PdfEvidenceBlock {
149        block_key,
150        item_key,
151        attachment_key,
152        page_idx,
153        block_type,
154        bbox,
155        section_path,
156        text,
157    })
158}
159
160fn chunk_to_cli_value(
161    chunk: &zotron_types::StructureChunk,
162    blocks: &[zotron_types::PdfEvidenceBlock],
163) -> Result<Value, String> {
164    let refs = chunk
165        .block_keys
166        .iter()
167        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
168        .map(|block| {
169            serde_json::json!({
170                "block_key": block.block_key,
171                "page_idx": block.page_idx,
172                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
173                    if n.fract() == 0.0 {
174                        Value::from(*n as i64)
175                    } else {
176                        Value::from(*n)
177                    }
178                }).collect::<Vec<_>>()),
179            })
180        })
181        .collect::<Vec<_>>();
182    Ok(serde_json::json!({
183        "chunk_key": chunk.chunk_key,
184        "item_key": chunk.item_key,
185        "attachment_key": chunk.attachment_key,
186        "block_keys": chunk.block_keys,
187        "section_path": chunk.section_path,
188        "text": chunk.text,
189        "page_start": chunk.page_start,
190        "page_end": chunk.page_end,
191        "evidence_refs": refs,
192    }))
193}
194
195fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
196    let arr = value.as_array()?;
197    if arr.len() != 4 {
198        return None;
199    }
200    Some([
201        arr[0].as_f64()?,
202        arr[1].as_f64()?,
203        arr[2].as_f64()?,
204        arr[3].as_f64()?,
205    ])
206}
207
208impl RpcCaller for ZoteroRpc {
209    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
210        self.call(method, params).map_err(|err| err.to_string())
211    }
212}
213
214#[derive(Debug, Parser)]
215#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
216struct Cli {
217    #[command(subcommand)]
218    command: Command,
219}
220
221#[derive(Debug, Subcommand)]
222enum OcrCommand {
223    /// Print supported OCR provider contracts.
224    Providers,
225    /// Execute an OCR provider request from JSON and emit normalized blocks.
226    #[command(name = "run")]
227    Run {
228        #[arg(long)]
229        provider: String,
230        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
231        #[arg(long)]
232        input: Option<String>,
233        /// Local PDF/image file to encode into an OcrRequestInput.
234        #[arg(long)]
235        file: Option<String>,
236        /// Zotero item key used when --file builds the OCR request.
237        #[arg(long = "item-key")]
238        item_key: Option<String>,
239        /// Zotero attachment key used when --file builds the OCR request.
240        #[arg(long = "attachment-key")]
241        attachment_key: Option<String>,
242        /// MIME type used when --file builds the OCR request.
243        #[arg(long = "mime-type")]
244        mime_type: Option<String>,
245        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
246        #[arg(long)]
247        endpoint: Option<String>,
248        /// Environment variable containing the provider bearer token.
249        #[arg(long = "api-key-env")]
250        api_key_env: Option<String>,
251    },
252    /// Show OCR statistics for a collection.
253    Status {
254        #[arg(long)]
255        collection: String,
256        #[arg(long, default_value = DEFAULT_RPC_URL)]
257        url: String,
258    },
259    /// Parse a Zotero PDF through MinerU and write hidden sidecar OCR/RAG artifacts.
260    #[command(name = "process")]
261    Process {
262        #[arg(long, default_value = "mineru")]
263        provider: String,
264        /// Parent Zotero item key.
265        #[arg(long)]
266        parent: String,
267        /// Zotero PDF attachment key (auto-resolved from --parent when omitted).
268        #[arg(long)]
269        attachment: Option<String>,
270        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
271        #[arg(long = "source-url")]
272        source_url: Option<String>,
273        /// Already-extracted MinerU result directory, used by tests/offline replay.
274        #[arg(long = "result-dir")]
275        result_dir: Option<String>,
276        /// Already-downloaded MinerU result zip, used by tests/offline replay.
277        #[arg(long = "result-zip")]
278        result_zip: Option<String>,
279        /// Override MinerU submit endpoint.
280        #[arg(long = "provider-endpoint")]
281        provider_endpoint: Option<String>,
282        /// Environment variable containing the MinerU bearer token.
283        #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
284        api_key_env: String,
285        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
286        poll_interval_seconds: u64,
287        #[arg(long = "timeout-seconds", default_value_t = 900)]
288        timeout_seconds: u64,
289        #[arg(long = "chunk-chars", default_value_t = 1200)]
290        chunk_chars: usize,
291        #[arg(long, default_value = DEFAULT_RPC_URL)]
292        url: String,
293    },
294}
295
296#[derive(Debug, Subcommand)]
297enum Command {
298    /// Check that Zotero is running with the Zotron XPI enabled.
299    Ping {
300        #[arg(long, default_value = DEFAULT_RPC_URL)]
301        url: String,
302    },
303    /// Generic RPC escape hatch.
304    Rpc {
305        method: String,
306        #[arg(default_value = "{}")]
307        params_json: String,
308        #[arg(long, default_value = DEFAULT_RPC_URL)]
309        url: String,
310        #[arg(long)]
311        paginate: bool,
312        #[arg(long, default_value_t = 100)]
313        page_size: usize,
314    },
315    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
316    Push {
317        /// Path to a JSON file, or "-" to read from stdin.
318        json_file: String,
319        /// Optional PDF attachment path.
320        #[arg(long)]
321        pdf: Option<String>,
322        /// Collection name (fuzzy) or key.
323        #[arg(long)]
324        collection: Option<String>,
325        /// Duplicate handling: skip | update | create.
326        #[arg(long = "on-duplicate", default_value = "skip")]
327        on_duplicate: String,
328        #[arg(long, default_value = DEFAULT_RPC_URL)]
329        url: String,
330        /// Parse input + resolve collection only; do not push to Zotero.
331        #[arg(long = "dry-run")]
332        dry_run: bool,
333    },
334    /// System and plugin introspection commands.
335    System {
336        #[command(subcommand)]
337        command: SystemCommand,
338    },
339    /// Search items by text, tag, identifier, or structured conditions.
340    Search(SearchArgs),
341    /// Inspect and manage Zotero items.
342    Items {
343        #[command(subcommand)]
344        command: ItemsCommand,
345    },
346    /// Inspect Zotero collections.
347    Collections {
348        #[command(subcommand)]
349        command: CollectionsCommand,
350    },
351    /// Inspect Zotero notes.
352    Notes {
353        #[command(subcommand)]
354        command: NotesCommand,
355    },
356    /// Inspect Zotero attachments.
357    Attachments {
358        #[command(subcommand)]
359        command: AttachmentsCommand,
360    },
361    /// Inspect Zotero preferences.
362    Settings {
363        #[command(subcommand)]
364        command: SettingsCommand,
365    },
366    /// Inspect and manage Zotero tags.
367    Tags {
368        #[command(subcommand)]
369        command: TagsCommand,
370    },
371    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
372    Export(ExportArgs),
373    /// List, create, and delete PDF annotations.
374    Annotations {
375        #[command(subcommand)]
376        command: AnnotationsCommand,
377    },
378    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
379    Ocr {
380        #[command(subcommand)]
381        command: OcrCommand,
382    },
383    /// Build and search retrieval artifacts.
384    Rag {
385        #[command(subcommand)]
386        command: RagCommand,
387    },
388    /// Batch fill missing PDFs in a collection via Zotero's resolver chain.
389    #[command(name = "find-pdfs")]
390    FindPdfs {
391        #[arg(long)]
392        collection: String,
393        #[arg(long, default_value_t = 0)]
394        limit: usize,
395        #[arg(long, default_value = DEFAULT_RPC_URL)]
396        url: String,
397    },
398}
399
400struct RagHitsOptions {
401    query: String,
402    collection: Option<String>,
403    keys: Vec<String>,
404    zotero: bool,
405    top_spans_per_item: u64,
406    include_fulltext_spans: bool,
407    top_k: u64,
408    output: String,
409}
410
411#[derive(Debug, Subcommand)]
412enum RagCommand {
413    /// Print supported embedding provider contracts.
414    #[command(name = "providers")]
415    Providers,
416    /// Execute an embedding provider request from JSON and emit vectors.
417    #[command(name = "embed")]
418    Embed {
419        #[arg(long)]
420        provider: String,
421        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
422        #[arg(long)]
423        input: String,
424        /// Override the embedding endpoint.
425        #[arg(long)]
426        endpoint: Option<String>,
427        /// Override the embedding model.
428        #[arg(long)]
429        model: Option<String>,
430        /// Override provider input type, for example document or query.
431        #[arg(long = "input-type")]
432        input_type: Option<String>,
433        /// Environment variable containing the provider bearer token.
434        #[arg(long = "api-key-env")]
435        api_key_env: Option<String>,
436    },
437    /// Show index status for a collection.
438    Status {
439        #[arg(long)]
440        collection: String,
441        #[arg(long, default_value = DEFAULT_RPC_URL)]
442        url: String,
443    },
444    /// Emit academic-zh retrieval hits with item_key/title/text provenance.
445    #[command(name = "search")]
446    Search {
447        query: String,
448        #[arg(long)]
449        collection: Option<String>,
450        /// Limit retrieval to one or more Zotero item keys.
451        #[arg(long = "key", alias = "keys")]
452        keys: Vec<String>,
453        #[arg(long)]
454        zotero: bool,
455        #[arg(long = "top-spans-per-item", default_value_t = 3)]
456        top_spans_per_item: u64,
457        #[arg(long = "include-fulltext-spans")]
458        include_fulltext_spans: bool,
459        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
460        top_k: u64,
461        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
462        output: String,
463        #[arg(long, default_value = DEFAULT_RPC_URL)]
464        url: String,
465    },
466}
467
468#[derive(Debug, Subcommand)]
469enum SystemCommand {
470    /// Show XPI version and exposed method metadata.
471    Version {
472        #[arg(long, default_value = DEFAULT_RPC_URL)]
473        url: String,
474    },
475    /// List all libraries (user + groups).
476    Libraries {
477        #[arg(long, default_value = DEFAULT_RPC_URL)]
478        url: String,
479    },
480    /// Get statistics for the current (or specified) library.
481    #[command(name = "library-stats")]
482    LibraryStats {
483        #[arg(long)]
484        library: Option<i64>,
485        #[arg(long, default_value = DEFAULT_RPC_URL)]
486        url: String,
487    },
488    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
489    Schema {
490        #[arg(long = "type")]
491        item_type: Option<String>,
492        #[arg(long, default_value = DEFAULT_RPC_URL)]
493        url: String,
494    },
495    /// Get the currently selected Zotero collection (or null).
496    #[command(name = "current-collection")]
497    CurrentCollection {
498        #[arg(long, default_value = DEFAULT_RPC_URL)]
499        url: String,
500    },
501    /// List all RPC methods exposed by the XPI.
502    #[command(name = "list-methods")]
503    ListMethods {
504        #[arg(long, default_value = DEFAULT_RPC_URL)]
505        url: String,
506    },
507    /// Describe one or all RPC methods (schema / signatures).
508    Describe {
509        method: Option<String>,
510        #[arg(long, default_value = DEFAULT_RPC_URL)]
511        url: String,
512    },
513}
514
515#[derive(Debug, clap::Args)]
516struct SearchArgs {
517    /// Search query (title/creator/year by default; PDF content with --fulltext).
518    query: Option<String>,
519    /// Search inside PDF full-text content instead of metadata.
520    #[arg(long)]
521    fulltext: bool,
522    /// Filter by author/creator name (contains match).
523    #[arg(long)]
524    author: Option<String>,
525    /// Filter by date after (YYYY or YYYY-MM-DD).
526    #[arg(long)]
527    after: Option<String>,
528    /// Filter by date before (YYYY or YYYY-MM-DD).
529    #[arg(long)]
530    before: Option<String>,
531    /// Filter by journal/publication title (contains match).
532    #[arg(long)]
533    journal: Option<String>,
534    /// Filter by tag (exact match).
535    #[arg(long)]
536    tag: Option<String>,
537    /// Find by DOI.
538    #[arg(long)]
539    doi: Option<String>,
540    /// Find by ISBN.
541    #[arg(long)]
542    isbn: Option<String>,
543    /// Find by ISSN.
544    #[arg(long)]
545    issn: Option<String>,
546    /// Limit results to a collection name or key.
547    #[arg(long)]
548    collection: Option<String>,
549    #[arg(long, default_value_t = 50)]
550    limit: u64,
551    #[arg(long, default_value_t = 0)]
552    offset: u64,
553    #[arg(long, default_value = DEFAULT_RPC_URL)]
554    url: String,
555    #[command(subcommand)]
556    management: Option<SearchManagementCommand>,
557}
558
559#[derive(Debug, Subcommand)]
560enum SearchManagementCommand {
561    /// List all saved searches in the library.
562    #[command(name = "saved-searches")]
563    SavedSearches {
564        #[arg(long, default_value = DEFAULT_RPC_URL)]
565        url: String,
566    },
567    /// Create a saved search with one or more conditions.
568    #[command(name = "create-saved")]
569    CreateSaved {
570        name: String,
571        #[arg(long = "condition", required = true)]
572        condition: Vec<String>,
573        #[arg(long)]
574        dry_run: bool,
575        #[arg(long, default_value = DEFAULT_RPC_URL)]
576        url: String,
577    },
578    /// Delete a saved search by key.
579    #[command(name = "delete-saved")]
580    DeleteSaved {
581        search_key: String,
582        #[arg(long)]
583        dry_run: bool,
584        #[arg(long, default_value = DEFAULT_RPC_URL)]
585        url: String,
586    },
587}
588
589#[derive(Debug, Subcommand)]
590enum ItemsCommand {
591    /// Add an item by DOI, ISBN, URL, or local file.
592    Add {
593        #[arg(long)]
594        doi: Option<String>,
595        #[arg(long)]
596        isbn: Option<String>,
597        /// Web page URL to add from.
598        #[arg(long = "from-url")]
599        from_url: Option<String>,
600        /// Local file path to add from.
601        #[arg(long)]
602        file: Option<String>,
603        #[arg(long)]
604        collection: Option<String>,
605        #[arg(long)]
606        dry_run: bool,
607        #[arg(long, default_value = DEFAULT_RPC_URL)]
608        url: String,
609    },
610    /// Create a new item of the given type.
611    Create {
612        #[arg(long = "type")]
613        item_type: String,
614        #[arg(long = "field")]
615        fields: Vec<String>,
616        #[arg(long)]
617        dry_run: bool,
618        #[arg(long, default_value = DEFAULT_RPC_URL)]
619        url: String,
620    },
621    /// Update fields on an existing item.
622    Update {
623        key: String,
624        #[arg(long = "field")]
625        fields: Vec<String>,
626        #[arg(long)]
627        dry_run: bool,
628        #[arg(long, default_value = DEFAULT_RPC_URL)]
629        url: String,
630    },
631    /// Permanently delete an item.
632    Delete {
633        key: String,
634        #[arg(long)]
635        dry_run: bool,
636        #[arg(long, default_value = DEFAULT_RPC_URL)]
637        url: String,
638    },
639    /// Move one or more items to trash.
640    Trash {
641        items: Vec<String>,
642        #[arg(long)]
643        dry_run: bool,
644        #[arg(long, default_value = DEFAULT_RPC_URL)]
645        url: String,
646    },
647    /// Restore a trashed item.
648    Restore {
649        item: String,
650        #[arg(long)]
651        dry_run: bool,
652        #[arg(long, default_value = DEFAULT_RPC_URL)]
653        url: String,
654    },
655    /// Merge a group of duplicate items.
656    #[command(name = "merge-duplicates")]
657    MergeDuplicates {
658        keys: Vec<String>,
659        #[arg(long)]
660        dry_run: bool,
661        #[arg(long, default_value = DEFAULT_RPC_URL)]
662        url: String,
663    },
664    /// Add a related-item link between two items.
665    #[command(name = "add-related")]
666    AddRelated {
667        key: String,
668        #[arg(long)]
669        target: String,
670        #[arg(long)]
671        dry_run: bool,
672        #[arg(long, default_value = DEFAULT_RPC_URL)]
673        url: String,
674    },
675    /// Remove a related-item link between two items.
676    #[command(name = "remove-related")]
677    RemoveRelated {
678        key: String,
679        #[arg(long)]
680        target: String,
681        #[arg(long)]
682        dry_run: bool,
683        #[arg(long, default_value = DEFAULT_RPC_URL)]
684        url: String,
685    },
686    /// Print the full serialization of an item by key.
687    Get {
688        item: String,
689        #[arg(long, default_value = DEFAULT_RPC_URL)]
690        url: String,
691    },
692    /// List items in the library with optional sorting and pagination.
693    List {
694        #[arg(long, default_value_t = 50)]
695        limit: u64,
696        #[arg(long, default_value_t = 0)]
697        offset: u64,
698        #[arg(long)]
699        sort: Option<String>,
700        #[arg(long, default_value = "asc")]
701        direction: String,
702        /// List trashed items instead of regular items.
703        #[arg(long)]
704        trash: bool,
705        #[arg(long, default_value = DEFAULT_RPC_URL)]
706        url: String,
707    },
708    /// Run Zotero's duplicate scan and print groups.
709    #[command(name = "find-duplicates")]
710    FindDuplicates {
711        #[arg(long, default_value = DEFAULT_RPC_URL)]
712        url: String,
713    },
714    /// List recently added or modified items.
715    Recent {
716        #[arg(long, default_value_t = 20)]
717        limit: u64,
718        #[arg(long, default_value_t = 0)]
719        offset: u64,
720        #[arg(long = "type", default_value = "added")]
721        recent_type: String,
722        #[arg(long, default_value = DEFAULT_RPC_URL)]
723        url: String,
724    },
725    /// Retrieve the full-text content of an item's attachment.
726    Fulltext {
727        key: String,
728        #[arg(long, default_value = DEFAULT_RPC_URL)]
729        url: String,
730    },
731    /// List items related to the given item.
732    Related {
733        key: String,
734        #[arg(long, default_value = DEFAULT_RPC_URL)]
735        url: String,
736    },
737    /// Get the citation key for an item.
738    #[command(name = "citation-key")]
739    CitationKey {
740        key: String,
741        #[arg(long, default_value = DEFAULT_RPC_URL)]
742        url: String,
743    },
744}
745
746#[derive(Debug, Subcommand)]
747enum SettingsCommand {
748    /// Get a single Zotero preference value.
749    Get {
750        key: String,
751        #[arg(long, default_value = DEFAULT_RPC_URL)]
752        url: String,
753    },
754    /// List all Zotero preferences as a key->value dict.
755    #[command(visible_alias = "get-all")]
756    List {
757        #[arg(long, default_value = DEFAULT_RPC_URL)]
758        url: String,
759    },
760    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
761    Set {
762        /// key value key value ... (pairs of positional args)
763        pairs: Vec<String>,
764        /// Bulk-set from a JSON file.
765        #[arg(long)]
766        file: Option<String>,
767        #[arg(long)]
768        dry_run: bool,
769        #[arg(long, default_value = DEFAULT_RPC_URL)]
770        url: String,
771    },
772}
773
774#[derive(Debug, Subcommand)]
775enum TagsCommand {
776    /// List all tags in the library (flat).
777    List {
778        #[arg(long, default_value_t = 200)]
779        limit: u64,
780        #[arg(long, default_value = DEFAULT_RPC_URL)]
781        url: String,
782    },
783    /// Rename a tag across all items.
784    Rename {
785        old: String,
786        new: String,
787        #[arg(long)]
788        dry_run: bool,
789        #[arg(long, default_value = DEFAULT_RPC_URL)]
790        url: String,
791    },
792    /// Delete a tag library-wide.
793    Delete {
794        tag: String,
795        #[arg(long)]
796        dry_run: bool,
797        #[arg(long, default_value = DEFAULT_RPC_URL)]
798        url: String,
799    },
800    /// Add tags to one or more items.
801    Add {
802        keys: Vec<String>,
803        #[arg(long = "tag", required = true)]
804        tags: Vec<String>,
805        #[arg(long)]
806        dry_run: bool,
807        #[arg(long, default_value = DEFAULT_RPC_URL)]
808        url: String,
809    },
810    /// Remove tags from one or more items.
811    Remove {
812        keys: Vec<String>,
813        #[arg(long = "tag", required = true)]
814        tags: Vec<String>,
815        #[arg(long)]
816        dry_run: bool,
817        #[arg(long, default_value = DEFAULT_RPC_URL)]
818        url: String,
819    },
820}
821
822#[derive(Debug, clap::Args)]
823struct ExportArgs {
824    /// Item keys to export.
825    keys: Vec<String>,
826    /// Output format: bibtex, ris, csl-json, bibliography.
827    #[arg(long, default_value = "bibtex")]
828    format: String,
829    /// Export all items from this collection (name or key).
830    #[arg(long)]
831    collection: Option<String>,
832    /// Citation style URL (only for bibliography format).
833    #[arg(long, default_value = "http://www.zotero.org/styles/gb-t-7714-2015-numeric")]
834    style: String,
835    /// Output HTML instead of plain text (only for bibliography format).
836    #[arg(long)]
837    html: bool,
838    #[arg(long, default_value = DEFAULT_RPC_URL)]
839    url: String,
840}
841
842#[derive(Debug, Subcommand)]
843enum AnnotationsCommand {
844    /// List annotations on a PDF attachment.
845    List {
846        #[arg(long)]
847        parent: String,
848        #[arg(long, default_value = DEFAULT_RPC_URL)]
849        url: String,
850    },
851    /// Create a new annotation on a PDF attachment.
852    Create {
853        #[arg(long)]
854        parent: String,
855        #[arg(long = "type")]
856        annotation_type: String,
857        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
858        #[arg(long)]
859        position: Option<String>,
860        /// Zotero annotation sort index.
861        #[arg(long = "sort-index")]
862        sort_index: Option<String>,
863        #[arg(long)]
864        text: Option<String>,
865        #[arg(long)]
866        comment: Option<String>,
867        #[arg(long, default_value = "#ffd400")]
868        color: String,
869        #[arg(long)]
870        dry_run: bool,
871        #[arg(long, default_value = DEFAULT_RPC_URL)]
872        url: String,
873    },
874    /// Delete an annotation by key.
875    Delete {
876        annotation_key: String,
877        #[arg(long)]
878        dry_run: bool,
879        #[arg(long, default_value = DEFAULT_RPC_URL)]
880        url: String,
881    },
882}
883
884#[derive(Debug, Subcommand)]
885enum AttachmentsCommand {
886    /// List attachments belonging to a parent item.
887    List {
888        #[arg(long)]
889        parent: String,
890        #[arg(long, default_value_t = 50)]
891        limit: u64,
892        #[arg(long, default_value_t = 0)]
893        offset: u64,
894        #[arg(long, default_value = DEFAULT_RPC_URL)]
895        url: String,
896    },
897    /// Get a single attachment by key.
898    Get {
899        key: String,
900        #[arg(long, default_value = DEFAULT_RPC_URL)]
901        url: String,
902    },
903    /// Get full-text content of an attachment.
904    Fulltext {
905        key: String,
906        #[arg(long, default_value = DEFAULT_RPC_URL)]
907        url: String,
908    },
909    /// Get the local filesystem path of an attachment.
910    Path {
911        key: String,
912        #[arg(long, default_value = DEFAULT_RPC_URL)]
913        url: String,
914    },
915    /// Attach a local file or remote URL to an item.
916    Add {
917        #[arg(long)]
918        parent: String,
919        /// Local file path to attach.
920        #[arg(long)]
921        path: Option<String>,
922        /// Remote URL to attach.
923        #[arg(long = "from-url")]
924        from_url: Option<String>,
925        #[arg(long)]
926        title: Option<String>,
927        #[arg(long)]
928        dry_run: bool,
929        #[arg(long, default_value = DEFAULT_RPC_URL)]
930        url: String,
931    },
932    /// Delete an attachment.
933    Delete {
934        key: String,
935        #[arg(long, default_value = DEFAULT_RPC_URL)]
936        url: String,
937        #[arg(long)]
938        dry_run: bool,
939    },
940    /// Trigger Zotero's Find Available PDF for a parent item.
941    #[command(name = "find-pdf")]
942    FindPdf {
943        #[arg(long)]
944        parent: String,
945        #[arg(long, default_value = DEFAULT_RPC_URL)]
946        url: String,
947    },
948}
949
950#[derive(Debug, Subcommand)]
951enum NotesCommand {
952    /// List notes attached to a parent item.
953    List {
954        #[arg(long)]
955        parent: String,
956        #[arg(long, default_value_t = 50)]
957        limit: u64,
958        #[arg(long, default_value_t = 0)]
959        offset: u64,
960        #[arg(long, default_value = DEFAULT_RPC_URL)]
961        url: String,
962    },
963    /// Get a single note by key.
964    Get {
965        note_key: String,
966        #[arg(long, default_value = DEFAULT_RPC_URL)]
967        url: String,
968    },
969    /// Create a note attached to a parent item.
970    Create {
971        #[arg(long)]
972        parent: String,
973        #[arg(long)]
974        content: String,
975        #[arg(long = "tag")]
976        tags: Vec<String>,
977        #[arg(long)]
978        dry_run: bool,
979        #[arg(long, default_value = DEFAULT_RPC_URL)]
980        url: String,
981    },
982    /// Update the content of an existing note.
983    Update {
984        note_key: String,
985        #[arg(long)]
986        content: String,
987        #[arg(long)]
988        dry_run: bool,
989        #[arg(long, default_value = DEFAULT_RPC_URL)]
990        url: String,
991    },
992    /// Delete a note by key.
993    Delete {
994        note_key: String,
995        #[arg(long)]
996        dry_run: bool,
997        #[arg(long, default_value = DEFAULT_RPC_URL)]
998        url: String,
999    },
1000    /// Search notes by text content.
1001    Search {
1002        query: String,
1003        #[arg(long, default_value_t = 50)]
1004        limit: u64,
1005        #[arg(long, default_value = DEFAULT_RPC_URL)]
1006        url: String,
1007    },
1008}
1009
1010#[derive(Debug, Subcommand)]
1011enum CollectionsCommand {
1012    /// List all collections in the user library (flat).
1013    List {
1014        #[arg(long, default_value = DEFAULT_RPC_URL)]
1015        url: String,
1016    },
1017    /// Print the collection hierarchy as a tree.
1018    Tree {
1019        #[arg(long, default_value = DEFAULT_RPC_URL)]
1020        url: String,
1021    },
1022    /// Get a single collection's metadata.
1023    Get {
1024        name_or_id: String,
1025        #[arg(long, default_value = DEFAULT_RPC_URL)]
1026        url: String,
1027    },
1028    /// List all items in a collection.
1029    #[command(name = "get-items", visible_alias = "items")]
1030    GetItems {
1031        name_or_id: String,
1032        #[arg(long)]
1033        limit: Option<u64>,
1034        #[arg(long, default_value_t = 0)]
1035        offset: u64,
1036        #[arg(long, default_value = DEFAULT_RPC_URL)]
1037        url: String,
1038    },
1039    /// Show item/attachment/note/subcollection counts for a collection.
1040    Stats {
1041        name_or_id: String,
1042        #[arg(long, default_value = DEFAULT_RPC_URL)]
1043        url: String,
1044    },
1045    /// Rename a collection.
1046    Rename {
1047        old_name: String,
1048        new_name: String,
1049        #[arg(long, default_value = DEFAULT_RPC_URL)]
1050        url: String,
1051        #[arg(long)]
1052        dry_run: bool,
1053    },
1054    /// Create a collection, optionally nested under a parent.
1055    Create {
1056        name: String,
1057        #[arg(long)]
1058        parent: Option<String>,
1059        #[arg(long, default_value = DEFAULT_RPC_URL)]
1060        url: String,
1061        #[arg(long)]
1062        dry_run: bool,
1063    },
1064    /// Delete a collection.
1065    Delete {
1066        name_or_id: String,
1067        #[arg(long, default_value = DEFAULT_RPC_URL)]
1068        url: String,
1069        #[arg(long)]
1070        dry_run: bool,
1071    },
1072    /// Add existing items to a collection.
1073    #[command(name = "add-items")]
1074    AddItems {
1075        collection: String,
1076        item_keys: Vec<String>,
1077        #[arg(long, default_value = DEFAULT_RPC_URL)]
1078        url: String,
1079        #[arg(long)]
1080        dry_run: bool,
1081    },
1082    /// Remove items from a collection.
1083    #[command(name = "remove-items")]
1084    RemoveItems {
1085        collection: String,
1086        item_keys: Vec<String>,
1087        #[arg(long, default_value = DEFAULT_RPC_URL)]
1088        url: String,
1089        #[arg(long)]
1090        dry_run: bool,
1091    },
1092}
1093
1094#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1095enum JsonStyle {
1096    /// Matches Python's top-level `ping` command (`json.dumps` defaults).
1097    PythonCompact,
1098    /// Matches namespace commands that route through Python `emit(..., indent=2)`.
1099    Pretty,
1100}
1101
1102enum ParseOutcome<T> {
1103    Command(T),
1104    Display(String),
1105}
1106
1107fn parse_cli<T>(
1108    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1109) -> Result<ParseOutcome<T>, String>
1110where
1111    T: Parser,
1112{
1113    match T::try_parse_from(args) {
1114        Ok(cli) => Ok(ParseOutcome::Command(cli)),
1115        Err(err)
1116            if matches!(
1117                err.kind(),
1118                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1119            ) =>
1120        {
1121            Ok(ParseOutcome::Display(err.to_string()))
1122        }
1123        Err(err) => Err(err.to_string()),
1124    }
1125}
1126
1127pub fn format_error_json(message: &str) -> String {
1128    let message = message.trim_end();
1129    let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1130    serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1131}
1132
1133fn split_error_code(message: &str) -> Option<(&str, &str)> {
1134    let (code, rest) = message.split_once(':')?;
1135    if !code.is_empty()
1136        && code
1137            .chars()
1138            .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1139    {
1140        Some((code, rest.trim_start()))
1141    } else {
1142        None
1143    }
1144}
1145
1146pub fn run(
1147    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1148) -> Result<String, String> {
1149    let cli = match parse_cli::<Cli>(args)? {
1150        ParseOutcome::Command(cli) => cli,
1151        ParseOutcome::Display(output) => return Ok(output),
1152    };
1153    let url = command_url(&cli.command);
1154    let mut client = ZoteroRpc::new(url);
1155    run_command(cli.command, &mut client)
1156}
1157
1158pub fn run_with_client(
1159    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1160    client: &mut impl RpcCaller,
1161) -> Result<String, String> {
1162    let cli = match parse_cli::<Cli>(args)? {
1163        ParseOutcome::Command(cli) => cli,
1164        ParseOutcome::Display(output) => return Ok(output),
1165    };
1166    run_command(cli.command, client)
1167}
1168
1169fn rag_command_url(command: &RagCommand) -> String {
1170    match command {
1171        RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1172        RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1173        RagCommand::Status { url, .. } => url.clone(),
1174        RagCommand::Search { url, .. } => url.clone(),
1175    }
1176}
1177
1178fn command_url(command: &Command) -> String {
1179    match command {
1180        Command::Ping { url }
1181        | Command::Rpc { url, .. }
1182        | Command::Push { url, .. }
1183        | Command::FindPdfs { url, .. } => url.clone(),
1184        Command::Ocr { command } => match command {
1185            OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1186            OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1187            OcrCommand::Status { url, .. } => url.clone(),
1188            OcrCommand::Process { url, .. } => url.clone(),
1189        },
1190        Command::Rag { command } => rag_command_url(command),
1191        Command::System { command } => match command {
1192            SystemCommand::Version { url }
1193            | SystemCommand::Libraries { url }
1194            | SystemCommand::LibraryStats { url, .. }
1195            | SystemCommand::Schema { url, .. }
1196            | SystemCommand::CurrentCollection { url }
1197            | SystemCommand::ListMethods { url }
1198            | SystemCommand::Describe { url, .. } => url.clone(),
1199        },
1200        Command::Search(ref args) => match &args.management {
1201            Some(SearchManagementCommand::SavedSearches { url })
1202            | Some(SearchManagementCommand::CreateSaved { url, .. })
1203            | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1204            None => args.url.clone(),
1205        },
1206        Command::Items { command } => match command {
1207            ItemsCommand::Add { url, .. }
1208            | ItemsCommand::Create { url, .. }
1209            | ItemsCommand::Update { url, .. }
1210            | ItemsCommand::Delete { url, .. }
1211            | ItemsCommand::Trash { url, .. }
1212            | ItemsCommand::Restore { url, .. }
1213            | ItemsCommand::MergeDuplicates { url, .. }
1214            | ItemsCommand::AddRelated { url, .. }
1215            | ItemsCommand::RemoveRelated { url, .. }
1216            | ItemsCommand::Get { url, .. }
1217            | ItemsCommand::List { url, .. }
1218            | ItemsCommand::FindDuplicates { url }
1219            | ItemsCommand::Recent { url, .. }
1220            | ItemsCommand::Fulltext { url, .. }
1221            | ItemsCommand::Related { url, .. }
1222            | ItemsCommand::CitationKey { url, .. } => url.clone(),
1223        },
1224        Command::Collections { command } => match command {
1225            CollectionsCommand::List { url }
1226            | CollectionsCommand::Tree { url }
1227            | CollectionsCommand::Get { url, .. }
1228            | CollectionsCommand::GetItems { url, .. }
1229            | CollectionsCommand::Stats { url, .. }
1230            | CollectionsCommand::Rename { url, .. }
1231            | CollectionsCommand::Create { url, .. }
1232            | CollectionsCommand::Delete { url, .. }
1233            | CollectionsCommand::AddItems { url, .. }
1234            | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1235        },
1236        Command::Notes { command } => match command {
1237            NotesCommand::List { url, .. }
1238            | NotesCommand::Get { url, .. }
1239            | NotesCommand::Create { url, .. }
1240            | NotesCommand::Update { url, .. }
1241            | NotesCommand::Delete { url, .. }
1242            | NotesCommand::Search { url, .. } => url.clone(),
1243        },
1244        Command::Attachments { command } => match command {
1245            AttachmentsCommand::List { url, .. }
1246            | AttachmentsCommand::Get { url, .. }
1247            | AttachmentsCommand::Fulltext { url, .. }
1248            | AttachmentsCommand::Path { url, .. }
1249            | AttachmentsCommand::Add { url, .. }
1250            | AttachmentsCommand::Delete { url, .. }
1251            | AttachmentsCommand::FindPdf { url, .. } => url.clone(),
1252        },
1253        Command::Settings { command } => match command {
1254            SettingsCommand::Get { url, .. }
1255            | SettingsCommand::List { url }
1256            | SettingsCommand::Set { url, .. } => url.clone(),
1257        },
1258        Command::Tags { command } => match command {
1259            TagsCommand::List { url, .. }
1260            | TagsCommand::Rename { url, .. }
1261            | TagsCommand::Delete { url, .. }
1262            | TagsCommand::Add { url, .. }
1263            | TagsCommand::Remove { url, .. } => url.clone(),
1264        },
1265        Command::Export(ref args) => args.url.clone(),
1266        Command::Annotations { command } => match command {
1267            AnnotationsCommand::List { url, .. }
1268            | AnnotationsCommand::Create { url, .. }
1269            | AnnotationsCommand::Delete { url, .. } => url.clone(),
1270        },
1271    }
1272}
1273
1274fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1275    let value = match command {
1276        OcrCommand::Providers => serde_json::json!({
1277            "providers": ocr_provider_specs(),
1278        }),
1279        OcrCommand::Run {
1280            provider,
1281            input,
1282            file,
1283            item_key,
1284            attachment_key,
1285            mime_type,
1286            endpoint,
1287            api_key_env,
1288        } => run_ocr_provider_json_command(OcrProviderJsonOptions {
1289            provider,
1290            input,
1291            file,
1292            item_key,
1293            attachment_key,
1294            mime_type,
1295            endpoint,
1296            api_key_env,
1297        })?,
1298        OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1299        OcrCommand::Process {
1300            provider,
1301            parent,
1302            attachment,
1303            source_url,
1304            result_dir,
1305            result_zip,
1306            provider_endpoint,
1307            api_key_env,
1308            poll_interval_seconds,
1309            timeout_seconds,
1310            chunk_chars,
1311            ..
1312        } => run_ocr_parse_pdf_command(
1313            client,
1314            OcrParsePdfOptions {
1315                provider,
1316                parent,
1317                attachment,
1318                source_url,
1319                result_dir,
1320                result_zip,
1321                provider_endpoint,
1322                api_key_env,
1323                poll_interval_seconds,
1324                timeout_seconds,
1325                chunk_chars,
1326            },
1327        )?,
1328    };
1329    format_json(&value, JsonStyle::PythonCompact)
1330}
1331
1332struct OcrParsePdfOptions {
1333    provider: String,
1334    parent: String,
1335    attachment: Option<String>,
1336    source_url: Option<String>,
1337    result_dir: Option<String>,
1338    result_zip: Option<String>,
1339    provider_endpoint: Option<String>,
1340    api_key_env: String,
1341    poll_interval_seconds: u64,
1342    timeout_seconds: u64,
1343    chunk_chars: usize,
1344}
1345
1346struct OcrProviderJsonOptions {
1347    provider: String,
1348    input: Option<String>,
1349    file: Option<String>,
1350    item_key: Option<String>,
1351    attachment_key: Option<String>,
1352    mime_type: Option<String>,
1353    endpoint: Option<String>,
1354    api_key_env: Option<String>,
1355}
1356
1357fn run_ocr_provider_json_command(options: OcrProviderJsonOptions) -> Result<Value, String> {
1358    let input: OcrRequestInput = match (options.input, options.file) {
1359        (Some(input), None) => read_json_input(&input)?,
1360        (None, Some(file)) => ocr_input_from_file(
1361            file,
1362            options.item_key,
1363            options.attachment_key,
1364            options.mime_type,
1365        )?,
1366        (Some(_), Some(_)) => {
1367            return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1368        }
1369        (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1370    };
1371    let request = build_ocr_provider_request(&options.provider, &input)?;
1372    let payload = if request.command.is_empty() {
1373        let method = request
1374            .method
1375            .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1376        let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1377        let mut transport =
1378            provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1379        transport.post_json(&ProviderHttpInvocation {
1380            provider: request.provider.to_string(),
1381            style: request.style.to_string(),
1382            method: method.to_string(),
1383            url: options
1384                .endpoint
1385                .or_else(|| request.url.map(ToString::to_string)),
1386            auth_header_name: request.auth_header.map(ToString::to_string),
1387            auth_header_value: None,
1388            body: request.body,
1389        })?
1390    } else {
1391        let mut command_runner = StdProviderCommandRunner;
1392        command_runner.run_json(&request.command)?
1393    };
1394    let blocks = match parse_ocr_provider_response(
1395        request.provider,
1396        &payload,
1397        &input.item_key,
1398        &input.attachment_key,
1399    ) {
1400        Ok(blocks) => blocks,
1401        Err(err) => {
1402            if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1403                return Ok(task);
1404            }
1405            return Err(err);
1406        }
1407    };
1408
1409    Ok(serde_json::json!({
1410        "provider": request.provider,
1411        "blocks": blocks,
1412    }))
1413}
1414
1415fn run_ocr_parse_pdf_command(
1416    client: &mut impl RpcCaller,
1417    mut options: OcrParsePdfOptions,
1418) -> Result<Value, String> {
1419    let spec = raw_ocr_provider_spec(&options.provider)?;
1420    if spec.provider_key != "mineru" {
1421        return Err(
1422            "INVALID_ARGS: ocr parse-pdf currently supports only --provider mineru".to_string(),
1423        );
1424    }
1425    if options.result_dir.is_some() && options.result_zip.is_some() {
1426        return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1427    }
1428    if options.source_url.is_some()
1429        && (options.result_dir.is_some() || options.result_zip.is_some())
1430    {
1431        return Err(
1432            "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1433                .to_string(),
1434        );
1435    }
1436
1437    let attachment = match options.attachment.take() {
1438        Some(key) => key,
1439        None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1440    };
1441    options.attachment = Some(attachment.clone());
1442
1443    let attachment_path = resolve_attachment_path(client, &attachment)?;
1444    let storage_dir = attachment_path
1445        .parent()
1446        .ok_or_else(|| {
1447            format!(
1448                "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1449                attachment_path.display()
1450            )
1451        })?
1452        .to_path_buf();
1453    let file_name = attachment_path
1454        .file_name()
1455        .and_then(|name| name.to_str())
1456        .unwrap_or("document.pdf")
1457        .to_string();
1458
1459    let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1460    let artifacts = persist_mineru_result_sidecars(
1461        &storage_dir,
1462        &options.parent,
1463        &attachment,
1464        &options.provider,
1465        &source,
1466        options.chunk_chars,
1467    )?;
1468
1469    Ok(serde_json::json!({
1470        "provider": "mineru",
1471        "status": "indexed",
1472        "item_key": options.parent,
1473        "attachment_key": attachment,
1474        "attachment_path": attachment_path,
1475        "storage_dir": storage_dir,
1476        "task_id": source.task_id,
1477        "state": source.state,
1478        "blocks": artifacts.block_count,
1479        "chunks": artifacts.chunk_count,
1480        "artifacts": artifacts.artifacts,
1481    }))
1482}
1483
1484struct MineruResultSource {
1485    task_id: Option<String>,
1486    state: String,
1487    result_dir: PathBuf,
1488    raw_zip_bytes: Option<Vec<u8>>,
1489    task_status: Option<Value>,
1490    payload: Value,
1491    content_list_file: Option<PathBuf>,
1492    markdown: Option<String>,
1493}
1494
1495struct PersistedOcrArtifacts {
1496    block_count: usize,
1497    chunk_count: usize,
1498    artifacts: Vec<Value>,
1499}
1500
1501fn resolve_attachment_path(
1502    client: &mut impl RpcCaller,
1503    attachment_key: &str,
1504) -> Result<PathBuf, String> {
1505    let payload = client.call(
1506        "attachments.getPath",
1507        Some(serde_json::json!({"key": attachment_key})),
1508    )?;
1509    let raw_path = payload
1510        .get("path")
1511        .and_then(Value::as_str)
1512        .filter(|path| !path.trim().is_empty())
1513        .ok_or_else(|| {
1514            format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1515        })?;
1516    Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1517}
1518
1519/// Resolve the first PDF attachment key for a parent item via `attachments.list`.
1520fn resolve_first_pdf_attachment_key(
1521    client: &mut impl RpcCaller,
1522    parent_key: &str,
1523) -> Result<String, String> {
1524    let response = client.call(
1525        "attachments.list",
1526        Some(serde_json::json!({"parentKey": parent_key})),
1527    )?;
1528    // The XPI returns {items: [...], total: N}.
1529    let attachments = response
1530        .get("items")
1531        .and_then(Value::as_array)
1532        .or_else(|| response.as_array())
1533        .ok_or_else(|| {
1534            format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1535        })?;
1536    for attachment in attachments {
1537        if is_pdf_attachment(attachment) {
1538            if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1539                return Ok(key.to_string());
1540            }
1541        }
1542    }
1543    Err(format!(
1544        "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1545    ))
1546}
1547
1548fn load_mineru_result_source(
1549    options: &OcrParsePdfOptions,
1550    attachment_path: &Path,
1551    file_name: &str,
1552) -> Result<MineruResultSource, String> {
1553    if let Some(result_dir) = options.result_dir.as_deref() {
1554        return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1555    }
1556    if let Some(result_zip) = options.result_zip.as_deref() {
1557        let zip_path = PathBuf::from(result_zip);
1558        let zip_bytes = fs::read(&zip_path)
1559            .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1560        let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1561        return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1562    }
1563
1564    let Some(source_url) = options
1565        .source_url
1566        .as_deref()
1567        .filter(|value| !value.trim().is_empty())
1568    else {
1569        return submit_mineru_local_file(options, attachment_path, file_name);
1570    };
1571    let input = OcrRequestInput {
1572        item_key: options.parent.clone(),
1573        attachment_key: options.attachment.clone().expect("attachment resolved"),
1574        file_name: file_name.to_string(),
1575        mime_type: "application/pdf".to_string(),
1576        content_base64: format!("url:{source_url}"),
1577        source_url: Some(source_url.to_string()),
1578        local_path: None,
1579        output_dir: None,
1580    };
1581    let task = submit_mineru_task(
1582        &options.provider,
1583        &input,
1584        options.provider_endpoint.clone(),
1585        &options.api_key_env,
1586    )?;
1587    let task_id = task
1588        .get("data")
1589        .and_then(|data| data.get("task_id"))
1590        .and_then(Value::as_str)
1591        .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1592        .to_string();
1593    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1594    let status = poll_mineru_task(
1595        options.provider_endpoint.as_deref(),
1596        &task_id,
1597        &auth_header,
1598        options.poll_interval_seconds,
1599        options.timeout_seconds,
1600    )?;
1601    let zip_url = status
1602        .pointer("/data/full_zip_url")
1603        .or_else(|| status.pointer("/data/result/full_zip_url"))
1604        .and_then(Value::as_str)
1605        .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1606    let zip_bytes = download_bytes(zip_url)?;
1607    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1608    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1609}
1610
1611fn submit_mineru_local_file(
1612    options: &OcrParsePdfOptions,
1613    attachment_path: &Path,
1614    file_name: &str,
1615) -> Result<MineruResultSource, String> {
1616    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1617    let upload_request = create_mineru_file_upload(
1618        options.provider_endpoint.as_deref(),
1619        file_name,
1620        options.attachment.as_deref().expect("attachment resolved"),
1621        &auth_header,
1622    )?;
1623    let upload_url = upload_request
1624        .pointer("/data/file_urls/0")
1625        .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1626        .and_then(Value::as_str)
1627        .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1628    let batch_id = upload_request
1629        .pointer("/data/batch_id")
1630        .or_else(|| upload_request.pointer("/data/batchId"))
1631        .and_then(Value::as_str)
1632        .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1633        .to_string();
1634    let bytes = fs::read(attachment_path)
1635        .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1636    put_bytes(upload_url, &bytes)?;
1637    let status = poll_mineru_batch(
1638        options.provider_endpoint.as_deref(),
1639        &batch_id,
1640        &auth_header,
1641        options.poll_interval_seconds,
1642        options.timeout_seconds,
1643    )?;
1644    let zip_url = mineru_batch_zip_url(&status)
1645        .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1646    let zip_bytes = download_bytes(&zip_url)?;
1647    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1648    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1649}
1650
1651fn create_mineru_file_upload(
1652    endpoint: Option<&str>,
1653    file_name: &str,
1654    data_id: &str,
1655    auth_header: &str,
1656) -> Result<Value, String> {
1657    let url = mineru_file_urls_url(endpoint);
1658    let body = serde_json::json!({
1659        "files": [{"name": file_name, "data_id": data_id}],
1660        "model_version": "vlm",
1661        "is_ocr": false,
1662        "enable_formula": true,
1663        "enable_table": true,
1664        "language": "ch",
1665        "page_ranges": "1-200",
1666    });
1667    ureq::post(&url)
1668        .set("Authorization", auth_header)
1669        .send_json(body)
1670        .map_err(|err| format!("POST {url} failed: {err}"))?
1671        .into_json::<Value>()
1672        .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1673}
1674
1675fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1676    ureq::put(url)
1677        .send_bytes(bytes)
1678        .map_err(|err| format!("PUT {url} failed: {err}"))?;
1679    Ok(())
1680}
1681
1682fn submit_mineru_task(
1683    provider: &str,
1684    input: &OcrRequestInput,
1685    endpoint: Option<String>,
1686    api_key_env: &str,
1687) -> Result<Value, String> {
1688    let request = build_ocr_provider_request(provider, input)?;
1689    let method = request
1690        .method
1691        .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1692    let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1693    transport.post_json(&ProviderHttpInvocation {
1694        provider: request.provider.to_string(),
1695        style: request.style.to_string(),
1696        method: method.to_string(),
1697        url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1698        auth_header_name: request.auth_header.map(ToString::to_string),
1699        auth_header_value: None,
1700        body: request.body,
1701    })
1702}
1703
1704fn poll_mineru_task(
1705    endpoint: Option<&str>,
1706    task_id: &str,
1707    auth_header: &str,
1708    poll_interval_seconds: u64,
1709    timeout_seconds: u64,
1710) -> Result<Value, String> {
1711    let url = mineru_task_status_url(endpoint, task_id);
1712    let started = Instant::now();
1713    loop {
1714        let status = get_json_with_auth(&url, auth_header)?;
1715        let state = status
1716            .pointer("/data/state")
1717            .or_else(|| status.pointer("/data/status"))
1718            .and_then(Value::as_str)
1719            .unwrap_or("unknown");
1720        match state {
1721            "done" | "finished" | "success" => return Ok(status),
1722            "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1723            _ => {
1724                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1725                    return Err(format!(
1726                        "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1727                    ));
1728                }
1729                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1730            }
1731        }
1732    }
1733}
1734
1735fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1736    let base = endpoint
1737        .unwrap_or("https://mineru.net/api/v4/extract/task")
1738        .trim_end_matches('/');
1739    if base.ends_with("/extract/task") {
1740        format!("{base}/{task_id}")
1741    } else {
1742        format!("{base}/extract/task/{task_id}")
1743    }
1744}
1745
1746fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1747    let base = mineru_api_base(endpoint);
1748    format!("{base}/file-urls/batch")
1749}
1750
1751fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1752    let base = mineru_api_base(endpoint);
1753    format!("{base}/extract-results/batch/{batch_id}")
1754}
1755
1756fn mineru_api_base(endpoint: Option<&str>) -> String {
1757    let base = endpoint
1758        .unwrap_or("https://mineru.net/api/v4/extract/task")
1759        .trim_end_matches('/');
1760    if let Some(stripped) = base.strip_suffix("/extract/task") {
1761        return stripped.to_string();
1762    }
1763    if let Some(stripped) = base.strip_suffix("/extract") {
1764        return stripped.to_string();
1765    }
1766    base.to_string()
1767}
1768
1769fn poll_mineru_batch(
1770    endpoint: Option<&str>,
1771    batch_id: &str,
1772    auth_header: &str,
1773    poll_interval_seconds: u64,
1774    timeout_seconds: u64,
1775) -> Result<Value, String> {
1776    let url = mineru_batch_status_url(endpoint, batch_id);
1777    let started = Instant::now();
1778    loop {
1779        let status = get_json_with_auth(&url, auth_header)?;
1780        let state = mineru_batch_state(&status).unwrap_or("unknown");
1781        match state {
1782            "done" | "finished" | "success" => return Ok(status),
1783            "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1784            _ => {
1785                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1786                    return Err(format!(
1787                        "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1788                    ));
1789                }
1790                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1791            }
1792        }
1793    }
1794}
1795
1796fn mineru_batch_state(status: &Value) -> Option<&str> {
1797    status
1798        .pointer("/data/extract_result/0/state")
1799        .or_else(|| status.pointer("/data/extractResult/0/state"))
1800        .or_else(|| status.pointer("/data/state"))
1801        .and_then(Value::as_str)
1802}
1803
1804fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1805    status
1806        .pointer("/data/extract_result/0/full_zip_url")
1807        .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
1808        .or_else(|| status.pointer("/data/full_zip_url"))
1809        .and_then(Value::as_str)
1810        .map(ToString::to_string)
1811}
1812
1813fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
1814    let token = env::var(api_key_env)
1815        .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
1816    let token = token.trim();
1817    if token.is_empty() {
1818        return Err(format!(
1819            "provider credential env var {api_key_env} is empty"
1820        ));
1821    }
1822    Ok(match auth_scheme {
1823        "bearer" if token.starts_with("Bearer ") => token.to_string(),
1824        "bearer" => format!("Bearer {token}"),
1825        "token" if token.starts_with("token ") => token.to_string(),
1826        "token" => format!("token {token}"),
1827        _ => token.to_string(),
1828    })
1829}
1830
1831fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
1832    ureq::get(url)
1833        .set("Authorization", auth_header)
1834        .call()
1835        .map_err(|err| format!("GET {url} failed: {err}"))?
1836        .into_json::<Value>()
1837        .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
1838}
1839
1840fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
1841    let response = ureq::get(url)
1842        .call()
1843        .map_err(|err| format!("download {url} failed: {err}"))?;
1844    let mut bytes = Vec::new();
1845    response
1846        .into_reader()
1847        .read_to_end(&mut bytes)
1848        .map_err(|err| format!("read download {url}: {err}"))?;
1849    Ok(bytes)
1850}
1851
1852fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
1853    let dir = unique_temp_path(prefix);
1854    fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
1855    let zip_path = dir.with_extension("zip");
1856    fs::write(&zip_path, zip_bytes)
1857        .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
1858    let output = ProcessCommand::new("unzip")
1859        .arg("-q")
1860        .arg("-o")
1861        .arg(&zip_path)
1862        .arg("-d")
1863        .arg(&dir)
1864        .output()
1865        .map_err(|err| format!("run unzip: {err}"))?;
1866    if !output.status.success() {
1867        return Err(format!(
1868            "unzip {} failed: {}",
1869            zip_path.display(),
1870            String::from_utf8_lossy(&output.stderr).trim()
1871        ));
1872    }
1873    Ok(dir)
1874}
1875
1876fn unique_temp_path(prefix: &str) -> PathBuf {
1877    let nanos = SystemTime::now()
1878        .duration_since(UNIX_EPOCH)
1879        .map(|duration| duration.as_nanos())
1880        .unwrap_or(0);
1881    env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
1882}
1883
1884fn mineru_result_source_from_dir(
1885    result_dir: PathBuf,
1886    raw_zip_bytes: Option<Vec<u8>>,
1887    task_status: Option<Value>,
1888    task_id: Option<String>,
1889) -> Result<MineruResultSource, String> {
1890    let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
1891    let markdown = find_first_file_by_name(&result_dir, "full.md")
1892        .map(|path| {
1893            fs::read_to_string(&path)
1894                .map_err(|err| format!("read native markdown {}: {err}", path.display()))
1895        })
1896        .transpose()?;
1897    Ok(MineruResultSource {
1898        task_id,
1899        state: "done".to_string(),
1900        result_dir,
1901        raw_zip_bytes,
1902        task_status,
1903        payload,
1904        content_list_file,
1905        markdown,
1906    })
1907}
1908
1909fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
1910    let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
1911    if let Some(path) = v2 {
1912        let value = read_json_file(&path)?;
1913        return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
1914    }
1915    let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
1916    if let Some(path) = content_list {
1917        let value = read_json_file(&path)?;
1918        return Ok((serde_json::json!({"content_list": value}), Some(path)));
1919    }
1920    let layout = find_first_file_by_name(result_dir, "layout.json");
1921    if let Some(path) = layout {
1922        return Ok((read_json_file(&path)?, Some(path)));
1923    }
1924    let markdown = find_first_file_by_name(result_dir, "full.md");
1925    if let Some(path) = markdown {
1926        let text = fs::read_to_string(&path)
1927            .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
1928        return Ok((serde_json::json!({"result": text}), Some(path)));
1929    }
1930    Err(format!(
1931        "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
1932        result_dir.display()
1933    ))
1934}
1935
1936fn read_json_file(path: &Path) -> Result<Value, String> {
1937    let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
1938    serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
1939}
1940
1941fn persist_mineru_result_sidecars(
1942    storage_dir: &Path,
1943    item_key: &str,
1944    attachment_key: &str,
1945    provider: &str,
1946    source: &MineruResultSource,
1947    chunk_chars: usize,
1948) -> Result<PersistedOcrArtifacts, String> {
1949    let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
1950    let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
1951    let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
1952    let raw_bundle = serde_json::json!({
1953        "provider": provider,
1954        "item_key": item_key,
1955        "attachment_key": attachment_key,
1956        "task_id": source.task_id,
1957        "state": source.state,
1958        "task_status": source.task_status,
1959        "content_list_file": source.content_list_file,
1960        "payload": source.payload,
1961    });
1962
1963    let mut artifacts = Vec::new();
1964    artifacts.push(write_sidecar_json(
1965        storage_dir,
1966        item_key,
1967        attachment_key,
1968        MachineArtifactKind::OcrRaw,
1969        &raw_bundle,
1970    )?);
1971    artifacts.push(write_sidecar_jsonl(
1972        storage_dir,
1973        item_key,
1974        attachment_key,
1975        MachineArtifactKind::Blocks,
1976        &blocks,
1977    )?);
1978    artifacts.push(write_sidecar_jsonl(
1979        storage_dir,
1980        item_key,
1981        attachment_key,
1982        MachineArtifactKind::Chunks,
1983        &chunks,
1984    )?);
1985    if let Some(markdown) = source.markdown.as_deref() {
1986        artifacts.push(write_sidecar_bytes(
1987            storage_dir,
1988            item_key,
1989            attachment_key,
1990            MachineArtifactKind::OcrNativeMarkdown,
1991            markdown.as_bytes(),
1992        )?);
1993    }
1994    artifacts.push(write_sidecar_json(
1995        storage_dir,
1996        item_key,
1997        attachment_key,
1998        MachineArtifactKind::OcrNativeAssets,
1999        &assets,
2000    )?);
2001    if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2002        artifacts.push(write_extra_sidecar_bytes(
2003            storage_dir,
2004            ".zotron/ocr/latest.raw.zip",
2005            bytes,
2006        )?);
2007    }
2008
2009    Ok(PersistedOcrArtifacts {
2010        block_count: blocks.len(),
2011        chunk_count: chunks.len(),
2012        artifacts,
2013    })
2014}
2015
2016fn write_sidecar_json(
2017    storage_dir: &Path,
2018    item_key: &str,
2019    attachment_key: &str,
2020    kind: MachineArtifactKind,
2021    value: &Value,
2022) -> Result<Value, String> {
2023    let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2024    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2025}
2026
2027fn write_sidecar_jsonl<T: serde::Serialize>(
2028    storage_dir: &Path,
2029    item_key: &str,
2030    attachment_key: &str,
2031    kind: MachineArtifactKind,
2032    values: &[T],
2033) -> Result<Value, String> {
2034    let mut out = String::new();
2035    for value in values {
2036        out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2037        out.push('\n');
2038    }
2039    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2040}
2041
2042fn write_sidecar_bytes(
2043    storage_dir: &Path,
2044    item_key: &str,
2045    attachment_key: &str,
2046    kind: MachineArtifactKind,
2047    bytes: &[u8],
2048) -> Result<Value, String> {
2049    let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2050        .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2051    Ok(serde_json::json!({
2052        "kind": kind,
2053        "relative_path": record.relative_path,
2054        "absolute_path": record.absolute_path,
2055    }))
2056}
2057
2058fn write_extra_sidecar_bytes(
2059    storage_dir: &Path,
2060    relative_path: &str,
2061    bytes: &[u8],
2062) -> Result<Value, String> {
2063    let absolute_path = storage_dir.join(relative_path);
2064    if let Some(parent) = absolute_path.parent() {
2065        fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2066    }
2067    fs::write(&absolute_path, bytes)
2068        .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2069    Ok(serde_json::json!({
2070        "kind": "ocr_raw_zip",
2071        "relative_path": relative_path,
2072        "absolute_path": absolute_path,
2073    }))
2074}
2075
2076fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2077    let mut images = Vec::new();
2078    for file in collect_files(result_dir)? {
2079        if !is_image_file(&file) {
2080            continue;
2081        }
2082        let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2083        let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2084        if let Some(parent) = destination.parent() {
2085            fs::create_dir_all(parent)
2086                .map_err(|err| format!("create {}: {err}", parent.display()))?;
2087        }
2088        fs::copy(&file, &destination).map_err(|err| {
2089            format!(
2090                "copy MinerU asset {} to {}: {err}",
2091                file.display(),
2092                destination.display()
2093            )
2094        })?;
2095        images.push(serde_json::json!({
2096            "source_relative": relative,
2097            "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2098            "absolute_path": destination,
2099        }));
2100    }
2101    Ok(serde_json::json!({
2102        "provider": "mineru",
2103        "images": images,
2104    }))
2105}
2106
2107fn is_image_file(path: &Path) -> bool {
2108    matches!(
2109        path.extension()
2110            .and_then(|ext| ext.to_str())
2111            .unwrap_or_default()
2112            .to_ascii_lowercase()
2113            .as_str(),
2114        "png" | "jpg" | "jpeg" | "webp" | "gif"
2115    )
2116}
2117
2118fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2119    collect_files(root).ok()?.into_iter().find(|path| {
2120        path.file_name()
2121            .and_then(|name| name.to_str())
2122            .is_some_and(|name| name.ends_with(suffix))
2123    })
2124}
2125
2126fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2127    collect_files(root).ok()?.into_iter().find(|path| {
2128        path.file_name()
2129            .and_then(|file_name| file_name.to_str())
2130            .is_some_and(|file_name| file_name == name)
2131    })
2132}
2133
2134fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2135    let mut files = Vec::new();
2136    collect_files_into(root, &mut files)?;
2137    files.sort();
2138    Ok(files)
2139}
2140
2141fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2142    for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2143        let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2144        let path = entry.path();
2145        let file_type = entry
2146            .file_type()
2147            .map_err(|err| format!("stat {}: {err}", path.display()))?;
2148        if file_type.is_dir() {
2149            collect_files_into(&path, files)?;
2150        } else if file_type.is_file() {
2151            files.push(path);
2152        }
2153    }
2154    Ok(())
2155}
2156
2157fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2158    let data = payload.get("data")?;
2159    let task_id = data.get("task_id").and_then(Value::as_str)?;
2160    Some(serde_json::json!({
2161        "provider": provider,
2162        "status": "submitted",
2163        "task_id": task_id,
2164        "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2165        "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2166        "raw": payload,
2167    }))
2168}
2169
2170fn ocr_input_from_file(
2171    file: String,
2172    item_key: Option<String>,
2173    attachment_key: Option<String>,
2174    mime_type: Option<String>,
2175) -> Result<OcrRequestInput, String> {
2176    let item_key = item_key
2177        .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2178    let attachment_key = attachment_key.ok_or_else(|| {
2179        "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2180    })?;
2181    let path = PathBuf::from(&file);
2182    let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2183    let file_name = path
2184        .file_name()
2185        .and_then(|name| name.to_str())
2186        .unwrap_or("document.pdf")
2187        .to_string();
2188    let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2189    Ok(OcrRequestInput {
2190        item_key,
2191        attachment_key,
2192        file_name,
2193        mime_type,
2194        content_base64: base64_encode(&bytes),
2195        source_url: None,
2196        local_path: Some(file),
2197        output_dir: None,
2198    })
2199}
2200
2201fn guess_mime_type(path: &Path) -> &'static str {
2202    match path
2203        .extension()
2204        .and_then(|ext| ext.to_str())
2205        .unwrap_or_default()
2206        .to_ascii_lowercase()
2207        .as_str()
2208    {
2209        "png" => "image/png",
2210        "jpg" | "jpeg" => "image/jpeg",
2211        "webp" => "image/webp",
2212        _ => "application/pdf",
2213    }
2214}
2215
2216fn base64_encode(bytes: &[u8]) -> String {
2217    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2218    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2219    for chunk in bytes.chunks(3) {
2220        let b0 = chunk[0];
2221        let b1 = *chunk.get(1).unwrap_or(&0);
2222        let b2 = *chunk.get(2).unwrap_or(&0);
2223        out.push(TABLE[(b0 >> 2) as usize] as char);
2224        out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2225        if chunk.len() > 1 {
2226            out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2227        } else {
2228            out.push('=');
2229        }
2230        if chunk.len() > 2 {
2231            out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2232        } else {
2233            out.push('=');
2234        }
2235    }
2236    out
2237}
2238
2239fn run_ocr_status_command(
2240    client: &mut impl RpcCaller,
2241    collection: String,
2242) -> Result<Value, String> {
2243    let collection_key = find_collection_in_tree(client, &collection)?
2244        .and_then(|node| node.get("key").cloned())
2245        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2246    let raw = paginate_rpc(
2247        client,
2248        "collections.getItems",
2249        serde_json::json!({"key": collection_key}),
2250        500,
2251    )?;
2252    let items = raw
2253        .get("items")
2254        .and_then(Value::as_array)
2255        .or_else(|| raw.as_array())
2256        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2257        .clone();
2258
2259    let mut has_ocr = 0usize;
2260    for item in &items {
2261        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2262        if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2263            has_ocr += 1;
2264        }
2265    }
2266
2267    Ok(serde_json::json!({
2268        "collection": collection,
2269        "total": items.len(),
2270        "has_ocr": has_ocr,
2271        "missing_ocr": items.len() - has_ocr,
2272    }))
2273}
2274
2275fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2276    if let Some(item_key) = item_key.as_str() {
2277        // Legacy external store lookup for artifacts produced before the
2278        // per-attachment hidden sidecar became the default.
2279        if machine_artifact_exists_for_item(
2280            machine_artifact_store_root(),
2281            item_key,
2282            MachineArtifactKind::Chunks,
2283        ) {
2284            return Ok(true);
2285        }
2286    }
2287
2288    let attachments = client.call(
2289        "attachments.list",
2290        Some(serde_json::json!({"parentKey": item_key.clone()})),
2291    )?;
2292    Ok(attachments.as_array().is_some_and(|attachments| {
2293        attachments.iter().any(|attachment| {
2294            let has_sidecar_chunks = attachment
2295                .get("path")
2296                .and_then(Value::as_str)
2297                .map(local_path_from_zotero_path)
2298                .as_deref()
2299                .map(Path::new)
2300                .and_then(Path::parent)
2301                .is_some_and(|dir| {
2302                    machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2303                });
2304            if has_sidecar_chunks {
2305                return true;
2306            }
2307
2308            // Read-only fallback for old Zotero-visible artifact attachments.
2309            attachment
2310                .get("title")
2311                .and_then(Value::as_str)
2312                .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2313        })
2314    }))
2315}
2316
2317fn local_path_from_zotero_path(path: &str) -> String {
2318    if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2319        return ProcessCommand::new("wslpath")
2320            .arg("-u")
2321            .arg(path)
2322            .output()
2323            .ok()
2324            .filter(|output| output.status.success())
2325            .and_then(|output| String::from_utf8(output.stdout).ok())
2326            .map(|converted| converted.trim().to_string())
2327            .filter(|converted| !converted.is_empty())
2328            .unwrap_or_else(|| path.to_string());
2329    }
2330    path.to_string()
2331}
2332
2333fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2334    let notes = client.call(
2335        "notes.list",
2336        Some(serde_json::json!({"parentKey": item_key.clone()})),
2337    )?;
2338    Ok(notes.as_array().is_some_and(|notes| {
2339        notes.iter().any(|note| {
2340            note.get("tags")
2341                .and_then(Value::as_array)
2342                .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2343        })
2344    }))
2345}
2346
2347fn tag_is_ocr(tag: &Value) -> bool {
2348    tag.as_str() == Some("ocr")
2349        || tag
2350            .get("tag")
2351            .and_then(Value::as_str)
2352            .is_some_and(|tag| tag == "ocr")
2353}
2354
2355fn find_collection_in_tree(
2356    client: &mut impl RpcCaller,
2357    collection: &str,
2358) -> Result<Option<Value>, String> {
2359    let tree = client.call("collections.tree", None)?;
2360    let nodes = tree
2361        .as_array()
2362        .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2363    Ok(search_collection_tree(nodes, collection).cloned())
2364}
2365
2366fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2367    for node in nodes {
2368        if node.get("key").and_then(Value::as_str) == Some(collection)
2369            || node.get("name").and_then(Value::as_str) == Some(collection)
2370        {
2371            return Some(node);
2372        }
2373        if let Some(children) = node.get("children").and_then(Value::as_array) {
2374            if let Some(found) = search_collection_tree(children, collection) {
2375                return Some(found);
2376            }
2377        }
2378    }
2379    None
2380}
2381
2382fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2383    if let Command::Export(args) = command {
2384        return run_export(args, client);
2385    }
2386
2387    let (value, style) = match command {
2388        Command::Ping { .. } => (
2389            call_json(client, "system.ping", None)?,
2390            JsonStyle::PythonCompact,
2391        ),
2392        Command::Rpc {
2393            method,
2394            params_json,
2395            paginate,
2396            page_size,
2397            ..
2398        } => {
2399            let params = serde_json::from_str::<Value>(&params_json)
2400                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2401            if !params.is_object() {
2402                return Err("INVALID_JSON: params must be a JSON object".to_string());
2403            }
2404            if paginate {
2405                (
2406                    paginate_rpc(client, &method, params, page_size)?,
2407                    JsonStyle::Pretty,
2408                )
2409            } else {
2410                (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2411            }
2412        }
2413        Command::Push {
2414            json_file,
2415            pdf,
2416            collection,
2417            on_duplicate,
2418            dry_run,
2419            ..
2420        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2421        Command::System { command } => run_system_command(command, client)?,
2422        Command::Search(args) => {
2423            if let Some(mgmt) = args.management {
2424                run_search_management_command(mgmt, client)?
2425            } else {
2426                run_search(args, client)?
2427            }
2428        }
2429        Command::Items { command } => run_items_command(command, client)?,
2430        Command::Collections { command } => run_collections_command(command, client)?,
2431        Command::Notes { command } => run_notes_command(command, client)?,
2432        Command::Attachments { command } => run_attachments_command(command, client)?,
2433        Command::Settings { command } => run_settings_command(command, client)?,
2434        Command::Tags { command } => run_tags_command(command, client)?,
2435        Command::Annotations { command } => run_annotations_command(command, client)?,
2436        Command::Ocr { command } => {
2437            return run_ocr_command(command, client);
2438        }
2439        Command::Rag { command } => {
2440            return run_rag_command(command, client);
2441        }
2442        Command::Export(_) => unreachable!("export commands return raw output above"),
2443        Command::FindPdfs {
2444            collection, limit, ..
2445        } => run_find_pdfs_command(client, collection, limit)?,
2446    };
2447
2448    format_json(&value, style)
2449}
2450
2451fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2452    match command {
2453        RagCommand::Providers => format_json(
2454            &serde_json::json!({
2455                "providers": [
2456                    embedding_provider_spec("volcengine")?,
2457                    embedding_provider_spec("alibaba")?,
2458                    embedding_provider_spec("custom")?,
2459                ],
2460            }),
2461            JsonStyle::Pretty,
2462        ),
2463        RagCommand::Embed {
2464            provider,
2465            input,
2466            endpoint,
2467            model,
2468            input_type,
2469            api_key_env,
2470        } => {
2471            let value = run_embedding_provider_json_command(
2472                provider,
2473                input,
2474                endpoint,
2475                model,
2476                input_type,
2477                api_key_env,
2478            )?;
2479            format_json(&value, JsonStyle::PythonCompact)
2480        }
2481        RagCommand::Status { collection, .. } => {
2482            let value = rag_status_value(client, &collection)?;
2483            format_json(&value, JsonStyle::PythonCompact)
2484        }
2485        RagCommand::Search {
2486            query,
2487            collection,
2488            keys,
2489            zotero,
2490            top_spans_per_item,
2491            include_fulltext_spans,
2492            top_k,
2493            output,
2494            ..
2495        } => run_rag_hits_command(
2496            client,
2497            RagHitsOptions {
2498                query,
2499                collection,
2500                keys,
2501                zotero,
2502                top_spans_per_item,
2503                include_fulltext_spans,
2504                top_k,
2505                output,
2506            },
2507        ),
2508    }
2509}
2510
2511fn run_embedding_provider_json_command(
2512    provider: String,
2513    input: String,
2514    endpoint: Option<String>,
2515    model: Option<String>,
2516    input_type: Option<String>,
2517    api_key_env: Option<String>,
2518) -> Result<Value, String> {
2519    let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2520    if endpoint.is_some() {
2521        input.url = endpoint;
2522    }
2523    if model.is_some() {
2524        input.model = model;
2525    }
2526    if input_type.is_some() {
2527        input.input_type = input_type;
2528    }
2529    let mut transport = provider_http_transport(api_key_env.as_deref())?;
2530    let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2531
2532    Ok(serde_json::json!({
2533        "provider": provider,
2534        "vectors": vectors,
2535    }))
2536}
2537
2538fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2539    provider_http_transport_with_auth(api_key_env, "bearer")
2540}
2541
2542fn provider_http_transport_with_auth(
2543    api_key_env: Option<&str>,
2544    auth_scheme: &str,
2545) -> Result<UreqProviderHttpTransport, String> {
2546    let Some(env_name) = api_key_env else {
2547        return Ok(UreqProviderHttpTransport::new());
2548    };
2549    let token = env::var(env_name)
2550        .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2551    if token.trim().is_empty() {
2552        return Err(format!("provider credential env var {env_name} is empty"));
2553    }
2554    let token = token.trim();
2555    match auth_scheme {
2556        "token" if token.starts_with("token ") => {
2557            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2558        }
2559        "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2560            "token {token}"
2561        ))),
2562        "bearer" if token.starts_with("Bearer ") => {
2563            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2564        }
2565        "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2566        "none" => Ok(UreqProviderHttpTransport::new()),
2567        other => Err(format!("unsupported provider auth scheme {other}")),
2568    }
2569}
2570
2571fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2572    let payload = if path == "-" {
2573        let mut input = String::new();
2574        io::stdin()
2575            .read_to_string(&mut input)
2576            .map_err(|err| format!("read stdin: {err}"))?;
2577        input
2578    } else {
2579        fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2580    };
2581    serde_json::from_str::<T>(&payload)
2582        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2583}
2584
2585fn run_rag_hits_command(
2586    client: &mut impl RpcCaller,
2587    options: RagHitsOptions,
2588) -> Result<String, String> {
2589    if !options.zotero {
2590        return Err(
2591            "zotron rag hits currently supports only the fixture-covered --zotero backend in Rust"
2592                .to_string(),
2593        );
2594    }
2595    if options.collection.is_none() && options.keys.is_empty() {
2596        return Err(
2597            "INVALID_ARGS: --collection or --key is required when --zotero is used".to_string(),
2598        );
2599    }
2600    let mut params = serde_json::json!({
2601        "query": options.query,
2602        "limit": options.top_k,
2603        "top_spans_per_item": options.top_spans_per_item,
2604        "include_fulltext_spans": options.include_fulltext_spans,
2605    });
2606    if let Some(map) = params.as_object_mut() {
2607        if let Some(collection) = options.collection {
2608            map.insert("collection".to_string(), Value::String(collection));
2609        }
2610        if !options.keys.is_empty() {
2611            map.insert(
2612                "keys".to_string(),
2613                Value::Array(options.keys.into_iter().map(Value::String).collect()),
2614            );
2615        }
2616    }
2617    let payload = client.call("rag.searchHits", Some(params))?;
2618    let hits = payload
2619        .get("hits")
2620        .and_then(Value::as_array)
2621        .cloned()
2622        .unwrap_or_default();
2623    if options.output == "jsonl" {
2624        let mut out = String::new();
2625        for hit in &hits {
2626            out.push_str(&serde_json::to_string(hit).map_err(|err| err.to_string())?);
2627            out.push('\n');
2628        }
2629        Ok(out)
2630    } else {
2631        let total = hits.len() as u64;
2632        let value = normalize_list_envelope(
2633            serde_json::json!({"items": hits, "total": total}),
2634            "items",
2635            Some(options.top_k),
2636            0,
2637        );
2638        format_json(&value, JsonStyle::Pretty)
2639    }
2640}
2641
2642fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
2643    let raw_store_path = rag_store_path(collection);
2644    if raw_store_path.exists() {
2645        return rag_status_from_store(collection, &raw_store_path);
2646    }
2647
2648    let mut store_candidates = Vec::new();
2649    let collection_match = find_collection_in_tree(client, collection)?;
2650    if let Some(collection_node) = collection_match.as_ref() {
2651        if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
2652            store_candidates.push(rag_store_path(name));
2653        }
2654        if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
2655            store_candidates.push(rag_store_path(key));
2656        }
2657    }
2658    for store_path in unique_paths(store_candidates) {
2659        if store_path.exists() {
2660            return rag_status_from_store(collection, &store_path);
2661        }
2662    }
2663
2664    rag_status_from_zotero_sidecars(client, collection, collection_match)
2665}
2666
2667fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
2668    let mut unique = Vec::new();
2669    for path in paths {
2670        if !unique.iter().any(|seen| seen == &path) {
2671            unique.push(path);
2672        }
2673    }
2674    unique
2675}
2676
2677fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
2678    let raw = fs::read_to_string(store_path)
2679        .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
2680    let store: Value = serde_json::from_str(&raw)
2681        .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
2682    let chunks = store
2683        .get("chunks")
2684        .and_then(Value::as_array)
2685        .cloned()
2686        .unwrap_or_default();
2687    let mut item_keys = Vec::<Value>::new();
2688    for chunk in &chunks {
2689        let Some(item_key) = chunk.get("item_key") else {
2690            continue;
2691        };
2692        if !item_keys.iter().any(|seen| seen == item_key) {
2693            item_keys.push(item_key.clone());
2694        }
2695    }
2696    Ok(serde_json::json!({
2697        "status": "indexed",
2698        "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
2699        "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
2700        "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
2701        "total_chunks": chunks.len(),
2702        "total_items": item_keys.len(),
2703        "store_path": store_path.to_string_lossy(),
2704    }))
2705}
2706
2707fn rag_status_from_zotero_sidecars(
2708    client: &mut impl RpcCaller,
2709    collection: &str,
2710    collection_match: Option<Value>,
2711) -> Result<Value, String> {
2712    let collection_key = collection_match
2713        .as_ref()
2714        .and_then(|node| node.get("key").cloned())
2715        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2716    let raw = paginate_rpc(
2717        client,
2718        "collections.getItems",
2719        serde_json::json!({"key": collection_key}),
2720        500,
2721    )?;
2722    let items = raw
2723        .get("items")
2724        .and_then(Value::as_array)
2725        .or_else(|| raw.as_array())
2726        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2727        .clone();
2728
2729    let mut indexed_items = 0usize;
2730    let mut total_chunks = 0usize;
2731    for item in &items {
2732        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2733        let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
2734        if chunk_count > 0 {
2735            indexed_items += 1;
2736            total_chunks += chunk_count;
2737        }
2738    }
2739
2740    if indexed_items == 0 {
2741        return Ok(serde_json::json!({
2742            "status": "not indexed",
2743            "collection": collection,
2744            "total_items": items.len(),
2745            "indexed_items": 0,
2746        }));
2747    }
2748
2749    Ok(serde_json::json!({
2750        "status": "indexed",
2751        "collection": collection,
2752        "total_chunks": total_chunks,
2753        "total_items": indexed_items,
2754        "collection_items": items.len(),
2755        "source": "zotero-sidecar",
2756    }))
2757}
2758
2759fn sidecar_chunk_count_for_item(
2760    client: &mut impl RpcCaller,
2761    item_key: &Value,
2762) -> Result<usize, String> {
2763    let attachments = client.call(
2764        "attachments.list",
2765        Some(serde_json::json!({"parentKey": item_key.clone()})),
2766    )?;
2767    let Some(attachments) = attachments.as_array() else {
2768        return Ok(0);
2769    };
2770
2771    let mut count = 0usize;
2772    for attachment in attachments {
2773        let Some(path) = attachment.get("path").and_then(Value::as_str) else {
2774            continue;
2775        };
2776        let local = local_path_from_zotero_path(path);
2777        let Some(dir) = Path::new(&local).parent() else {
2778            continue;
2779        };
2780        let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
2781            continue;
2782        };
2783        let text = String::from_utf8_lossy(&bytes);
2784        count += text.lines().filter(|line| !line.trim().is_empty()).count();
2785    }
2786    Ok(count)
2787}
2788
2789fn rag_store_path(collection: &str) -> PathBuf {
2790    rag_store_root().join(format!("{collection}.json"))
2791}
2792
2793fn rag_store_root() -> PathBuf {
2794    let xdg_data_home = env::var_os("XDG_DATA_HOME")
2795        .filter(|path| !path.is_empty())
2796        .map(PathBuf::from);
2797    let appdata = env::var_os("APPDATA")
2798        .filter(|path| !path.is_empty())
2799        .map(PathBuf::from);
2800    let userprofile = env::var_os("USERPROFILE")
2801        .filter(|path| !path.is_empty())
2802        .map(PathBuf::from);
2803    let home = env::var_os("HOME")
2804        .filter(|path| !path.is_empty())
2805        .map(PathBuf::from);
2806
2807    rag_store_root_for_platform(
2808        ArtifactStorePlatform::current(),
2809        xdg_data_home.as_deref(),
2810        appdata.as_deref(),
2811        userprofile.as_deref(),
2812        home.as_deref(),
2813    )
2814}
2815
2816fn rag_store_root_for_platform(
2817    platform: ArtifactStorePlatform,
2818    xdg_data_home: Option<&Path>,
2819    appdata: Option<&Path>,
2820    userprofile: Option<&Path>,
2821    home: Option<&Path>,
2822) -> PathBuf {
2823    match platform {
2824        ArtifactStorePlatform::Windows => {
2825            if let Some(path) = appdata {
2826                return path.join("Zotron").join("rag");
2827            }
2828            if let Some(path) = userprofile {
2829                return path
2830                    .join("AppData")
2831                    .join("Roaming")
2832                    .join("Zotron")
2833                    .join("rag");
2834            }
2835            if let Some(path) = home {
2836                return path
2837                    .join("AppData")
2838                    .join("Roaming")
2839                    .join("Zotron")
2840                    .join("rag");
2841            }
2842            PathBuf::from(".zotron").join("rag")
2843        }
2844        ArtifactStorePlatform::Macos => {
2845            if let Some(path) = home {
2846                return path
2847                    .join("Library")
2848                    .join("Application Support")
2849                    .join("Zotron")
2850                    .join("rag");
2851            }
2852            if let Some(path) = xdg_data_home {
2853                return path.join("zotron").join("rag");
2854            }
2855            PathBuf::from(".zotron").join("rag")
2856        }
2857        ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
2858            .map(|path| path.join("zotron").join("rag"))
2859            .or_else(|| {
2860                home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
2861            })
2862            .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
2863    }
2864}
2865
2866fn run_push_command(
2867    json_file: String,
2868    pdf: Option<String>,
2869    collection: Option<String>,
2870    on_duplicate: String,
2871    dry_run: bool,
2872    client: &mut impl RpcCaller,
2873) -> Result<String, String> {
2874    if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
2875        return Err(format!(
2876            "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
2877        ));
2878    }
2879
2880    let payload = if json_file == "-" {
2881        let mut input = String::new();
2882        io::stdin()
2883            .read_to_string(&mut input)
2884            .map_err(|err| format!("read stdin: {err}"))?;
2885        input
2886    } else {
2887        fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
2888    };
2889    let item_json = serde_json::from_str::<Value>(&payload)
2890        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
2891
2892    if dry_run {
2893        let collection_key = collection
2894            .as_deref()
2895            .map(|name| resolve_collection(client, name))
2896            .transpose()?;
2897        return format_json(
2898            &serde_json::json!({
2899                "ok": true,
2900                "dryRun": true,
2901                "wouldPush": {
2902                    "title": item_json.get("title").cloned().unwrap_or(Value::Null),
2903                    "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
2904                    "collectionKey": collection_key,
2905                    "pdfPath": pdf,
2906                    "onDuplicate": on_duplicate,
2907                }
2908            }),
2909            JsonStyle::PythonCompact,
2910        );
2911    }
2912
2913    let result = push_item(
2914        client,
2915        &item_json,
2916        pdf.as_deref(),
2917        collection.as_deref(),
2918        &on_duplicate,
2919    )?;
2920    format_json(&result, JsonStyle::PythonCompact)
2921}
2922
2923fn push_item(
2924    client: &mut impl RpcCaller,
2925    item_json: &Value,
2926    pdf_path: Option<&str>,
2927    collection: Option<&str>,
2928    on_duplicate: &str,
2929) -> Result<Value, String> {
2930    let pdf_size = if let Some(path) = pdf_path {
2931        validate_pdf_magic(path)?
2932    } else {
2933        0
2934    };
2935
2936    let collection_key = match collection {
2937        Some(name) => resolve_collection(client, name)?,
2938        None => resolve_current_collection(client)?,
2939    };
2940
2941    let dup_id = find_duplicate(client, item_json)?;
2942    if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
2943        if !is_library_root(&collection_key) {
2944            client.call(
2945                "collections.addItems",
2946                Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
2947            )?;
2948        }
2949        let mut pdf_attached = false;
2950        if let Some(path) = pdf_path {
2951            if !item_has_pdf_attachment(client, dup_id)? {
2952                attach_pdf(client, dup_id, path)?;
2953                pdf_attached = true;
2954            }
2955        }
2956        return Ok(push_result(
2957            "skipped_duplicate",
2958            Some(dup_id.to_string()),
2959            pdf_attached,
2960            if pdf_attached { pdf_size } else { 0 },
2961            Value::Null,
2962        ));
2963    }
2964
2965    let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
2966    let (item_key, status) =
2967        if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
2968            let mut params = serde_json::Map::new();
2969            params.insert("key".to_string(), Value::String(dup_id.to_string()));
2970            params.insert(
2971                "fields".to_string(),
2972                xpi_payload
2973                    .get("fields")
2974                    .cloned()
2975                    .unwrap_or_else(|| serde_json::json!({})),
2976            );
2977            if let Some(creators) = xpi_payload.get("creators") {
2978                params.insert("creators".to_string(), creators.clone());
2979            }
2980            if let Some(tags) = xpi_payload.get("tags") {
2981                params.insert("tags".to_string(), tags.clone());
2982            }
2983            client.call("items.update", Some(Value::Object(params)))?;
2984            (dup_id.to_string(), "updated")
2985        } else {
2986            let created = client.call("items.create", Some(xpi_payload))?;
2987            let key = created
2988                .get("key")
2989                .and_then(Value::as_str)
2990                .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
2991            (key.to_string(), "created")
2992        };
2993
2994    let mut pdf_attached = false;
2995    if let Some(path) = pdf_path {
2996        if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
2997            attach_pdf(client, &item_key, path)?;
2998            pdf_attached = true;
2999        }
3000    }
3001
3002    if status == "updated" && !is_library_root(&collection_key) {
3003        client.call(
3004            "collections.addItems",
3005            Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3006        )?;
3007    }
3008
3009    Ok(push_result(
3010        status,
3011        Some(item_key),
3012        pdf_attached,
3013        if pdf_attached { pdf_size } else { 0 },
3014        Value::Null,
3015    ))
3016}
3017
3018fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3019    let bytes = fs::read(path)
3020        .map_err(|_| format!("INVALID_PDF: {path} does not start with %PDF- magic bytes"))?;
3021    if !bytes.starts_with(b"%PDF-") {
3022        return Err(format!(
3023            "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3024        ));
3025    }
3026    Ok(bytes.len() as u64)
3027}
3028
3029fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3030    let selected = client.call("system.currentCollection", None)?;
3031    Ok(selected
3032        .get("key")
3033        .cloned()
3034        .unwrap_or_else(|| Value::Number(0.into())))
3035}
3036
3037fn find_duplicate(
3038    client: &mut impl RpcCaller,
3039    item_json: &Value,
3040) -> Result<Option<String>, String> {
3041    if let Some(doi) = item_json
3042        .get("DOI")
3043        .and_then(Value::as_str)
3044        .filter(|doi| !doi.is_empty())
3045    {
3046        let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3047        if let Some(key) = first_hit_key(&hits) {
3048            return Ok(Some(key));
3049        }
3050    }
3051
3052    if let Some(title) = item_json
3053        .get("title")
3054        .and_then(Value::as_str)
3055        .filter(|title| title.len() >= 10)
3056    {
3057        let hits = client.call(
3058            "search.quick",
3059            Some(serde_json::json!({"query": title, "limit": 20})),
3060        )?;
3061        if let Some(items) = response_items(&hits) {
3062            for item in items {
3063                if item.get("title").and_then(Value::as_str) == Some(title) {
3064                    if let Some(key) = item.get("key").and_then(Value::as_str) {
3065                        return Ok(Some(key.to_string()));
3066                    }
3067                }
3068            }
3069        }
3070    }
3071
3072    Ok(None)
3073}
3074
3075fn first_hit_key(response: &Value) -> Option<String> {
3076    response_items(response)?
3077        .first()?
3078        .get("key")?
3079        .as_str()
3080        .map(ToString::to_string)
3081}
3082
3083fn response_items(response: &Value) -> Option<&Vec<Value>> {
3084    response
3085        .get("items")
3086        .and_then(Value::as_array)
3087        .or_else(|| response.as_array())
3088}
3089
3090fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3091    const NON_FIELD_KEYS: &[&str] = &[
3092        "itemType",
3093        "creators",
3094        "tags",
3095        "collections",
3096        "attachments",
3097        "relations",
3098        "notes",
3099        "id",
3100        "key",
3101        "version",
3102    ];
3103
3104    let mut fields = serde_json::Map::new();
3105    if let Some(item) = item_json.as_object() {
3106        for (key, value) in item {
3107            if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3108                fields.insert(key.clone(), value.clone());
3109            }
3110        }
3111    }
3112
3113    let mut payload = serde_json::Map::new();
3114    payload.insert(
3115        "itemType".to_string(),
3116        item_json
3117            .get("itemType")
3118            .cloned()
3119            .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3120    );
3121    payload.insert("fields".to_string(), Value::Object(fields));
3122
3123    if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3124        if !creators.is_empty() {
3125            payload.insert(
3126                "creators".to_string(),
3127                Value::Array(
3128                    creators
3129                        .iter()
3130                        .map(|creator| {
3131                            serde_json::json!({
3132                                "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3133                                "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3134                                "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3135                            })
3136                        })
3137                        .collect(),
3138                ),
3139            );
3140        }
3141    }
3142
3143    if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3144        if !tags.is_empty() {
3145            payload.insert(
3146                "tags".to_string(),
3147                Value::Array(
3148                    tags.iter()
3149                        .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3150                        .collect(),
3151                ),
3152            );
3153        }
3154    }
3155
3156    if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3157        payload.insert(
3158            "collections".to_string(),
3159            Value::Array(vec![collection_key.clone()]),
3160        );
3161    }
3162
3163    Value::Object(payload)
3164}
3165
3166fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3167    let attachments = client.call(
3168        "attachments.list",
3169        Some(serde_json::json!({"parentKey": item_key})),
3170    )?;
3171    Ok(has_pdf_attachment(&attachments))
3172}
3173
3174fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3175    client.call(
3176        "attachments.add",
3177        Some(serde_json::json!({
3178            "parentKey": item_key,
3179            "path": zotero_path(path),
3180            "title": "Full Text PDF",
3181        })),
3182    )?;
3183    Ok(())
3184}
3185
3186fn zotero_path(path: &str) -> String {
3187    let path = Path::new(path)
3188        .canonicalize()
3189        .unwrap_or_else(|_| Path::new(path).to_path_buf())
3190        .to_string_lossy()
3191        .into_owned();
3192    if is_wsl() {
3193        return ProcessCommand::new("wslpath")
3194            .arg("-w")
3195            .arg(&path)
3196            .output()
3197            .ok()
3198            .filter(|output| output.status.success())
3199            .and_then(|output| String::from_utf8(output.stdout).ok())
3200            .map(|converted| converted.trim().to_string())
3201            .filter(|converted| !converted.is_empty())
3202            .unwrap_or(path);
3203    }
3204    path
3205}
3206
3207fn is_wsl() -> bool {
3208    if env::var_os("WSL_DISTRO_NAME").is_some() {
3209        return true;
3210    }
3211    fs::read_to_string("/proc/sys/kernel/osrelease")
3212        .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3213        .unwrap_or(false)
3214}
3215
3216fn is_library_root(value: &Value) -> bool {
3217    value.as_i64() == Some(0) || value.as_u64() == Some(0)
3218}
3219
3220fn push_result(
3221    status: &str,
3222    zotero_item_key: Option<String>,
3223    pdf_attached: bool,
3224    pdf_size_bytes: u64,
3225    error: Value,
3226) -> Value {
3227    serde_json::json!({
3228        "status": status,
3229        "zotero_item_key": zotero_item_key,
3230        "pdf_attached": pdf_attached,
3231        "pdf_size_bytes": pdf_size_bytes,
3232        "error": error,
3233    })
3234}
3235
3236fn run_search(
3237    args: SearchArgs,
3238    client: &mut impl RpcCaller,
3239) -> Result<(Value, JsonStyle), String> {
3240    let SearchArgs {
3241        query, fulltext, author, after, before, journal, tag,
3242        doi, isbn, issn, collection, limit, offset, ..
3243    } = args;
3244
3245    let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3246    if has_identifier {
3247        let mut params = serde_json::Map::new();
3248        if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3249        if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3250        if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3251        let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3252        return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3253    }
3254
3255    if fulltext {
3256        let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3257        let mut params = serde_json::json!({"query": query, "limit": limit});
3258        if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3259            map.insert("collection".into(), resolve_collection(client, &col)?);
3260        }
3261        let value = client.call("search.fulltext", Some(params))?;
3262        return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3263    }
3264
3265    let has_filters = author.is_some() || after.is_some() || before.is_some()
3266        || journal.is_some() || tag.is_some();
3267    if has_filters {
3268        let mut conditions: Vec<Value> = Vec::new();
3269        if let Some(query) = &query {
3270            conditions.push(serde_json::json!({
3271                "field": "quicksearch-titleCreatorYear",
3272                "operator": "contains",
3273                "value": query,
3274            }));
3275        }
3276        if let Some(author) = author {
3277            conditions.push(serde_json::json!({
3278                "field": "creator", "operator": "contains", "value": author,
3279            }));
3280        }
3281        if let Some(after) = after {
3282            conditions.push(serde_json::json!({
3283                "field": "date", "operator": "isAfter", "value": after,
3284            }));
3285        }
3286        if let Some(before) = before {
3287            conditions.push(serde_json::json!({
3288                "field": "date", "operator": "isBefore", "value": before,
3289            }));
3290        }
3291        if let Some(journal) = journal {
3292            conditions.push(serde_json::json!({
3293                "field": "publicationTitle", "operator": "contains", "value": journal,
3294            }));
3295        }
3296        if let Some(tag) = tag {
3297            conditions.push(serde_json::json!({
3298                "field": "tag", "operator": "is", "value": tag,
3299            }));
3300        }
3301        let value = client.call(
3302            "search.advanced",
3303            Some(serde_json::json!({
3304                "conditions": conditions,
3305                "operator": "and",
3306                "limit": limit,
3307                "offset": offset,
3308            })),
3309        )?;
3310        return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3311    }
3312
3313    let query = query.ok_or(
3314        "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3315    )?;
3316    let value = if let Some(col) = collection {
3317        let key = resolve_collection(client, &col)?;
3318        let response = client.call(
3319            "collections.getItems",
3320            Some(serde_json::json!({"key": key})),
3321        )?;
3322        collection_quick_search_response(&response, &query, limit)
3323    } else {
3324        filter_search_artifacts(client.call(
3325            "search.quick",
3326            Some(serde_json::json!({"query": query, "limit": limit})),
3327        )?)
3328    };
3329    Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3330}
3331
3332fn run_search_management_command(
3333    command: SearchManagementCommand,
3334    client: &mut impl RpcCaller,
3335) -> Result<(Value, JsonStyle), String> {
3336    match command {
3337        SearchManagementCommand::SavedSearches { .. } => Ok((
3338            normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3339            JsonStyle::Pretty,
3340        )),
3341        SearchManagementCommand::CreateSaved {
3342            name, condition, dry_run, ..
3343        } => {
3344            let conditions = condition
3345                .iter()
3346                .map(|raw| parse_search_condition(raw))
3347                .collect::<Result<Vec<_>, _>>()?;
3348            let params = serde_json::json!({"name": name, "conditions": conditions});
3349            if dry_run {
3350                Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3351            } else {
3352                Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3353            }
3354        }
3355        SearchManagementCommand::DeleteSaved {
3356            search_key, dry_run, ..
3357        } => {
3358            let params = serde_json::json!({"key": search_key});
3359            if dry_run {
3360                Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3361            } else {
3362                Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3363            }
3364        }
3365    }
3366}
3367
3368fn filter_search_artifacts(mut value: Value) -> Value {
3369    let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3370        return value;
3371    };
3372    items.retain(|item| match item.get("title").and_then(Value::as_str) {
3373        Some(title) => !is_zotron_evidence_artifact(title),
3374        None => true,
3375    });
3376    let total_items = items.len() as u64;
3377    if let Some(total) = value.get_mut("total") {
3378        *total = Value::from(total_items);
3379    }
3380    value
3381}
3382
3383fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3384    let mut matched = collection_items(response)
3385        .into_iter()
3386        .filter(|item| !item_is_evidence_artifact(item))
3387        .filter(|item| quick_item_matches(item, query))
3388        .collect::<Vec<_>>();
3389    let total = matched.len() as u64;
3390    let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3391    if matched.len() > limit {
3392        matched.truncate(limit);
3393    }
3394    serde_json::json!({"items": matched, "total": total})
3395}
3396
3397fn item_is_evidence_artifact(item: &Value) -> bool {
3398    item.get("title")
3399        .and_then(Value::as_str)
3400        .is_some_and(is_zotron_evidence_artifact)
3401}
3402
3403fn quick_item_matches(item: &Value, query: &str) -> bool {
3404    let terms = query
3405        .split_whitespace()
3406        .map(|term| term.to_lowercase())
3407        .filter(|term| !term.is_empty())
3408        .collect::<Vec<_>>();
3409    if terms.is_empty() {
3410        return true;
3411    }
3412    let mut haystack = String::new();
3413    append_search_text(item, &mut haystack);
3414    let haystack = haystack.to_lowercase();
3415    terms.iter().all(|term| haystack.contains(term))
3416}
3417
3418fn append_search_text(value: &Value, out: &mut String) {
3419    match value {
3420        Value::String(text) => {
3421            out.push(' ');
3422            out.push_str(text);
3423        }
3424        Value::Number(number) => {
3425            out.push(' ');
3426            out.push_str(&number.to_string());
3427        }
3428        Value::Bool(value) => {
3429            out.push(' ');
3430            out.push_str(if *value { "true" } else { "false" });
3431        }
3432        Value::Array(items) => {
3433            for item in items {
3434                append_search_text(item, out);
3435            }
3436        }
3437        Value::Object(map) => {
3438            for item in map.values() {
3439                append_search_text(item, out);
3440            }
3441        }
3442        Value::Null => {}
3443    }
3444}
3445
3446fn parse_search_condition(raw: &str) -> Result<Value, String> {
3447    let mut parts = raw.split_whitespace();
3448    let field = parts.next();
3449    let operator = parts.next();
3450    let value = parts.collect::<Vec<_>>().join(" ");
3451    match (field, operator, value.is_empty()) {
3452        (Some(field), Some(operator), false) => Ok(serde_json::json!({
3453            "field": field,
3454            "operator": operator,
3455            "value": value,
3456        })),
3457        _ => Err(format!(
3458            "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
3459        )),
3460    }
3461}
3462
3463fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
3464    if let Value::Array(arr) = value {
3465        let total = arr.len() as u64;
3466        let mut obj = serde_json::Map::new();
3467        obj.insert(list_key.to_string(), Value::Array(arr));
3468        obj.insert("total".to_string(), Value::from(total));
3469        if let Some(limit) = limit {
3470            obj.insert("limit".to_string(), Value::from(limit));
3471        }
3472        obj.insert("offset".to_string(), Value::from(offset));
3473        obj.insert("hasMore".to_string(), Value::Bool(false));
3474        return Value::Object(obj);
3475    }
3476
3477    let mut obj = match value {
3478        Value::Object(obj) if obj.contains_key(list_key) => obj,
3479        other => return other,
3480    };
3481
3482    let items_len = obj
3483        .get(list_key)
3484        .and_then(Value::as_array)
3485        .map_or(0, |a| a.len()) as u64;
3486    let total = obj
3487        .get("total")
3488        .and_then(Value::as_u64)
3489        .unwrap_or(items_len);
3490
3491    obj.insert("total".to_string(), Value::from(total));
3492    if let Some(limit) = limit {
3493        obj.insert("limit".to_string(), Value::from(limit));
3494    }
3495    obj.insert("offset".to_string(), Value::from(offset));
3496    obj.insert(
3497        "hasMore".to_string(),
3498        Value::Bool(offset + items_len < total),
3499    );
3500
3501    Value::Object(obj)
3502}
3503
3504const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
3505const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
3506
3507fn paginate_rpc(
3508    client: &mut impl RpcCaller,
3509    method: &str,
3510    params: Value,
3511    page_size: usize,
3512) -> Result<Value, String> {
3513    let base = params
3514        .as_object()
3515        .ok_or_else(|| "params must be a JSON object".to_string())?;
3516    let mut out = Vec::new();
3517    let mut prev_page: Option<Vec<Value>> = None;
3518    let mut offset = 0usize;
3519
3520    loop {
3521        let mut page_params = base.clone();
3522        page_params.insert("offset".to_string(), Value::Number(offset.into()));
3523        page_params.insert("limit".to_string(), Value::Number(page_size.into()));
3524        let response = client.call(method, Some(Value::Object(page_params)))?;
3525
3526        let page = match extract_page(&response) {
3527            Some(page) => page,
3528            None if out.is_empty() => return Ok(response),
3529            None if response.is_object() => {
3530                return Err(format!(
3531                    "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
3532                    out.len()
3533                ));
3534            }
3535            None => {
3536                return Err(format!(
3537                    "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
3538                    out.len()
3539                ));
3540            }
3541        };
3542
3543        if prev_page.as_ref() == Some(&page) {
3544            return Err(format!(
3545                "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
3546                out.len()
3547            ));
3548        }
3549
3550        let page_len = page.len();
3551        out.extend(page.clone());
3552        if page_len < page_size {
3553            return Ok(Value::Array(out));
3554        }
3555        if out.len() >= RPC_PAGINATION_SAFETY_CAP {
3556            out.truncate(RPC_PAGINATION_SAFETY_CAP);
3557            return Ok(Value::Array(out));
3558        }
3559        prev_page = Some(page);
3560        offset += page_size;
3561    }
3562}
3563
3564fn extract_page(response: &Value) -> Option<Vec<Value>> {
3565    if let Some(page) = response.as_array() {
3566        return Some(page.clone());
3567    }
3568    let object = response.as_object()?;
3569    for key in RPC_PAGE_LIST_KEYS {
3570        if let Some(page) = object.get(key).and_then(Value::as_array) {
3571            return Some(page.clone());
3572        }
3573    }
3574    None
3575}
3576
3577fn run_find_pdfs_command(
3578    client: &mut impl RpcCaller,
3579    collection: String,
3580    limit: usize,
3581) -> Result<(Value, JsonStyle), String> {
3582    let collection_key = resolve_collection(client, &collection)?;
3583    let response = client.call(
3584        "collections.getItems",
3585        Some(serde_json::json!({"key": collection_key})),
3586    )?;
3587    let items = collection_items(&response);
3588
3589    let mut missing = Vec::new();
3590    for item in &items {
3591        let Some(item_key) = item.get("key").and_then(Value::as_str) else {
3592            continue;
3593        };
3594        let attachments = client.call(
3595            "attachments.list",
3596            Some(serde_json::json!({"parentKey": item_key})),
3597        )?;
3598        if !has_pdf_attachment(&attachments) {
3599            missing.push(item.clone());
3600        }
3601        if limit > 0 && missing.len() >= limit {
3602            break;
3603        }
3604    }
3605
3606    let mut results = Vec::new();
3607    for item in &missing {
3608        let item_key = item
3609            .get("key")
3610            .and_then(Value::as_str)
3611            .ok_or_else(|| "missing item lacks key".to_string())?;
3612        let response = client.call(
3613            "attachments.findPDF",
3614            Some(serde_json::json!({"parentKey": item_key})),
3615        )?;
3616        let attachment = response.get("attachment").filter(|value| !value.is_null());
3617        results.push(serde_json::json!({
3618            "item_key": item_key,
3619            "title": item.get("title").cloned().unwrap_or(Value::Null),
3620            "found": attachment.is_some(),
3621            "attachment_key": attachment
3622                .and_then(|attachment| attachment.get("key"))
3623                .cloned()
3624                .unwrap_or(Value::Null),
3625        }));
3626    }
3627
3628    Ok((
3629        serde_json::json!({
3630            "scanned": items.len(),
3631            "attempted": missing.len(),
3632            "results": results,
3633        }),
3634        JsonStyle::Pretty,
3635    ))
3636}
3637
3638fn collection_items(response: &Value) -> Vec<Value> {
3639    if let Some(items) = response.get("items").and_then(Value::as_array) {
3640        return items.clone();
3641    }
3642    response.as_array().cloned().unwrap_or_default()
3643}
3644
3645fn has_pdf_attachment(attachments: &Value) -> bool {
3646    attachments
3647        .as_array()
3648        .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
3649}
3650
3651fn is_pdf_attachment(attachment: &Value) -> bool {
3652    let content_type = attachment
3653        .get("contentType")
3654        .and_then(Value::as_str)
3655        .unwrap_or_default()
3656        .to_lowercase();
3657    let path = attachment
3658        .get("path")
3659        .and_then(Value::as_str)
3660        .unwrap_or_default()
3661        .to_lowercase();
3662    matches!(
3663        content_type.as_str(),
3664        "application/pdf" | "application/x-pdf"
3665    ) || path.ends_with(".pdf")
3666}
3667
3668fn call_json(
3669    client: &mut impl RpcCaller,
3670    method: &str,
3671    params: Option<Value>,
3672) -> Result<Value, String> {
3673    client.call(method, params)
3674}
3675
3676fn run_system_command(
3677    command: SystemCommand,
3678    client: &mut impl RpcCaller,
3679) -> Result<(Value, JsonStyle), String> {
3680    let value = match command {
3681        SystemCommand::Version { .. } => client.call("system.version", None)?,
3682        SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
3683        SystemCommand::LibraryStats { library, .. } => {
3684            let params = library.map(|id| serde_json::json!({"id": id}));
3685            client.call("system.libraryStats", params)?
3686        }
3687        SystemCommand::Schema { item_type, .. } => {
3688            if let Some(item_type) = item_type {
3689                let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
3690                let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
3691                let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
3692                    .iter()
3693                    .filter_map(|f| f.get("field").cloned())
3694                    .collect();
3695                let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
3696                    .iter()
3697                    .filter_map(|c| c.get("creatorType").cloned())
3698                    .collect();
3699                serde_json::json!({
3700                    "itemType": item_type,
3701                    "fields": field_names,
3702                    "creatorTypes": creator_names,
3703                })
3704            } else {
3705                let types = client.call("system.itemTypes", None)?;
3706                let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
3707                    .iter()
3708                    .filter_map(|t| t.get("itemType").cloned())
3709                    .collect();
3710                Value::Array(type_names)
3711            }
3712        }
3713        SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
3714        SystemCommand::ListMethods { .. } => client.call("system.listMethods", None)?,
3715        SystemCommand::Describe { method, .. } => {
3716            let params = method.map(|method| serde_json::json!({"method": method}));
3717            client.call("system.describe", params)?
3718        }
3719    };
3720    Ok((value, JsonStyle::Pretty))
3721}
3722
3723fn run_items_command(
3724    command: ItemsCommand,
3725    client: &mut impl RpcCaller,
3726) -> Result<(Value, JsonStyle), String> {
3727    let (value, style) = match command {
3728        ItemsCommand::Add {
3729            doi,
3730            isbn,
3731            from_url,
3732            file,
3733            collection,
3734            dry_run,
3735            ..
3736        } => {
3737            if let Some(doi) = doi {
3738                run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
3739            } else if let Some(isbn) = isbn {
3740                run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
3741            } else if let Some(from_url) = from_url {
3742                run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
3743            } else if let Some(file) = file {
3744                let mut params = serde_json::json!({"path": zotero_path(&file)});
3745                maybe_insert_collection(client, &mut params, collection)?;
3746                run_mutation_command(client, "items.addFromFile", params, dry_run)?
3747            } else {
3748                return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, or --file".into());
3749            }
3750        }
3751        ItemsCommand::Create {
3752            item_type,
3753            fields,
3754            dry_run,
3755            ..
3756        } => {
3757            let parsed_fields = parse_field_options(&fields)?;
3758            let mut params = serde_json::json!({"itemType": item_type});
3759            if !parsed_fields.is_empty() {
3760                if let Some(map) = params.as_object_mut() {
3761                    map.insert("fields".to_string(), Value::Object(parsed_fields));
3762                }
3763            }
3764            run_mutation_command(client, "items.create", params, dry_run)?
3765        }
3766        ItemsCommand::Update {
3767            key,
3768            fields,
3769            dry_run,
3770            ..
3771        } => {
3772            let parsed_fields = parse_field_options(&fields)?;
3773            let mut params = serde_json::json!({"key": key});
3774            if !parsed_fields.is_empty() {
3775                if let Some(map) = params.as_object_mut() {
3776                    map.insert("fields".to_string(), Value::Object(parsed_fields));
3777                }
3778            }
3779            run_mutation_command(client, "items.update", params, dry_run)?
3780        }
3781        ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
3782            client,
3783            "items.delete",
3784            serde_json::json!({"key": key}),
3785            dry_run,
3786        )?,
3787        ItemsCommand::Trash {
3788            items, dry_run, ..
3789        } => {
3790            if items.len() == 1 {
3791                run_mutation_command(
3792                    client,
3793                    "items.trash",
3794                    serde_json::json!({"key": items[0]}),
3795                    dry_run,
3796                )?
3797            } else {
3798                run_mutation_command(
3799                    client,
3800                    "items.batchTrash",
3801                    serde_json::json!({"keys": items}),
3802                    dry_run,
3803                )?
3804            }
3805        }
3806        ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
3807            client,
3808            "items.restore",
3809            serde_json::json!({"key": item}),
3810            dry_run,
3811        )?,
3812        ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
3813            if keys.len() < 2 {
3814                return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
3815            }
3816            run_mutation_command(
3817                client,
3818                "items.mergeDuplicates",
3819                serde_json::json!({"keys": keys}),
3820                dry_run,
3821            )?
3822        }
3823        ItemsCommand::AddRelated {
3824            key,
3825            target,
3826            dry_run,
3827            ..
3828        } => run_mutation_command(
3829            client,
3830            "items.addRelated",
3831            serde_json::json!({"key": key, "targetKey": target}),
3832            dry_run,
3833        )?,
3834        ItemsCommand::RemoveRelated {
3835            key,
3836            target,
3837            dry_run,
3838            ..
3839        } => run_mutation_command(
3840            client,
3841            "items.removeRelated",
3842            serde_json::json!({"key": key, "targetKey": target}),
3843            dry_run,
3844        )?,
3845        ItemsCommand::Get { item, .. } => (
3846            client.call("items.get", Some(serde_json::json!({"key": item})))?,
3847            JsonStyle::Pretty,
3848        ),
3849        ItemsCommand::List {
3850            limit,
3851            offset,
3852            sort,
3853            direction,
3854            trash,
3855            ..
3856        } => {
3857            if trash {
3858                let value = client.call(
3859                    "items.getTrash",
3860                    Some(serde_json::json!({"limit": limit, "offset": offset})),
3861                )?;
3862                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3863            } else {
3864                let mut params = serde_json::json!({
3865                    "limit": limit,
3866                    "offset": offset,
3867                    "direction": direction,
3868                });
3869                if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
3870                    map.insert("sort".to_string(), Value::String(sort));
3871                }
3872                let value = client.call("items.list", Some(params))?;
3873                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3874            }
3875        }
3876        ItemsCommand::FindDuplicates { .. } => (
3877            client.call("items.findDuplicates", None)?,
3878            JsonStyle::Pretty,
3879        ),
3880        ItemsCommand::Recent {
3881            limit,
3882            offset,
3883            recent_type,
3884            ..
3885        } => {
3886            if recent_type != "added" && recent_type != "modified" {
3887                return Err(format!(
3888                    "--type must be added or modified, got {recent_type:?}"
3889                ));
3890            }
3891            let value = client.call(
3892                "items.getRecent",
3893                Some(
3894                    serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
3895                ),
3896            )?;
3897            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
3898        }
3899        ItemsCommand::Fulltext { key, .. } => (
3900            client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
3901            JsonStyle::Pretty,
3902        ),
3903        ItemsCommand::Related { key, .. } => (
3904            normalize_list_envelope(
3905                client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
3906                "items",
3907                None,
3908                0,
3909            ),
3910            JsonStyle::Pretty,
3911        ),
3912        ItemsCommand::CitationKey { key, .. } => (
3913            client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
3914            JsonStyle::Pretty,
3915        ),
3916    };
3917    Ok((value, style))
3918}
3919
3920fn run_add_identifier_command(
3921    client: &mut impl RpcCaller,
3922    method: &str,
3923    param_name: &str,
3924    param_value: String,
3925    collection: Option<String>,
3926    dry_run: bool,
3927) -> Result<(Value, JsonStyle), String> {
3928    let mut params = Value::Object(serde_json::Map::from_iter([(
3929        param_name.to_string(),
3930        Value::String(param_value),
3931    )]));
3932    maybe_insert_collection(client, &mut params, collection)?;
3933    run_mutation_command(client, method, params, dry_run)
3934}
3935
3936fn run_mutation_command(
3937    client: &mut impl RpcCaller,
3938    method: &str,
3939    params: Value,
3940    dry_run: bool,
3941) -> Result<(Value, JsonStyle), String> {
3942    let value = if dry_run {
3943        serde_json::json!({
3944            "ok": true,
3945            "dryRun": true,
3946            "wouldCall": method,
3947            "wouldCallParams": params,
3948        })
3949    } else {
3950        client.call(method, Some(params))?
3951    };
3952    Ok((value, JsonStyle::PythonCompact))
3953}
3954
3955fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
3956    let mut parsed = serde_json::Map::new();
3957    for field in fields {
3958        let (key, value) = field
3959            .split_once('=')
3960            .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
3961        parsed.insert(key.to_string(), Value::String(value.to_string()));
3962    }
3963    Ok(parsed)
3964}
3965
3966fn maybe_insert_collection(
3967    client: &mut impl RpcCaller,
3968    params: &mut Value,
3969    collection: Option<String>,
3970) -> Result<(), String> {
3971    let Some(collection) = collection else {
3972        return Ok(());
3973    };
3974    let collection = resolve_collection(client, &collection)?;
3975    let include = match &collection {
3976        Value::Null => false,
3977        Value::Number(number) => number.as_i64() != Some(0),
3978        _ => true,
3979    };
3980    if include {
3981        params
3982            .as_object_mut()
3983            .expect("mutation params are always objects")
3984            .insert("collection".to_string(), collection);
3985    }
3986    Ok(())
3987}
3988
3989fn run_settings_command(
3990    command: SettingsCommand,
3991    client: &mut impl RpcCaller,
3992) -> Result<(Value, JsonStyle), String> {
3993    let (value, style) = match command {
3994        SettingsCommand::Get { key, .. } => (
3995            client.call("settings.get", Some(serde_json::json!({"key": key})))?,
3996            JsonStyle::Pretty,
3997        ),
3998        SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
3999        SettingsCommand::Set {
4000            pairs,
4001            file,
4002            dry_run,
4003            ..
4004        } => {
4005            if let Some(file) = file {
4006                // --file mode: read JSON and call settings.setAll
4007                let raw = fs::read_to_string(&file)
4008                    .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4009                let settings: Value = serde_json::from_str(&raw)
4010                    .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4011                if dry_run {
4012                    (
4013                        dry_run_value("settings.setAll", settings),
4014                        JsonStyle::PythonCompact,
4015                    )
4016                } else {
4017                    (
4018                        client.call("settings.setAll", Some(settings))?,
4019                        JsonStyle::PythonCompact,
4020                    )
4021                }
4022            } else if pairs.len() == 2 {
4023                // Single key=value: settings.set
4024                let key = &pairs[0];
4025                let value = &pairs[1];
4026                let parsed_value = serde_json::from_str::<Value>(value)
4027                    .unwrap_or(Value::String(value.clone()));
4028                let params = serde_json::json!({"key": key, "value": parsed_value});
4029                if dry_run {
4030                    (
4031                        dry_run_value("settings.set", params),
4032                        JsonStyle::PythonCompact,
4033                    )
4034                } else {
4035                    (
4036                        client.call("settings.set", Some(params))?,
4037                        JsonStyle::PythonCompact,
4038                    )
4039                }
4040            } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4041                // Multiple pairs: build a map and call settings.setAll
4042                let mut map = serde_json::Map::new();
4043                for chunk in pairs.chunks(2) {
4044                    let parsed = serde_json::from_str::<Value>(&chunk[1])
4045                        .unwrap_or(Value::String(chunk[1].clone()));
4046                    map.insert(chunk[0].clone(), parsed);
4047                }
4048                let settings = Value::Object(map);
4049                if dry_run {
4050                    (
4051                        dry_run_value("settings.setAll", settings),
4052                        JsonStyle::PythonCompact,
4053                    )
4054                } else {
4055                    (
4056                        client.call("settings.setAll", Some(settings))?,
4057                        JsonStyle::PythonCompact,
4058                    )
4059                }
4060            } else {
4061                return Err(
4062                    "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4063                );
4064            }
4065        }
4066    };
4067    Ok((value, style))
4068}
4069
4070fn run_tags_command(
4071    command: TagsCommand,
4072    client: &mut impl RpcCaller,
4073) -> Result<(Value, JsonStyle), String> {
4074    let (value, style) = match command {
4075        TagsCommand::List { limit, .. } => {
4076            let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4077            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4078        }
4079        TagsCommand::Rename {
4080            old, new, dry_run, ..
4081        } => run_tag_mutation(
4082            client,
4083            "tags.rename",
4084            serde_json::json!({"oldName": old, "newName": new}),
4085            dry_run,
4086        )?,
4087        TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4088            client,
4089            "tags.delete",
4090            serde_json::json!({"tag": tag}),
4091            dry_run,
4092        )?,
4093        TagsCommand::Add {
4094            keys, tags, dry_run, ..
4095        } => {
4096            if keys.len() == 1 {
4097                run_tag_mutation(
4098                    client,
4099                    "tags.add",
4100                    serde_json::json!({"key": keys[0], "tags": tags}),
4101                    dry_run,
4102                )?
4103            } else {
4104                run_tag_mutation(
4105                    client,
4106                    "tags.batchUpdate",
4107                    serde_json::json!({"keys": keys, "add": tags}),
4108                    dry_run,
4109                )?
4110            }
4111        }
4112        TagsCommand::Remove {
4113            keys, tags, dry_run, ..
4114        } => {
4115            if keys.len() == 1 {
4116                run_tag_mutation(
4117                    client,
4118                    "tags.remove",
4119                    serde_json::json!({"key": keys[0], "tags": tags}),
4120                    dry_run,
4121                )?
4122            } else {
4123                run_tag_mutation(
4124                    client,
4125                    "tags.batchUpdate",
4126                    serde_json::json!({"keys": keys, "remove": tags}),
4127                    dry_run,
4128                )?
4129            }
4130        }
4131    };
4132    Ok((value, style))
4133}
4134
4135fn run_tag_mutation(
4136    client: &mut impl RpcCaller,
4137    method: &str,
4138    params: Value,
4139    dry_run: bool,
4140) -> Result<(Value, JsonStyle), String> {
4141    if dry_run {
4142        Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4143    } else {
4144        Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4145    }
4146}
4147
4148fn dry_run_value(method: &str, params: Value) -> Value {
4149    serde_json::json!({
4150        "ok": true,
4151        "dryRun": true,
4152        "wouldCall": method,
4153        "wouldCallParams": params,
4154    })
4155}
4156
4157fn run_annotations_command(
4158    command: AnnotationsCommand,
4159    client: &mut impl RpcCaller,
4160) -> Result<(Value, JsonStyle), String> {
4161    let (value, style) = match command {
4162        AnnotationsCommand::List { parent, .. } => {
4163            let value = client.call(
4164                "annotations.list",
4165                Some(serde_json::json!({"parentKey": parent})),
4166            )?;
4167            (normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty)
4168        }
4169        AnnotationsCommand::Create {
4170            parent,
4171            annotation_type,
4172            position,
4173            sort_index,
4174            text,
4175            comment,
4176            color,
4177            dry_run,
4178            ..
4179        } => {
4180            if !matches!(
4181                annotation_type.as_str(),
4182                "highlight" | "note" | "underline" | "image" | "ink"
4183            ) {
4184                return Err(format!(
4185                    "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4186                ));
4187            }
4188            let position = position
4189                .ok_or_else(|| "INVALID_ARGS: --position JSON is required".to_string())
4190                .and_then(|raw| {
4191                    serde_json::from_str::<Value>(&raw)
4192                        .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4193                })?;
4194            validate_annotation_position(annotation_type.as_str(), &position)?;
4195            let mut params = serde_json::Map::new();
4196            params.insert("parentKey".to_string(), Value::String(parent));
4197            params.insert("type".to_string(), Value::String(annotation_type));
4198            params.insert("color".to_string(), Value::String(color));
4199            params.insert("position".to_string(), position);
4200            if let Some(sort_index) = sort_index {
4201                params.insert(
4202                    "sortIndex".to_string(),
4203                    parse_annotation_sort_index(sort_index)?,
4204                );
4205            }
4206            if let Some(text) = text {
4207                params.insert("text".to_string(), Value::String(text));
4208            }
4209            if let Some(comment) = comment {
4210                params.insert("comment".to_string(), Value::String(comment));
4211            }
4212            run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4213        }
4214        AnnotationsCommand::Delete {
4215            annotation_key,
4216            dry_run,
4217            ..
4218        } => run_mutating_command(
4219            client,
4220            "annotations.delete",
4221            serde_json::json!({"key": annotation_key}),
4222            dry_run,
4223        )?,
4224    };
4225    Ok((value, style))
4226}
4227
4228fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4229    position
4230        .get("pageIndex")
4231        .and_then(Value::as_i64)
4232        .filter(|value| *value >= 0)
4233        .ok_or_else(|| {
4234            "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4235        })?;
4236
4237    if annotation_type == "ink" {
4238        let has_paths = position
4239            .get("paths")
4240            .and_then(Value::as_array)
4241            .is_some_and(|paths| !paths.is_empty());
4242        if !has_paths {
4243            return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4244        }
4245        return Ok(());
4246    }
4247
4248    let valid_rects = position
4249        .get("rects")
4250        .and_then(Value::as_array)
4251        .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4252    if !valid_rects {
4253        return Err(
4254            "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4255        );
4256    }
4257    Ok(())
4258}
4259
4260fn is_annotation_rect(value: &Value) -> bool {
4261    value.as_array().is_some_and(|coords| {
4262        coords.len() == 4
4263            && coords
4264                .iter()
4265                .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4266    })
4267}
4268
4269fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4270    let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4271    let valid = match &parsed {
4272        Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4273        Value::String(value) => {
4274            is_zotero_pdf_sort_index(value.trim())
4275                || (!value.trim().is_empty()
4276                    && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4277        }
4278        _ => false,
4279    };
4280    if valid {
4281        Ok(parsed)
4282    } else {
4283        Err(format!(
4284            "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4285        ))
4286    }
4287}
4288
4289fn is_zotero_pdf_sort_index(value: &str) -> bool {
4290    let mut parts = value.split('|');
4291    matches!(
4292        (parts.next(), parts.next(), parts.next(), parts.next()),
4293        (Some(page), Some(offset), Some(y), None)
4294            if page.len() == 5
4295                && offset.len() == 6
4296                && y.len() == 5
4297                && page.chars().all(|ch| ch.is_ascii_digit())
4298                && offset.chars().all(|ch| ch.is_ascii_digit())
4299                && y.chars().all(|ch| ch.is_ascii_digit())
4300    )
4301}
4302
4303fn run_attachments_command(
4304    command: AttachmentsCommand,
4305    client: &mut impl RpcCaller,
4306) -> Result<(Value, JsonStyle), String> {
4307    let value = match command {
4308        AttachmentsCommand::List {
4309            parent,
4310            limit,
4311            offset,
4312            ..
4313        } => normalize_list_envelope(
4314            client.call(
4315                "attachments.list",
4316                Some(serde_json::json!({"parentKey": parent})),
4317            )?,
4318            "items",
4319            Some(limit),
4320            offset,
4321        ),
4322        AttachmentsCommand::Get { key, .. } => {
4323            client.call("attachments.get", Some(serde_json::json!({"key": key})))?
4324        }
4325        AttachmentsCommand::Fulltext { key, .. } => client.call(
4326            "attachments.getFulltext",
4327            Some(serde_json::json!({"key": key})),
4328        )?,
4329        AttachmentsCommand::Path { key, .. } => localize_attachment_path_response(
4330            client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4331        ),
4332        AttachmentsCommand::Add {
4333            parent,
4334            path,
4335            from_url,
4336            title,
4337            dry_run,
4338            ..
4339        } => {
4340            match (path, from_url) {
4341                (Some(p), None) => {
4342                    let mut params = serde_json::json!({"parentKey": parent, "path": zotero_path(&p)});
4343                    insert_optional_string(&mut params, "title", title);
4344                    if dry_run {
4345                        return Ok((
4346                            dry_run_value("attachments.add", params),
4347                            JsonStyle::PythonCompact,
4348                        ));
4349                    }
4350                    return Ok((
4351                        client.call("attachments.add", Some(params))?,
4352                        JsonStyle::PythonCompact,
4353                    ));
4354                }
4355                (None, Some(u)) => {
4356                    let mut params = serde_json::json!({"parentKey": parent, "url": u});
4357                    insert_optional_string(&mut params, "title", title);
4358                    if dry_run {
4359                        return Ok((
4360                            dry_run_value("attachments.addByURL", params),
4361                            JsonStyle::PythonCompact,
4362                        ));
4363                    }
4364                    return Ok((
4365                        client.call("attachments.addByURL", Some(params))?,
4366                        JsonStyle::PythonCompact,
4367                    ));
4368                }
4369                (Some(_), Some(_)) => {
4370                    return Err("INVALID_ARGS: --path and --from-url are mutually exclusive".to_string());
4371                }
4372                (None, None) => {
4373                    return Err("INVALID_ARGS: either --path or --from-url is required".to_string());
4374                }
4375            }
4376        }
4377        AttachmentsCommand::Delete { key, dry_run, .. } => {
4378            let params = serde_json::json!({"key": key});
4379            if dry_run {
4380                return Ok((
4381                    dry_run_value("attachments.delete", params),
4382                    JsonStyle::PythonCompact,
4383                ));
4384            }
4385            return Ok((
4386                client.call("attachments.delete", Some(params))?,
4387                JsonStyle::PythonCompact,
4388            ));
4389        }
4390        AttachmentsCommand::FindPdf { parent, .. } => client.call(
4391            "attachments.findPDF",
4392            Some(serde_json::json!({"parentKey": parent})),
4393        )?,
4394    };
4395    Ok((value, JsonStyle::Pretty))
4396}
4397
4398fn localize_attachment_path_response(mut value: Value) -> Value {
4399    if let Some(path) = value.get("path").and_then(Value::as_str) {
4400        let local = local_path_from_zotero_path(path);
4401        if let Some(map) = value.as_object_mut() {
4402            map.insert("path".to_string(), Value::String(local));
4403        }
4404    }
4405    value
4406}
4407
4408fn run_notes_command(
4409    command: NotesCommand,
4410    client: &mut impl RpcCaller,
4411) -> Result<(Value, JsonStyle), String> {
4412    let (value, style) = match command {
4413        NotesCommand::List {
4414            parent,
4415            limit,
4416            offset,
4417            ..
4418        } => {
4419            let value = client.call(
4420                "notes.list",
4421                Some(serde_json::json!({"parentKey": parent})),
4422            )?;
4423            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4424        }
4425        NotesCommand::Get { note_key, .. } => {
4426            let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
4427            (value, JsonStyle::Pretty)
4428        }
4429        NotesCommand::Create {
4430            parent,
4431            content,
4432            tags,
4433            dry_run,
4434            ..
4435        } => {
4436            let mut params = serde_json::Map::new();
4437            params.insert("parentKey".to_string(), Value::String(parent));
4438            params.insert("content".to_string(), Value::String(content));
4439            if !tags.is_empty() {
4440                params.insert(
4441                    "tags".to_string(),
4442                    Value::Array(tags.into_iter().map(Value::String).collect()),
4443                );
4444            }
4445            run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
4446        }
4447        NotesCommand::Update {
4448            note_key,
4449            content,
4450            dry_run,
4451            ..
4452        } => run_mutating_command(
4453            client,
4454            "notes.update",
4455            serde_json::json!({"key": note_key, "content": content}),
4456            dry_run,
4457        )?,
4458        NotesCommand::Delete {
4459            note_key, dry_run, ..
4460        } => {
4461            // Python CLI intentionally routes note deletion through items.delete.
4462            run_mutating_command(
4463                client,
4464                "items.delete",
4465                serde_json::json!({"key": note_key}),
4466                dry_run,
4467            )?
4468        }
4469        NotesCommand::Search { query, limit, .. } => {
4470            let value = client.call(
4471                "notes.search",
4472                Some(serde_json::json!({"query": query, "limit": limit})),
4473            )?;
4474            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4475        }
4476    };
4477    Ok((value, style))
4478}
4479
4480fn run_mutating_command(
4481    client: &mut impl RpcCaller,
4482    method: &str,
4483    params: Value,
4484    dry_run: bool,
4485) -> Result<(Value, JsonStyle), String> {
4486    if dry_run {
4487        Ok((
4488            serde_json::json!({
4489                "ok": true,
4490                "dryRun": true,
4491                "wouldCall": method,
4492                "wouldCallParams": params,
4493            }),
4494            JsonStyle::PythonCompact,
4495        ))
4496    } else {
4497        client
4498            .call(method, Some(params))
4499            .map(|value| (value, JsonStyle::PythonCompact))
4500    }
4501}
4502
4503fn run_collections_command(
4504    command: CollectionsCommand,
4505    client: &mut impl RpcCaller,
4506) -> Result<(Value, JsonStyle), String> {
4507    let value = match command {
4508        CollectionsCommand::List { .. } => normalize_list_envelope(
4509            client.call("collections.list", None)?,
4510            "items",
4511            None,
4512            0,
4513        ),
4514        CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
4515        CollectionsCommand::Get { name_or_id, .. } => {
4516            let key = resolve_collection(client, &name_or_id)?;
4517            client.call("collections.get", Some(serde_json::json!({"key": key})))?
4518        }
4519        CollectionsCommand::GetItems {
4520            name_or_id,
4521            limit,
4522            offset,
4523            ..
4524        } => {
4525            let key = resolve_collection(client, &name_or_id)?;
4526            let mut params = serde_json::json!({"key": key});
4527            if let Some(map) = params.as_object_mut() {
4528                if let Some(limit) = limit {
4529                    map.insert("limit".to_string(), Value::Number(limit.into()));
4530                }
4531                if offset > 0 {
4532                    map.insert("offset".to_string(), Value::Number(offset.into()));
4533                }
4534            }
4535            normalize_list_envelope(
4536                client.call("collections.getItems", Some(params))?,
4537                "items",
4538                limit,
4539                offset,
4540            )
4541        }
4542        CollectionsCommand::Stats { name_or_id, .. } => {
4543            let key = resolve_collection(client, &name_or_id)?;
4544            client.call("collections.stats", Some(serde_json::json!({"key": key})))?
4545        }
4546        CollectionsCommand::Rename {
4547            old_name,
4548            new_name,
4549            dry_run,
4550            ..
4551        } => {
4552            let key = resolve_mutable_collection(client, &old_name, "rename")?;
4553            let params = serde_json::json!({"key": key, "name": new_name});
4554            if dry_run {
4555                return Ok((
4556                    dry_run_value("collections.rename", params),
4557                    JsonStyle::PythonCompact,
4558                ));
4559            }
4560            return Ok((
4561                client.call("collections.rename", Some(params))?,
4562                JsonStyle::PythonCompact,
4563            ));
4564        }
4565        CollectionsCommand::Create {
4566            name,
4567            parent,
4568            dry_run,
4569            ..
4570        } => {
4571            let mut params = serde_json::json!({"name": name});
4572            if let Some(parent) = parent {
4573                let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
4574                if let Some(map) = params.as_object_mut() {
4575                    map.insert("parentKey".to_string(), parent_key);
4576                }
4577            }
4578            if dry_run {
4579                return Ok((
4580                    dry_run_value("collections.create", params),
4581                    JsonStyle::PythonCompact,
4582                ));
4583            }
4584            return Ok((
4585                client.call("collections.create", Some(params))?,
4586                JsonStyle::PythonCompact,
4587            ));
4588        }
4589        CollectionsCommand::Delete {
4590            name_or_id,
4591            dry_run,
4592            ..
4593        } => {
4594            let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
4595            let params = serde_json::json!({"key": key});
4596            if dry_run {
4597                return Ok((
4598                    dry_run_value("collections.delete", params),
4599                    JsonStyle::PythonCompact,
4600                ));
4601            }
4602            return Ok((
4603                client.call("collections.delete", Some(params))?,
4604                JsonStyle::PythonCompact,
4605            ));
4606        }
4607        CollectionsCommand::AddItems {
4608            collection,
4609            item_keys,
4610            dry_run,
4611            ..
4612        } => {
4613            let key = resolve_mutable_collection(client, &collection, "add to")?;
4614            let params = serde_json::json!({"key": key, "keys": item_keys});
4615            if dry_run {
4616                return Ok((
4617                    dry_run_value("collections.addItems", params),
4618                    JsonStyle::PythonCompact,
4619                ));
4620            }
4621            return Ok((
4622                client.call("collections.addItems", Some(params))?,
4623                JsonStyle::PythonCompact,
4624            ));
4625        }
4626        CollectionsCommand::RemoveItems {
4627            collection,
4628            item_keys,
4629            dry_run,
4630            ..
4631        } => {
4632            let key = resolve_mutable_collection(client, &collection, "operate on")?;
4633            let params = serde_json::json!({"key": key, "keys": item_keys});
4634            if dry_run {
4635                return Ok((
4636                    dry_run_value("collections.removeItems", params),
4637                    JsonStyle::PythonCompact,
4638                ));
4639            }
4640            return Ok((
4641                client.call("collections.removeItems", Some(params))?,
4642                JsonStyle::PythonCompact,
4643            ));
4644        }
4645    };
4646    Ok((value, JsonStyle::Pretty))
4647}
4648
4649fn resolve_export_keys(
4650    client: &mut impl RpcCaller,
4651    mut keys: Vec<String>,
4652    collection: Option<String>,
4653) -> Result<Vec<String>, String> {
4654    if let Some(name) = collection {
4655        let col_key = resolve_collection(client, &name)?;
4656        let response = client.call(
4657            "collections.getItems",
4658            Some(serde_json::json!({"key": col_key})),
4659        )?;
4660        let items = collection_items(&response);
4661        for item in items {
4662            if let Some(key) = item.get("key").and_then(Value::as_str) {
4663                if !keys.contains(&key.to_string()) {
4664                    keys.push(key.to_string());
4665                }
4666            }
4667        }
4668    }
4669    if keys.is_empty() {
4670        return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
4671    }
4672    Ok(keys)
4673}
4674
4675fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
4676    let keys = resolve_export_keys(client, args.keys, args.collection)?;
4677    match args.format.as_str() {
4678        "bibtex" => run_export_content_command(client, "export.bibtex", keys),
4679        "ris" => run_export_content_command(client, "export.ris", keys),
4680        "csl-json" => {
4681            let response =
4682                client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
4683            if let Some(content) = response.get("content") {
4684                format_json(content, JsonStyle::Pretty)
4685            } else {
4686                format_json(&response, JsonStyle::PythonCompact)
4687            }
4688        }
4689        "bibliography" => {
4690            let response = client.call(
4691                "export.bibliography",
4692                Some(serde_json::json!({"keys": keys, "style": args.style})),
4693            )?;
4694            if let Some(object) = response.as_object() {
4695                let field = if args.html { "html" } else { "text" };
4696                if object.contains_key("html") || object.contains_key("text") {
4697                    return raw_value_output(
4698                        object.get(field).unwrap_or(&Value::String(String::new())),
4699                    );
4700                }
4701            }
4702            format_json(&response, JsonStyle::PythonCompact)
4703        }
4704        other => Err(format!(
4705            "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
4706        )),
4707    }
4708}
4709
4710fn run_export_content_command(
4711    client: &mut impl RpcCaller,
4712    method: &str,
4713    keys: Vec<String>,
4714) -> Result<String, String> {
4715    let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
4716    if let Some(content) = response.get("content") {
4717        raw_value_output(content)
4718    } else {
4719        format_json(&response, JsonStyle::PythonCompact)
4720    }
4721}
4722
4723fn raw_value_output(value: &Value) -> Result<String, String> {
4724    let mut out = match value {
4725        Value::Null => String::new(),
4726        Value::String(content) => content.clone(),
4727        other => to_python_repr(other),
4728    };
4729    out.push('\n');
4730    Ok(out)
4731}
4732
4733fn to_python_repr(value: &Value) -> String {
4734    match value {
4735        Value::Null => "None".to_string(),
4736        Value::Bool(value) => {
4737            if *value {
4738                "True".to_string()
4739            } else {
4740                "False".to_string()
4741            }
4742        }
4743        Value::Number(value) => value.to_string(),
4744        Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
4745        Value::Array(values) => {
4746            let inner = values
4747                .iter()
4748                .map(to_python_repr)
4749                .collect::<Vec<_>>()
4750                .join(", ");
4751            format!("[{inner}]")
4752        }
4753        Value::Object(entries) => {
4754            let inner = entries
4755                .iter()
4756                .map(|(key, value)| {
4757                    format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
4758                })
4759                .collect::<Vec<_>>()
4760                .join(", ");
4761            format!("{{{inner}}}")
4762        }
4763    }
4764}
4765
4766fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
4767    let trimmed = name_or_id.trim();
4768    if let Ok(id) = trimmed.parse::<i64>() {
4769        return Ok(Value::Number(id.into()));
4770    }
4771
4772    let collections = client.call("collections.list", None)?;
4773    let items = collections
4774        .get("items")
4775        .and_then(Value::as_array)
4776        .or_else(|| collections.as_array())
4777        .ok_or_else(|| "collections.list returned non-array result".to_string())?;
4778
4779    if let Some(collection) = items
4780        .iter()
4781        .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
4782    {
4783        return collection_key(collection);
4784    }
4785
4786    let exact = items
4787        .iter()
4788        .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
4789        .collect::<Vec<_>>();
4790    if exact.len() == 1 {
4791        return collection_key(exact[0]);
4792    }
4793
4794    let needle = normalize_collection_name(trimmed);
4795    let fuzzy = items
4796        .iter()
4797        .filter(|collection| {
4798            collection
4799                .get("name")
4800                .and_then(Value::as_str)
4801                .map(normalize_collection_name)
4802                .is_some_and(|name| name.contains(&needle))
4803        })
4804        .collect::<Vec<_>>();
4805
4806    match fuzzy.len() {
4807        1 => collection_key(fuzzy[0]),
4808        0 => Err(format!(
4809            "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
4810        )),
4811        _ => Err(format!(
4812            "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
4813        )),
4814    }
4815}
4816
4817fn collection_key(collection: &Value) -> Result<Value, String> {
4818    collection
4819        .get("key")
4820        .cloned()
4821        .ok_or_else(|| "collection result is missing key".to_string())
4822}
4823
4824fn resolve_mutable_collection(
4825    client: &mut impl RpcCaller,
4826    name_or_id: &str,
4827    operation: &str,
4828) -> Result<Value, String> {
4829    let key = resolve_collection(client, name_or_id)?;
4830    if key.as_i64() == Some(0) {
4831        return Err(format!(
4832            "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
4833        ));
4834    }
4835    Ok(key)
4836}
4837
4838fn insert_optional_string(value: &mut Value, key: &str, maybe: Option<String>) {
4839    if let (Some(map), Some(content)) = (value.as_object_mut(), maybe) {
4840        map.insert(key.to_string(), Value::String(content));
4841    }
4842}
4843
4844fn normalize_collection_name(name: &str) -> String {
4845    name.split_whitespace()
4846        .collect::<Vec<_>>()
4847        .join(" ")
4848        .to_lowercase()
4849}
4850
4851fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
4852    let mut out = match style {
4853        JsonStyle::PythonCompact => to_python_compact_json(value),
4854        JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
4855    };
4856    out.push('\n');
4857    Ok(out)
4858}
4859
4860fn to_python_compact_json(value: &Value) -> String {
4861    match value {
4862        Value::Null => "null".to_string(),
4863        Value::Bool(value) => value.to_string(),
4864        Value::Number(value) => value.to_string(),
4865        Value::String(value) => {
4866            serde_json::to_string(value).expect("string serialization cannot fail")
4867        }
4868        Value::Array(values) => {
4869            let inner = values
4870                .iter()
4871                .map(to_python_compact_json)
4872                .collect::<Vec<_>>()
4873                .join(", ");
4874            format!("[{inner}]")
4875        }
4876        Value::Object(entries) => {
4877            let inner = entries
4878                .iter()
4879                .map(|(key, value)| {
4880                    let key = serde_json::to_string(key).expect("string serialization cannot fail");
4881                    format!("{key}: {}", to_python_compact_json(value))
4882                })
4883                .collect::<Vec<_>>()
4884                .join(", ");
4885            format!("{{{inner}}}")
4886        }
4887    }
4888}