Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::{
4    env, fs,
5    io::{self, Read},
6    path::{Path, PathBuf},
7    process::Command as ProcessCommand,
8    thread,
9    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16    bm25_score_chunks, build_embedding_provider_request, build_ocr_provider_request,
17    builtin_ocr_provider_specs, cosine_similarity, execute_embedding_provider_request,
18    is_zotron_evidence_artifact, machine_artifact_exists_for_item,
19    machine_artifact_exists_in_sidecar, machine_artifact_store_root,
20    ocr_provider_spec as raw_ocr_provider_spec, parse_embedding_provider_response,
21    parse_ocr_provider_response, read_machine_artifact_sidecar, rrf_merge,
22    write_machine_artifact_sidecar, ArtifactStorePlatform, EmbeddingChunkInput,
23    EmbeddingRequestInput, EmbeddingVector, MachineArtifactKind, OcrRequestInput,
24    ProviderCommandRunner, ProviderHttpInvocation, ProviderHttpTransport, StructureChunk,
25    DEFAULT_RPC_URL,
26};
27
28pub trait RpcCaller {
29    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
30}
31
32#[derive(Debug, Clone, PartialEq, serde::Serialize)]
33pub struct CliOcrProviderSpec {
34    pub id: &'static str,
35    pub provider: &'static str,
36    pub request_style: &'static str,
37    pub auth: &'static str,
38    pub auth_header: &'static str,
39    pub supports_pdf_direct: bool,
40    pub key_field: &'static str,
41}
42
43#[derive(Debug, Clone, PartialEq, serde::Serialize)]
44pub struct CliEmbeddingProviderSpec {
45    pub id: &'static str,
46    pub provider: &'static str,
47    pub request_style: &'static str,
48    pub default_url: String,
49    pub default_model: &'static str,
50    pub auth: &'static str,
51    pub key_field: &'static str,
52}
53
54pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
55    builtin_ocr_provider_specs()
56        .into_iter()
57        .map(cli_ocr_provider_spec)
58        .collect()
59}
60
61pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
62    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
63}
64
65pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
66    let spec = zotron_types::embedding_provider_spec(provider)?;
67    Ok(CliEmbeddingProviderSpec {
68        id: spec.id,
69        provider: spec.provider_key,
70        request_style: if spec.provider_key == "alibaba" {
71            "dashscope"
72        } else {
73            spec.request_style.as_str()
74        },
75        default_url: spec.default_url.unwrap_or("").to_string(),
76        default_model: spec.default_model,
77        auth: spec.auth,
78        key_field: spec.key_field,
79    })
80}
81
82pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
83    let typed = blocks
84        .iter()
85        .map(json_block_to_pdf_block)
86        .collect::<Result<Vec<_>, _>>()?;
87    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
88    chunks
89        .into_iter()
90        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
91        .collect()
92}
93
94fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
95    CliOcrProviderSpec {
96        id: spec.provider_key,
97        provider: spec.provider_key,
98        request_style: spec.request_style.as_str(),
99        auth: spec.auth,
100        auth_header: spec.auth_header,
101        supports_pdf_direct: spec.supports_pdf_direct,
102        key_field: spec.key_field,
103    }
104}
105
106fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
107    let block_key = value
108        .get("block_key")
109        .and_then(Value::as_str)
110        .ok_or_else(|| "block missing block_key".to_string())?
111        .to_string();
112    let item_key = value
113        .get("item_key")
114        .and_then(Value::as_str)
115        .ok_or_else(|| "block missing item_key".to_string())?
116        .to_string();
117    let attachment_key = value
118        .get("attachment_key")
119        .and_then(Value::as_str)
120        .ok_or_else(|| "block missing attachment_key".to_string())?
121        .to_string();
122    let page_idx = value
123        .get("page_idx")
124        .or_else(|| value.get("page"))
125        .and_then(Value::as_u64)
126        .unwrap_or(1);
127    let block_type = value
128        .get("type")
129        .or_else(|| value.get("block_type"))
130        .and_then(Value::as_str)
131        .unwrap_or("paragraph")
132        .to_string();
133    let section_path = value
134        .get("section_path")
135        .and_then(Value::as_array)
136        .map(|items| {
137            items
138                .iter()
139                .filter_map(Value::as_str)
140                .map(ToString::to_string)
141                .collect::<Vec<_>>()
142        })
143        .unwrap_or_default();
144    let text = value
145        .get("text")
146        .and_then(Value::as_str)
147        .unwrap_or("")
148        .to_string();
149    let bbox = value.get("bbox").and_then(value_bbox4);
150
151    Ok(zotron_types::PdfEvidenceBlock {
152        block_key,
153        item_key,
154        attachment_key,
155        page_idx,
156        block_type,
157        bbox,
158        section_path,
159        text,
160    })
161}
162
163fn chunk_to_cli_value(
164    chunk: &zotron_types::StructureChunk,
165    blocks: &[zotron_types::PdfEvidenceBlock],
166) -> Result<Value, String> {
167    let refs = chunk
168        .block_keys
169        .iter()
170        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
171        .map(|block| {
172            serde_json::json!({
173                "block_key": block.block_key,
174                "page_idx": block.page_idx,
175                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
176                    if n.fract() == 0.0 {
177                        Value::from(*n as i64)
178                    } else {
179                        Value::from(*n)
180                    }
181                }).collect::<Vec<_>>()),
182            })
183        })
184        .collect::<Vec<_>>();
185    Ok(serde_json::json!({
186        "chunk_key": chunk.chunk_key,
187        "item_key": chunk.item_key,
188        "attachment_key": chunk.attachment_key,
189        "block_keys": chunk.block_keys,
190        "section_path": chunk.section_path,
191        "text": chunk.text,
192        "page_start": chunk.page_start,
193        "page_end": chunk.page_end,
194        "evidence_refs": refs,
195    }))
196}
197
198fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
199    let arr = value.as_array()?;
200    if arr.len() != 4 {
201        return None;
202    }
203    Some([
204        arr[0].as_f64()?,
205        arr[1].as_f64()?,
206        arr[2].as_f64()?,
207        arr[3].as_f64()?,
208    ])
209}
210
211impl RpcCaller for ZoteroRpc {
212    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
213        self.call(method, params).map_err(|err| err.to_string())
214    }
215}
216
217#[derive(Debug, Parser)]
218#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
219struct Cli {
220    #[command(subcommand)]
221    command: Command,
222}
223
224#[derive(Debug, Subcommand)]
225enum OcrCommand {
226    /// Print supported OCR provider contracts.
227    Providers,
228    /// Execute an OCR provider request from JSON and emit normalized blocks.
229    #[command(name = "run")]
230    Run {
231        #[arg(long)]
232        provider: String,
233        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
234        #[arg(long)]
235        input: Option<String>,
236        /// Local PDF/image file to encode into an OcrRequestInput.
237        #[arg(long)]
238        file: Option<String>,
239        /// Zotero item key used when --file builds the OCR request.
240        #[arg(long = "item-key")]
241        item_key: Option<String>,
242        /// Zotero attachment key used when --file builds the OCR request.
243        #[arg(long = "attachment-key")]
244        attachment_key: Option<String>,
245        /// MIME type used when --file builds the OCR request.
246        #[arg(long = "mime-type")]
247        mime_type: Option<String>,
248        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
249        #[arg(long)]
250        endpoint: Option<String>,
251        /// Environment variable containing the provider bearer token.
252        #[arg(long = "api-key-env")]
253        api_key_env: Option<String>,
254    },
255    /// Show OCR statistics for a collection.
256    Status {
257        #[arg(long)]
258        collection: String,
259        #[arg(long, default_value = DEFAULT_RPC_URL)]
260        url: String,
261    },
262    /// Parse a Zotero PDF through MinerU and write hidden sidecar OCR/RAG artifacts.
263    #[command(name = "process")]
264    Process {
265        #[arg(long, default_value = "mineru")]
266        provider: String,
267        /// Parent Zotero item key.
268        #[arg(long)]
269        parent: String,
270        /// Zotero PDF attachment key (auto-resolved from --parent when omitted).
271        #[arg(long)]
272        attachment: Option<String>,
273        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
274        #[arg(long = "source-url")]
275        source_url: Option<String>,
276        /// Already-extracted MinerU result directory, used by tests/offline replay.
277        #[arg(long = "result-dir")]
278        result_dir: Option<String>,
279        /// Already-downloaded MinerU result zip, used by tests/offline replay.
280        #[arg(long = "result-zip")]
281        result_zip: Option<String>,
282        /// Override MinerU submit endpoint.
283        #[arg(long = "provider-endpoint")]
284        provider_endpoint: Option<String>,
285        /// Environment variable containing the MinerU bearer token.
286        #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
287        api_key_env: String,
288        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
289        poll_interval_seconds: u64,
290        #[arg(long = "timeout-seconds", default_value_t = 900)]
291        timeout_seconds: u64,
292        #[arg(long = "chunk-chars", default_value_t = 1200)]
293        chunk_chars: usize,
294        #[arg(long, default_value = DEFAULT_RPC_URL)]
295        url: String,
296    },
297}
298
299#[derive(Debug, Subcommand)]
300enum Command {
301    /// Check that Zotero is running with the Zotron XPI enabled.
302    Ping {
303        #[arg(long, default_value = DEFAULT_RPC_URL)]
304        url: String,
305    },
306    /// Generic RPC escape hatch.
307    Rpc {
308        method: String,
309        #[arg(default_value = "{}")]
310        params_json: String,
311        #[arg(long, default_value = DEFAULT_RPC_URL)]
312        url: String,
313        #[arg(long)]
314        paginate: bool,
315        #[arg(long, default_value_t = 100)]
316        page_size: usize,
317    },
318    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
319    Push {
320        /// Path to a JSON file, or "-" to read from stdin.
321        json_file: String,
322        /// Optional PDF attachment path.
323        #[arg(long)]
324        pdf: Option<String>,
325        /// Collection name (fuzzy) or key.
326        #[arg(long)]
327        collection: Option<String>,
328        /// Duplicate handling: skip | update | create.
329        #[arg(long = "on-duplicate", default_value = "skip")]
330        on_duplicate: String,
331        #[arg(long, default_value = DEFAULT_RPC_URL)]
332        url: String,
333        /// Parse input + resolve collection only; do not push to Zotero.
334        #[arg(long = "dry-run")]
335        dry_run: bool,
336    },
337    /// System and plugin introspection commands.
338    System {
339        #[command(subcommand)]
340        command: SystemCommand,
341    },
342    /// Search items by text, tag, identifier, or structured conditions.
343    Search(SearchArgs),
344    /// Inspect and manage Zotero items.
345    Items {
346        #[command(subcommand)]
347        command: ItemsCommand,
348    },
349    /// Inspect Zotero collections.
350    Collections {
351        #[command(subcommand)]
352        command: CollectionsCommand,
353    },
354    /// Inspect Zotero notes.
355    Notes {
356        #[command(subcommand)]
357        command: NotesCommand,
358    },
359    /// Inspect Zotero preferences.
360    Settings {
361        #[command(subcommand)]
362        command: SettingsCommand,
363    },
364    /// Inspect and manage Zotero tags.
365    Tags {
366        #[command(subcommand)]
367        command: TagsCommand,
368    },
369    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
370    Export(ExportArgs),
371    /// List, create, and delete PDF annotations.
372    Annotations {
373        #[command(subcommand)]
374        command: AnnotationsCommand,
375    },
376    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
377    Ocr {
378        #[command(subcommand)]
379        command: OcrCommand,
380    },
381    /// Build and search retrieval artifacts.
382    Rag {
383        #[command(subcommand)]
384        command: RagCommand,
385    },
386}
387
388struct RagSearchOptions {
389    query: String,
390    collection: Option<String>,
391    keys: Vec<String>,
392    zotero: bool,
393    top_spans_per_item: u64,
394    include_fulltext_spans: bool,
395    top_k: u64,
396    output: String,
397}
398
399#[derive(Debug, Subcommand)]
400enum RagCommand {
401    /// Print supported embedding provider contracts.
402    #[command(name = "providers")]
403    Providers,
404    /// Execute an embedding provider request from JSON and emit vectors.
405    #[command(name = "embed")]
406    Embed {
407        #[arg(long)]
408        provider: String,
409        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
410        #[arg(long)]
411        input: String,
412        /// Override the embedding endpoint.
413        #[arg(long)]
414        endpoint: Option<String>,
415        /// Override the embedding model.
416        #[arg(long)]
417        model: Option<String>,
418        /// Override provider input type, for example document or query.
419        #[arg(long = "input-type")]
420        input_type: Option<String>,
421        /// Environment variable containing the provider bearer token.
422        #[arg(long = "api-key-env")]
423        api_key_env: Option<String>,
424    },
425    /// Show index status for a collection.
426    Status {
427        #[arg(long)]
428        collection: String,
429        #[arg(long, default_value = DEFAULT_RPC_URL)]
430        url: String,
431    },
432    /// Emit academic-zh retrieval hits with item_key/title/text provenance.
433    #[command(name = "search")]
434    Search {
435        query: String,
436        #[arg(long)]
437        collection: Option<String>,
438        /// Limit retrieval to one or more Zotero item keys.
439        #[arg(long = "key", alias = "keys")]
440        keys: Vec<String>,
441        #[arg(long)]
442        zotero: bool,
443        #[arg(long = "top-spans-per-item", default_value_t = 3)]
444        top_spans_per_item: u64,
445        #[arg(long = "include-fulltext-spans")]
446        include_fulltext_spans: bool,
447        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
448        top_k: u64,
449        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
450        output: String,
451        #[arg(long, default_value = DEFAULT_RPC_URL)]
452        url: String,
453    },
454}
455
456#[derive(Debug, Subcommand)]
457enum SystemCommand {
458    /// Show XPI version and exposed method metadata.
459    Version {
460        #[arg(long, default_value = DEFAULT_RPC_URL)]
461        url: String,
462    },
463    /// List all libraries (user + groups).
464    Libraries {
465        #[arg(long, default_value = DEFAULT_RPC_URL)]
466        url: String,
467    },
468    /// Get statistics for the current (or specified) library.
469    #[command(name = "library-stats")]
470    LibraryStats {
471        #[arg(long)]
472        library: Option<i64>,
473        #[arg(long, default_value = DEFAULT_RPC_URL)]
474        url: String,
475    },
476    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
477    Schema {
478        #[arg(long = "type")]
479        item_type: Option<String>,
480        #[arg(long, default_value = DEFAULT_RPC_URL)]
481        url: String,
482    },
483    /// Get the currently selected Zotero collection (or null).
484    #[command(name = "current-collection")]
485    CurrentCollection {
486        #[arg(long, default_value = DEFAULT_RPC_URL)]
487        url: String,
488    },
489    /// List RPC methods, or describe a specific method.
490    Methods {
491        /// Method name to describe. Omit to list all methods.
492        method: Option<String>,
493        #[arg(long, default_value = DEFAULT_RPC_URL)]
494        url: String,
495    },
496}
497
498#[derive(Debug, clap::Args)]
499struct SearchArgs {
500    /// Search query (title/creator/year by default; PDF content with --fulltext).
501    query: Option<String>,
502    /// Search inside PDF full-text content instead of metadata.
503    #[arg(long)]
504    fulltext: bool,
505    /// Filter by author/creator name (contains match).
506    #[arg(long)]
507    author: Option<String>,
508    /// Filter by date after (YYYY or YYYY-MM-DD).
509    #[arg(long)]
510    after: Option<String>,
511    /// Filter by date before (YYYY or YYYY-MM-DD).
512    #[arg(long)]
513    before: Option<String>,
514    /// Filter by journal/publication title (contains match).
515    #[arg(long)]
516    journal: Option<String>,
517    /// Filter by tag (exact match).
518    #[arg(long)]
519    tag: Option<String>,
520    /// Find by DOI.
521    #[arg(long)]
522    doi: Option<String>,
523    /// Find by ISBN.
524    #[arg(long)]
525    isbn: Option<String>,
526    /// Find by ISSN.
527    #[arg(long)]
528    issn: Option<String>,
529    /// Limit results to a collection name or key.
530    #[arg(long)]
531    collection: Option<String>,
532    #[arg(long, default_value_t = 50)]
533    limit: u64,
534    #[arg(long, default_value_t = 0)]
535    offset: u64,
536    #[arg(long, default_value = DEFAULT_RPC_URL)]
537    url: String,
538    #[command(subcommand)]
539    management: Option<SearchManagementCommand>,
540}
541
542#[derive(Debug, Subcommand)]
543enum SearchManagementCommand {
544    /// List all saved searches in the library.
545    #[command(name = "saved-searches")]
546    SavedSearches {
547        #[arg(long, default_value = DEFAULT_RPC_URL)]
548        url: String,
549    },
550    /// Create a saved search with one or more conditions.
551    #[command(name = "create-saved")]
552    CreateSaved {
553        name: String,
554        #[arg(long = "condition", required = true)]
555        condition: Vec<String>,
556        #[arg(long)]
557        dry_run: bool,
558        #[arg(long, default_value = DEFAULT_RPC_URL)]
559        url: String,
560    },
561    /// Delete a saved search by key.
562    #[command(name = "delete-saved")]
563    DeleteSaved {
564        search_key: String,
565        #[arg(long)]
566        dry_run: bool,
567        #[arg(long, default_value = DEFAULT_RPC_URL)]
568        url: String,
569    },
570}
571
572#[derive(Debug, Subcommand)]
573enum ItemsCommand {
574    /// Add an item by DOI, ISBN, URL, local file, or manual entry (--type + --field).
575    Add {
576        #[arg(long)]
577        doi: Option<String>,
578        #[arg(long)]
579        isbn: Option<String>,
580        /// Web page URL to add from.
581        #[arg(long = "from-url")]
582        from_url: Option<String>,
583        /// Local file path to add from.
584        #[arg(long)]
585        file: Option<String>,
586        /// Item type for manual creation (e.g. journalArticle).
587        #[arg(long = "type")]
588        item_type: Option<String>,
589        /// Field values for manual creation (e.g. title="My Paper").
590        #[arg(long = "field")]
591        fields: Vec<String>,
592        #[arg(long)]
593        collection: Option<String>,
594        #[arg(long)]
595        dry_run: bool,
596        #[arg(long, default_value = DEFAULT_RPC_URL)]
597        url: String,
598    },
599    /// Update fields on an existing item.
600    Update {
601        key: String,
602        #[arg(long = "field")]
603        fields: Vec<String>,
604        #[arg(long)]
605        dry_run: bool,
606        #[arg(long, default_value = DEFAULT_RPC_URL)]
607        url: String,
608    },
609    /// Permanently delete an item.
610    Delete {
611        key: String,
612        #[arg(long)]
613        dry_run: bool,
614        #[arg(long, default_value = DEFAULT_RPC_URL)]
615        url: String,
616    },
617    /// Move one or more items to trash.
618    Trash {
619        items: Vec<String>,
620        #[arg(long)]
621        dry_run: bool,
622        #[arg(long, default_value = DEFAULT_RPC_URL)]
623        url: String,
624    },
625    /// Restore a trashed item.
626    Restore {
627        item: String,
628        #[arg(long)]
629        dry_run: bool,
630        #[arg(long, default_value = DEFAULT_RPC_URL)]
631        url: String,
632    },
633    /// Merge a group of duplicate items.
634    #[command(name = "merge-duplicates")]
635    MergeDuplicates {
636        keys: Vec<String>,
637        #[arg(long)]
638        dry_run: bool,
639        #[arg(long, default_value = DEFAULT_RPC_URL)]
640        url: String,
641    },
642    /// Add a related-item link between two items.
643    #[command(name = "add-related")]
644    AddRelated {
645        key: String,
646        #[arg(long)]
647        target: String,
648        #[arg(long)]
649        dry_run: bool,
650        #[arg(long, default_value = DEFAULT_RPC_URL)]
651        url: String,
652    },
653    /// Remove a related-item link between two items.
654    #[command(name = "remove-related")]
655    RemoveRelated {
656        key: String,
657        #[arg(long)]
658        target: String,
659        #[arg(long)]
660        dry_run: bool,
661        #[arg(long, default_value = DEFAULT_RPC_URL)]
662        url: String,
663    },
664    /// Print the full serialization of an item by key.
665    Get {
666        item: String,
667        #[arg(long, default_value = DEFAULT_RPC_URL)]
668        url: String,
669    },
670    /// List items in the library with optional sorting and pagination.
671    List {
672        #[arg(long, default_value_t = 50)]
673        limit: u64,
674        #[arg(long, default_value_t = 0)]
675        offset: u64,
676        #[arg(long)]
677        sort: Option<String>,
678        #[arg(long, default_value = "asc")]
679        direction: String,
680        /// List trashed items instead of regular items.
681        #[arg(long)]
682        trash: bool,
683        #[arg(long, default_value = DEFAULT_RPC_URL)]
684        url: String,
685    },
686    /// Run Zotero's duplicate scan and print groups.
687    #[command(name = "find-duplicates")]
688    FindDuplicates {
689        #[arg(long, default_value = DEFAULT_RPC_URL)]
690        url: String,
691    },
692    /// List recently added or modified items.
693    Recent {
694        #[arg(long, default_value_t = 20)]
695        limit: u64,
696        #[arg(long, default_value_t = 0)]
697        offset: u64,
698        #[arg(long = "type", default_value = "added")]
699        recent_type: String,
700        #[arg(long, default_value = DEFAULT_RPC_URL)]
701        url: String,
702    },
703    /// Retrieve the full-text content of an item's attachment.
704    Fulltext {
705        key: String,
706        #[arg(long, default_value = DEFAULT_RPC_URL)]
707        url: String,
708    },
709    /// List items related to the given item.
710    Related {
711        key: String,
712        #[arg(long, default_value = DEFAULT_RPC_URL)]
713        url: String,
714    },
715    /// Get the citation key for an item.
716    #[command(name = "citation-key")]
717    CitationKey {
718        key: String,
719        #[arg(long, default_value = DEFAULT_RPC_URL)]
720        url: String,
721    },
722    /// Get the local filesystem path of an item's PDF attachment.
723    Path {
724        key: String,
725        #[arg(long, default_value = DEFAULT_RPC_URL)]
726        url: String,
727    },
728    /// List attachments belonging to an item.
729    Attachments {
730        key: String,
731        #[arg(long, default_value_t = 0)]
732        offset: u64,
733        #[arg(long, default_value = DEFAULT_RPC_URL)]
734        url: String,
735    },
736    /// Batch find missing PDFs in a collection via Zotero's resolver chain.
737    #[command(name = "find-pdfs")]
738    FindPdfs {
739        #[arg(long)]
740        collection: String,
741        #[arg(long, default_value_t = 0)]
742        limit: usize,
743        #[arg(long, default_value = DEFAULT_RPC_URL)]
744        url: String,
745    },
746}
747
748#[derive(Debug, Subcommand)]
749enum SettingsCommand {
750    /// Get a single Zotero preference value.
751    Get {
752        key: String,
753        #[arg(long, default_value = DEFAULT_RPC_URL)]
754        url: String,
755    },
756    /// List all Zotero preferences as a key->value dict.
757    #[command(visible_alias = "get-all")]
758    List {
759        #[arg(long, default_value = DEFAULT_RPC_URL)]
760        url: String,
761    },
762    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
763    Set {
764        /// key value key value ... (pairs of positional args)
765        pairs: Vec<String>,
766        /// Bulk-set from a JSON file.
767        #[arg(long)]
768        file: Option<String>,
769        #[arg(long)]
770        dry_run: bool,
771        #[arg(long, default_value = DEFAULT_RPC_URL)]
772        url: String,
773    },
774}
775
776#[derive(Debug, Subcommand)]
777enum TagsCommand {
778    /// List all tags in the library (flat).
779    List {
780        #[arg(long, default_value_t = 200)]
781        limit: u64,
782        #[arg(long, default_value = DEFAULT_RPC_URL)]
783        url: String,
784    },
785    /// Rename a tag across all items.
786    Rename {
787        old: String,
788        new: String,
789        #[arg(long)]
790        dry_run: bool,
791        #[arg(long, default_value = DEFAULT_RPC_URL)]
792        url: String,
793    },
794    /// Delete a tag library-wide.
795    Delete {
796        tag: String,
797        #[arg(long)]
798        dry_run: bool,
799        #[arg(long, default_value = DEFAULT_RPC_URL)]
800        url: String,
801    },
802    /// Add tags to one or more items.
803    Add {
804        keys: Vec<String>,
805        #[arg(long = "tag", required = true)]
806        tags: Vec<String>,
807        #[arg(long)]
808        dry_run: bool,
809        #[arg(long, default_value = DEFAULT_RPC_URL)]
810        url: String,
811    },
812    /// Remove tags from one or more items.
813    Remove {
814        keys: Vec<String>,
815        #[arg(long = "tag", required = true)]
816        tags: Vec<String>,
817        #[arg(long)]
818        dry_run: bool,
819        #[arg(long, default_value = DEFAULT_RPC_URL)]
820        url: String,
821    },
822}
823
824#[derive(Debug, clap::Args)]
825struct ExportArgs {
826    /// Item keys to export.
827    keys: Vec<String>,
828    /// Output format: bibtex, ris, csl-json, bibliography.
829    #[arg(long, default_value = "bibtex")]
830    format: String,
831    /// Export all items from this collection (name or key).
832    #[arg(long)]
833    collection: Option<String>,
834    /// Citation style URL (only for bibliography format).
835    #[arg(long, default_value = "http://www.zotero.org/styles/apa")]
836    style: String,
837    /// Output HTML instead of plain text (only for bibliography format).
838    #[arg(long)]
839    html: bool,
840    #[arg(long, default_value = DEFAULT_RPC_URL)]
841    url: String,
842}
843
844#[derive(Debug, Subcommand)]
845enum AnnotationsCommand {
846    /// List annotations on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
847    List {
848        /// Item key or attachment key
849        parent: String,
850        /// Use a specific attachment when the item has multiple PDFs
851        #[arg(long)]
852        attachment: Option<String>,
853        #[arg(long, default_value = DEFAULT_RPC_URL)]
854        url: String,
855    },
856    /// Create a new annotation on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
857    Create {
858        /// Item key or attachment key
859        parent: String,
860        /// Use a specific attachment when the item has multiple PDFs
861        #[arg(long)]
862        attachment: Option<String>,
863        #[arg(long = "type")]
864        annotation_type: Option<String>,
865        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
866        /// Not required when --quote is given.
867        #[arg(long)]
868        position: Option<String>,
869        /// Text to locate in the PDF and highlight. Resolves to rects automatically.
870        /// Locates text headlessly (no PDF viewer required).
871        /// Use with --dry-run for locate-only mode.
872        #[arg(long)]
873        quote: Option<String>,
874        /// Restrict quote search to a specific page (0-indexed).
875        #[arg(long)]
876        page: Option<u32>,
877        /// Zotero annotation sort index.
878        #[arg(long = "sort-index")]
879        sort_index: Option<String>,
880        #[arg(long)]
881        text: Option<String>,
882        #[arg(long)]
883        comment: Option<String>,
884        #[arg(long, default_value = "#ffd400")]
885        color: String,
886        #[arg(long)]
887        dry_run: bool,
888        #[arg(long, default_value = DEFAULT_RPC_URL)]
889        url: String,
890    },
891    /// Delete an annotation by key.
892    Delete {
893        annotation_key: String,
894        #[arg(long)]
895        dry_run: bool,
896        #[arg(long, default_value = DEFAULT_RPC_URL)]
897        url: String,
898    },
899}
900
901#[derive(Debug, Subcommand)]
902enum NotesCommand {
903    /// List notes attached to a parent item.
904    List {
905        #[arg(long)]
906        parent: String,
907        #[arg(long, default_value_t = 50)]
908        limit: u64,
909        #[arg(long, default_value_t = 0)]
910        offset: u64,
911        #[arg(long, default_value = DEFAULT_RPC_URL)]
912        url: String,
913    },
914    /// Get a single note by key.
915    Get {
916        note_key: String,
917        #[arg(long, default_value = DEFAULT_RPC_URL)]
918        url: String,
919    },
920    /// Create a note attached to a parent item.
921    Create {
922        #[arg(long)]
923        parent: String,
924        #[arg(long)]
925        content: String,
926        #[arg(long = "tag")]
927        tags: Vec<String>,
928        #[arg(long)]
929        dry_run: bool,
930        #[arg(long, default_value = DEFAULT_RPC_URL)]
931        url: String,
932    },
933    /// Update the content of an existing note.
934    Update {
935        note_key: String,
936        #[arg(long)]
937        content: String,
938        #[arg(long)]
939        dry_run: bool,
940        #[arg(long, default_value = DEFAULT_RPC_URL)]
941        url: String,
942    },
943    /// Delete a note by key.
944    Delete {
945        note_key: String,
946        #[arg(long)]
947        dry_run: bool,
948        #[arg(long, default_value = DEFAULT_RPC_URL)]
949        url: String,
950    },
951    /// Search notes by text content.
952    Search {
953        query: String,
954        #[arg(long, default_value_t = 50)]
955        limit: u64,
956        #[arg(long, default_value = DEFAULT_RPC_URL)]
957        url: String,
958    },
959}
960
961#[derive(Debug, Subcommand)]
962enum CollectionsCommand {
963    /// List all collections in the user library (flat).
964    List {
965        #[arg(long, default_value = DEFAULT_RPC_URL)]
966        url: String,
967    },
968    /// Print the collection hierarchy as a tree.
969    Tree {
970        #[arg(long, default_value = DEFAULT_RPC_URL)]
971        url: String,
972    },
973    /// Get a single collection's metadata.
974    Get {
975        name_or_id: String,
976        #[arg(long, default_value = DEFAULT_RPC_URL)]
977        url: String,
978    },
979    /// List all items in a collection.
980    #[command(name = "get-items", visible_alias = "items")]
981    GetItems {
982        name_or_id: String,
983        #[arg(long)]
984        limit: Option<u64>,
985        #[arg(long, default_value_t = 0)]
986        offset: u64,
987        #[arg(long, default_value = DEFAULT_RPC_URL)]
988        url: String,
989    },
990    /// Show item/attachment/note/subcollection counts for a collection.
991    Stats {
992        name_or_id: String,
993        #[arg(long, default_value = DEFAULT_RPC_URL)]
994        url: String,
995    },
996    /// Rename a collection.
997    Rename {
998        old_name: String,
999        new_name: String,
1000        #[arg(long, default_value = DEFAULT_RPC_URL)]
1001        url: String,
1002        #[arg(long)]
1003        dry_run: bool,
1004    },
1005    /// Create a collection, optionally nested under a parent.
1006    Create {
1007        name: String,
1008        #[arg(long)]
1009        parent: Option<String>,
1010        #[arg(long, default_value = DEFAULT_RPC_URL)]
1011        url: String,
1012        #[arg(long)]
1013        dry_run: bool,
1014    },
1015    /// Delete a collection.
1016    Delete {
1017        name_or_id: String,
1018        #[arg(long, default_value = DEFAULT_RPC_URL)]
1019        url: String,
1020        #[arg(long)]
1021        dry_run: bool,
1022    },
1023    /// Add existing items to a collection.
1024    #[command(name = "add-items")]
1025    AddItems {
1026        collection: String,
1027        item_keys: Vec<String>,
1028        #[arg(long, default_value = DEFAULT_RPC_URL)]
1029        url: String,
1030        #[arg(long)]
1031        dry_run: bool,
1032    },
1033    /// Remove items from a collection.
1034    #[command(name = "remove-items")]
1035    RemoveItems {
1036        collection: String,
1037        item_keys: Vec<String>,
1038        #[arg(long, default_value = DEFAULT_RPC_URL)]
1039        url: String,
1040        #[arg(long)]
1041        dry_run: bool,
1042    },
1043}
1044
1045#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1046enum JsonStyle {
1047    /// Matches Python's top-level `ping` command (`json.dumps` defaults).
1048    PythonCompact,
1049    /// Matches namespace commands that route through Python `emit(..., indent=2)`.
1050    Pretty,
1051}
1052
1053enum ParseOutcome<T> {
1054    Command(T),
1055    Display(String),
1056}
1057
1058fn parse_cli<T>(
1059    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1060) -> Result<ParseOutcome<T>, String>
1061where
1062    T: Parser,
1063{
1064    match T::try_parse_from(args) {
1065        Ok(cli) => Ok(ParseOutcome::Command(cli)),
1066        Err(err)
1067            if matches!(
1068                err.kind(),
1069                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1070            ) =>
1071        {
1072            Ok(ParseOutcome::Display(err.to_string()))
1073        }
1074        Err(err) => Err(err.to_string()),
1075    }
1076}
1077
1078pub fn format_error_json(message: &str) -> String {
1079    let message = message.trim_end();
1080    let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1081    serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1082}
1083
1084fn split_error_code(message: &str) -> Option<(&str, &str)> {
1085    let (code, rest) = message.split_once(':')?;
1086    if !code.is_empty()
1087        && code
1088            .chars()
1089            .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1090    {
1091        Some((code, rest.trim_start()))
1092    } else {
1093        None
1094    }
1095}
1096
1097pub fn run(
1098    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1099) -> Result<String, String> {
1100    // Emit plugin hint for Claude Code discovery
1101    if std::env::var("CLAUDECODE").as_deref() == Ok("1") {
1102        eprintln!(r#"<claude-code-hint v="1" type="plugin" value="zotron@dianzuan/zotron" />"#);
1103    }
1104
1105    let cli = match parse_cli::<Cli>(args)? {
1106        ParseOutcome::Command(cli) => cli,
1107        ParseOutcome::Display(output) => return Ok(output),
1108    };
1109    let url = command_url(&cli.command);
1110    let mut client = ZoteroRpc::new(url);
1111    run_command(cli.command, &mut client)
1112}
1113
1114pub fn run_with_client(
1115    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1116    client: &mut impl RpcCaller,
1117) -> Result<String, String> {
1118    let cli = match parse_cli::<Cli>(args)? {
1119        ParseOutcome::Command(cli) => cli,
1120        ParseOutcome::Display(output) => return Ok(output),
1121    };
1122    run_command(cli.command, client)
1123}
1124
1125fn rag_command_url(command: &RagCommand) -> String {
1126    match command {
1127        RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1128        RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1129        RagCommand::Status { url, .. } => url.clone(),
1130        RagCommand::Search { url, .. } => url.clone(),
1131    }
1132}
1133
1134fn command_url(command: &Command) -> String {
1135    match command {
1136        Command::Ping { url }
1137        | Command::Rpc { url, .. }
1138        | Command::Push { url, .. }
1139        => url.clone(),
1140        Command::Ocr { command } => match command {
1141            OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1142            OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1143            OcrCommand::Status { url, .. } => url.clone(),
1144            OcrCommand::Process { url, .. } => url.clone(),
1145        },
1146        Command::Rag { command } => rag_command_url(command),
1147        Command::System { command } => match command {
1148            SystemCommand::Version { url }
1149            | SystemCommand::Libraries { url }
1150            | SystemCommand::LibraryStats { url, .. }
1151            | SystemCommand::Schema { url, .. }
1152            | SystemCommand::CurrentCollection { url }
1153            | SystemCommand::Methods { url, .. } => url.clone(),
1154        },
1155        Command::Search(ref args) => match &args.management {
1156            Some(SearchManagementCommand::SavedSearches { url })
1157            | Some(SearchManagementCommand::CreateSaved { url, .. })
1158            | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1159            None => args.url.clone(),
1160        },
1161        Command::Items { command } => match command {
1162            ItemsCommand::Add { url, .. }
1163            | ItemsCommand::Update { url, .. }
1164            | ItemsCommand::Delete { url, .. }
1165            | ItemsCommand::Trash { url, .. }
1166            | ItemsCommand::Restore { url, .. }
1167            | ItemsCommand::MergeDuplicates { url, .. }
1168            | ItemsCommand::AddRelated { url, .. }
1169            | ItemsCommand::RemoveRelated { url, .. }
1170            | ItemsCommand::Get { url, .. }
1171            | ItemsCommand::List { url, .. }
1172            | ItemsCommand::FindDuplicates { url }
1173            | ItemsCommand::Recent { url, .. }
1174            | ItemsCommand::Fulltext { url, .. }
1175            | ItemsCommand::Related { url, .. }
1176            | ItemsCommand::CitationKey { url, .. }
1177            | ItemsCommand::Path { url, .. }
1178            | ItemsCommand::Attachments { url, .. }
1179            | ItemsCommand::FindPdfs { url, .. } => url.clone(),
1180        },
1181        Command::Collections { command } => match command {
1182            CollectionsCommand::List { url }
1183            | CollectionsCommand::Tree { url }
1184            | CollectionsCommand::Get { url, .. }
1185            | CollectionsCommand::GetItems { url, .. }
1186            | CollectionsCommand::Stats { url, .. }
1187            | CollectionsCommand::Rename { url, .. }
1188            | CollectionsCommand::Create { url, .. }
1189            | CollectionsCommand::Delete { url, .. }
1190            | CollectionsCommand::AddItems { url, .. }
1191            | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1192        },
1193        Command::Notes { command } => match command {
1194            NotesCommand::List { url, .. }
1195            | NotesCommand::Get { url, .. }
1196            | NotesCommand::Create { url, .. }
1197            | NotesCommand::Update { url, .. }
1198            | NotesCommand::Delete { url, .. }
1199            | NotesCommand::Search { url, .. } => url.clone(),
1200        },
1201        Command::Settings { command } => match command {
1202            SettingsCommand::Get { url, .. }
1203            | SettingsCommand::List { url }
1204            | SettingsCommand::Set { url, .. } => url.clone(),
1205        },
1206        Command::Tags { command } => match command {
1207            TagsCommand::List { url, .. }
1208            | TagsCommand::Rename { url, .. }
1209            | TagsCommand::Delete { url, .. }
1210            | TagsCommand::Add { url, .. }
1211            | TagsCommand::Remove { url, .. } => url.clone(),
1212        },
1213        Command::Export(ref args) => args.url.clone(),
1214        Command::Annotations { command } => match command {
1215            AnnotationsCommand::List { url, .. }
1216            | AnnotationsCommand::Create { url, .. }
1217            | AnnotationsCommand::Delete { url, .. } => url.clone(),
1218        },
1219    }
1220}
1221
1222fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1223    if let OcrCommand::Providers = &command {
1224        return format_json(
1225            &serde_json::json!({ "providers": ocr_provider_specs() }),
1226            JsonStyle::Pretty,
1227        );
1228    }
1229    let value = match command {
1230        OcrCommand::Providers => unreachable!(),
1231        OcrCommand::Run {
1232            provider,
1233            input,
1234            file,
1235            item_key,
1236            attachment_key,
1237            mime_type,
1238            endpoint,
1239            api_key_env,
1240        } => run_ocr_run_command(OcrRunOptions {
1241            provider,
1242            input,
1243            file,
1244            item_key,
1245            attachment_key,
1246            mime_type,
1247            endpoint,
1248            api_key_env,
1249        })?,
1250        OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1251        OcrCommand::Process {
1252            provider,
1253            parent,
1254            attachment,
1255            source_url,
1256            result_dir,
1257            result_zip,
1258            provider_endpoint,
1259            api_key_env,
1260            poll_interval_seconds,
1261            timeout_seconds,
1262            chunk_chars,
1263            ..
1264        } => run_ocr_process_command(
1265            client,
1266            OcrProcessOptions {
1267                provider,
1268                parent,
1269                attachment,
1270                source_url,
1271                result_dir,
1272                result_zip,
1273                provider_endpoint,
1274                api_key_env,
1275                poll_interval_seconds,
1276                timeout_seconds,
1277                chunk_chars,
1278            },
1279        )?,
1280    };
1281    format_json(&value, JsonStyle::PythonCompact)
1282}
1283
1284struct OcrProcessOptions {
1285    provider: String,
1286    parent: String,
1287    attachment: Option<String>,
1288    source_url: Option<String>,
1289    result_dir: Option<String>,
1290    result_zip: Option<String>,
1291    provider_endpoint: Option<String>,
1292    api_key_env: String,
1293    poll_interval_seconds: u64,
1294    timeout_seconds: u64,
1295    chunk_chars: usize,
1296}
1297
1298struct OcrRunOptions {
1299    provider: String,
1300    input: Option<String>,
1301    file: Option<String>,
1302    item_key: Option<String>,
1303    attachment_key: Option<String>,
1304    mime_type: Option<String>,
1305    endpoint: Option<String>,
1306    api_key_env: Option<String>,
1307}
1308
1309fn run_ocr_run_command(options: OcrRunOptions) -> Result<Value, String> {
1310    let input: OcrRequestInput = match (options.input, options.file) {
1311        (Some(input), None) => read_json_input(&input)?,
1312        (None, Some(file)) => ocr_input_from_file(
1313            file,
1314            options.item_key,
1315            options.attachment_key,
1316            options.mime_type,
1317        )?,
1318        (Some(_), Some(_)) => {
1319            return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1320        }
1321        (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1322    };
1323    let request = build_ocr_provider_request(&options.provider, &input)?;
1324    let payload = if request.command.is_empty() {
1325        let method = request
1326            .method
1327            .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1328        let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1329        let mut transport =
1330            provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1331        transport.post_json(&ProviderHttpInvocation {
1332            provider: request.provider.to_string(),
1333            style: request.style.to_string(),
1334            method: method.to_string(),
1335            url: options
1336                .endpoint
1337                .or_else(|| request.url.map(ToString::to_string)),
1338            auth_header_name: request.auth_header.map(ToString::to_string),
1339            auth_header_value: None,
1340            body: request.body,
1341        })?
1342    } else {
1343        let mut command_runner = StdProviderCommandRunner;
1344        command_runner.run_json(&request.command)?
1345    };
1346    let blocks = match parse_ocr_provider_response(
1347        request.provider,
1348        &payload,
1349        &input.item_key,
1350        &input.attachment_key,
1351    ) {
1352        Ok(blocks) => blocks,
1353        Err(err) => {
1354            if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1355                return Ok(task);
1356            }
1357            return Err(err);
1358        }
1359    };
1360
1361    Ok(serde_json::json!({
1362        "provider": request.provider,
1363        "blocks": blocks,
1364    }))
1365}
1366
1367fn run_ocr_process_command(
1368    client: &mut impl RpcCaller,
1369    mut options: OcrProcessOptions,
1370) -> Result<Value, String> {
1371    let spec = raw_ocr_provider_spec(&options.provider)?;
1372
1373    let attachment = match options.attachment.take() {
1374        Some(key) => key,
1375        None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1376    };
1377    options.attachment = Some(attachment.clone());
1378
1379    let attachment_path = resolve_attachment_path(client, &attachment)?;
1380    let storage_dir = attachment_path
1381        .parent()
1382        .ok_or_else(|| {
1383            format!(
1384                "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1385                attachment_path.display()
1386            )
1387        })?
1388        .to_path_buf();
1389
1390    match spec.provider_key {
1391        "mineru" | "mineru-cli" => {
1392            if options.result_dir.is_some() && options.result_zip.is_some() {
1393                return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1394            }
1395            if options.source_url.is_some()
1396                && (options.result_dir.is_some() || options.result_zip.is_some())
1397            {
1398                return Err(
1399                    "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1400                        .to_string(),
1401                );
1402            }
1403            let file_name = attachment_path
1404                .file_name()
1405                .and_then(|name| name.to_str())
1406                .unwrap_or("document.pdf")
1407                .to_string();
1408            let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1409            let artifacts = persist_mineru_result_sidecars(
1410                &storage_dir, &options.parent, &attachment,
1411                &options.provider, &source, options.chunk_chars,
1412            )?;
1413            Ok(serde_json::json!({
1414                "provider": spec.provider_key,
1415                "status": "indexed",
1416                "item_key": options.parent,
1417                "attachment_key": attachment,
1418                "attachment_path": attachment_path,
1419                "storage_dir": storage_dir,
1420                "task_id": source.task_id,
1421                "state": source.state,
1422                "blocks": artifacts.block_count,
1423                "chunks": artifacts.chunk_count,
1424                "artifacts": artifacts.artifacts,
1425            }))
1426        }
1427        _ => {
1428            run_ocr_process_sync(
1429                client, &options, spec.provider_key,
1430                &attachment, &attachment_path, &storage_dir,
1431            )
1432        }
1433    }
1434}
1435
1436fn run_ocr_process_sync(
1437    client: &mut impl RpcCaller,
1438    options: &OcrProcessOptions,
1439    provider: &str,
1440    attachment_key: &str,
1441    attachment_path: &Path,
1442    storage_dir: &Path,
1443) -> Result<Value, String> {
1444    let api_url = if let Some(endpoint) = &options.provider_endpoint {
1445        endpoint.clone()
1446    } else {
1447        let settings = client.call("settings.getAll", None)?;
1448        settings.get("ocr.apiUrl")
1449            .and_then(Value::as_str)
1450            .unwrap_or("")
1451            .to_string()
1452    };
1453    if api_url.is_empty() {
1454        return Err(format!("MISSING_CONFIG: ocr.apiUrl not configured for provider {provider}"));
1455    }
1456
1457    let api_key = {
1458        let from_env = if !options.api_key_env.is_empty() {
1459            env::var(&options.api_key_env).ok().filter(|v| !v.is_empty())
1460        } else {
1461            None
1462        };
1463        from_env.unwrap_or_else(|| {
1464            client.call("settings.getRaw", Some(serde_json::json!({"key": "ocr.apiKey"})))
1465                .ok()
1466                .and_then(|raw| raw.get("ocr.apiKey").and_then(Value::as_str).map(String::from))
1467                .unwrap_or_default()
1468        })
1469    };
1470
1471    let pdf_bytes = fs::read(attachment_path)
1472        .map_err(|e| format!("READ_PDF_FAILED: {}: {e}", attachment_path.display()))?;
1473
1474    const MAX_PDF_SIZE: usize = 100 * 1024 * 1024; // 100 MB
1475    if pdf_bytes.len() > MAX_PDF_SIZE {
1476        return Err(format!(
1477            "PDF_TOO_LARGE: {} is {} MB, max {} MB",
1478            attachment_path.display(),
1479            pdf_bytes.len() / (1024 * 1024),
1480            MAX_PDF_SIZE / (1024 * 1024),
1481        ));
1482    }
1483
1484    let base64_pdf = format!("data:application/pdf;base64,{}", base64_encode(&pdf_bytes));
1485
1486    let input = OcrRequestInput {
1487        content_base64: base64_pdf,
1488        file_name: attachment_path
1489            .file_name()
1490            .and_then(|n| n.to_str())
1491            .unwrap_or("document.pdf")
1492            .to_string(),
1493        mime_type: "application/pdf".to_string(),
1494        item_key: options.parent.clone(),
1495        attachment_key: attachment_key.to_string(),
1496        source_url: None,
1497        local_path: Some(attachment_path.to_string_lossy().to_string()),
1498        output_dir: None,
1499    };
1500    let request = build_ocr_provider_request(provider, &input)?;
1501
1502    let payload = if request.command.is_empty() {
1503        let method = request
1504            .method
1505            .ok_or_else(|| format!("OCR provider {provider} missing HTTP method"))?;
1506        let spec = raw_ocr_provider_spec(provider)?;
1507
1508        let mut transport = if !api_key.is_empty() {
1509            match spec.auth {
1510                "bearer" => UreqProviderHttpTransport::with_bearer_token(&api_key),
1511                "token" => UreqProviderHttpTransport::with_api_key(format!("token {api_key}")),
1512                _ => UreqProviderHttpTransport::new(),
1513            }
1514        } else {
1515            UreqProviderHttpTransport::new()
1516        };
1517
1518        transport.post_json(&ProviderHttpInvocation {
1519            provider: request.provider.to_string(),
1520            style: request.style.to_string(),
1521            method: method.to_string(),
1522            url: Some(api_url),
1523            auth_header_name: request.auth_header.map(ToString::to_string),
1524            auth_header_value: None,
1525            body: request.body,
1526        })?
1527    } else {
1528        let mut runner = StdProviderCommandRunner;
1529        runner.run_json(&request.command)?
1530    };
1531
1532    let blocks = parse_ocr_provider_response(provider, &payload, &options.parent, attachment_key)?;
1533    let chunks = zotron_types::chunks_from_blocks(&blocks, options.chunk_chars);
1534
1535    let artifacts = vec![
1536        write_sidecar_json(
1537            storage_dir, &options.parent, attachment_key,
1538            MachineArtifactKind::OcrRaw, &payload,
1539        )?,
1540        write_sidecar_jsonl(
1541            storage_dir, &options.parent, attachment_key,
1542            MachineArtifactKind::Blocks, &blocks,
1543        )?,
1544        write_sidecar_jsonl(
1545            storage_dir, &options.parent, attachment_key,
1546            MachineArtifactKind::Chunks, &chunks,
1547        )?,
1548    ];
1549
1550    let embedding_count = embed_sidecar_chunks(client, storage_dir, &options.parent, attachment_key, &chunks);
1551
1552    Ok(serde_json::json!({
1553        "provider": provider,
1554        "status": "indexed",
1555        "item_key": options.parent,
1556        "attachment_key": attachment_key,
1557        "embeddings": embedding_count,
1558        "attachment_path": attachment_path,
1559        "storage_dir": storage_dir,
1560        "blocks": blocks.len(),
1561        "chunks": chunks.len(),
1562        "artifacts": artifacts,
1563    }))
1564}
1565
1566fn embed_sidecar_chunks(
1567    client: &mut impl RpcCaller,
1568    storage_dir: &Path,
1569    item_key: &str,
1570    _attachment_key: &str,
1571    chunks: &[zotron_types::StructureChunk],
1572) -> usize {
1573    let Ok((provider, model, api_url, api_key)) = fetch_embedding_settings(client) else {
1574        return 0;
1575    };
1576    if provider.is_empty() || (api_key.is_empty() && provider != "ollama") {
1577        return 0;
1578    }
1579    let emb_chunks: Vec<EmbeddingChunkInput> = chunks
1580        .iter()
1581        .map(|c| EmbeddingChunkInput {
1582            chunk_key: c.chunk_key.clone(),
1583            text: c.text.clone(),
1584        })
1585        .collect();
1586    if emb_chunks.is_empty() {
1587        return 0;
1588    }
1589    // Batch in groups of 20 to avoid API limits
1590    let batch_size = 20;
1591    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
1592    for batch in emb_chunks.chunks(batch_size) {
1593        let input = EmbeddingRequestInput {
1594            item_key: item_key.to_string(),
1595            chunks: batch.to_vec(),
1596            model: if model.is_empty() { None } else { Some(model.clone()) },
1597            url: if api_url.is_empty() { None } else { Some(api_url.clone()) },
1598            input_type: Some("document".to_string()),
1599        };
1600        let Ok(request) = build_embedding_provider_request(&provider, &input) else {
1601            break;
1602        };
1603        let Some(url) = request.url.as_deref() else { break };
1604        let mut http = ureq::post(url).set("Content-Type", "application/json");
1605        if let Some(auth) = request.auth_header {
1606            if !api_key.is_empty() {
1607                http = http.set(auth, &format!("Bearer {api_key}"));
1608            }
1609        }
1610        let Ok(resp) = http.send_json(&request.body) else { break };
1611        let Ok(payload): Result<Value, _> = resp.into_json() else { break };
1612        let Ok(vectors) = parse_embedding_provider_response(&provider, &payload, item_key, batch)
1613        else {
1614            break;
1615        };
1616        all_vectors.extend(vectors);
1617    }
1618    let count = all_vectors.len();
1619    if count > 0 {
1620        let filename = embedding_vector_filename(&provider, &model);
1621        let vectors_dir = storage_dir.join(".zotron").join("embeddings");
1622        fs::create_dir_all(&vectors_dir).map_err(|e| {
1623            eprintln!("warning: cannot create embeddings dir {}: {e}", vectors_dir.display());
1624            e
1625        }).ok();
1626        let vectors_path = vectors_dir.join(&filename);
1627        let mut out = String::new();
1628        for v in &all_vectors {
1629            if let Ok(line) = serde_json::to_string(v) {
1630                out.push_str(&line);
1631                out.push('\n');
1632            }
1633        }
1634        if let Err(e) = fs::write(&vectors_path, &out) {
1635            eprintln!("warning: failed to persist embeddings to {}: {e}", vectors_path.display());
1636        }
1637    }
1638    count
1639}
1640
1641struct MineruResultSource {
1642    task_id: Option<String>,
1643    state: String,
1644    result_dir: PathBuf,
1645    raw_zip_bytes: Option<Vec<u8>>,
1646    task_status: Option<Value>,
1647    payload: Value,
1648    content_list_file: Option<PathBuf>,
1649    markdown: Option<String>,
1650}
1651
1652struct PersistedOcrArtifacts {
1653    block_count: usize,
1654    chunk_count: usize,
1655    artifacts: Vec<Value>,
1656}
1657
1658fn resolve_attachment_path(
1659    client: &mut impl RpcCaller,
1660    attachment_key: &str,
1661) -> Result<PathBuf, String> {
1662    let payload = client.call(
1663        "attachments.getPath",
1664        Some(serde_json::json!({"key": attachment_key})),
1665    )?;
1666    let raw_path = payload
1667        .get("path")
1668        .and_then(Value::as_str)
1669        .filter(|path| !path.trim().is_empty())
1670        .ok_or_else(|| {
1671            format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1672        })?;
1673    Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1674}
1675
1676/// Resolve the first PDF attachment key for a parent item via `attachments.list`.
1677fn resolve_first_pdf_attachment_key(
1678    client: &mut impl RpcCaller,
1679    parent_key: &str,
1680) -> Result<String, String> {
1681    let response = client.call(
1682        "attachments.list",
1683        Some(serde_json::json!({"parentKey": parent_key})),
1684    )?;
1685    // The XPI returns {items: [...], total: N}.
1686    let attachments = response
1687        .get("items")
1688        .and_then(Value::as_array)
1689        .or_else(|| response.as_array())
1690        .ok_or_else(|| {
1691            format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1692        })?;
1693    for attachment in attachments {
1694        if is_pdf_attachment(attachment) {
1695            if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1696                return Ok(key.to_string());
1697            }
1698        }
1699    }
1700    Err(format!(
1701        "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1702    ))
1703}
1704
1705fn load_mineru_result_source(
1706    options: &OcrProcessOptions,
1707    attachment_path: &Path,
1708    file_name: &str,
1709) -> Result<MineruResultSource, String> {
1710    if let Some(result_dir) = options.result_dir.as_deref() {
1711        return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1712    }
1713    if let Some(result_zip) = options.result_zip.as_deref() {
1714        let zip_path = PathBuf::from(result_zip);
1715        let zip_bytes = fs::read(&zip_path)
1716            .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1717        let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1718        return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1719    }
1720
1721    let Some(source_url) = options
1722        .source_url
1723        .as_deref()
1724        .filter(|value| !value.trim().is_empty())
1725    else {
1726        return submit_mineru_local_file(options, attachment_path, file_name);
1727    };
1728    let input = OcrRequestInput {
1729        item_key: options.parent.clone(),
1730        attachment_key: options.attachment.clone().expect("attachment resolved"),
1731        file_name: file_name.to_string(),
1732        mime_type: "application/pdf".to_string(),
1733        content_base64: format!("url:{source_url}"),
1734        source_url: Some(source_url.to_string()),
1735        local_path: None,
1736        output_dir: None,
1737    };
1738    let task = submit_mineru_task(
1739        &options.provider,
1740        &input,
1741        options.provider_endpoint.clone(),
1742        &options.api_key_env,
1743    )?;
1744    let task_id = task
1745        .get("data")
1746        .and_then(|data| data.get("task_id"))
1747        .and_then(Value::as_str)
1748        .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1749        .to_string();
1750    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1751    let status = poll_mineru_task(
1752        options.provider_endpoint.as_deref(),
1753        &task_id,
1754        &auth_header,
1755        options.poll_interval_seconds,
1756        options.timeout_seconds,
1757    )?;
1758    let zip_url = status
1759        .pointer("/data/full_zip_url")
1760        .or_else(|| status.pointer("/data/result/full_zip_url"))
1761        .and_then(Value::as_str)
1762        .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1763    let zip_bytes = download_bytes(zip_url)?;
1764    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1765    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1766}
1767
1768fn submit_mineru_local_file(
1769    options: &OcrProcessOptions,
1770    attachment_path: &Path,
1771    file_name: &str,
1772) -> Result<MineruResultSource, String> {
1773    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1774    let upload_request = create_mineru_file_upload(
1775        options.provider_endpoint.as_deref(),
1776        file_name,
1777        options.attachment.as_deref().expect("attachment resolved"),
1778        &auth_header,
1779    )?;
1780    let upload_url = upload_request
1781        .pointer("/data/file_urls/0")
1782        .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1783        .and_then(Value::as_str)
1784        .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1785    let batch_id = upload_request
1786        .pointer("/data/batch_id")
1787        .or_else(|| upload_request.pointer("/data/batchId"))
1788        .and_then(Value::as_str)
1789        .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1790        .to_string();
1791    let bytes = fs::read(attachment_path)
1792        .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1793    put_bytes(upload_url, &bytes)?;
1794    let status = poll_mineru_batch(
1795        options.provider_endpoint.as_deref(),
1796        &batch_id,
1797        &auth_header,
1798        options.poll_interval_seconds,
1799        options.timeout_seconds,
1800    )?;
1801    let zip_url = mineru_batch_zip_url(&status)
1802        .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1803    let zip_bytes = download_bytes(&zip_url)?;
1804    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1805    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1806}
1807
1808fn create_mineru_file_upload(
1809    endpoint: Option<&str>,
1810    file_name: &str,
1811    data_id: &str,
1812    auth_header: &str,
1813) -> Result<Value, String> {
1814    let url = mineru_file_urls_url(endpoint);
1815    let body = serde_json::json!({
1816        "files": [{"name": file_name, "data_id": data_id}],
1817        "model_version": "vlm",
1818        "is_ocr": false,
1819        "enable_formula": true,
1820        "enable_table": true,
1821        "language": "ch",
1822        "page_ranges": "1-200",
1823    });
1824    ureq::post(&url)
1825        .set("Authorization", auth_header)
1826        .send_json(body)
1827        .map_err(|err| format!("POST {url} failed: {err}"))?
1828        .into_json::<Value>()
1829        .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1830}
1831
1832fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1833    ureq::put(url)
1834        .send_bytes(bytes)
1835        .map_err(|err| format!("PUT {url} failed: {err}"))?;
1836    Ok(())
1837}
1838
1839fn submit_mineru_task(
1840    provider: &str,
1841    input: &OcrRequestInput,
1842    endpoint: Option<String>,
1843    api_key_env: &str,
1844) -> Result<Value, String> {
1845    let request = build_ocr_provider_request(provider, input)?;
1846    let method = request
1847        .method
1848        .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1849    let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1850    transport.post_json(&ProviderHttpInvocation {
1851        provider: request.provider.to_string(),
1852        style: request.style.to_string(),
1853        method: method.to_string(),
1854        url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1855        auth_header_name: request.auth_header.map(ToString::to_string),
1856        auth_header_value: None,
1857        body: request.body,
1858    })
1859}
1860
1861fn poll_mineru_task(
1862    endpoint: Option<&str>,
1863    task_id: &str,
1864    auth_header: &str,
1865    poll_interval_seconds: u64,
1866    timeout_seconds: u64,
1867) -> Result<Value, String> {
1868    let url = mineru_task_status_url(endpoint, task_id);
1869    let started = Instant::now();
1870    loop {
1871        let status = get_json_with_auth(&url, auth_header)?;
1872        let state = status
1873            .pointer("/data/state")
1874            .or_else(|| status.pointer("/data/status"))
1875            .and_then(Value::as_str)
1876            .unwrap_or("unknown");
1877        match state {
1878            "done" | "finished" | "success" => return Ok(status),
1879            "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1880            _ => {
1881                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1882                    return Err(format!(
1883                        "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1884                    ));
1885                }
1886                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1887            }
1888        }
1889    }
1890}
1891
1892fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1893    let base = endpoint
1894        .unwrap_or("https://mineru.net/api/v4/extract/task")
1895        .trim_end_matches('/');
1896    if base.ends_with("/extract/task") {
1897        format!("{base}/{task_id}")
1898    } else {
1899        format!("{base}/extract/task/{task_id}")
1900    }
1901}
1902
1903fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1904    let base = mineru_api_base(endpoint);
1905    format!("{base}/file-urls/batch")
1906}
1907
1908fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1909    let base = mineru_api_base(endpoint);
1910    format!("{base}/extract-results/batch/{batch_id}")
1911}
1912
1913fn mineru_api_base(endpoint: Option<&str>) -> String {
1914    let base = endpoint
1915        .unwrap_or("https://mineru.net/api/v4/extract/task")
1916        .trim_end_matches('/');
1917    if let Some(stripped) = base.strip_suffix("/extract/task") {
1918        return stripped.to_string();
1919    }
1920    if let Some(stripped) = base.strip_suffix("/extract") {
1921        return stripped.to_string();
1922    }
1923    base.to_string()
1924}
1925
1926fn poll_mineru_batch(
1927    endpoint: Option<&str>,
1928    batch_id: &str,
1929    auth_header: &str,
1930    poll_interval_seconds: u64,
1931    timeout_seconds: u64,
1932) -> Result<Value, String> {
1933    let url = mineru_batch_status_url(endpoint, batch_id);
1934    let started = Instant::now();
1935    loop {
1936        let status = get_json_with_auth(&url, auth_header)?;
1937        let state = mineru_batch_state(&status).unwrap_or("unknown");
1938        match state {
1939            "done" | "finished" | "success" => return Ok(status),
1940            "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
1941            _ => {
1942                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1943                    return Err(format!(
1944                        "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
1945                    ));
1946                }
1947                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1948            }
1949        }
1950    }
1951}
1952
1953fn mineru_batch_state(status: &Value) -> Option<&str> {
1954    status
1955        .pointer("/data/extract_result/0/state")
1956        .or_else(|| status.pointer("/data/extractResult/0/state"))
1957        .or_else(|| status.pointer("/data/state"))
1958        .and_then(Value::as_str)
1959}
1960
1961fn mineru_batch_zip_url(status: &Value) -> Option<String> {
1962    status
1963        .pointer("/data/extract_result/0/full_zip_url")
1964        .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
1965        .or_else(|| status.pointer("/data/full_zip_url"))
1966        .and_then(Value::as_str)
1967        .map(ToString::to_string)
1968}
1969
1970fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
1971    let token = env::var(api_key_env)
1972        .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
1973    let token = token.trim();
1974    if token.is_empty() {
1975        return Err(format!(
1976            "provider credential env var {api_key_env} is empty"
1977        ));
1978    }
1979    Ok(match auth_scheme {
1980        "bearer" if token.starts_with("Bearer ") => token.to_string(),
1981        "bearer" => format!("Bearer {token}"),
1982        "token" if token.starts_with("token ") => token.to_string(),
1983        "token" => format!("token {token}"),
1984        _ => token.to_string(),
1985    })
1986}
1987
1988fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
1989    ureq::get(url)
1990        .set("Authorization", auth_header)
1991        .call()
1992        .map_err(|err| format!("GET {url} failed: {err}"))?
1993        .into_json::<Value>()
1994        .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
1995}
1996
1997fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
1998    let response = ureq::get(url)
1999        .call()
2000        .map_err(|err| format!("download {url} failed: {err}"))?;
2001    let mut bytes = Vec::new();
2002    response
2003        .into_reader()
2004        .read_to_end(&mut bytes)
2005        .map_err(|err| format!("read download {url}: {err}"))?;
2006    Ok(bytes)
2007}
2008
2009fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
2010    let dir = unique_temp_path(prefix);
2011    fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
2012    let zip_path = dir.with_extension("zip");
2013    fs::write(&zip_path, zip_bytes)
2014        .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
2015    let output = ProcessCommand::new("unzip")
2016        .arg("-q")
2017        .arg("-o")
2018        .arg(&zip_path)
2019        .arg("-d")
2020        .arg(&dir)
2021        .output()
2022        .map_err(|err| format!("run unzip: {err}"))?;
2023    if !output.status.success() {
2024        return Err(format!(
2025            "unzip {} failed: {}",
2026            zip_path.display(),
2027            String::from_utf8_lossy(&output.stderr).trim()
2028        ));
2029    }
2030    Ok(dir)
2031}
2032
2033fn unique_temp_path(prefix: &str) -> PathBuf {
2034    let nanos = SystemTime::now()
2035        .duration_since(UNIX_EPOCH)
2036        .map(|duration| duration.as_nanos())
2037        .unwrap_or(0);
2038    env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
2039}
2040
2041fn mineru_result_source_from_dir(
2042    result_dir: PathBuf,
2043    raw_zip_bytes: Option<Vec<u8>>,
2044    task_status: Option<Value>,
2045    task_id: Option<String>,
2046) -> Result<MineruResultSource, String> {
2047    let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
2048    let markdown = find_first_file_by_name(&result_dir, "full.md")
2049        .map(|path| {
2050            fs::read_to_string(&path)
2051                .map_err(|err| format!("read native markdown {}: {err}", path.display()))
2052        })
2053        .transpose()?;
2054    Ok(MineruResultSource {
2055        task_id,
2056        state: "done".to_string(),
2057        result_dir,
2058        raw_zip_bytes,
2059        task_status,
2060        payload,
2061        content_list_file,
2062        markdown,
2063    })
2064}
2065
2066fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
2067    let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
2068    if let Some(path) = v2 {
2069        let value = read_json_file(&path)?;
2070        return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
2071    }
2072    let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
2073    if let Some(path) = content_list {
2074        let value = read_json_file(&path)?;
2075        return Ok((serde_json::json!({"content_list": value}), Some(path)));
2076    }
2077    let layout = find_first_file_by_name(result_dir, "layout.json");
2078    if let Some(path) = layout {
2079        return Ok((read_json_file(&path)?, Some(path)));
2080    }
2081    let markdown = find_first_file_by_name(result_dir, "full.md");
2082    if let Some(path) = markdown {
2083        let text = fs::read_to_string(&path)
2084            .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
2085        return Ok((serde_json::json!({"result": text}), Some(path)));
2086    }
2087    Err(format!(
2088        "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
2089        result_dir.display()
2090    ))
2091}
2092
2093fn read_json_file(path: &Path) -> Result<Value, String> {
2094    let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
2095    serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
2096}
2097
2098fn persist_mineru_result_sidecars(
2099    storage_dir: &Path,
2100    item_key: &str,
2101    attachment_key: &str,
2102    provider: &str,
2103    source: &MineruResultSource,
2104    chunk_chars: usize,
2105) -> Result<PersistedOcrArtifacts, String> {
2106    let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
2107    let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
2108    let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
2109    let raw_bundle = serde_json::json!({
2110        "provider": provider,
2111        "item_key": item_key,
2112        "attachment_key": attachment_key,
2113        "task_id": source.task_id,
2114        "state": source.state,
2115        "task_status": source.task_status,
2116        "content_list_file": source.content_list_file,
2117        "payload": source.payload,
2118    });
2119
2120    let mut artifacts = Vec::new();
2121    artifacts.push(write_sidecar_json(
2122        storage_dir,
2123        item_key,
2124        attachment_key,
2125        MachineArtifactKind::OcrRaw,
2126        &raw_bundle,
2127    )?);
2128    artifacts.push(write_sidecar_jsonl(
2129        storage_dir,
2130        item_key,
2131        attachment_key,
2132        MachineArtifactKind::Blocks,
2133        &blocks,
2134    )?);
2135    artifacts.push(write_sidecar_jsonl(
2136        storage_dir,
2137        item_key,
2138        attachment_key,
2139        MachineArtifactKind::Chunks,
2140        &chunks,
2141    )?);
2142    if let Some(markdown) = source.markdown.as_deref() {
2143        artifacts.push(write_sidecar_bytes(
2144            storage_dir,
2145            item_key,
2146            attachment_key,
2147            MachineArtifactKind::OcrNativeMarkdown,
2148            markdown.as_bytes(),
2149        )?);
2150    }
2151    artifacts.push(write_sidecar_json(
2152        storage_dir,
2153        item_key,
2154        attachment_key,
2155        MachineArtifactKind::OcrNativeAssets,
2156        &assets,
2157    )?);
2158    if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2159        artifacts.push(write_extra_sidecar_bytes(
2160            storage_dir,
2161            ".zotron/ocr/latest.raw.zip",
2162            bytes,
2163        )?);
2164    }
2165
2166    Ok(PersistedOcrArtifacts {
2167        block_count: blocks.len(),
2168        chunk_count: chunks.len(),
2169        artifacts,
2170    })
2171}
2172
2173fn write_sidecar_json(
2174    storage_dir: &Path,
2175    item_key: &str,
2176    attachment_key: &str,
2177    kind: MachineArtifactKind,
2178    value: &Value,
2179) -> Result<Value, String> {
2180    let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2181    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2182}
2183
2184fn write_sidecar_jsonl<T: serde::Serialize>(
2185    storage_dir: &Path,
2186    item_key: &str,
2187    attachment_key: &str,
2188    kind: MachineArtifactKind,
2189    values: &[T],
2190) -> Result<Value, String> {
2191    let mut out = String::new();
2192    for value in values {
2193        out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2194        out.push('\n');
2195    }
2196    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2197}
2198
2199fn write_sidecar_bytes(
2200    storage_dir: &Path,
2201    item_key: &str,
2202    attachment_key: &str,
2203    kind: MachineArtifactKind,
2204    bytes: &[u8],
2205) -> Result<Value, String> {
2206    let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2207        .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2208    Ok(serde_json::json!({
2209        "kind": kind,
2210        "relative_path": record.relative_path,
2211        "absolute_path": record.absolute_path,
2212    }))
2213}
2214
2215fn write_extra_sidecar_bytes(
2216    storage_dir: &Path,
2217    relative_path: &str,
2218    bytes: &[u8],
2219) -> Result<Value, String> {
2220    let absolute_path = storage_dir.join(relative_path);
2221    if let Some(parent) = absolute_path.parent() {
2222        fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2223    }
2224    fs::write(&absolute_path, bytes)
2225        .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2226    Ok(serde_json::json!({
2227        "kind": "ocr_raw_zip",
2228        "relative_path": relative_path,
2229        "absolute_path": absolute_path,
2230    }))
2231}
2232
2233fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2234    let mut images = Vec::new();
2235    for file in collect_files(result_dir)? {
2236        if !is_image_file(&file) {
2237            continue;
2238        }
2239        let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2240        let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2241        if let Some(parent) = destination.parent() {
2242            fs::create_dir_all(parent)
2243                .map_err(|err| format!("create {}: {err}", parent.display()))?;
2244        }
2245        fs::copy(&file, &destination).map_err(|err| {
2246            format!(
2247                "copy MinerU asset {} to {}: {err}",
2248                file.display(),
2249                destination.display()
2250            )
2251        })?;
2252        images.push(serde_json::json!({
2253            "source_relative": relative,
2254            "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2255            "absolute_path": destination,
2256        }));
2257    }
2258    Ok(serde_json::json!({
2259        "provider": "mineru",
2260        "images": images,
2261    }))
2262}
2263
2264fn is_image_file(path: &Path) -> bool {
2265    matches!(
2266        path.extension()
2267            .and_then(|ext| ext.to_str())
2268            .unwrap_or_default()
2269            .to_ascii_lowercase()
2270            .as_str(),
2271        "png" | "jpg" | "jpeg" | "webp" | "gif"
2272    )
2273}
2274
2275fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2276    collect_files(root).ok()?.into_iter().find(|path| {
2277        path.file_name()
2278            .and_then(|name| name.to_str())
2279            .is_some_and(|name| name.ends_with(suffix))
2280    })
2281}
2282
2283fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2284    collect_files(root).ok()?.into_iter().find(|path| {
2285        path.file_name()
2286            .and_then(|file_name| file_name.to_str())
2287            .is_some_and(|file_name| file_name == name)
2288    })
2289}
2290
2291fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2292    let mut files = Vec::new();
2293    collect_files_into(root, &mut files)?;
2294    files.sort();
2295    Ok(files)
2296}
2297
2298fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2299    for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2300        let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2301        let path = entry.path();
2302        let file_type = entry
2303            .file_type()
2304            .map_err(|err| format!("stat {}: {err}", path.display()))?;
2305        if file_type.is_dir() {
2306            collect_files_into(&path, files)?;
2307        } else if file_type.is_file() {
2308            files.push(path);
2309        }
2310    }
2311    Ok(())
2312}
2313
2314fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2315    let data = payload.get("data")?;
2316    let task_id = data.get("task_id").and_then(Value::as_str)?;
2317    Some(serde_json::json!({
2318        "provider": provider,
2319        "status": "submitted",
2320        "task_id": task_id,
2321        "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2322        "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2323        "raw": payload,
2324    }))
2325}
2326
2327fn ocr_input_from_file(
2328    file: String,
2329    item_key: Option<String>,
2330    attachment_key: Option<String>,
2331    mime_type: Option<String>,
2332) -> Result<OcrRequestInput, String> {
2333    let item_key = item_key
2334        .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2335    let attachment_key = attachment_key.ok_or_else(|| {
2336        "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2337    })?;
2338    let path = PathBuf::from(&file);
2339    let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2340    let file_name = path
2341        .file_name()
2342        .and_then(|name| name.to_str())
2343        .unwrap_or("document.pdf")
2344        .to_string();
2345    let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2346    Ok(OcrRequestInput {
2347        item_key,
2348        attachment_key,
2349        file_name,
2350        mime_type,
2351        content_base64: base64_encode(&bytes),
2352        source_url: None,
2353        local_path: Some(file),
2354        output_dir: None,
2355    })
2356}
2357
2358fn guess_mime_type(path: &Path) -> &'static str {
2359    match path
2360        .extension()
2361        .and_then(|ext| ext.to_str())
2362        .unwrap_or_default()
2363        .to_ascii_lowercase()
2364        .as_str()
2365    {
2366        "png" => "image/png",
2367        "jpg" | "jpeg" => "image/jpeg",
2368        "webp" => "image/webp",
2369        _ => "application/pdf",
2370    }
2371}
2372
2373fn base64_encode(bytes: &[u8]) -> String {
2374    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2375    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2376    for chunk in bytes.chunks(3) {
2377        let b0 = chunk[0];
2378        let b1 = *chunk.get(1).unwrap_or(&0);
2379        let b2 = *chunk.get(2).unwrap_or(&0);
2380        out.push(TABLE[(b0 >> 2) as usize] as char);
2381        out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2382        if chunk.len() > 1 {
2383            out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2384        } else {
2385            out.push('=');
2386        }
2387        if chunk.len() > 2 {
2388            out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2389        } else {
2390            out.push('=');
2391        }
2392    }
2393    out
2394}
2395
2396fn run_ocr_status_command(
2397    client: &mut impl RpcCaller,
2398    collection: String,
2399) -> Result<Value, String> {
2400    let collection_key = find_collection_in_tree(client, &collection)?
2401        .and_then(|node| node.get("key").cloned())
2402        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2403    let raw = paginate_rpc(
2404        client,
2405        "collections.getItems",
2406        serde_json::json!({"key": collection_key}),
2407        500,
2408    )?;
2409    let items = raw
2410        .get("items")
2411        .and_then(Value::as_array)
2412        .or_else(|| raw.as_array())
2413        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2414        .clone();
2415
2416    let mut has_ocr = 0usize;
2417    for item in &items {
2418        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2419        if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2420            has_ocr += 1;
2421        }
2422    }
2423
2424    Ok(serde_json::json!({
2425        "collection": collection,
2426        "total": items.len(),
2427        "has_ocr": has_ocr,
2428        "missing_ocr": items.len() - has_ocr,
2429    }))
2430}
2431
2432fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2433    if let Some(item_key) = item_key.as_str() {
2434        // Legacy external store lookup for artifacts produced before the
2435        // per-attachment hidden sidecar became the default.
2436        if machine_artifact_exists_for_item(
2437            machine_artifact_store_root(),
2438            item_key,
2439            MachineArtifactKind::Chunks,
2440        ) {
2441            return Ok(true);
2442        }
2443    }
2444
2445    let attachments = client.call(
2446        "attachments.list",
2447        Some(serde_json::json!({"parentKey": item_key.clone()})),
2448    )?;
2449    Ok(attachments.as_array().is_some_and(|attachments| {
2450        attachments.iter().any(|attachment| {
2451            let has_sidecar_chunks = attachment
2452                .get("path")
2453                .and_then(Value::as_str)
2454                .map(local_path_from_zotero_path)
2455                .as_deref()
2456                .map(Path::new)
2457                .and_then(Path::parent)
2458                .is_some_and(|dir| {
2459                    machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2460                });
2461            if has_sidecar_chunks {
2462                return true;
2463            }
2464
2465            // Read-only fallback for old Zotero-visible artifact attachments.
2466            attachment
2467                .get("title")
2468                .and_then(Value::as_str)
2469                .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2470        })
2471    }))
2472}
2473
2474fn local_path_from_zotero_path(path: &str) -> String {
2475    if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2476        return ProcessCommand::new("wslpath")
2477            .arg("-u")
2478            .arg(path)
2479            .output()
2480            .ok()
2481            .filter(|output| output.status.success())
2482            .and_then(|output| String::from_utf8(output.stdout).ok())
2483            .map(|converted| converted.trim().to_string())
2484            .filter(|converted| !converted.is_empty())
2485            .unwrap_or_else(|| path.to_string());
2486    }
2487    path.to_string()
2488}
2489
2490fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2491    let notes = client.call(
2492        "notes.list",
2493        Some(serde_json::json!({"parentKey": item_key.clone()})),
2494    )?;
2495    Ok(notes.as_array().is_some_and(|notes| {
2496        notes.iter().any(|note| {
2497            note.get("tags")
2498                .and_then(Value::as_array)
2499                .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2500        })
2501    }))
2502}
2503
2504fn tag_is_ocr(tag: &Value) -> bool {
2505    tag.as_str() == Some("ocr")
2506        || tag
2507            .get("tag")
2508            .and_then(Value::as_str)
2509            .is_some_and(|tag| tag == "ocr")
2510}
2511
2512fn find_collection_in_tree(
2513    client: &mut impl RpcCaller,
2514    collection: &str,
2515) -> Result<Option<Value>, String> {
2516    let tree = client.call("collections.tree", None)?;
2517    let nodes = tree
2518        .as_array()
2519        .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2520    Ok(search_collection_tree(nodes, collection).cloned())
2521}
2522
2523fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2524    for node in nodes {
2525        if node.get("key").and_then(Value::as_str) == Some(collection)
2526            || node.get("name").and_then(Value::as_str) == Some(collection)
2527        {
2528            return Some(node);
2529        }
2530        if let Some(children) = node.get("children").and_then(Value::as_array) {
2531            if let Some(found) = search_collection_tree(children, collection) {
2532                return Some(found);
2533            }
2534        }
2535    }
2536    None
2537}
2538
2539fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2540    if let Command::Export(args) = command {
2541        return run_export(args, client);
2542    }
2543
2544    let (value, style) = match command {
2545        Command::Ping { .. } => (
2546            call_json(client, "system.ping", None)?,
2547            JsonStyle::PythonCompact,
2548        ),
2549        Command::Rpc {
2550            method,
2551            params_json,
2552            paginate,
2553            page_size,
2554            ..
2555        } => {
2556            let params = serde_json::from_str::<Value>(&params_json)
2557                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2558            if !params.is_object() {
2559                return Err("INVALID_JSON: params must be a JSON object".to_string());
2560            }
2561            if paginate {
2562                (
2563                    paginate_rpc(client, &method, params, page_size)?,
2564                    JsonStyle::Pretty,
2565                )
2566            } else {
2567                (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2568            }
2569        }
2570        Command::Push {
2571            json_file,
2572            pdf,
2573            collection,
2574            on_duplicate,
2575            dry_run,
2576            ..
2577        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2578        Command::System { command } => run_system_command(command, client)?,
2579        Command::Search(args) => {
2580            if let Some(mgmt) = args.management {
2581                run_search_management_command(mgmt, client)?
2582            } else {
2583                run_search(args, client)?
2584            }
2585        }
2586        Command::Items { command } => run_items_command(command, client)?,
2587        Command::Collections { command } => run_collections_command(command, client)?,
2588        Command::Notes { command } => run_notes_command(command, client)?,
2589        Command::Settings { command } => run_settings_command(command, client)?,
2590        Command::Tags { command } => run_tags_command(command, client)?,
2591        Command::Annotations { command } => run_annotations_command(command, client)?,
2592        Command::Ocr { command } => {
2593            return run_ocr_command(command, client);
2594        }
2595        Command::Rag { command } => {
2596            return run_rag_command(command, client);
2597        }
2598        Command::Export(_) => unreachable!("export commands return raw output above"),
2599    };
2600
2601    format_json(&value, style)
2602}
2603
2604fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2605    match command {
2606        RagCommand::Providers => format_json(
2607            &serde_json::json!({
2608                "providers": [
2609                    embedding_provider_spec("volcengine")?,
2610                    embedding_provider_spec("alibaba")?,
2611                    embedding_provider_spec("custom")?,
2612                ],
2613            }),
2614            JsonStyle::Pretty,
2615        ),
2616        RagCommand::Embed {
2617            provider,
2618            input,
2619            endpoint,
2620            model,
2621            input_type,
2622            api_key_env,
2623        } => {
2624            let value = run_embedding_provider_json_command(
2625                provider,
2626                input,
2627                endpoint,
2628                model,
2629                input_type,
2630                api_key_env,
2631            )?;
2632            format_json(&value, JsonStyle::PythonCompact)
2633        }
2634        RagCommand::Status { collection, .. } => {
2635            let value = rag_status_value(client, &collection)?;
2636            format_json(&value, JsonStyle::PythonCompact)
2637        }
2638        RagCommand::Search {
2639            query,
2640            collection,
2641            keys,
2642            zotero,
2643            top_spans_per_item,
2644            include_fulltext_spans,
2645            top_k,
2646            output,
2647            ..
2648        } => run_rag_search_command(
2649            client,
2650            RagSearchOptions {
2651                query,
2652                collection,
2653                keys,
2654                zotero,
2655                top_spans_per_item,
2656                include_fulltext_spans,
2657                top_k,
2658                output,
2659            },
2660        ),
2661    }
2662}
2663
2664fn run_embedding_provider_json_command(
2665    provider: String,
2666    input: String,
2667    endpoint: Option<String>,
2668    model: Option<String>,
2669    input_type: Option<String>,
2670    api_key_env: Option<String>,
2671) -> Result<Value, String> {
2672    let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2673    if endpoint.is_some() {
2674        input.url = endpoint;
2675    }
2676    if model.is_some() {
2677        input.model = model;
2678    }
2679    if input_type.is_some() {
2680        input.input_type = input_type;
2681    }
2682    let mut transport = provider_http_transport(api_key_env.as_deref())?;
2683    let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2684
2685    Ok(serde_json::json!({
2686        "provider": provider,
2687        "vectors": vectors,
2688    }))
2689}
2690
2691fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2692    provider_http_transport_with_auth(api_key_env, "bearer")
2693}
2694
2695fn provider_http_transport_with_auth(
2696    api_key_env: Option<&str>,
2697    auth_scheme: &str,
2698) -> Result<UreqProviderHttpTransport, String> {
2699    let Some(env_name) = api_key_env else {
2700        return Ok(UreqProviderHttpTransport::new());
2701    };
2702    let token = env::var(env_name)
2703        .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2704    if token.trim().is_empty() {
2705        return Err(format!("provider credential env var {env_name} is empty"));
2706    }
2707    let token = token.trim();
2708    match auth_scheme {
2709        "token" if token.starts_with("token ") => {
2710            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2711        }
2712        "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2713            "token {token}"
2714        ))),
2715        "bearer" if token.starts_with("Bearer ") => {
2716            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2717        }
2718        "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2719        "none" => Ok(UreqProviderHttpTransport::new()),
2720        other => Err(format!("unsupported provider auth scheme {other}")),
2721    }
2722}
2723
2724fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2725    let payload = if path == "-" {
2726        let mut input = String::new();
2727        io::stdin()
2728            .read_to_string(&mut input)
2729            .map_err(|err| format!("read stdin: {err}"))?;
2730        input
2731    } else {
2732        fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2733    };
2734    serde_json::from_str::<T>(&payload)
2735        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2736}
2737
2738fn fetch_embedding_settings(
2739    client: &mut impl RpcCaller,
2740) -> Result<(String, String, String, String), String> {
2741    let settings = client.call("settings.getAll", None)?;
2742    let provider = settings
2743        .get("embedding.provider")
2744        .and_then(Value::as_str)
2745        .unwrap_or("ollama")
2746        .to_string();
2747    let model = settings
2748        .get("embedding.model")
2749        .and_then(Value::as_str)
2750        .unwrap_or("")
2751        .to_string();
2752    let api_url = settings
2753        .get("embedding.apiUrl")
2754        .and_then(Value::as_str)
2755        .unwrap_or("")
2756        .to_string();
2757    let raw = client.call("settings.getRaw", Some(serde_json::json!({"key": "embedding.apiKey"})))?;
2758    let api_key = raw
2759        .get("embedding.apiKey")
2760        .and_then(Value::as_str)
2761        .unwrap_or("")
2762        .to_string();
2763    Ok((provider, model, api_url, api_key))
2764}
2765
2766fn fetch_retrieval_mode(client: &mut impl RpcCaller) -> String {
2767    client
2768        .call(
2769            "settings.get",
2770            Some(serde_json::json!({"key": "rag.retrievalMode"})),
2771        )
2772        .ok()
2773        .and_then(|v| {
2774            v.get("rag.retrievalMode")
2775                .and_then(Value::as_str)
2776                .map(String::from)
2777        })
2778        .unwrap_or_else(|| "hybrid".to_string())
2779}
2780
2781fn resolve_sidecar_paths(
2782    client: &mut impl RpcCaller,
2783    collection: Option<&str>,
2784    keys: &[String],
2785) -> Result<Vec<(String, String, PathBuf)>, String> {
2786    let items = if !keys.is_empty() {
2787        let mut items = Vec::new();
2788        for key in keys {
2789            let item = client.call("items.get", Some(serde_json::json!({"key": key})))?;
2790            items.push(item);
2791        }
2792        items
2793    } else if let Some(col) = collection {
2794        let col_key = resolve_collection(client, col)?;
2795        let response = client.call(
2796            "collections.getItems",
2797            Some(serde_json::json!({"key": col_key})),
2798        )?;
2799        collection_items(&response)
2800    } else {
2801        return Err("INVALID_ARGS: --collection or --key required".into());
2802    };
2803
2804    let mut results = Vec::new();
2805    for item in &items {
2806        let item_key = item.get("key").and_then(Value::as_str).unwrap_or_default();
2807        let attachments = client.call(
2808            "attachments.list",
2809            Some(serde_json::json!({"parentKey": item_key})),
2810        )?;
2811        let att_list = attachments
2812            .get("items")
2813            .and_then(Value::as_array)
2814            .or_else(|| attachments.as_array())
2815            .cloned()
2816            .unwrap_or_default();
2817        for att in &att_list {
2818            let content_type = att
2819                .get("contentType")
2820                .and_then(Value::as_str)
2821                .unwrap_or("");
2822            if content_type != "application/pdf" {
2823                continue;
2824            }
2825            let att_key = att.get("key").and_then(Value::as_str).unwrap_or_default();
2826            let path = att.get("path").and_then(Value::as_str).unwrap_or_default();
2827            if path.is_empty() {
2828                continue;
2829            }
2830            let local_path = local_path_from_zotero_path(path);
2831            let pdf_path = PathBuf::from(&local_path);
2832            if let Some(parent) = pdf_path.parent() {
2833                let sidecar_root = parent.join(".zotron");
2834                if sidecar_root.exists() {
2835                    results.push((item_key.to_string(), att_key.to_string(), sidecar_root));
2836                }
2837            }
2838        }
2839    }
2840    Ok(results)
2841}
2842
2843fn load_sidecar_chunks(sidecar_root: &Path) -> Vec<StructureChunk> {
2844    let chunks_path = sidecar_root.join("chunks").join("chunks.v1.jsonl");
2845    let Ok(content) = fs::read_to_string(&chunks_path) else {
2846        return Vec::new();
2847    };
2848    content
2849        .lines()
2850        .filter(|line| !line.trim().is_empty())
2851        .filter_map(|line| serde_json::from_str::<StructureChunk>(line).ok())
2852        .collect()
2853}
2854
2855fn embedding_vector_filename(provider: &str, model: &str) -> String {
2856    let p = provider.trim().to_lowercase().replace('/', "-");
2857    let m = model.trim().to_lowercase().replace('/', "-");
2858    if p.is_empty() && m.is_empty() {
2859        return "vectors.jsonl".to_string();
2860    }
2861    format!("{p}--{m}.jsonl")
2862}
2863
2864fn load_sidecar_vectors(sidecar_root: &Path, provider: &str, model: &str) -> Vec<EmbeddingVector> {
2865    let embeddings_dir = sidecar_root.join("embeddings");
2866    let target = embedding_vector_filename(provider, model);
2867    let target_path = embeddings_dir.join(&target);
2868    if let Ok(content) = fs::read_to_string(&target_path) {
2869        let vecs: Vec<EmbeddingVector> = content
2870            .lines()
2871            .filter(|line| !line.trim().is_empty())
2872            .filter_map(|line| serde_json::from_str(line).ok())
2873            .collect();
2874        if !vecs.is_empty() {
2875            return vecs;
2876        }
2877    }
2878    // Fallback: try legacy vectors.v1.jsonl / vectors.jsonl with provider match
2879    for legacy in &["vectors.v1.jsonl", "vectors.jsonl"] {
2880        let path = embeddings_dir.join(legacy);
2881        if let Ok(content) = fs::read_to_string(&path) {
2882            let vecs: Vec<EmbeddingVector> = content
2883                .lines()
2884                .filter(|line| !line.trim().is_empty())
2885                .filter_map(|line| serde_json::from_str::<EmbeddingVector>(line).ok())
2886                .filter(|v| v.source_provider == provider || provider.is_empty())
2887                .collect();
2888            if !vecs.is_empty() {
2889                return vecs;
2890            }
2891        }
2892    }
2893    Vec::new()
2894}
2895
2896fn embed_query_text(
2897    query: &str,
2898    provider: &str,
2899    model: &str,
2900    api_url: &str,
2901    api_key: &str,
2902) -> Result<Vec<f64>, String> {
2903    let input = EmbeddingRequestInput {
2904        item_key: "query".to_string(),
2905        chunks: vec![EmbeddingChunkInput {
2906            chunk_key: "q0".to_string(),
2907            text: query.to_string(),
2908        }],
2909        model: if model.is_empty() {
2910            None
2911        } else {
2912            Some(model.to_string())
2913        },
2914        url: if api_url.is_empty() {
2915            None
2916        } else {
2917            Some(api_url.to_string())
2918        },
2919        input_type: Some("query".to_string()),
2920    };
2921    let request = build_embedding_provider_request(provider, &input)?;
2922    let url = request
2923        .url
2924        .as_deref()
2925        .ok_or("no embedding URL configured")?;
2926    let mut http = ureq::post(url).set("Content-Type", "application/json");
2927    if let Some(auth) = request.auth_header {
2928        if !api_key.is_empty() {
2929            http = http.set(auth, &format!("Bearer {api_key}"));
2930        }
2931    }
2932    let resp = http
2933        .send_json(&request.body)
2934        .map_err(|e| format!("embedding request failed: {e}"))?;
2935    let payload: Value = resp
2936        .into_json()
2937        .map_err(|e| format!("embedding response parse: {e}"))?;
2938    let vectors =
2939        parse_embedding_provider_response(provider, &payload, "query", &input.chunks)?;
2940    vectors
2941        .into_iter()
2942        .next()
2943        .map(|v| v.vector)
2944        .ok_or_else(|| "no embedding vector returned".to_string())
2945}
2946
2947fn run_rag_search_xpi_fallback(
2948    client: &mut impl RpcCaller,
2949    options: &RagSearchOptions,
2950) -> Result<String, String> {
2951    let mut params = serde_json::json!({
2952        "query": options.query,
2953        "limit": options.top_k,
2954        "top_spans_per_item": options.top_spans_per_item,
2955        "include_fulltext_spans": options.include_fulltext_spans,
2956    });
2957    if let Some(map) = params.as_object_mut() {
2958        if let Some(col) = &options.collection {
2959            map.insert("collection".into(), Value::String(col.clone()));
2960        }
2961        if !options.keys.is_empty() {
2962            map.insert(
2963                "keys".into(),
2964                Value::Array(options.keys.iter().map(|k| Value::String(k.clone())).collect()),
2965            );
2966        }
2967    }
2968    let payload = client.call("rag.searchHits", Some(params))?;
2969    let hits = payload
2970        .get("hits")
2971        .and_then(Value::as_array)
2972        .cloned()
2973        .unwrap_or_default();
2974    if options.output == "jsonl" {
2975        let mut out = String::new();
2976        for hit in &hits {
2977            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
2978            out.push('\n');
2979        }
2980        Ok(out)
2981    } else {
2982        let total = hits.len() as u64;
2983        format_json(
2984            &normalize_list_envelope(
2985                serde_json::json!({"items": hits, "total": total}),
2986                "items",
2987                Some(options.top_k),
2988                0,
2989            ),
2990            JsonStyle::Pretty,
2991        )
2992    }
2993}
2994
2995fn run_rag_search_command(
2996    client: &mut impl RpcCaller,
2997    options: RagSearchOptions,
2998) -> Result<String, String> {
2999    // When --zotero is explicitly passed, use XPI fallback directly (backward compat).
3000    if options.zotero {
3001        if options.collection.is_none() && options.keys.is_empty() {
3002            return Err(
3003                "INVALID_ARGS: --collection or --key is required".to_string(),
3004            );
3005        }
3006        return run_rag_search_xpi_fallback(client, &options);
3007    }
3008
3009    // Hybrid path: require scope.
3010    if options.collection.is_none() && options.keys.is_empty() {
3011        return Err("INVALID_ARGS: --collection or --key required".to_string());
3012    }
3013
3014    // Step 1: resolve sidecar paths from collection/keys.
3015    let sidecars = resolve_sidecar_paths(
3016        client,
3017        options.collection.as_deref(),
3018        &options.keys,
3019    );
3020
3021    // If sidecar resolution fails or returns empty, fall back to XPI.
3022    // But propagate COLLECTION_NOT_FOUND errors directly instead of masking them.
3023    let sidecars = match sidecars {
3024        Ok(ref s) if !s.is_empty() => s,
3025        Err(ref e) if e.contains("COLLECTION_NOT_FOUND") => return Err(e.clone()),
3026        _ => return run_rag_search_xpi_fallback(client, &options),
3027    };
3028
3029    // Step 2: load all chunks and vectors from sidecars.
3030    let (emb_provider, emb_model, emb_url, emb_key) = fetch_embedding_settings(client)?;
3031    let mut all_chunks: Vec<StructureChunk> = Vec::new();
3032    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
3033    for (_item_key, _att_key, sidecar_root) in sidecars {
3034        all_chunks.extend(load_sidecar_chunks(sidecar_root));
3035        all_vectors.extend(load_sidecar_vectors(sidecar_root, &emb_provider, &emb_model));
3036    }
3037
3038    if all_chunks.is_empty() {
3039        return run_rag_search_xpi_fallback(client, &options);
3040    }
3041
3042    // Step 3: determine retrieval mode.
3043    let mode = fetch_retrieval_mode(client);
3044
3045    // Step 4: BM25 scoring (unless mode is "dense").
3046    let bm25_ranked = if mode != "dense" {
3047        bm25_score_chunks(&all_chunks, &options.query, 1.2, 0.75)
3048    } else {
3049        Vec::new()
3050    };
3051
3052    // Step 5: dense vector scoring (unless mode is "lexical" or no vectors).
3053    let dense_ranked = if mode != "lexical" && !all_vectors.is_empty() {
3054        match embed_query_text(&options.query, &emb_provider, &emb_model, &emb_url, &emb_key) {
3055            Ok(query_vec) => {
3056                let vec_map: std::collections::HashMap<&str, &[f64]> = all_vectors
3057                    .iter()
3058                    .map(|v| (v.chunk_key.as_str(), v.vector.as_slice()))
3059                    .collect();
3060                let mut scores: Vec<(usize, f64)> = all_chunks
3061                    .iter()
3062                    .enumerate()
3063                    .filter_map(|(i, chunk)| {
3064                        vec_map.get(chunk.chunk_key.as_str()).map(|stored| {
3065                            (i, cosine_similarity(&query_vec, stored))
3066                        })
3067                    })
3068                    .filter(|(_, s)| *s > 0.0)
3069                    .collect();
3070                scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3071                scores
3072            }
3073            Err(_) => Vec::new(),
3074        }
3075    } else {
3076        Vec::new()
3077    };
3078
3079    // Step 6: merge results.
3080    let limit = options.top_k as usize;
3081    let ranked = if !bm25_ranked.is_empty() && !dense_ranked.is_empty() {
3082        rrf_merge(&bm25_ranked, &dense_ranked, 60.0, limit)
3083    } else if !bm25_ranked.is_empty() {
3084        bm25_ranked.into_iter().take(limit).collect()
3085    } else {
3086        dense_ranked.into_iter().take(limit).collect()
3087    };
3088
3089    // Step 7: apply per-item span limit.
3090    let mut per_item_count: std::collections::HashMap<&str, u64> =
3091        std::collections::HashMap::new();
3092    let mut selected: Vec<(usize, f64)> = Vec::new();
3093    for (idx, score) in &ranked {
3094        let item_key = all_chunks[*idx].item_key.as_str();
3095        let count = per_item_count.entry(item_key).or_insert(0);
3096        if *count < options.top_spans_per_item {
3097            *count += 1;
3098            selected.push((*idx, *score));
3099        }
3100    }
3101
3102    // Step 8: enrich hits with item metadata.
3103    let mut meta_cache: std::collections::HashMap<String, Value> =
3104        std::collections::HashMap::new();
3105    let mut hits: Vec<Value> = Vec::new();
3106    for (idx, score) in &selected {
3107        let chunk = &all_chunks[*idx];
3108        let meta = if let Some(cached) = meta_cache.get(&chunk.item_key) {
3109            cached.clone()
3110        } else {
3111            let fetched = client
3112                .call(
3113                    "items.get",
3114                    Some(serde_json::json!({"key": chunk.item_key})),
3115                )
3116                .unwrap_or(Value::Null);
3117            meta_cache.insert(chunk.item_key.clone(), fetched.clone());
3118            fetched
3119        };
3120        let title = meta
3121            .get("title")
3122            .and_then(Value::as_str)
3123            .unwrap_or("")
3124            .to_string();
3125        let authors = meta
3126            .get("creators")
3127            .and_then(Value::as_array)
3128            .map(|creators| {
3129                creators
3130                    .iter()
3131                    .filter_map(|c| {
3132                        let last = c.get("lastName").and_then(Value::as_str).unwrap_or("");
3133                        let first = c.get("firstName").and_then(Value::as_str).unwrap_or("");
3134                        if last.is_empty() && first.is_empty() {
3135                            None
3136                        } else {
3137                            Some(format!("{last}{first}"))
3138                        }
3139                    })
3140                    .collect::<Vec<_>>()
3141                    .join(", ")
3142            })
3143            .unwrap_or_default();
3144        let year = meta.get("date").and_then(Value::as_str).unwrap_or("");
3145        let mut hit = serde_json::json!({
3146            "item_key": chunk.item_key,
3147            "chunk_key": chunk.chunk_key,
3148            "title": title,
3149            "authors": authors,
3150            "year": year,
3151            "text": chunk.text,
3152            "page_range": chunk.page_range,
3153            "section_path": chunk.section_path,
3154            "score": score,
3155        });
3156        if options.include_fulltext_spans {
3157            hit.as_object_mut().unwrap().insert(
3158                "attachment_key".to_string(),
3159                Value::String(chunk.attachment_key.clone()),
3160            );
3161        }
3162        hits.push(hit);
3163    }
3164
3165    // Step 9: format output.
3166    if options.output == "jsonl" {
3167        let mut out = String::new();
3168        for hit in &hits {
3169            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3170            out.push('\n');
3171        }
3172        Ok(out)
3173    } else {
3174        let total = hits.len() as u64;
3175        format_json(
3176            &normalize_list_envelope(
3177                serde_json::json!({"items": hits, "total": total}),
3178                "items",
3179                Some(options.top_k),
3180                0,
3181            ),
3182            JsonStyle::Pretty,
3183        )
3184    }
3185}
3186
3187fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
3188    let raw_store_path = rag_store_path(collection);
3189    if raw_store_path.exists() {
3190        return rag_status_from_store(collection, &raw_store_path);
3191    }
3192
3193    let mut store_candidates = Vec::new();
3194    let collection_match = find_collection_in_tree(client, collection)?;
3195    if let Some(collection_node) = collection_match.as_ref() {
3196        if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
3197            store_candidates.push(rag_store_path(name));
3198        }
3199        if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
3200            store_candidates.push(rag_store_path(key));
3201        }
3202    }
3203    for store_path in unique_paths(store_candidates) {
3204        if store_path.exists() {
3205            return rag_status_from_store(collection, &store_path);
3206        }
3207    }
3208
3209    rag_status_from_zotero_sidecars(client, collection, collection_match)
3210}
3211
3212fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
3213    let mut unique = Vec::new();
3214    for path in paths {
3215        if !unique.iter().any(|seen| seen == &path) {
3216            unique.push(path);
3217        }
3218    }
3219    unique
3220}
3221
3222fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
3223    let raw = fs::read_to_string(store_path)
3224        .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
3225    let store: Value = serde_json::from_str(&raw)
3226        .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
3227    let chunks = store
3228        .get("chunks")
3229        .and_then(Value::as_array)
3230        .cloned()
3231        .unwrap_or_default();
3232    let mut item_keys = Vec::<Value>::new();
3233    for chunk in &chunks {
3234        let Some(item_key) = chunk.get("item_key") else {
3235            continue;
3236        };
3237        if !item_keys.iter().any(|seen| seen == item_key) {
3238            item_keys.push(item_key.clone());
3239        }
3240    }
3241    Ok(serde_json::json!({
3242        "status": "indexed",
3243        "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
3244        "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
3245        "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
3246        "total_chunks": chunks.len(),
3247        "total_items": item_keys.len(),
3248        "store_path": store_path.to_string_lossy(),
3249    }))
3250}
3251
3252fn rag_status_from_zotero_sidecars(
3253    client: &mut impl RpcCaller,
3254    collection: &str,
3255    collection_match: Option<Value>,
3256) -> Result<Value, String> {
3257    let collection_key = collection_match
3258        .as_ref()
3259        .and_then(|node| node.get("key").cloned())
3260        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
3261    let raw = paginate_rpc(
3262        client,
3263        "collections.getItems",
3264        serde_json::json!({"key": collection_key}),
3265        500,
3266    )?;
3267    let items = raw
3268        .get("items")
3269        .and_then(Value::as_array)
3270        .or_else(|| raw.as_array())
3271        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
3272        .clone();
3273
3274    let mut indexed_items = 0usize;
3275    let mut total_chunks = 0usize;
3276    for item in &items {
3277        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
3278        let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
3279        if chunk_count > 0 {
3280            indexed_items += 1;
3281            total_chunks += chunk_count;
3282        }
3283    }
3284
3285    if indexed_items == 0 {
3286        return Ok(serde_json::json!({
3287            "status": "not indexed",
3288            "collection": collection,
3289            "total_items": items.len(),
3290            "indexed_items": 0,
3291        }));
3292    }
3293
3294    Ok(serde_json::json!({
3295        "status": "indexed",
3296        "collection": collection,
3297        "total_chunks": total_chunks,
3298        "total_items": indexed_items,
3299        "collection_items": items.len(),
3300        "source": "zotero-sidecar",
3301    }))
3302}
3303
3304fn sidecar_chunk_count_for_item(
3305    client: &mut impl RpcCaller,
3306    item_key: &Value,
3307) -> Result<usize, String> {
3308    let attachments = client.call(
3309        "attachments.list",
3310        Some(serde_json::json!({"parentKey": item_key.clone()})),
3311    )?;
3312    let Some(attachments) = attachments.as_array() else {
3313        return Ok(0);
3314    };
3315
3316    let mut count = 0usize;
3317    for attachment in attachments {
3318        let Some(path) = attachment.get("path").and_then(Value::as_str) else {
3319            continue;
3320        };
3321        let local = local_path_from_zotero_path(path);
3322        let Some(dir) = Path::new(&local).parent() else {
3323            continue;
3324        };
3325        let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
3326            continue;
3327        };
3328        let text = String::from_utf8_lossy(&bytes);
3329        count += text.lines().filter(|line| !line.trim().is_empty()).count();
3330    }
3331    Ok(count)
3332}
3333
3334fn rag_store_path(collection: &str) -> PathBuf {
3335    rag_store_root().join(format!("{collection}.json"))
3336}
3337
3338fn rag_store_root() -> PathBuf {
3339    let xdg_data_home = env::var_os("XDG_DATA_HOME")
3340        .filter(|path| !path.is_empty())
3341        .map(PathBuf::from);
3342    let appdata = env::var_os("APPDATA")
3343        .filter(|path| !path.is_empty())
3344        .map(PathBuf::from);
3345    let userprofile = env::var_os("USERPROFILE")
3346        .filter(|path| !path.is_empty())
3347        .map(PathBuf::from);
3348    let home = env::var_os("HOME")
3349        .filter(|path| !path.is_empty())
3350        .map(PathBuf::from);
3351
3352    rag_store_root_for_platform(
3353        ArtifactStorePlatform::current(),
3354        xdg_data_home.as_deref(),
3355        appdata.as_deref(),
3356        userprofile.as_deref(),
3357        home.as_deref(),
3358    )
3359}
3360
3361fn rag_store_root_for_platform(
3362    platform: ArtifactStorePlatform,
3363    xdg_data_home: Option<&Path>,
3364    appdata: Option<&Path>,
3365    userprofile: Option<&Path>,
3366    home: Option<&Path>,
3367) -> PathBuf {
3368    match platform {
3369        ArtifactStorePlatform::Windows => {
3370            if let Some(path) = appdata {
3371                return path.join("Zotron").join("rag");
3372            }
3373            if let Some(path) = userprofile {
3374                return path
3375                    .join("AppData")
3376                    .join("Roaming")
3377                    .join("Zotron")
3378                    .join("rag");
3379            }
3380            if let Some(path) = home {
3381                return path
3382                    .join("AppData")
3383                    .join("Roaming")
3384                    .join("Zotron")
3385                    .join("rag");
3386            }
3387            PathBuf::from(".zotron").join("rag")
3388        }
3389        ArtifactStorePlatform::Macos => {
3390            if let Some(path) = home {
3391                return path
3392                    .join("Library")
3393                    .join("Application Support")
3394                    .join("Zotron")
3395                    .join("rag");
3396            }
3397            if let Some(path) = xdg_data_home {
3398                return path.join("zotron").join("rag");
3399            }
3400            PathBuf::from(".zotron").join("rag")
3401        }
3402        ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
3403            .map(|path| path.join("zotron").join("rag"))
3404            .or_else(|| {
3405                home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
3406            })
3407            .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
3408    }
3409}
3410
3411fn run_push_command(
3412    json_file: String,
3413    pdf: Option<String>,
3414    collection: Option<String>,
3415    on_duplicate: String,
3416    dry_run: bool,
3417    client: &mut impl RpcCaller,
3418) -> Result<String, String> {
3419    if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
3420        return Err(format!(
3421            "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
3422        ));
3423    }
3424
3425    let payload = if json_file == "-" {
3426        let mut input = String::new();
3427        io::stdin()
3428            .read_to_string(&mut input)
3429            .map_err(|err| format!("read stdin: {err}"))?;
3430        input
3431    } else {
3432        fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
3433    };
3434    let item_json = serde_json::from_str::<Value>(&payload)
3435        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
3436
3437    // Validate required fields
3438    match item_json.get("itemType").and_then(Value::as_str) {
3439        Some(s) if !s.is_empty() => {}
3440        _ => return Err("INVALID_ARGS: input JSON must include a non-empty \"itemType\" field".to_string()),
3441    }
3442
3443    if dry_run {
3444        let collection_key = collection
3445            .as_deref()
3446            .map(|name| resolve_collection(client, name))
3447            .transpose()?;
3448        return format_json(
3449            &serde_json::json!({
3450                "ok": true,
3451                "dryRun": true,
3452                "wouldPush": {
3453                    "title": item_json.get("title").cloned().unwrap_or(Value::Null),
3454                    "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
3455                    "collectionKey": collection_key,
3456                    "pdfPath": pdf,
3457                    "onDuplicate": on_duplicate,
3458                }
3459            }),
3460            JsonStyle::PythonCompact,
3461        );
3462    }
3463
3464    let result = push_item(
3465        client,
3466        &item_json,
3467        pdf.as_deref(),
3468        collection.as_deref(),
3469        &on_duplicate,
3470    )?;
3471    format_json(&result, JsonStyle::PythonCompact)
3472}
3473
3474fn push_item(
3475    client: &mut impl RpcCaller,
3476    item_json: &Value,
3477    pdf_path: Option<&str>,
3478    collection: Option<&str>,
3479    on_duplicate: &str,
3480) -> Result<Value, String> {
3481    let pdf_size = if let Some(path) = pdf_path {
3482        validate_pdf_magic(path)?
3483    } else {
3484        0
3485    };
3486
3487    let collection_key = match collection {
3488        Some(name) => resolve_collection(client, name)?,
3489        None => resolve_current_collection(client)?,
3490    };
3491
3492    let dup_id = find_duplicate(client, item_json)?;
3493    if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
3494        if !is_library_root(&collection_key) {
3495            client.call(
3496                "collections.addItems",
3497                Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
3498            )?;
3499        }
3500        let mut pdf_attached = false;
3501        if let Some(path) = pdf_path {
3502            if !item_has_pdf_attachment(client, dup_id)? {
3503                attach_pdf(client, dup_id, path)?;
3504                pdf_attached = true;
3505            }
3506        }
3507        return Ok(push_result(
3508            "skipped_duplicate",
3509            Some(dup_id.to_string()),
3510            pdf_attached,
3511            if pdf_attached { pdf_size } else { 0 },
3512            Value::Null,
3513        ));
3514    }
3515
3516    let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
3517    let (item_key, status) =
3518        if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
3519            let mut params = serde_json::Map::new();
3520            params.insert("key".to_string(), Value::String(dup_id.to_string()));
3521            params.insert(
3522                "fields".to_string(),
3523                xpi_payload
3524                    .get("fields")
3525                    .cloned()
3526                    .unwrap_or_else(|| serde_json::json!({})),
3527            );
3528            if let Some(creators) = xpi_payload.get("creators") {
3529                params.insert("creators".to_string(), creators.clone());
3530            }
3531            if let Some(tags) = xpi_payload.get("tags") {
3532                params.insert("tags".to_string(), tags.clone());
3533            }
3534            client.call("items.update", Some(Value::Object(params)))?;
3535            (dup_id.to_string(), "updated")
3536        } else {
3537            let created = client.call("items.create", Some(xpi_payload))?;
3538            let key = created
3539                .get("key")
3540                .and_then(Value::as_str)
3541                .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
3542            (key.to_string(), "created")
3543        };
3544
3545    let mut pdf_attached = false;
3546    if let Some(path) = pdf_path {
3547        if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
3548            attach_pdf(client, &item_key, path)?;
3549            pdf_attached = true;
3550        }
3551    }
3552
3553    if status == "updated" && !is_library_root(&collection_key) {
3554        client.call(
3555            "collections.addItems",
3556            Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3557        )?;
3558    }
3559
3560    Ok(push_result(
3561        status,
3562        Some(item_key),
3563        pdf_attached,
3564        if pdf_attached { pdf_size } else { 0 },
3565        Value::Null,
3566    ))
3567}
3568
3569fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3570    let bytes = fs::read(path)
3571        .map_err(|e| format!("INVALID_PDF: cannot read {path}: {e}"))?;
3572    if !bytes.starts_with(b"%PDF-") {
3573        return Err(format!(
3574            "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3575        ));
3576    }
3577    Ok(bytes.len() as u64)
3578}
3579
3580fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3581    let selected = client.call("system.currentCollection", None)?;
3582    Ok(selected
3583        .get("key")
3584        .cloned()
3585        .unwrap_or_else(|| Value::Number(0.into())))
3586}
3587
3588fn find_duplicate(
3589    client: &mut impl RpcCaller,
3590    item_json: &Value,
3591) -> Result<Option<String>, String> {
3592    if let Some(doi) = item_json
3593        .get("DOI")
3594        .and_then(Value::as_str)
3595        .filter(|doi| !doi.is_empty())
3596    {
3597        let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3598        if let Some(key) = first_hit_key(&hits) {
3599            return Ok(Some(key));
3600        }
3601    }
3602
3603    if let Some(title) = item_json
3604        .get("title")
3605        .and_then(Value::as_str)
3606        .filter(|title| title.len() >= 10)
3607    {
3608        let hits = client.call(
3609            "search.quick",
3610            Some(serde_json::json!({"query": title, "limit": 20})),
3611        )?;
3612        if let Some(items) = response_items(&hits) {
3613            for item in items {
3614                if item.get("title").and_then(Value::as_str) == Some(title) {
3615                    if let Some(key) = item.get("key").and_then(Value::as_str) {
3616                        return Ok(Some(key.to_string()));
3617                    }
3618                }
3619            }
3620        }
3621    }
3622
3623    Ok(None)
3624}
3625
3626fn first_hit_key(response: &Value) -> Option<String> {
3627    response_items(response)?
3628        .first()?
3629        .get("key")?
3630        .as_str()
3631        .map(ToString::to_string)
3632}
3633
3634fn response_items(response: &Value) -> Option<&Vec<Value>> {
3635    response
3636        .get("items")
3637        .and_then(Value::as_array)
3638        .or_else(|| response.as_array())
3639}
3640
3641fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3642    const NON_FIELD_KEYS: &[&str] = &[
3643        "itemType",
3644        "creators",
3645        "tags",
3646        "collections",
3647        "attachments",
3648        "relations",
3649        "notes",
3650        "id",
3651        "key",
3652        "version",
3653    ];
3654
3655    let mut fields = serde_json::Map::new();
3656    if let Some(item) = item_json.as_object() {
3657        for (key, value) in item {
3658            if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3659                fields.insert(key.clone(), value.clone());
3660            }
3661        }
3662    }
3663
3664    let mut payload = serde_json::Map::new();
3665    payload.insert(
3666        "itemType".to_string(),
3667        item_json
3668            .get("itemType")
3669            .cloned()
3670            .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3671    );
3672    payload.insert("fields".to_string(), Value::Object(fields));
3673
3674    if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3675        if !creators.is_empty() {
3676            payload.insert(
3677                "creators".to_string(),
3678                Value::Array(
3679                    creators
3680                        .iter()
3681                        .map(|creator| {
3682                            let mut c = serde_json::json!({
3683                                "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3684                                "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3685                                "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3686                            });
3687                            if let Some(fm) = creator.get("fieldMode").and_then(Value::as_u64) {
3688                                c["fieldMode"] = Value::from(fm);
3689                            }
3690                            c
3691                        })
3692                        .collect(),
3693                ),
3694            );
3695        }
3696    }
3697
3698    if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3699        if !tags.is_empty() {
3700            payload.insert(
3701                "tags".to_string(),
3702                Value::Array(
3703                    tags.iter()
3704                        .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3705                        .collect(),
3706                ),
3707            );
3708        }
3709    }
3710
3711    if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3712        payload.insert(
3713            "collections".to_string(),
3714            Value::Array(vec![collection_key.clone()]),
3715        );
3716    }
3717
3718    Value::Object(payload)
3719}
3720
3721fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3722    let attachments = client.call(
3723        "attachments.list",
3724        Some(serde_json::json!({"parentKey": item_key})),
3725    )?;
3726    Ok(has_pdf_attachment(&attachments))
3727}
3728
3729fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3730    client.call(
3731        "attachments.add",
3732        Some(serde_json::json!({
3733            "parentKey": item_key,
3734            "path": zotero_path(path),
3735            "title": "Full Text PDF",
3736        })),
3737    )?;
3738    Ok(())
3739}
3740
3741fn zotero_path(path: &str) -> String {
3742    let path = Path::new(path)
3743        .canonicalize()
3744        .unwrap_or_else(|_| Path::new(path).to_path_buf())
3745        .to_string_lossy()
3746        .into_owned();
3747    if is_wsl() {
3748        return ProcessCommand::new("wslpath")
3749            .arg("-w")
3750            .arg(&path)
3751            .output()
3752            .ok()
3753            .filter(|output| output.status.success())
3754            .and_then(|output| String::from_utf8(output.stdout).ok())
3755            .map(|converted| converted.trim().to_string())
3756            .filter(|converted| !converted.is_empty())
3757            .unwrap_or(path);
3758    }
3759    path
3760}
3761
3762fn is_wsl() -> bool {
3763    if env::var_os("WSL_DISTRO_NAME").is_some() {
3764        return true;
3765    }
3766    fs::read_to_string("/proc/sys/kernel/osrelease")
3767        .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3768        .unwrap_or(false)
3769}
3770
3771fn is_library_root(value: &Value) -> bool {
3772    value.as_i64() == Some(0) || value.as_u64() == Some(0)
3773}
3774
3775fn push_result(
3776    status: &str,
3777    zotero_item_key: Option<String>,
3778    pdf_attached: bool,
3779    pdf_size_bytes: u64,
3780    error: Value,
3781) -> Value {
3782    serde_json::json!({
3783        "status": status,
3784        "zotero_item_key": zotero_item_key,
3785        "pdf_attached": pdf_attached,
3786        "pdf_size_bytes": pdf_size_bytes,
3787        "error": error,
3788    })
3789}
3790
3791fn run_search(
3792    args: SearchArgs,
3793    client: &mut impl RpcCaller,
3794) -> Result<(Value, JsonStyle), String> {
3795    let SearchArgs {
3796        query, fulltext, author, after, before, journal, tag,
3797        doi, isbn, issn, collection, limit, offset, ..
3798    } = args;
3799
3800    let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3801    if has_identifier {
3802        let mut params = serde_json::Map::new();
3803        if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3804        if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3805        if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3806        let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3807        return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3808    }
3809
3810    if fulltext {
3811        let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3812        let mut params = serde_json::json!({"query": query, "limit": limit});
3813        if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3814            map.insert("collection".into(), resolve_collection(client, &col)?);
3815        }
3816        let value = client.call("search.fulltext", Some(params))?;
3817        return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3818    }
3819
3820    let has_filters = author.is_some() || after.is_some() || before.is_some()
3821        || journal.is_some() || tag.is_some();
3822    if has_filters {
3823        let mut conditions: Vec<Value> = Vec::new();
3824        if let Some(query) = &query {
3825            conditions.push(serde_json::json!({
3826                "field": "quicksearch-titleCreatorYear",
3827                "operator": "contains",
3828                "value": query,
3829            }));
3830        }
3831        if let Some(author) = author {
3832            conditions.push(serde_json::json!({
3833                "field": "creator", "operator": "contains", "value": author,
3834            }));
3835        }
3836        if let Some(after) = after {
3837            conditions.push(serde_json::json!({
3838                "field": "date", "operator": "isAfter", "value": after,
3839            }));
3840        }
3841        if let Some(before) = before {
3842            conditions.push(serde_json::json!({
3843                "field": "date", "operator": "isBefore", "value": before,
3844            }));
3845        }
3846        if let Some(journal) = journal {
3847            conditions.push(serde_json::json!({
3848                "field": "publicationTitle", "operator": "contains", "value": journal,
3849            }));
3850        }
3851        if let Some(tag) = tag {
3852            conditions.push(serde_json::json!({
3853                "field": "tag", "operator": "is", "value": tag,
3854            }));
3855        }
3856        let value = client.call(
3857            "search.advanced",
3858            Some(serde_json::json!({
3859                "conditions": conditions,
3860                "operator": "and",
3861                "limit": limit,
3862                "offset": offset,
3863            })),
3864        )?;
3865        return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3866    }
3867
3868    let query = query.ok_or(
3869        "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3870    )?;
3871    let value = if let Some(col) = collection {
3872        let key = resolve_collection(client, &col)?;
3873        let response = client.call(
3874            "collections.getItems",
3875            Some(serde_json::json!({"key": key})),
3876        )?;
3877        collection_quick_search_response(&response, &query, limit)
3878    } else {
3879        filter_search_artifacts(client.call(
3880            "search.quick",
3881            Some(serde_json::json!({"query": query, "limit": limit})),
3882        )?)
3883    };
3884    Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3885}
3886
3887fn run_search_management_command(
3888    command: SearchManagementCommand,
3889    client: &mut impl RpcCaller,
3890) -> Result<(Value, JsonStyle), String> {
3891    match command {
3892        SearchManagementCommand::SavedSearches { .. } => Ok((
3893            normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3894            JsonStyle::Pretty,
3895        )),
3896        SearchManagementCommand::CreateSaved {
3897            name, condition, dry_run, ..
3898        } => {
3899            let conditions = condition
3900                .iter()
3901                .map(|raw| parse_search_condition(raw))
3902                .collect::<Result<Vec<_>, _>>()?;
3903            let params = serde_json::json!({"name": name, "conditions": conditions});
3904            if dry_run {
3905                Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3906            } else {
3907                Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3908            }
3909        }
3910        SearchManagementCommand::DeleteSaved {
3911            search_key, dry_run, ..
3912        } => {
3913            let params = serde_json::json!({"key": search_key});
3914            if dry_run {
3915                Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3916            } else {
3917                Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3918            }
3919        }
3920    }
3921}
3922
3923fn filter_search_artifacts(mut value: Value) -> Value {
3924    let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3925        return value;
3926    };
3927    items.retain(|item| match item.get("title").and_then(Value::as_str) {
3928        Some(title) => !is_zotron_evidence_artifact(title),
3929        None => true,
3930    });
3931    let total_items = items.len() as u64;
3932    if let Some(total) = value.get_mut("total") {
3933        *total = Value::from(total_items);
3934    }
3935    value
3936}
3937
3938fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
3939    let mut matched = collection_items(response)
3940        .into_iter()
3941        .filter(|item| !item_is_evidence_artifact(item))
3942        .filter(|item| quick_item_matches(item, query))
3943        .collect::<Vec<_>>();
3944    let total = matched.len() as u64;
3945    let limit = usize::try_from(limit).unwrap_or(usize::MAX);
3946    if matched.len() > limit {
3947        matched.truncate(limit);
3948    }
3949    serde_json::json!({"items": matched, "total": total})
3950}
3951
3952fn item_is_evidence_artifact(item: &Value) -> bool {
3953    item.get("title")
3954        .and_then(Value::as_str)
3955        .is_some_and(is_zotron_evidence_artifact)
3956}
3957
3958fn quick_item_matches(item: &Value, query: &str) -> bool {
3959    let terms = query
3960        .split_whitespace()
3961        .map(|term| term.to_lowercase())
3962        .filter(|term| !term.is_empty())
3963        .collect::<Vec<_>>();
3964    if terms.is_empty() {
3965        return true;
3966    }
3967    let mut haystack = String::new();
3968    append_search_text(item, &mut haystack);
3969    let haystack = haystack.to_lowercase();
3970    terms.iter().all(|term| haystack.contains(term))
3971}
3972
3973fn append_search_text(value: &Value, out: &mut String) {
3974    match value {
3975        Value::String(text) => {
3976            out.push(' ');
3977            out.push_str(text);
3978        }
3979        Value::Number(number) => {
3980            out.push(' ');
3981            out.push_str(&number.to_string());
3982        }
3983        Value::Bool(value) => {
3984            out.push(' ');
3985            out.push_str(if *value { "true" } else { "false" });
3986        }
3987        Value::Array(items) => {
3988            for item in items {
3989                append_search_text(item, out);
3990            }
3991        }
3992        Value::Object(map) => {
3993            for item in map.values() {
3994                append_search_text(item, out);
3995            }
3996        }
3997        Value::Null => {}
3998    }
3999}
4000
4001fn parse_search_condition(raw: &str) -> Result<Value, String> {
4002    let mut parts = raw.split_whitespace();
4003    let field = parts.next();
4004    let operator = parts.next();
4005    let value = parts.collect::<Vec<_>>().join(" ");
4006    match (field, operator, value.is_empty()) {
4007        (Some(field), Some(operator), false) => Ok(serde_json::json!({
4008            "field": field,
4009            "operator": operator,
4010            "value": value,
4011        })),
4012        _ => Err(format!(
4013            "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
4014        )),
4015    }
4016}
4017
4018fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
4019    if let Value::Array(arr) = value {
4020        let total = arr.len() as u64;
4021        let mut obj = serde_json::Map::new();
4022        obj.insert(list_key.to_string(), Value::Array(arr));
4023        obj.insert("total".to_string(), Value::from(total));
4024        if let Some(limit) = limit {
4025            obj.insert("limit".to_string(), Value::from(limit));
4026        }
4027        obj.insert("offset".to_string(), Value::from(offset));
4028        obj.insert("hasMore".to_string(), Value::Bool(false));
4029        return Value::Object(obj);
4030    }
4031
4032    let mut obj = match value {
4033        Value::Object(obj) if obj.contains_key(list_key) => obj,
4034        other => return other,
4035    };
4036
4037    let items_len = obj
4038        .get(list_key)
4039        .and_then(Value::as_array)
4040        .map_or(0, |a| a.len()) as u64;
4041    let total = obj
4042        .get("total")
4043        .and_then(Value::as_u64)
4044        .unwrap_or(items_len);
4045
4046    obj.insert("total".to_string(), Value::from(total));
4047    if let Some(limit) = limit {
4048        obj.insert("limit".to_string(), Value::from(limit));
4049    }
4050    obj.insert("offset".to_string(), Value::from(offset));
4051    obj.insert(
4052        "hasMore".to_string(),
4053        Value::Bool(offset + items_len < total),
4054    );
4055
4056    Value::Object(obj)
4057}
4058
4059const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
4060const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
4061
4062fn paginate_rpc(
4063    client: &mut impl RpcCaller,
4064    method: &str,
4065    params: Value,
4066    page_size: usize,
4067) -> Result<Value, String> {
4068    let base = params
4069        .as_object()
4070        .ok_or_else(|| "params must be a JSON object".to_string())?;
4071    let mut out = Vec::new();
4072    let mut prev_page: Option<Vec<Value>> = None;
4073    let mut offset = 0usize;
4074
4075    loop {
4076        let mut page_params = base.clone();
4077        page_params.insert("offset".to_string(), Value::Number(offset.into()));
4078        page_params.insert("limit".to_string(), Value::Number(page_size.into()));
4079        let response = client.call(method, Some(Value::Object(page_params)))?;
4080
4081        let page = match extract_page(&response) {
4082            Some(page) => page,
4083            None if out.is_empty() => return Ok(response),
4084            None if response.is_object() => {
4085                return Err(format!(
4086                    "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
4087                    out.len()
4088                ));
4089            }
4090            None => {
4091                return Err(format!(
4092                    "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
4093                    out.len()
4094                ));
4095            }
4096        };
4097
4098        if prev_page.as_ref() == Some(&page) {
4099            return Err(format!(
4100                "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
4101                out.len()
4102            ));
4103        }
4104
4105        let page_len = page.len();
4106        out.extend(page.clone());
4107        if page_len < page_size {
4108            return Ok(Value::Array(out));
4109        }
4110        if out.len() >= RPC_PAGINATION_SAFETY_CAP {
4111            out.truncate(RPC_PAGINATION_SAFETY_CAP);
4112            return Ok(Value::Array(out));
4113        }
4114        prev_page = Some(page);
4115        offset += page_size;
4116    }
4117}
4118
4119fn extract_page(response: &Value) -> Option<Vec<Value>> {
4120    if let Some(page) = response.as_array() {
4121        return Some(page.clone());
4122    }
4123    let object = response.as_object()?;
4124    for key in RPC_PAGE_LIST_KEYS {
4125        if let Some(page) = object.get(key).and_then(Value::as_array) {
4126            return Some(page.clone());
4127        }
4128    }
4129    None
4130}
4131
4132fn run_find_pdfs_command(
4133    client: &mut impl RpcCaller,
4134    collection: String,
4135    limit: usize,
4136) -> Result<(Value, JsonStyle), String> {
4137    let collection_key = resolve_collection(client, &collection)?;
4138    let response = client.call(
4139        "collections.getItems",
4140        Some(serde_json::json!({"key": collection_key})),
4141    )?;
4142    let items = collection_items(&response);
4143
4144    let mut missing = Vec::new();
4145    for item in &items {
4146        let Some(item_key) = item.get("key").and_then(Value::as_str) else {
4147            continue;
4148        };
4149        let attachments = client.call(
4150            "attachments.list",
4151            Some(serde_json::json!({"parentKey": item_key})),
4152        )?;
4153        if !has_pdf_attachment(&attachments) {
4154            missing.push(item.clone());
4155        }
4156        if limit > 0 && missing.len() >= limit {
4157            break;
4158        }
4159    }
4160
4161    let mut results = Vec::new();
4162    for item in &missing {
4163        let item_key = item
4164            .get("key")
4165            .and_then(Value::as_str)
4166            .ok_or_else(|| "missing item lacks key".to_string())?;
4167        let response = client.call(
4168            "attachments.findPDF",
4169            Some(serde_json::json!({"parentKey": item_key})),
4170        )?;
4171        let attachment = response.get("attachment").filter(|value| !value.is_null());
4172        results.push(serde_json::json!({
4173            "item_key": item_key,
4174            "title": item.get("title").cloned().unwrap_or(Value::Null),
4175            "found": attachment.is_some(),
4176            "attachment_key": attachment
4177                .and_then(|attachment| attachment.get("key"))
4178                .cloned()
4179                .unwrap_or(Value::Null),
4180        }));
4181    }
4182
4183    Ok((
4184        serde_json::json!({
4185            "scanned": items.len(),
4186            "attempted": missing.len(),
4187            "results": results,
4188        }),
4189        JsonStyle::Pretty,
4190    ))
4191}
4192
4193fn collection_items(response: &Value) -> Vec<Value> {
4194    if let Some(items) = response.get("items").and_then(Value::as_array) {
4195        return items.clone();
4196    }
4197    response.as_array().cloned().unwrap_or_default()
4198}
4199
4200fn has_pdf_attachment(attachments: &Value) -> bool {
4201    attachments
4202        .as_array()
4203        .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
4204}
4205
4206fn is_pdf_attachment(attachment: &Value) -> bool {
4207    let content_type = attachment
4208        .get("contentType")
4209        .and_then(Value::as_str)
4210        .unwrap_or_default()
4211        .to_lowercase();
4212    let path = attachment
4213        .get("path")
4214        .and_then(Value::as_str)
4215        .unwrap_or_default()
4216        .to_lowercase();
4217    matches!(
4218        content_type.as_str(),
4219        "application/pdf" | "application/x-pdf"
4220    ) || path.ends_with(".pdf")
4221}
4222
4223fn call_json(
4224    client: &mut impl RpcCaller,
4225    method: &str,
4226    params: Option<Value>,
4227) -> Result<Value, String> {
4228    client.call(method, params)
4229}
4230
4231fn run_system_command(
4232    command: SystemCommand,
4233    client: &mut impl RpcCaller,
4234) -> Result<(Value, JsonStyle), String> {
4235    let value = match command {
4236        SystemCommand::Version { .. } => client.call("system.version", None)?,
4237        SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
4238        SystemCommand::LibraryStats { library, .. } => {
4239            let params = library.map(|id| serde_json::json!({"id": id}));
4240            client.call("system.libraryStats", params)?
4241        }
4242        SystemCommand::Schema { item_type, .. } => {
4243            if let Some(item_type) = item_type {
4244                let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
4245                let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
4246                let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
4247                    .iter()
4248                    .filter_map(|f| f.get("field").cloned())
4249                    .collect();
4250                let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
4251                    .iter()
4252                    .filter_map(|c| c.get("creatorType").cloned())
4253                    .collect();
4254                serde_json::json!({
4255                    "itemType": item_type,
4256                    "fields": field_names,
4257                    "creatorTypes": creator_names,
4258                })
4259            } else {
4260                let types = client.call("system.itemTypes", None)?;
4261                let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
4262                    .iter()
4263                    .filter_map(|t| t.get("itemType").cloned())
4264                    .collect();
4265                Value::Array(type_names)
4266            }
4267        }
4268        SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
4269        SystemCommand::Methods { method, .. } => {
4270            if let Some(method) = method {
4271                client.call("system.describe", Some(serde_json::json!({"method": method})))?
4272            } else {
4273                client.call("system.listMethods", None)?
4274            }
4275        }
4276    };
4277    Ok((value, JsonStyle::Pretty))
4278}
4279
4280fn run_items_command(
4281    command: ItemsCommand,
4282    client: &mut impl RpcCaller,
4283) -> Result<(Value, JsonStyle), String> {
4284    let (value, style) = match command {
4285        ItemsCommand::Add {
4286            doi,
4287            isbn,
4288            from_url,
4289            file,
4290            item_type,
4291            fields,
4292            collection,
4293            dry_run,
4294            ..
4295        } => {
4296            if let Some(doi) = doi {
4297                run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
4298            } else if let Some(isbn) = isbn {
4299                run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
4300            } else if let Some(from_url) = from_url {
4301                run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
4302            } else if let Some(file) = file {
4303                let mut params = serde_json::json!({"path": zotero_path(&file)});
4304                maybe_insert_collection(client, &mut params, collection)?;
4305                run_mutation_command(client, "items.addFromFile", params, dry_run)?
4306            } else if let Some(item_type) = item_type {
4307                let parsed_fields = parse_field_options(&fields)?;
4308                let mut params = serde_json::json!({"itemType": item_type});
4309                if !parsed_fields.is_empty() {
4310                    if let Some(map) = params.as_object_mut() {
4311                        map.insert("fields".to_string(), Value::Object(parsed_fields));
4312                    }
4313                }
4314                run_mutation_command(client, "items.create", params, dry_run)?
4315            } else {
4316                return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, --file, or --type".into());
4317            }
4318        }
4319        ItemsCommand::Update {
4320            key,
4321            fields,
4322            dry_run,
4323            ..
4324        } => {
4325            let parsed_fields = parse_field_options(&fields)?;
4326            let mut params = serde_json::json!({"key": key});
4327            if !parsed_fields.is_empty() {
4328                if let Some(map) = params.as_object_mut() {
4329                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4330                }
4331            }
4332            run_mutation_command(client, "items.update", params, dry_run)?
4333        }
4334        ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
4335            client,
4336            "items.delete",
4337            serde_json::json!({"key": key}),
4338            dry_run,
4339        )?,
4340        ItemsCommand::Trash {
4341            items, dry_run, ..
4342        } => {
4343            if items.len() == 1 {
4344                run_mutation_command(
4345                    client,
4346                    "items.trash",
4347                    serde_json::json!({"key": items[0]}),
4348                    dry_run,
4349                )?
4350            } else {
4351                run_mutation_command(
4352                    client,
4353                    "items.batchTrash",
4354                    serde_json::json!({"keys": items}),
4355                    dry_run,
4356                )?
4357            }
4358        }
4359        ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
4360            client,
4361            "items.restore",
4362            serde_json::json!({"key": item}),
4363            dry_run,
4364        )?,
4365        ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
4366            if keys.len() < 2 {
4367                return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
4368            }
4369            run_mutation_command(
4370                client,
4371                "items.mergeDuplicates",
4372                serde_json::json!({"keys": keys}),
4373                dry_run,
4374            )?
4375        }
4376        ItemsCommand::AddRelated {
4377            key,
4378            target,
4379            dry_run,
4380            ..
4381        } => run_mutation_command(
4382            client,
4383            "items.addRelated",
4384            serde_json::json!({"key": key, "targetKey": target}),
4385            dry_run,
4386        )?,
4387        ItemsCommand::RemoveRelated {
4388            key,
4389            target,
4390            dry_run,
4391            ..
4392        } => run_mutation_command(
4393            client,
4394            "items.removeRelated",
4395            serde_json::json!({"key": key, "targetKey": target}),
4396            dry_run,
4397        )?,
4398        ItemsCommand::Get { item, .. } => (
4399            client.call("items.get", Some(serde_json::json!({"key": item})))?,
4400            JsonStyle::Pretty,
4401        ),
4402        ItemsCommand::List {
4403            limit,
4404            offset,
4405            sort,
4406            direction,
4407            trash,
4408            ..
4409        } => {
4410            if trash {
4411                let value = client.call(
4412                    "items.getTrash",
4413                    Some(serde_json::json!({"limit": limit, "offset": offset})),
4414                )?;
4415                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4416            } else {
4417                let mut params = serde_json::json!({
4418                    "limit": limit,
4419                    "offset": offset,
4420                    "direction": direction,
4421                });
4422                if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
4423                    map.insert("sort".to_string(), Value::String(sort));
4424                }
4425                let value = client.call("items.list", Some(params))?;
4426                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4427            }
4428        }
4429        ItemsCommand::FindDuplicates { .. } => (
4430            client.call("items.findDuplicates", None)?,
4431            JsonStyle::Pretty,
4432        ),
4433        ItemsCommand::Recent {
4434            limit,
4435            offset,
4436            recent_type,
4437            ..
4438        } => {
4439            if recent_type != "added" && recent_type != "modified" {
4440                return Err(format!(
4441                    "--type must be added or modified, got {recent_type:?}"
4442                ));
4443            }
4444            let value = client.call(
4445                "items.getRecent",
4446                Some(
4447                    serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
4448                ),
4449            )?;
4450            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4451        }
4452        ItemsCommand::Fulltext { key, .. } => (
4453            client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
4454            JsonStyle::Pretty,
4455        ),
4456        ItemsCommand::Related { key, .. } => (
4457            normalize_list_envelope(
4458                client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
4459                "items",
4460                None,
4461                0,
4462            ),
4463            JsonStyle::Pretty,
4464        ),
4465        ItemsCommand::CitationKey { key, .. } => (
4466            client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
4467            JsonStyle::Pretty,
4468        ),
4469        ItemsCommand::Path { key, .. } => (
4470            localize_attachment_path_response(
4471                client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4472            ),
4473            JsonStyle::Pretty,
4474        ),
4475        ItemsCommand::Attachments { key, offset, .. } => {
4476            let value = client.call(
4477                "attachments.list",
4478                Some(serde_json::json!({"parentKey": key})),
4479            )?;
4480            let total = value
4481                .get("items")
4482                .and_then(Value::as_array)
4483                .map_or(0, |a| a.len()) as u64;
4484            (normalize_list_envelope(value, "items", Some(total), offset), JsonStyle::Pretty)
4485        }
4486        ItemsCommand::FindPdfs { collection, limit, .. } => {
4487            run_find_pdfs_command(client, collection, limit)?
4488        }
4489    };
4490    Ok((value, style))
4491}
4492
4493fn run_add_identifier_command(
4494    client: &mut impl RpcCaller,
4495    method: &str,
4496    param_name: &str,
4497    param_value: String,
4498    collection: Option<String>,
4499    dry_run: bool,
4500) -> Result<(Value, JsonStyle), String> {
4501    let mut params = Value::Object(serde_json::Map::from_iter([(
4502        param_name.to_string(),
4503        Value::String(param_value),
4504    )]));
4505    maybe_insert_collection(client, &mut params, collection)?;
4506    run_mutation_command(client, method, params, dry_run)
4507}
4508
4509fn run_mutation_command(
4510    client: &mut impl RpcCaller,
4511    method: &str,
4512    params: Value,
4513    dry_run: bool,
4514) -> Result<(Value, JsonStyle), String> {
4515    let value = if dry_run {
4516        serde_json::json!({
4517            "ok": true,
4518            "dryRun": true,
4519            "wouldCall": method,
4520            "wouldCallParams": params,
4521        })
4522    } else {
4523        client.call(method, Some(params))?
4524    };
4525    Ok((value, JsonStyle::PythonCompact))
4526}
4527
4528fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
4529    let mut parsed = serde_json::Map::new();
4530    for field in fields {
4531        let (key, value) = field
4532            .split_once('=')
4533            .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
4534        parsed.insert(key.to_string(), Value::String(value.to_string()));
4535    }
4536    Ok(parsed)
4537}
4538
4539fn maybe_insert_collection(
4540    client: &mut impl RpcCaller,
4541    params: &mut Value,
4542    collection: Option<String>,
4543) -> Result<(), String> {
4544    let Some(collection) = collection else {
4545        return Ok(());
4546    };
4547    let collection = resolve_collection(client, &collection)?;
4548    let include = match &collection {
4549        Value::Null => false,
4550        Value::Number(number) => number.as_i64() != Some(0),
4551        _ => true,
4552    };
4553    if include {
4554        params
4555            .as_object_mut()
4556            .expect("mutation params are always objects")
4557            .insert("collection".to_string(), collection);
4558    }
4559    Ok(())
4560}
4561
4562fn run_settings_command(
4563    command: SettingsCommand,
4564    client: &mut impl RpcCaller,
4565) -> Result<(Value, JsonStyle), String> {
4566    let (value, style) = match command {
4567        SettingsCommand::Get { key, .. } => (
4568            client.call("settings.get", Some(serde_json::json!({"key": key})))?,
4569            JsonStyle::Pretty,
4570        ),
4571        SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
4572        SettingsCommand::Set {
4573            pairs,
4574            file,
4575            dry_run,
4576            ..
4577        } => {
4578            if let Some(file) = file {
4579                // --file mode: read JSON and call settings.setAll
4580                let raw = fs::read_to_string(&file)
4581                    .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4582                let settings: Value = serde_json::from_str(&raw)
4583                    .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4584                if dry_run {
4585                    (
4586                        dry_run_value("settings.setAll", settings),
4587                        JsonStyle::PythonCompact,
4588                    )
4589                } else {
4590                    (
4591                        client.call("settings.setAll", Some(settings))?,
4592                        JsonStyle::PythonCompact,
4593                    )
4594                }
4595            } else if pairs.len() == 2 {
4596                // Single key=value: settings.set
4597                let key = &pairs[0];
4598                let value = &pairs[1];
4599                let parsed_value = serde_json::from_str::<Value>(value)
4600                    .unwrap_or(Value::String(value.clone()));
4601                let params = serde_json::json!({"key": key, "value": parsed_value});
4602                if dry_run {
4603                    (
4604                        dry_run_value("settings.set", params),
4605                        JsonStyle::PythonCompact,
4606                    )
4607                } else {
4608                    (
4609                        client.call("settings.set", Some(params))?,
4610                        JsonStyle::PythonCompact,
4611                    )
4612                }
4613            } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4614                // Multiple pairs: build a map and call settings.setAll
4615                let mut map = serde_json::Map::new();
4616                for chunk in pairs.chunks(2) {
4617                    let parsed = serde_json::from_str::<Value>(&chunk[1])
4618                        .unwrap_or(Value::String(chunk[1].clone()));
4619                    map.insert(chunk[0].clone(), parsed);
4620                }
4621                let settings = Value::Object(map);
4622                if dry_run {
4623                    (
4624                        dry_run_value("settings.setAll", settings),
4625                        JsonStyle::PythonCompact,
4626                    )
4627                } else {
4628                    (
4629                        client.call("settings.setAll", Some(settings))?,
4630                        JsonStyle::PythonCompact,
4631                    )
4632                }
4633            } else {
4634                return Err(
4635                    "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4636                );
4637            }
4638        }
4639    };
4640    Ok((value, style))
4641}
4642
4643fn run_tags_command(
4644    command: TagsCommand,
4645    client: &mut impl RpcCaller,
4646) -> Result<(Value, JsonStyle), String> {
4647    let (value, style) = match command {
4648        TagsCommand::List { limit, .. } => {
4649            let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4650            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4651        }
4652        TagsCommand::Rename {
4653            old, new, dry_run, ..
4654        } => run_tag_mutation(
4655            client,
4656            "tags.rename",
4657            serde_json::json!({"oldName": old, "newName": new}),
4658            dry_run,
4659        )?,
4660        TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4661            client,
4662            "tags.delete",
4663            serde_json::json!({"tag": tag}),
4664            dry_run,
4665        )?,
4666        TagsCommand::Add {
4667            keys, tags, dry_run, ..
4668        } => {
4669            if keys.len() == 1 {
4670                run_tag_mutation(
4671                    client,
4672                    "tags.add",
4673                    serde_json::json!({"key": keys[0], "tags": tags}),
4674                    dry_run,
4675                )?
4676            } else {
4677                run_tag_mutation(
4678                    client,
4679                    "tags.batchUpdate",
4680                    serde_json::json!({"keys": keys, "add": tags}),
4681                    dry_run,
4682                )?
4683            }
4684        }
4685        TagsCommand::Remove {
4686            keys, tags, dry_run, ..
4687        } => {
4688            if keys.len() == 1 {
4689                run_tag_mutation(
4690                    client,
4691                    "tags.remove",
4692                    serde_json::json!({"key": keys[0], "tags": tags}),
4693                    dry_run,
4694                )?
4695            } else {
4696                run_tag_mutation(
4697                    client,
4698                    "tags.batchUpdate",
4699                    serde_json::json!({"keys": keys, "remove": tags}),
4700                    dry_run,
4701                )?
4702            }
4703        }
4704    };
4705    Ok((value, style))
4706}
4707
4708fn run_tag_mutation(
4709    client: &mut impl RpcCaller,
4710    method: &str,
4711    params: Value,
4712    dry_run: bool,
4713) -> Result<(Value, JsonStyle), String> {
4714    if dry_run {
4715        Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4716    } else {
4717        Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4718    }
4719}
4720
4721fn dry_run_value(method: &str, params: Value) -> Value {
4722    serde_json::json!({
4723        "ok": true,
4724        "dryRun": true,
4725        "wouldCall": method,
4726        "wouldCallParams": params,
4727    })
4728}
4729
4730fn run_annotations_command(
4731    command: AnnotationsCommand,
4732    client: &mut impl RpcCaller,
4733) -> Result<(Value, JsonStyle), String> {
4734    let (value, style) = match command {
4735        AnnotationsCommand::List { parent, attachment, .. } => {
4736            let mut params = serde_json::json!({"parentKey": parent});
4737            if let Some(att) = attachment {
4738                params["attachmentKey"] = Value::String(att);
4739            }
4740            let value = client.call("annotations.list", Some(params))?;
4741            let total = value
4742                .get("items")
4743                .and_then(Value::as_array)
4744                .map_or(0, |a| a.len()) as u64;
4745            (normalize_list_envelope(value, "items", Some(total), 0), JsonStyle::Pretty)
4746        }
4747        AnnotationsCommand::Create {
4748            parent,
4749            attachment,
4750            annotation_type,
4751            position,
4752            quote,
4753            page,
4754            sort_index,
4755            text,
4756            comment,
4757            color,
4758            dry_run,
4759            ..
4760        } => {
4761            let annotation_type = annotation_type.unwrap_or_else(|| "highlight".to_string());
4762            if !matches!(
4763                annotation_type.as_str(),
4764                "highlight" | "note" | "underline" | "image" | "ink"
4765            ) {
4766                return Err(format!(
4767                    "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4768                ));
4769            }
4770            let mut params = serde_json::Map::new();
4771            params.insert("parentKey".to_string(), Value::String(parent));
4772            if let Some(att) = attachment {
4773                params.insert("attachmentKey".to_string(), Value::String(att));
4774            }
4775            params.insert("type".to_string(), Value::String(annotation_type.clone()));
4776            params.insert("color".to_string(), Value::String(color));
4777
4778            if let Some(ref quote_text) = quote {
4779                if !matches!(annotation_type.as_str(), "highlight" | "underline") {
4780                    return Err(format!(
4781                        "INVALID_ARGS: --quote is only valid for highlight|underline, got {annotation_type:?}"
4782                    ));
4783                }
4784                params.insert("quote".to_string(), Value::String(quote_text.clone()));
4785                if let Some(page_idx) = page {
4786                    params.insert(
4787                        "pageIndex".to_string(),
4788                        Value::Number(page_idx.into()),
4789                    );
4790                }
4791                // When --quote is given, --position is optional
4792                if let Some(raw) = position {
4793                    let pos = serde_json::from_str::<Value>(&raw)
4794                        .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))?;
4795                    validate_annotation_position(annotation_type.as_str(), &pos)?;
4796                    params.insert("position".to_string(), pos);
4797                }
4798            } else {
4799                let position = position
4800                    .ok_or_else(|| "INVALID_ARGS: --position JSON is required (or use --quote)".to_string())
4801                    .and_then(|raw| {
4802                        serde_json::from_str::<Value>(&raw)
4803                            .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4804                    })?;
4805                validate_annotation_position(annotation_type.as_str(), &position)?;
4806                params.insert("position".to_string(), position);
4807            }
4808
4809            if let Some(sort_index) = sort_index {
4810                params.insert(
4811                    "sortIndex".to_string(),
4812                    parse_annotation_sort_index(sort_index)?,
4813                );
4814            }
4815            if let Some(text) = text {
4816                params.insert("text".to_string(), Value::String(text));
4817            }
4818            if let Some(comment) = comment {
4819                params.insert("comment".to_string(), Value::String(comment));
4820            }
4821            run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4822        }
4823        AnnotationsCommand::Delete {
4824            annotation_key,
4825            dry_run,
4826            ..
4827        } => run_mutating_command(
4828            client,
4829            "annotations.delete",
4830            serde_json::json!({"key": annotation_key}),
4831            dry_run,
4832        )?,
4833    };
4834    Ok((value, style))
4835}
4836
4837fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4838    position
4839        .get("pageIndex")
4840        .and_then(Value::as_i64)
4841        .filter(|value| *value >= 0)
4842        .ok_or_else(|| {
4843            "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4844        })?;
4845
4846    if annotation_type == "ink" {
4847        let has_paths = position
4848            .get("paths")
4849            .and_then(Value::as_array)
4850            .is_some_and(|paths| !paths.is_empty());
4851        if !has_paths {
4852            return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4853        }
4854        return Ok(());
4855    }
4856
4857    let valid_rects = position
4858        .get("rects")
4859        .and_then(Value::as_array)
4860        .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4861    if !valid_rects {
4862        return Err(
4863            "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4864        );
4865    }
4866    Ok(())
4867}
4868
4869fn is_annotation_rect(value: &Value) -> bool {
4870    value.as_array().is_some_and(|coords| {
4871        coords.len() == 4
4872            && coords
4873                .iter()
4874                .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4875    })
4876}
4877
4878fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4879    let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4880    let valid = match &parsed {
4881        Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4882        Value::String(value) => {
4883            is_zotero_pdf_sort_index(value.trim())
4884                || (!value.trim().is_empty()
4885                    && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4886        }
4887        _ => false,
4888    };
4889    if valid {
4890        Ok(parsed)
4891    } else {
4892        Err(format!(
4893            "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4894        ))
4895    }
4896}
4897
4898fn is_zotero_pdf_sort_index(value: &str) -> bool {
4899    let mut parts = value.split('|');
4900    matches!(
4901        (parts.next(), parts.next(), parts.next(), parts.next()),
4902        (Some(page), Some(offset), Some(y), None)
4903            if page.len() == 5
4904                && offset.len() == 6
4905                && y.len() == 5
4906                && page.chars().all(|ch| ch.is_ascii_digit())
4907                && offset.chars().all(|ch| ch.is_ascii_digit())
4908                && y.chars().all(|ch| ch.is_ascii_digit())
4909    )
4910}
4911
4912
4913fn localize_attachment_path_response(mut value: Value) -> Value {
4914    if let Some(path) = value.get("path").and_then(Value::as_str) {
4915        let local = local_path_from_zotero_path(path);
4916        if let Some(map) = value.as_object_mut() {
4917            map.insert("path".to_string(), Value::String(local));
4918        }
4919    }
4920    value
4921}
4922
4923fn run_notes_command(
4924    command: NotesCommand,
4925    client: &mut impl RpcCaller,
4926) -> Result<(Value, JsonStyle), String> {
4927    let (value, style) = match command {
4928        NotesCommand::List {
4929            parent,
4930            limit,
4931            offset,
4932            ..
4933        } => {
4934            let value = client.call(
4935                "notes.list",
4936                Some(serde_json::json!({"parentKey": parent})),
4937            )?;
4938            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4939        }
4940        NotesCommand::Get { note_key, .. } => {
4941            let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
4942            (value, JsonStyle::Pretty)
4943        }
4944        NotesCommand::Create {
4945            parent,
4946            content,
4947            tags,
4948            dry_run,
4949            ..
4950        } => {
4951            let mut params = serde_json::Map::new();
4952            params.insert("parentKey".to_string(), Value::String(parent));
4953            params.insert("content".to_string(), Value::String(content));
4954            if !tags.is_empty() {
4955                params.insert(
4956                    "tags".to_string(),
4957                    Value::Array(tags.into_iter().map(Value::String).collect()),
4958                );
4959            }
4960            run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
4961        }
4962        NotesCommand::Update {
4963            note_key,
4964            content,
4965            dry_run,
4966            ..
4967        } => run_mutating_command(
4968            client,
4969            "notes.update",
4970            serde_json::json!({"key": note_key, "content": content}),
4971            dry_run,
4972        )?,
4973        NotesCommand::Delete {
4974            note_key, dry_run, ..
4975        } => {
4976            // Python CLI intentionally routes note deletion through items.delete.
4977            run_mutating_command(
4978                client,
4979                "items.delete",
4980                serde_json::json!({"key": note_key}),
4981                dry_run,
4982            )?
4983        }
4984        NotesCommand::Search { query, limit, .. } => {
4985            let value = client.call(
4986                "notes.search",
4987                Some(serde_json::json!({"query": query, "limit": limit})),
4988            )?;
4989            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4990        }
4991    };
4992    Ok((value, style))
4993}
4994
4995fn run_mutating_command(
4996    client: &mut impl RpcCaller,
4997    method: &str,
4998    params: Value,
4999    dry_run: bool,
5000) -> Result<(Value, JsonStyle), String> {
5001    if dry_run {
5002        Ok((
5003            serde_json::json!({
5004                "ok": true,
5005                "dryRun": true,
5006                "wouldCall": method,
5007                "wouldCallParams": params,
5008            }),
5009            JsonStyle::PythonCompact,
5010        ))
5011    } else {
5012        client
5013            .call(method, Some(params))
5014            .map(|value| (value, JsonStyle::PythonCompact))
5015    }
5016}
5017
5018fn run_collections_command(
5019    command: CollectionsCommand,
5020    client: &mut impl RpcCaller,
5021) -> Result<(Value, JsonStyle), String> {
5022    let value = match command {
5023        CollectionsCommand::List { .. } => normalize_list_envelope(
5024            client.call("collections.list", None)?,
5025            "items",
5026            None,
5027            0,
5028        ),
5029        CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
5030        CollectionsCommand::Get { name_or_id, .. } => {
5031            let key = resolve_collection(client, &name_or_id)?;
5032            client.call("collections.get", Some(serde_json::json!({"key": key})))?
5033        }
5034        CollectionsCommand::GetItems {
5035            name_or_id,
5036            limit,
5037            offset,
5038            ..
5039        } => {
5040            let key = resolve_collection(client, &name_or_id)?;
5041            let mut params = serde_json::json!({"key": key});
5042            if let Some(map) = params.as_object_mut() {
5043                if let Some(limit) = limit {
5044                    map.insert("limit".to_string(), Value::Number(limit.into()));
5045                }
5046                if offset > 0 {
5047                    map.insert("offset".to_string(), Value::Number(offset.into()));
5048                }
5049            }
5050            normalize_list_envelope(
5051                client.call("collections.getItems", Some(params))?,
5052                "items",
5053                limit,
5054                offset,
5055            )
5056        }
5057        CollectionsCommand::Stats { name_or_id, .. } => {
5058            let key = resolve_collection(client, &name_or_id)?;
5059            client.call("collections.stats", Some(serde_json::json!({"key": key})))?
5060        }
5061        CollectionsCommand::Rename {
5062            old_name,
5063            new_name,
5064            dry_run,
5065            ..
5066        } => {
5067            let key = resolve_mutable_collection(client, &old_name, "rename")?;
5068            let params = serde_json::json!({"key": key, "name": new_name});
5069            if dry_run {
5070                return Ok((
5071                    dry_run_value("collections.rename", params),
5072                    JsonStyle::PythonCompact,
5073                ));
5074            }
5075            return Ok((
5076                client.call("collections.rename", Some(params))?,
5077                JsonStyle::PythonCompact,
5078            ));
5079        }
5080        CollectionsCommand::Create {
5081            name,
5082            parent,
5083            dry_run,
5084            ..
5085        } => {
5086            let mut params = serde_json::json!({"name": name});
5087            if let Some(parent) = parent {
5088                let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
5089                if let Some(map) = params.as_object_mut() {
5090                    map.insert("parentKey".to_string(), parent_key);
5091                }
5092            }
5093            if dry_run {
5094                return Ok((
5095                    dry_run_value("collections.create", params),
5096                    JsonStyle::PythonCompact,
5097                ));
5098            }
5099            return Ok((
5100                client.call("collections.create", Some(params))?,
5101                JsonStyle::PythonCompact,
5102            ));
5103        }
5104        CollectionsCommand::Delete {
5105            name_or_id,
5106            dry_run,
5107            ..
5108        } => {
5109            let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
5110            let params = serde_json::json!({"key": key});
5111            if dry_run {
5112                return Ok((
5113                    dry_run_value("collections.delete", params),
5114                    JsonStyle::PythonCompact,
5115                ));
5116            }
5117            return Ok((
5118                client.call("collections.delete", Some(params))?,
5119                JsonStyle::PythonCompact,
5120            ));
5121        }
5122        CollectionsCommand::AddItems {
5123            collection,
5124            item_keys,
5125            dry_run,
5126            ..
5127        } => {
5128            let key = resolve_mutable_collection(client, &collection, "add to")?;
5129            let params = serde_json::json!({"key": key, "keys": item_keys});
5130            if dry_run {
5131                return Ok((
5132                    dry_run_value("collections.addItems", params),
5133                    JsonStyle::PythonCompact,
5134                ));
5135            }
5136            return Ok((
5137                client.call("collections.addItems", Some(params))?,
5138                JsonStyle::PythonCompact,
5139            ));
5140        }
5141        CollectionsCommand::RemoveItems {
5142            collection,
5143            item_keys,
5144            dry_run,
5145            ..
5146        } => {
5147            let key = resolve_mutable_collection(client, &collection, "operate on")?;
5148            let params = serde_json::json!({"key": key, "keys": item_keys});
5149            if dry_run {
5150                return Ok((
5151                    dry_run_value("collections.removeItems", params),
5152                    JsonStyle::PythonCompact,
5153                ));
5154            }
5155            return Ok((
5156                client.call("collections.removeItems", Some(params))?,
5157                JsonStyle::PythonCompact,
5158            ));
5159        }
5160    };
5161    Ok((value, JsonStyle::Pretty))
5162}
5163
5164fn resolve_export_keys(
5165    client: &mut impl RpcCaller,
5166    mut keys: Vec<String>,
5167    collection: Option<String>,
5168) -> Result<Vec<String>, String> {
5169    if let Some(name) = collection {
5170        let col_key = resolve_collection(client, &name)?;
5171        let response = client.call(
5172            "collections.getItems",
5173            Some(serde_json::json!({"key": col_key})),
5174        )?;
5175        let items = collection_items(&response);
5176        for item in items {
5177            if let Some(key) = item.get("key").and_then(Value::as_str) {
5178                if !keys.contains(&key.to_string()) {
5179                    keys.push(key.to_string());
5180                }
5181            }
5182        }
5183    }
5184    if keys.is_empty() {
5185        return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
5186    }
5187    Ok(keys)
5188}
5189
5190fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
5191    let keys = resolve_export_keys(client, args.keys, args.collection)?;
5192    match args.format.as_str() {
5193        "bibtex" => run_export_content_command(client, "export.bibtex", keys),
5194        "ris" => run_export_content_command(client, "export.ris", keys),
5195        "csl-json" => {
5196            let response =
5197                client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
5198            if let Some(content) = response.get("content") {
5199                format_json(content, JsonStyle::Pretty)
5200            } else {
5201                format_json(&response, JsonStyle::PythonCompact)
5202            }
5203        }
5204        "bibliography" => {
5205            let response = client.call(
5206                "export.bibliography",
5207                Some(serde_json::json!({"keys": keys, "style": args.style})),
5208            )?;
5209            if let Some(object) = response.as_object() {
5210                let field = if args.html { "html" } else { "text" };
5211                if object.contains_key("html") || object.contains_key("text") {
5212                    return raw_value_output(
5213                        object.get(field).unwrap_or(&Value::String(String::new())),
5214                    );
5215                }
5216            }
5217            format_json(&response, JsonStyle::PythonCompact)
5218        }
5219        other => Err(format!(
5220            "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
5221        )),
5222    }
5223}
5224
5225fn run_export_content_command(
5226    client: &mut impl RpcCaller,
5227    method: &str,
5228    keys: Vec<String>,
5229) -> Result<String, String> {
5230    let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
5231    if let Some(content) = response.get("content") {
5232        raw_value_output(content)
5233    } else {
5234        format_json(&response, JsonStyle::PythonCompact)
5235    }
5236}
5237
5238fn raw_value_output(value: &Value) -> Result<String, String> {
5239    let mut out = match value {
5240        Value::Null => String::new(),
5241        Value::String(content) => content.clone(),
5242        other => to_python_repr(other),
5243    };
5244    out.push('\n');
5245    Ok(out)
5246}
5247
5248fn to_python_repr(value: &Value) -> String {
5249    match value {
5250        Value::Null => "None".to_string(),
5251        Value::Bool(value) => {
5252            if *value {
5253                "True".to_string()
5254            } else {
5255                "False".to_string()
5256            }
5257        }
5258        Value::Number(value) => value.to_string(),
5259        Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
5260        Value::Array(values) => {
5261            let inner = values
5262                .iter()
5263                .map(to_python_repr)
5264                .collect::<Vec<_>>()
5265                .join(", ");
5266            format!("[{inner}]")
5267        }
5268        Value::Object(entries) => {
5269            let inner = entries
5270                .iter()
5271                .map(|(key, value)| {
5272                    format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
5273                })
5274                .collect::<Vec<_>>()
5275                .join(", ");
5276            format!("{{{inner}}}")
5277        }
5278    }
5279}
5280
5281fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
5282    let trimmed = name_or_id.trim();
5283    if let Ok(id) = trimmed.parse::<i64>() {
5284        return Ok(Value::Number(id.into()));
5285    }
5286
5287    let collections = client.call("collections.list", None)?;
5288    let items = collections
5289        .get("items")
5290        .and_then(Value::as_array)
5291        .or_else(|| collections.as_array())
5292        .ok_or_else(|| "collections.list returned non-array result".to_string())?;
5293
5294    if let Some(collection) = items
5295        .iter()
5296        .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
5297    {
5298        return collection_key(collection);
5299    }
5300
5301    let exact = items
5302        .iter()
5303        .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
5304        .collect::<Vec<_>>();
5305    if exact.len() == 1 {
5306        return collection_key(exact[0]);
5307    }
5308
5309    let needle = normalize_collection_name(trimmed);
5310    let fuzzy = items
5311        .iter()
5312        .filter(|collection| {
5313            collection
5314                .get("name")
5315                .and_then(Value::as_str)
5316                .map(normalize_collection_name)
5317                .is_some_and(|name| name.contains(&needle))
5318        })
5319        .collect::<Vec<_>>();
5320
5321    match fuzzy.len() {
5322        1 => collection_key(fuzzy[0]),
5323        0 => Err(format!(
5324            "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
5325        )),
5326        _ => Err(format!(
5327            "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
5328        )),
5329    }
5330}
5331
5332fn collection_key(collection: &Value) -> Result<Value, String> {
5333    collection
5334        .get("key")
5335        .cloned()
5336        .ok_or_else(|| "collection result is missing key".to_string())
5337}
5338
5339fn resolve_mutable_collection(
5340    client: &mut impl RpcCaller,
5341    name_or_id: &str,
5342    operation: &str,
5343) -> Result<Value, String> {
5344    let key = resolve_collection(client, name_or_id)?;
5345    if key.as_i64() == Some(0) {
5346        return Err(format!(
5347            "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
5348        ));
5349    }
5350    Ok(key)
5351}
5352
5353fn normalize_collection_name(name: &str) -> String {
5354    name.split_whitespace()
5355        .collect::<Vec<_>>()
5356        .join(" ")
5357        .to_lowercase()
5358}
5359
5360fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
5361    let mut out = match style {
5362        JsonStyle::PythonCompact => to_python_compact_json(value),
5363        JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
5364    };
5365    out.push('\n');
5366    Ok(out)
5367}
5368
5369fn to_python_compact_json(value: &Value) -> String {
5370    match value {
5371        Value::Null => "null".to_string(),
5372        Value::Bool(value) => value.to_string(),
5373        Value::Number(value) => value.to_string(),
5374        Value::String(value) => {
5375            serde_json::to_string(value).expect("string serialization cannot fail")
5376        }
5377        Value::Array(values) => {
5378            let inner = values
5379                .iter()
5380                .map(to_python_compact_json)
5381                .collect::<Vec<_>>()
5382                .join(", ");
5383            format!("[{inner}]")
5384        }
5385        Value::Object(entries) => {
5386            let inner = entries
5387                .iter()
5388                .map(|(key, value)| {
5389                    let key = serde_json::to_string(key).expect("string serialization cannot fail");
5390                    format!("{key}: {}", to_python_compact_json(value))
5391                })
5392                .collect::<Vec<_>>()
5393                .join(", ");
5394            format!("{{{inner}}}")
5395        }
5396    }
5397}