Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::{
4    env, fs,
5    io::{self, Read},
6    path::{Path, PathBuf},
7    process::Command as ProcessCommand,
8    thread,
9    time::{Duration, Instant, SystemTime, UNIX_EPOCH},
10};
11
12use clap::{error::ErrorKind, Parser, Subcommand};
13use serde_json::Value;
14use zotron_rpc::{StdProviderCommandRunner, UreqProviderHttpTransport, ZoteroRpc};
15use zotron_types::{
16    bm25_score_chunks, build_embedding_provider_request, build_ocr_provider_request,
17    builtin_ocr_provider_specs, cosine_similarity, execute_embedding_provider_request,
18    is_zotron_evidence_artifact, machine_artifact_exists_for_item,
19    machine_artifact_exists_in_sidecar, machine_artifact_store_root,
20    ocr_provider_spec as raw_ocr_provider_spec, parse_embedding_provider_response,
21    parse_ocr_provider_response, read_machine_artifact_sidecar, rrf_merge,
22    write_machine_artifact_sidecar, ArtifactStorePlatform, EmbeddingChunkInput,
23    EmbeddingRequestInput, EmbeddingVector, MachineArtifactKind, OcrRequestInput,
24    ProviderCommandRunner, ProviderHttpInvocation, ProviderHttpTransport, StructureChunk,
25    DEFAULT_RPC_URL,
26};
27
28pub trait RpcCaller {
29    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String>;
30}
31
32#[derive(Debug, Clone, PartialEq, serde::Serialize)]
33pub struct CliOcrProviderSpec {
34    pub id: &'static str,
35    pub provider: &'static str,
36    pub request_style: &'static str,
37    pub auth: &'static str,
38    pub auth_header: &'static str,
39    pub supports_pdf_direct: bool,
40    pub key_field: &'static str,
41}
42
43#[derive(Debug, Clone, PartialEq, serde::Serialize)]
44pub struct CliEmbeddingProviderSpec {
45    pub id: &'static str,
46    pub provider: &'static str,
47    pub request_style: &'static str,
48    pub default_url: String,
49    pub default_model: &'static str,
50    pub auth: &'static str,
51    pub key_field: &'static str,
52}
53
54pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
55    builtin_ocr_provider_specs()
56        .into_iter()
57        .map(cli_ocr_provider_spec)
58        .collect()
59}
60
61pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
62    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
63}
64
65pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
66    let spec = zotron_types::embedding_provider_spec(provider)?;
67    Ok(CliEmbeddingProviderSpec {
68        id: spec.id,
69        provider: spec.provider_key,
70        request_style: if spec.provider_key == "alibaba" {
71            "dashscope"
72        } else {
73            spec.request_style.as_str()
74        },
75        default_url: spec.default_url.unwrap_or("").to_string(),
76        default_model: spec.default_model,
77        auth: spec.auth,
78        key_field: spec.key_field,
79    })
80}
81
82pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
83    let typed = blocks
84        .iter()
85        .map(json_block_to_pdf_block)
86        .collect::<Result<Vec<_>, _>>()?;
87    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
88    chunks
89        .into_iter()
90        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
91        .collect()
92}
93
94fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
95    CliOcrProviderSpec {
96        id: spec.provider_key,
97        provider: spec.provider_key,
98        request_style: spec.request_style.as_str(),
99        auth: spec.auth,
100        auth_header: spec.auth_header,
101        supports_pdf_direct: spec.supports_pdf_direct,
102        key_field: spec.key_field,
103    }
104}
105
106fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
107    let block_key = value
108        .get("block_key")
109        .and_then(Value::as_str)
110        .ok_or_else(|| "block missing block_key".to_string())?
111        .to_string();
112    let item_key = value
113        .get("item_key")
114        .and_then(Value::as_str)
115        .ok_or_else(|| "block missing item_key".to_string())?
116        .to_string();
117    let attachment_key = value
118        .get("attachment_key")
119        .and_then(Value::as_str)
120        .ok_or_else(|| "block missing attachment_key".to_string())?
121        .to_string();
122    let page_idx = value
123        .get("page_idx")
124        .or_else(|| value.get("page"))
125        .and_then(Value::as_u64)
126        .unwrap_or(1);
127    let block_type = value
128        .get("type")
129        .or_else(|| value.get("block_type"))
130        .and_then(Value::as_str)
131        .unwrap_or("paragraph")
132        .to_string();
133    let section_path = value
134        .get("section_path")
135        .and_then(Value::as_array)
136        .map(|items| {
137            items
138                .iter()
139                .filter_map(Value::as_str)
140                .map(ToString::to_string)
141                .collect::<Vec<_>>()
142        })
143        .unwrap_or_default();
144    let text = value
145        .get("text")
146        .and_then(Value::as_str)
147        .unwrap_or("")
148        .to_string();
149    let bbox = value.get("bbox").and_then(value_bbox4);
150
151    Ok(zotron_types::PdfEvidenceBlock {
152        block_key,
153        item_key,
154        attachment_key,
155        page_idx,
156        block_type,
157        bbox,
158        section_path,
159        text,
160    })
161}
162
163fn chunk_to_cli_value(
164    chunk: &zotron_types::StructureChunk,
165    blocks: &[zotron_types::PdfEvidenceBlock],
166) -> Result<Value, String> {
167    let refs = chunk
168        .block_keys
169        .iter()
170        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
171        .map(|block| {
172            serde_json::json!({
173                "block_key": block.block_key,
174                "page_idx": block.page_idx,
175                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
176                    if n.fract() == 0.0 {
177                        Value::from(*n as i64)
178                    } else {
179                        Value::from(*n)
180                    }
181                }).collect::<Vec<_>>()),
182            })
183        })
184        .collect::<Vec<_>>();
185    Ok(serde_json::json!({
186        "chunk_key": chunk.chunk_key,
187        "item_key": chunk.item_key,
188        "attachment_key": chunk.attachment_key,
189        "block_keys": chunk.block_keys,
190        "section_path": chunk.section_path,
191        "text": chunk.text,
192        "page_start": chunk.page_start,
193        "page_end": chunk.page_end,
194        "evidence_refs": refs,
195    }))
196}
197
198fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
199    let arr = value.as_array()?;
200    if arr.len() != 4 {
201        return None;
202    }
203    Some([
204        arr[0].as_f64()?,
205        arr[1].as_f64()?,
206        arr[2].as_f64()?,
207        arr[3].as_f64()?,
208    ])
209}
210
211impl RpcCaller for ZoteroRpc {
212    fn call(&mut self, method: &str, params: Option<Value>) -> Result<Value, String> {
213        self.call(method, params).map_err(|err| err.to_string())
214    }
215}
216
217#[derive(Debug, Parser)]
218#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
219struct Cli {
220    #[command(subcommand)]
221    command: Command,
222}
223
224#[derive(Debug, Subcommand)]
225enum OcrCommand {
226    /// Print supported OCR provider contracts.
227    Providers,
228    /// Execute an OCR provider request from JSON and emit normalized blocks.
229    #[command(name = "run")]
230    Run {
231        #[arg(long)]
232        provider: String,
233        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
234        #[arg(long)]
235        input: Option<String>,
236        /// Local PDF/image file to encode into an OcrRequestInput.
237        #[arg(long)]
238        file: Option<String>,
239        /// Zotero item key used when --file builds the OCR request.
240        #[arg(long = "item-key")]
241        item_key: Option<String>,
242        /// Zotero attachment key used when --file builds the OCR request.
243        #[arg(long = "attachment-key")]
244        attachment_key: Option<String>,
245        /// MIME type used when --file builds the OCR request.
246        #[arg(long = "mime-type")]
247        mime_type: Option<String>,
248        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
249        #[arg(long)]
250        endpoint: Option<String>,
251        /// Environment variable containing the provider bearer token.
252        #[arg(long = "api-key-env")]
253        api_key_env: Option<String>,
254    },
255    /// Show OCR statistics for a collection.
256    Status {
257        #[arg(long)]
258        collection: String,
259        #[arg(long, default_value = DEFAULT_RPC_URL)]
260        url: String,
261    },
262    /// Parse a Zotero PDF through MinerU and write hidden sidecar OCR/RAG artifacts.
263    #[command(name = "process")]
264    Process {
265        #[arg(long, default_value = "mineru")]
266        provider: String,
267        /// Parent Zotero item key.
268        #[arg(long)]
269        parent: String,
270        /// Zotero PDF attachment key (auto-resolved from --parent when omitted).
271        #[arg(long)]
272        attachment: Option<String>,
273        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
274        #[arg(long = "source-url")]
275        source_url: Option<String>,
276        /// Already-extracted MinerU result directory, used by tests/offline replay.
277        #[arg(long = "result-dir")]
278        result_dir: Option<String>,
279        /// Already-downloaded MinerU result zip, used by tests/offline replay.
280        #[arg(long = "result-zip")]
281        result_zip: Option<String>,
282        /// Override MinerU submit endpoint.
283        #[arg(long = "provider-endpoint")]
284        provider_endpoint: Option<String>,
285        /// Environment variable containing the MinerU bearer token.
286        #[arg(long = "api-key-env", default_value = "ZOTRON_MINERU_API_KEY")]
287        api_key_env: String,
288        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
289        poll_interval_seconds: u64,
290        #[arg(long = "timeout-seconds", default_value_t = 900)]
291        timeout_seconds: u64,
292        #[arg(long = "chunk-chars", default_value_t = 1200)]
293        chunk_chars: usize,
294        #[arg(long, default_value = DEFAULT_RPC_URL)]
295        url: String,
296    },
297}
298
299#[derive(Debug, Subcommand)]
300enum Command {
301    /// Check that Zotero is running with the Zotron XPI enabled.
302    Ping {
303        #[arg(long, default_value = DEFAULT_RPC_URL)]
304        url: String,
305    },
306    /// Generic RPC escape hatch.
307    Rpc {
308        method: String,
309        #[arg(default_value = "{}")]
310        params_json: String,
311        #[arg(long, default_value = DEFAULT_RPC_URL)]
312        url: String,
313        #[arg(long)]
314        paginate: bool,
315        #[arg(long, default_value_t = 100)]
316        page_size: usize,
317    },
318    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
319    Push {
320        /// Path to a JSON file, or "-" to read from stdin.
321        json_file: String,
322        /// Optional PDF attachment path.
323        #[arg(long)]
324        pdf: Option<String>,
325        /// Collection name (fuzzy) or key.
326        #[arg(long)]
327        collection: Option<String>,
328        /// Duplicate handling: skip | update | create.
329        #[arg(long = "on-duplicate", default_value = "skip")]
330        on_duplicate: String,
331        #[arg(long, default_value = DEFAULT_RPC_URL)]
332        url: String,
333        /// Parse input + resolve collection only; do not push to Zotero.
334        #[arg(long = "dry-run")]
335        dry_run: bool,
336    },
337    /// System and plugin introspection commands.
338    System {
339        #[command(subcommand)]
340        command: SystemCommand,
341    },
342    /// Search items by text, tag, identifier, or structured conditions.
343    Search(SearchArgs),
344    /// Inspect and manage Zotero items.
345    Items {
346        #[command(subcommand)]
347        command: ItemsCommand,
348    },
349    /// Inspect Zotero collections.
350    Collections {
351        #[command(subcommand)]
352        command: CollectionsCommand,
353    },
354    /// Inspect Zotero notes.
355    Notes {
356        #[command(subcommand)]
357        command: NotesCommand,
358    },
359    /// Inspect Zotero attachments.
360    Attachments {
361        #[command(subcommand)]
362        command: AttachmentsCommand,
363    },
364    /// Inspect Zotero preferences.
365    Settings {
366        #[command(subcommand)]
367        command: SettingsCommand,
368    },
369    /// Inspect and manage Zotero tags.
370    Tags {
371        #[command(subcommand)]
372        command: TagsCommand,
373    },
374    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
375    Export(ExportArgs),
376    /// List, create, and delete PDF annotations.
377    Annotations {
378        #[command(subcommand)]
379        command: AnnotationsCommand,
380    },
381    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
382    Ocr {
383        #[command(subcommand)]
384        command: OcrCommand,
385    },
386    /// Build and search retrieval artifacts.
387    Rag {
388        #[command(subcommand)]
389        command: RagCommand,
390    },
391    /// Batch fill missing PDFs in a collection via Zotero's resolver chain.
392    #[command(name = "find-pdfs")]
393    FindPdfs {
394        #[arg(long)]
395        collection: String,
396        #[arg(long, default_value_t = 0)]
397        limit: usize,
398        #[arg(long, default_value = DEFAULT_RPC_URL)]
399        url: String,
400    },
401}
402
403struct RagSearchOptions {
404    query: String,
405    collection: Option<String>,
406    keys: Vec<String>,
407    zotero: bool,
408    top_spans_per_item: u64,
409    include_fulltext_spans: bool,
410    top_k: u64,
411    output: String,
412}
413
414#[derive(Debug, Subcommand)]
415enum RagCommand {
416    /// Print supported embedding provider contracts.
417    #[command(name = "providers")]
418    Providers,
419    /// Execute an embedding provider request from JSON and emit vectors.
420    #[command(name = "embed")]
421    Embed {
422        #[arg(long)]
423        provider: String,
424        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
425        #[arg(long)]
426        input: String,
427        /// Override the embedding endpoint.
428        #[arg(long)]
429        endpoint: Option<String>,
430        /// Override the embedding model.
431        #[arg(long)]
432        model: Option<String>,
433        /// Override provider input type, for example document or query.
434        #[arg(long = "input-type")]
435        input_type: Option<String>,
436        /// Environment variable containing the provider bearer token.
437        #[arg(long = "api-key-env")]
438        api_key_env: Option<String>,
439    },
440    /// Show index status for a collection.
441    Status {
442        #[arg(long)]
443        collection: String,
444        #[arg(long, default_value = DEFAULT_RPC_URL)]
445        url: String,
446    },
447    /// Emit academic-zh retrieval hits with item_key/title/text provenance.
448    #[command(name = "search")]
449    Search {
450        query: String,
451        #[arg(long)]
452        collection: Option<String>,
453        /// Limit retrieval to one or more Zotero item keys.
454        #[arg(long = "key", alias = "keys")]
455        keys: Vec<String>,
456        #[arg(long)]
457        zotero: bool,
458        #[arg(long = "top-spans-per-item", default_value_t = 3)]
459        top_spans_per_item: u64,
460        #[arg(long = "include-fulltext-spans")]
461        include_fulltext_spans: bool,
462        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
463        top_k: u64,
464        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
465        output: String,
466        #[arg(long, default_value = DEFAULT_RPC_URL)]
467        url: String,
468    },
469}
470
471#[derive(Debug, Subcommand)]
472enum SystemCommand {
473    /// Show XPI version and exposed method metadata.
474    Version {
475        #[arg(long, default_value = DEFAULT_RPC_URL)]
476        url: String,
477    },
478    /// List all libraries (user + groups).
479    Libraries {
480        #[arg(long, default_value = DEFAULT_RPC_URL)]
481        url: String,
482    },
483    /// Get statistics for the current (or specified) library.
484    #[command(name = "library-stats")]
485    LibraryStats {
486        #[arg(long)]
487        library: Option<i64>,
488        #[arg(long, default_value = DEFAULT_RPC_URL)]
489        url: String,
490    },
491    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
492    Schema {
493        #[arg(long = "type")]
494        item_type: Option<String>,
495        #[arg(long, default_value = DEFAULT_RPC_URL)]
496        url: String,
497    },
498    /// Get the currently selected Zotero collection (or null).
499    #[command(name = "current-collection")]
500    CurrentCollection {
501        #[arg(long, default_value = DEFAULT_RPC_URL)]
502        url: String,
503    },
504    /// List all RPC methods exposed by the XPI.
505    #[command(name = "list-methods")]
506    ListMethods {
507        #[arg(long, default_value = DEFAULT_RPC_URL)]
508        url: String,
509    },
510    /// Describe one or all RPC methods (schema / signatures).
511    Describe {
512        method: Option<String>,
513        #[arg(long, default_value = DEFAULT_RPC_URL)]
514        url: String,
515    },
516}
517
518#[derive(Debug, clap::Args)]
519struct SearchArgs {
520    /// Search query (title/creator/year by default; PDF content with --fulltext).
521    query: Option<String>,
522    /// Search inside PDF full-text content instead of metadata.
523    #[arg(long)]
524    fulltext: bool,
525    /// Filter by author/creator name (contains match).
526    #[arg(long)]
527    author: Option<String>,
528    /// Filter by date after (YYYY or YYYY-MM-DD).
529    #[arg(long)]
530    after: Option<String>,
531    /// Filter by date before (YYYY or YYYY-MM-DD).
532    #[arg(long)]
533    before: Option<String>,
534    /// Filter by journal/publication title (contains match).
535    #[arg(long)]
536    journal: Option<String>,
537    /// Filter by tag (exact match).
538    #[arg(long)]
539    tag: Option<String>,
540    /// Find by DOI.
541    #[arg(long)]
542    doi: Option<String>,
543    /// Find by ISBN.
544    #[arg(long)]
545    isbn: Option<String>,
546    /// Find by ISSN.
547    #[arg(long)]
548    issn: Option<String>,
549    /// Limit results to a collection name or key.
550    #[arg(long)]
551    collection: Option<String>,
552    #[arg(long, default_value_t = 50)]
553    limit: u64,
554    #[arg(long, default_value_t = 0)]
555    offset: u64,
556    #[arg(long, default_value = DEFAULT_RPC_URL)]
557    url: String,
558    #[command(subcommand)]
559    management: Option<SearchManagementCommand>,
560}
561
562#[derive(Debug, Subcommand)]
563enum SearchManagementCommand {
564    /// List all saved searches in the library.
565    #[command(name = "saved-searches")]
566    SavedSearches {
567        #[arg(long, default_value = DEFAULT_RPC_URL)]
568        url: String,
569    },
570    /// Create a saved search with one or more conditions.
571    #[command(name = "create-saved")]
572    CreateSaved {
573        name: String,
574        #[arg(long = "condition", required = true)]
575        condition: Vec<String>,
576        #[arg(long)]
577        dry_run: bool,
578        #[arg(long, default_value = DEFAULT_RPC_URL)]
579        url: String,
580    },
581    /// Delete a saved search by key.
582    #[command(name = "delete-saved")]
583    DeleteSaved {
584        search_key: String,
585        #[arg(long)]
586        dry_run: bool,
587        #[arg(long, default_value = DEFAULT_RPC_URL)]
588        url: String,
589    },
590}
591
592#[derive(Debug, Subcommand)]
593enum ItemsCommand {
594    /// Add an item by DOI, ISBN, URL, or local file.
595    Add {
596        #[arg(long)]
597        doi: Option<String>,
598        #[arg(long)]
599        isbn: Option<String>,
600        /// Web page URL to add from.
601        #[arg(long = "from-url")]
602        from_url: Option<String>,
603        /// Local file path to add from.
604        #[arg(long)]
605        file: Option<String>,
606        #[arg(long)]
607        collection: Option<String>,
608        #[arg(long)]
609        dry_run: bool,
610        #[arg(long, default_value = DEFAULT_RPC_URL)]
611        url: String,
612    },
613    /// Create a new item of the given type.
614    Create {
615        #[arg(long = "type")]
616        item_type: String,
617        #[arg(long = "field")]
618        fields: Vec<String>,
619        #[arg(long)]
620        dry_run: bool,
621        #[arg(long, default_value = DEFAULT_RPC_URL)]
622        url: String,
623    },
624    /// Update fields on an existing item.
625    Update {
626        key: String,
627        #[arg(long = "field")]
628        fields: Vec<String>,
629        #[arg(long)]
630        dry_run: bool,
631        #[arg(long, default_value = DEFAULT_RPC_URL)]
632        url: String,
633    },
634    /// Permanently delete an item.
635    Delete {
636        key: String,
637        #[arg(long)]
638        dry_run: bool,
639        #[arg(long, default_value = DEFAULT_RPC_URL)]
640        url: String,
641    },
642    /// Move one or more items to trash.
643    Trash {
644        items: Vec<String>,
645        #[arg(long)]
646        dry_run: bool,
647        #[arg(long, default_value = DEFAULT_RPC_URL)]
648        url: String,
649    },
650    /// Restore a trashed item.
651    Restore {
652        item: String,
653        #[arg(long)]
654        dry_run: bool,
655        #[arg(long, default_value = DEFAULT_RPC_URL)]
656        url: String,
657    },
658    /// Merge a group of duplicate items.
659    #[command(name = "merge-duplicates")]
660    MergeDuplicates {
661        keys: Vec<String>,
662        #[arg(long)]
663        dry_run: bool,
664        #[arg(long, default_value = DEFAULT_RPC_URL)]
665        url: String,
666    },
667    /// Add a related-item link between two items.
668    #[command(name = "add-related")]
669    AddRelated {
670        key: String,
671        #[arg(long)]
672        target: String,
673        #[arg(long)]
674        dry_run: bool,
675        #[arg(long, default_value = DEFAULT_RPC_URL)]
676        url: String,
677    },
678    /// Remove a related-item link between two items.
679    #[command(name = "remove-related")]
680    RemoveRelated {
681        key: String,
682        #[arg(long)]
683        target: String,
684        #[arg(long)]
685        dry_run: bool,
686        #[arg(long, default_value = DEFAULT_RPC_URL)]
687        url: String,
688    },
689    /// Print the full serialization of an item by key.
690    Get {
691        item: String,
692        #[arg(long, default_value = DEFAULT_RPC_URL)]
693        url: String,
694    },
695    /// List items in the library with optional sorting and pagination.
696    List {
697        #[arg(long, default_value_t = 50)]
698        limit: u64,
699        #[arg(long, default_value_t = 0)]
700        offset: u64,
701        #[arg(long)]
702        sort: Option<String>,
703        #[arg(long, default_value = "asc")]
704        direction: String,
705        /// List trashed items instead of regular items.
706        #[arg(long)]
707        trash: bool,
708        #[arg(long, default_value = DEFAULT_RPC_URL)]
709        url: String,
710    },
711    /// Run Zotero's duplicate scan and print groups.
712    #[command(name = "find-duplicates")]
713    FindDuplicates {
714        #[arg(long, default_value = DEFAULT_RPC_URL)]
715        url: String,
716    },
717    /// List recently added or modified items.
718    Recent {
719        #[arg(long, default_value_t = 20)]
720        limit: u64,
721        #[arg(long, default_value_t = 0)]
722        offset: u64,
723        #[arg(long = "type", default_value = "added")]
724        recent_type: String,
725        #[arg(long, default_value = DEFAULT_RPC_URL)]
726        url: String,
727    },
728    /// Retrieve the full-text content of an item's attachment.
729    Fulltext {
730        key: String,
731        #[arg(long, default_value = DEFAULT_RPC_URL)]
732        url: String,
733    },
734    /// List items related to the given item.
735    Related {
736        key: String,
737        #[arg(long, default_value = DEFAULT_RPC_URL)]
738        url: String,
739    },
740    /// Get the citation key for an item.
741    #[command(name = "citation-key")]
742    CitationKey {
743        key: String,
744        #[arg(long, default_value = DEFAULT_RPC_URL)]
745        url: String,
746    },
747}
748
749#[derive(Debug, Subcommand)]
750enum SettingsCommand {
751    /// Get a single Zotero preference value.
752    Get {
753        key: String,
754        #[arg(long, default_value = DEFAULT_RPC_URL)]
755        url: String,
756    },
757    /// List all Zotero preferences as a key->value dict.
758    #[command(visible_alias = "get-all")]
759    List {
760        #[arg(long, default_value = DEFAULT_RPC_URL)]
761        url: String,
762    },
763    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
764    Set {
765        /// key value key value ... (pairs of positional args)
766        pairs: Vec<String>,
767        /// Bulk-set from a JSON file.
768        #[arg(long)]
769        file: Option<String>,
770        #[arg(long)]
771        dry_run: bool,
772        #[arg(long, default_value = DEFAULT_RPC_URL)]
773        url: String,
774    },
775}
776
777#[derive(Debug, Subcommand)]
778enum TagsCommand {
779    /// List all tags in the library (flat).
780    List {
781        #[arg(long, default_value_t = 200)]
782        limit: u64,
783        #[arg(long, default_value = DEFAULT_RPC_URL)]
784        url: String,
785    },
786    /// Rename a tag across all items.
787    Rename {
788        old: String,
789        new: String,
790        #[arg(long)]
791        dry_run: bool,
792        #[arg(long, default_value = DEFAULT_RPC_URL)]
793        url: String,
794    },
795    /// Delete a tag library-wide.
796    Delete {
797        tag: String,
798        #[arg(long)]
799        dry_run: bool,
800        #[arg(long, default_value = DEFAULT_RPC_URL)]
801        url: String,
802    },
803    /// Add tags to one or more items.
804    Add {
805        keys: Vec<String>,
806        #[arg(long = "tag", required = true)]
807        tags: Vec<String>,
808        #[arg(long)]
809        dry_run: bool,
810        #[arg(long, default_value = DEFAULT_RPC_URL)]
811        url: String,
812    },
813    /// Remove tags from one or more items.
814    Remove {
815        keys: Vec<String>,
816        #[arg(long = "tag", required = true)]
817        tags: Vec<String>,
818        #[arg(long)]
819        dry_run: bool,
820        #[arg(long, default_value = DEFAULT_RPC_URL)]
821        url: String,
822    },
823}
824
825#[derive(Debug, clap::Args)]
826struct ExportArgs {
827    /// Item keys to export.
828    keys: Vec<String>,
829    /// Output format: bibtex, ris, csl-json, bibliography.
830    #[arg(long, default_value = "bibtex")]
831    format: String,
832    /// Export all items from this collection (name or key).
833    #[arg(long)]
834    collection: Option<String>,
835    /// Citation style URL (only for bibliography format).
836    #[arg(long, default_value = "http://www.zotero.org/styles/apa")]
837    style: String,
838    /// Output HTML instead of plain text (only for bibliography format).
839    #[arg(long)]
840    html: bool,
841    #[arg(long, default_value = DEFAULT_RPC_URL)]
842    url: String,
843}
844
845#[derive(Debug, Subcommand)]
846enum AnnotationsCommand {
847    /// List annotations on a PDF attachment.
848    List {
849        parent: String,
850        #[arg(long, default_value = DEFAULT_RPC_URL)]
851        url: String,
852    },
853    /// Create a new annotation on a PDF attachment.
854    Create {
855        parent: String,
856        #[arg(long = "type")]
857        annotation_type: Option<String>,
858        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
859        /// Not required when --quote is given.
860        #[arg(long)]
861        position: Option<String>,
862        /// Text to locate in the PDF and highlight. Resolves to rects automatically.
863        /// Locates text headlessly (no PDF viewer required).
864        /// Use with --dry-run for locate-only mode.
865        #[arg(long)]
866        quote: Option<String>,
867        /// Restrict quote search to a specific page (0-indexed).
868        #[arg(long)]
869        page: Option<u32>,
870        /// Zotero annotation sort index.
871        #[arg(long = "sort-index")]
872        sort_index: Option<String>,
873        #[arg(long)]
874        text: Option<String>,
875        #[arg(long)]
876        comment: Option<String>,
877        #[arg(long, default_value = "#ffd400")]
878        color: String,
879        #[arg(long)]
880        dry_run: bool,
881        #[arg(long, default_value = DEFAULT_RPC_URL)]
882        url: String,
883    },
884    /// Delete an annotation by key.
885    Delete {
886        annotation_key: String,
887        #[arg(long)]
888        dry_run: bool,
889        #[arg(long, default_value = DEFAULT_RPC_URL)]
890        url: String,
891    },
892}
893
894#[derive(Debug, Subcommand)]
895enum AttachmentsCommand {
896    /// List attachments belonging to a parent item.
897    List {
898        #[arg(long)]
899        parent: String,
900        #[arg(long, default_value_t = 50)]
901        limit: u64,
902        #[arg(long, default_value_t = 0)]
903        offset: u64,
904        #[arg(long, default_value = DEFAULT_RPC_URL)]
905        url: String,
906    },
907    /// Get a single attachment by key.
908    Get {
909        key: String,
910        #[arg(long, default_value = DEFAULT_RPC_URL)]
911        url: String,
912    },
913    /// Get full-text content of an attachment.
914    Fulltext {
915        key: String,
916        #[arg(long, default_value = DEFAULT_RPC_URL)]
917        url: String,
918    },
919    /// Get the local filesystem path of an attachment.
920    Path {
921        key: String,
922        #[arg(long, default_value = DEFAULT_RPC_URL)]
923        url: String,
924    },
925    /// Attach a local file or remote URL to an item.
926    Add {
927        #[arg(long)]
928        parent: String,
929        /// Local file path to attach.
930        #[arg(long)]
931        path: Option<String>,
932        /// Remote URL to attach.
933        #[arg(long = "from-url")]
934        from_url: Option<String>,
935        #[arg(long)]
936        title: Option<String>,
937        #[arg(long)]
938        dry_run: bool,
939        #[arg(long, default_value = DEFAULT_RPC_URL)]
940        url: String,
941    },
942    /// Delete an attachment.
943    Delete {
944        key: String,
945        #[arg(long, default_value = DEFAULT_RPC_URL)]
946        url: String,
947        #[arg(long)]
948        dry_run: bool,
949    },
950    /// Trigger Zotero's Find Available PDF for a parent item.
951    #[command(name = "find-pdf")]
952    FindPdf {
953        #[arg(long)]
954        parent: String,
955        #[arg(long, default_value = DEFAULT_RPC_URL)]
956        url: String,
957    },
958}
959
960#[derive(Debug, Subcommand)]
961enum NotesCommand {
962    /// List notes attached to a parent item.
963    List {
964        #[arg(long)]
965        parent: String,
966        #[arg(long, default_value_t = 50)]
967        limit: u64,
968        #[arg(long, default_value_t = 0)]
969        offset: u64,
970        #[arg(long, default_value = DEFAULT_RPC_URL)]
971        url: String,
972    },
973    /// Get a single note by key.
974    Get {
975        note_key: String,
976        #[arg(long, default_value = DEFAULT_RPC_URL)]
977        url: String,
978    },
979    /// Create a note attached to a parent item.
980    Create {
981        #[arg(long)]
982        parent: String,
983        #[arg(long)]
984        content: String,
985        #[arg(long = "tag")]
986        tags: Vec<String>,
987        #[arg(long)]
988        dry_run: bool,
989        #[arg(long, default_value = DEFAULT_RPC_URL)]
990        url: String,
991    },
992    /// Update the content of an existing note.
993    Update {
994        note_key: String,
995        #[arg(long)]
996        content: String,
997        #[arg(long)]
998        dry_run: bool,
999        #[arg(long, default_value = DEFAULT_RPC_URL)]
1000        url: String,
1001    },
1002    /// Delete a note by key.
1003    Delete {
1004        note_key: String,
1005        #[arg(long)]
1006        dry_run: bool,
1007        #[arg(long, default_value = DEFAULT_RPC_URL)]
1008        url: String,
1009    },
1010    /// Search notes by text content.
1011    Search {
1012        query: String,
1013        #[arg(long, default_value_t = 50)]
1014        limit: u64,
1015        #[arg(long, default_value = DEFAULT_RPC_URL)]
1016        url: String,
1017    },
1018}
1019
1020#[derive(Debug, Subcommand)]
1021enum CollectionsCommand {
1022    /// List all collections in the user library (flat).
1023    List {
1024        #[arg(long, default_value = DEFAULT_RPC_URL)]
1025        url: String,
1026    },
1027    /// Print the collection hierarchy as a tree.
1028    Tree {
1029        #[arg(long, default_value = DEFAULT_RPC_URL)]
1030        url: String,
1031    },
1032    /// Get a single collection's metadata.
1033    Get {
1034        name_or_id: String,
1035        #[arg(long, default_value = DEFAULT_RPC_URL)]
1036        url: String,
1037    },
1038    /// List all items in a collection.
1039    #[command(name = "get-items", visible_alias = "items")]
1040    GetItems {
1041        name_or_id: String,
1042        #[arg(long)]
1043        limit: Option<u64>,
1044        #[arg(long, default_value_t = 0)]
1045        offset: u64,
1046        #[arg(long, default_value = DEFAULT_RPC_URL)]
1047        url: String,
1048    },
1049    /// Show item/attachment/note/subcollection counts for a collection.
1050    Stats {
1051        name_or_id: String,
1052        #[arg(long, default_value = DEFAULT_RPC_URL)]
1053        url: String,
1054    },
1055    /// Rename a collection.
1056    Rename {
1057        old_name: String,
1058        new_name: String,
1059        #[arg(long, default_value = DEFAULT_RPC_URL)]
1060        url: String,
1061        #[arg(long)]
1062        dry_run: bool,
1063    },
1064    /// Create a collection, optionally nested under a parent.
1065    Create {
1066        name: String,
1067        #[arg(long)]
1068        parent: Option<String>,
1069        #[arg(long, default_value = DEFAULT_RPC_URL)]
1070        url: String,
1071        #[arg(long)]
1072        dry_run: bool,
1073    },
1074    /// Delete a collection.
1075    Delete {
1076        name_or_id: String,
1077        #[arg(long, default_value = DEFAULT_RPC_URL)]
1078        url: String,
1079        #[arg(long)]
1080        dry_run: bool,
1081    },
1082    /// Add existing items to a collection.
1083    #[command(name = "add-items")]
1084    AddItems {
1085        collection: String,
1086        item_keys: Vec<String>,
1087        #[arg(long, default_value = DEFAULT_RPC_URL)]
1088        url: String,
1089        #[arg(long)]
1090        dry_run: bool,
1091    },
1092    /// Remove items from a collection.
1093    #[command(name = "remove-items")]
1094    RemoveItems {
1095        collection: String,
1096        item_keys: Vec<String>,
1097        #[arg(long, default_value = DEFAULT_RPC_URL)]
1098        url: String,
1099        #[arg(long)]
1100        dry_run: bool,
1101    },
1102}
1103
1104#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1105enum JsonStyle {
1106    /// Matches Python's top-level `ping` command (`json.dumps` defaults).
1107    PythonCompact,
1108    /// Matches namespace commands that route through Python `emit(..., indent=2)`.
1109    Pretty,
1110}
1111
1112enum ParseOutcome<T> {
1113    Command(T),
1114    Display(String),
1115}
1116
1117fn parse_cli<T>(
1118    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1119) -> Result<ParseOutcome<T>, String>
1120where
1121    T: Parser,
1122{
1123    match T::try_parse_from(args) {
1124        Ok(cli) => Ok(ParseOutcome::Command(cli)),
1125        Err(err)
1126            if matches!(
1127                err.kind(),
1128                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
1129            ) =>
1130        {
1131            Ok(ParseOutcome::Display(err.to_string()))
1132        }
1133        Err(err) => Err(err.to_string()),
1134    }
1135}
1136
1137pub fn format_error_json(message: &str) -> String {
1138    let message = message.trim_end();
1139    let (code, message) = split_error_code(message).unwrap_or(("RUNTIME_ERROR", message));
1140    serde_json::json!({"error": {"code": code, "message": message}}).to_string()
1141}
1142
1143fn split_error_code(message: &str) -> Option<(&str, &str)> {
1144    let (code, rest) = message.split_once(':')?;
1145    if !code.is_empty()
1146        && code
1147            .chars()
1148            .all(|ch| ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_')
1149    {
1150        Some((code, rest.trim_start()))
1151    } else {
1152        None
1153    }
1154}
1155
1156pub fn run(
1157    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1158) -> Result<String, String> {
1159    // Emit plugin hint for Claude Code discovery
1160    if std::env::var("CLAUDECODE").as_deref() == Ok("1") {
1161        eprintln!(r#"<claude-code-hint v="1" type="plugin" value="zotron@dianzuan/zotron" />"#);
1162    }
1163
1164    let cli = match parse_cli::<Cli>(args)? {
1165        ParseOutcome::Command(cli) => cli,
1166        ParseOutcome::Display(output) => return Ok(output),
1167    };
1168    let url = command_url(&cli.command);
1169    let mut client = ZoteroRpc::new(url);
1170    run_command(cli.command, &mut client)
1171}
1172
1173pub fn run_with_client(
1174    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1175    client: &mut impl RpcCaller,
1176) -> Result<String, String> {
1177    let cli = match parse_cli::<Cli>(args)? {
1178        ParseOutcome::Command(cli) => cli,
1179        ParseOutcome::Display(output) => return Ok(output),
1180    };
1181    run_command(cli.command, client)
1182}
1183
1184fn rag_command_url(command: &RagCommand) -> String {
1185    match command {
1186        RagCommand::Providers => DEFAULT_RPC_URL.to_string(),
1187        RagCommand::Embed { .. } => DEFAULT_RPC_URL.to_string(),
1188        RagCommand::Status { url, .. } => url.clone(),
1189        RagCommand::Search { url, .. } => url.clone(),
1190    }
1191}
1192
1193fn command_url(command: &Command) -> String {
1194    match command {
1195        Command::Ping { url }
1196        | Command::Rpc { url, .. }
1197        | Command::Push { url, .. }
1198        | Command::FindPdfs { url, .. } => url.clone(),
1199        Command::Ocr { command } => match command {
1200            OcrCommand::Providers => DEFAULT_RPC_URL.to_string(),
1201            OcrCommand::Run { .. } => DEFAULT_RPC_URL.to_string(),
1202            OcrCommand::Status { url, .. } => url.clone(),
1203            OcrCommand::Process { url, .. } => url.clone(),
1204        },
1205        Command::Rag { command } => rag_command_url(command),
1206        Command::System { command } => match command {
1207            SystemCommand::Version { url }
1208            | SystemCommand::Libraries { url }
1209            | SystemCommand::LibraryStats { url, .. }
1210            | SystemCommand::Schema { url, .. }
1211            | SystemCommand::CurrentCollection { url }
1212            | SystemCommand::ListMethods { url }
1213            | SystemCommand::Describe { url, .. } => url.clone(),
1214        },
1215        Command::Search(ref args) => match &args.management {
1216            Some(SearchManagementCommand::SavedSearches { url })
1217            | Some(SearchManagementCommand::CreateSaved { url, .. })
1218            | Some(SearchManagementCommand::DeleteSaved { url, .. }) => url.clone(),
1219            None => args.url.clone(),
1220        },
1221        Command::Items { command } => match command {
1222            ItemsCommand::Add { url, .. }
1223            | ItemsCommand::Create { url, .. }
1224            | ItemsCommand::Update { url, .. }
1225            | ItemsCommand::Delete { url, .. }
1226            | ItemsCommand::Trash { url, .. }
1227            | ItemsCommand::Restore { url, .. }
1228            | ItemsCommand::MergeDuplicates { url, .. }
1229            | ItemsCommand::AddRelated { url, .. }
1230            | ItemsCommand::RemoveRelated { url, .. }
1231            | ItemsCommand::Get { url, .. }
1232            | ItemsCommand::List { url, .. }
1233            | ItemsCommand::FindDuplicates { url }
1234            | ItemsCommand::Recent { url, .. }
1235            | ItemsCommand::Fulltext { url, .. }
1236            | ItemsCommand::Related { url, .. }
1237            | ItemsCommand::CitationKey { url, .. } => url.clone(),
1238        },
1239        Command::Collections { command } => match command {
1240            CollectionsCommand::List { url }
1241            | CollectionsCommand::Tree { url }
1242            | CollectionsCommand::Get { url, .. }
1243            | CollectionsCommand::GetItems { url, .. }
1244            | CollectionsCommand::Stats { url, .. }
1245            | CollectionsCommand::Rename { url, .. }
1246            | CollectionsCommand::Create { url, .. }
1247            | CollectionsCommand::Delete { url, .. }
1248            | CollectionsCommand::AddItems { url, .. }
1249            | CollectionsCommand::RemoveItems { url, .. } => url.clone(),
1250        },
1251        Command::Notes { command } => match command {
1252            NotesCommand::List { url, .. }
1253            | NotesCommand::Get { url, .. }
1254            | NotesCommand::Create { url, .. }
1255            | NotesCommand::Update { url, .. }
1256            | NotesCommand::Delete { url, .. }
1257            | NotesCommand::Search { url, .. } => url.clone(),
1258        },
1259        Command::Attachments { command } => match command {
1260            AttachmentsCommand::List { url, .. }
1261            | AttachmentsCommand::Get { url, .. }
1262            | AttachmentsCommand::Fulltext { url, .. }
1263            | AttachmentsCommand::Path { url, .. }
1264            | AttachmentsCommand::Add { url, .. }
1265            | AttachmentsCommand::Delete { url, .. }
1266            | AttachmentsCommand::FindPdf { url, .. } => url.clone(),
1267        },
1268        Command::Settings { command } => match command {
1269            SettingsCommand::Get { url, .. }
1270            | SettingsCommand::List { url }
1271            | SettingsCommand::Set { url, .. } => url.clone(),
1272        },
1273        Command::Tags { command } => match command {
1274            TagsCommand::List { url, .. }
1275            | TagsCommand::Rename { url, .. }
1276            | TagsCommand::Delete { url, .. }
1277            | TagsCommand::Add { url, .. }
1278            | TagsCommand::Remove { url, .. } => url.clone(),
1279        },
1280        Command::Export(ref args) => args.url.clone(),
1281        Command::Annotations { command } => match command {
1282            AnnotationsCommand::List { url, .. }
1283            | AnnotationsCommand::Create { url, .. }
1284            | AnnotationsCommand::Delete { url, .. } => url.clone(),
1285        },
1286    }
1287}
1288
1289fn run_ocr_command(command: OcrCommand, client: &mut impl RpcCaller) -> Result<String, String> {
1290    if let OcrCommand::Providers = &command {
1291        return format_json(
1292            &serde_json::json!({ "providers": ocr_provider_specs() }),
1293            JsonStyle::Pretty,
1294        );
1295    }
1296    let value = match command {
1297        OcrCommand::Providers => unreachable!(),
1298        OcrCommand::Run {
1299            provider,
1300            input,
1301            file,
1302            item_key,
1303            attachment_key,
1304            mime_type,
1305            endpoint,
1306            api_key_env,
1307        } => run_ocr_run_command(OcrRunOptions {
1308            provider,
1309            input,
1310            file,
1311            item_key,
1312            attachment_key,
1313            mime_type,
1314            endpoint,
1315            api_key_env,
1316        })?,
1317        OcrCommand::Status { collection, .. } => run_ocr_status_command(client, collection)?,
1318        OcrCommand::Process {
1319            provider,
1320            parent,
1321            attachment,
1322            source_url,
1323            result_dir,
1324            result_zip,
1325            provider_endpoint,
1326            api_key_env,
1327            poll_interval_seconds,
1328            timeout_seconds,
1329            chunk_chars,
1330            ..
1331        } => run_ocr_process_command(
1332            client,
1333            OcrProcessOptions {
1334                provider,
1335                parent,
1336                attachment,
1337                source_url,
1338                result_dir,
1339                result_zip,
1340                provider_endpoint,
1341                api_key_env,
1342                poll_interval_seconds,
1343                timeout_seconds,
1344                chunk_chars,
1345            },
1346        )?,
1347    };
1348    format_json(&value, JsonStyle::PythonCompact)
1349}
1350
1351struct OcrProcessOptions {
1352    provider: String,
1353    parent: String,
1354    attachment: Option<String>,
1355    source_url: Option<String>,
1356    result_dir: Option<String>,
1357    result_zip: Option<String>,
1358    provider_endpoint: Option<String>,
1359    api_key_env: String,
1360    poll_interval_seconds: u64,
1361    timeout_seconds: u64,
1362    chunk_chars: usize,
1363}
1364
1365struct OcrRunOptions {
1366    provider: String,
1367    input: Option<String>,
1368    file: Option<String>,
1369    item_key: Option<String>,
1370    attachment_key: Option<String>,
1371    mime_type: Option<String>,
1372    endpoint: Option<String>,
1373    api_key_env: Option<String>,
1374}
1375
1376fn run_ocr_run_command(options: OcrRunOptions) -> Result<Value, String> {
1377    let input: OcrRequestInput = match (options.input, options.file) {
1378        (Some(input), None) => read_json_input(&input)?,
1379        (None, Some(file)) => ocr_input_from_file(
1380            file,
1381            options.item_key,
1382            options.attachment_key,
1383            options.mime_type,
1384        )?,
1385        (Some(_), Some(_)) => {
1386            return Err("INVALID_ARGS: use either --input or --file, not both".to_string())
1387        }
1388        (None, None) => return Err("INVALID_ARGS: provide --input JSON or --file".to_string()),
1389    };
1390    let request = build_ocr_provider_request(&options.provider, &input)?;
1391    let payload = if request.command.is_empty() {
1392        let method = request
1393            .method
1394            .ok_or_else(|| format!("OCR provider {} missing HTTP method", request.provider))?;
1395        let auth_scheme = raw_ocr_provider_spec(&options.provider)?.auth;
1396        let mut transport =
1397            provider_http_transport_with_auth(options.api_key_env.as_deref(), auth_scheme)?;
1398        transport.post_json(&ProviderHttpInvocation {
1399            provider: request.provider.to_string(),
1400            style: request.style.to_string(),
1401            method: method.to_string(),
1402            url: options
1403                .endpoint
1404                .or_else(|| request.url.map(ToString::to_string)),
1405            auth_header_name: request.auth_header.map(ToString::to_string),
1406            auth_header_value: None,
1407            body: request.body,
1408        })?
1409    } else {
1410        let mut command_runner = StdProviderCommandRunner;
1411        command_runner.run_json(&request.command)?
1412    };
1413    let blocks = match parse_ocr_provider_response(
1414        request.provider,
1415        &payload,
1416        &input.item_key,
1417        &input.attachment_key,
1418    ) {
1419        Ok(blocks) => blocks,
1420        Err(err) => {
1421            if let Some(task) = ocr_async_task_result(request.provider, &payload) {
1422                return Ok(task);
1423            }
1424            return Err(err);
1425        }
1426    };
1427
1428    Ok(serde_json::json!({
1429        "provider": request.provider,
1430        "blocks": blocks,
1431    }))
1432}
1433
1434fn run_ocr_process_command(
1435    client: &mut impl RpcCaller,
1436    mut options: OcrProcessOptions,
1437) -> Result<Value, String> {
1438    let spec = raw_ocr_provider_spec(&options.provider)?;
1439
1440    let attachment = match options.attachment.take() {
1441        Some(key) => key,
1442        None => resolve_first_pdf_attachment_key(client, &options.parent)?,
1443    };
1444    options.attachment = Some(attachment.clone());
1445
1446    let attachment_path = resolve_attachment_path(client, &attachment)?;
1447    let storage_dir = attachment_path
1448        .parent()
1449        .ok_or_else(|| {
1450            format!(
1451                "ATTACHMENT_PATH_INVALID: attachment path has no parent directory: {}",
1452                attachment_path.display()
1453            )
1454        })?
1455        .to_path_buf();
1456
1457    match spec.provider_key {
1458        "mineru" | "mineru-cli" => {
1459            if options.result_dir.is_some() && options.result_zip.is_some() {
1460                return Err("INVALID_ARGS: use either --result-dir or --result-zip, not both".to_string());
1461            }
1462            if options.source_url.is_some()
1463                && (options.result_dir.is_some() || options.result_zip.is_some())
1464            {
1465                return Err(
1466                    "INVALID_ARGS: --source-url cannot be combined with --result-dir/--result-zip"
1467                        .to_string(),
1468                );
1469            }
1470            let file_name = attachment_path
1471                .file_name()
1472                .and_then(|name| name.to_str())
1473                .unwrap_or("document.pdf")
1474                .to_string();
1475            let source = load_mineru_result_source(&options, &attachment_path, &file_name)?;
1476            let artifacts = persist_mineru_result_sidecars(
1477                &storage_dir, &options.parent, &attachment,
1478                &options.provider, &source, options.chunk_chars,
1479            )?;
1480            Ok(serde_json::json!({
1481                "provider": spec.provider_key,
1482                "status": "indexed",
1483                "item_key": options.parent,
1484                "attachment_key": attachment,
1485                "attachment_path": attachment_path,
1486                "storage_dir": storage_dir,
1487                "task_id": source.task_id,
1488                "state": source.state,
1489                "blocks": artifacts.block_count,
1490                "chunks": artifacts.chunk_count,
1491                "artifacts": artifacts.artifacts,
1492            }))
1493        }
1494        _ => {
1495            run_ocr_process_sync(
1496                client, &options, spec.provider_key,
1497                &attachment, &attachment_path, &storage_dir,
1498            )
1499        }
1500    }
1501}
1502
1503fn run_ocr_process_sync(
1504    client: &mut impl RpcCaller,
1505    options: &OcrProcessOptions,
1506    provider: &str,
1507    attachment_key: &str,
1508    attachment_path: &Path,
1509    storage_dir: &Path,
1510) -> Result<Value, String> {
1511    let api_url = if let Some(endpoint) = &options.provider_endpoint {
1512        endpoint.clone()
1513    } else {
1514        let settings = client.call("settings.getAll", None)?;
1515        settings.get("ocr.apiUrl")
1516            .and_then(Value::as_str)
1517            .unwrap_or("")
1518            .to_string()
1519    };
1520    if api_url.is_empty() {
1521        return Err(format!("MISSING_CONFIG: ocr.apiUrl not configured for provider {provider}"));
1522    }
1523
1524    let api_key = {
1525        let from_env = if !options.api_key_env.is_empty() {
1526            env::var(&options.api_key_env).ok().filter(|v| !v.is_empty())
1527        } else {
1528            None
1529        };
1530        from_env.unwrap_or_else(|| {
1531            client.call("settings.getRaw", Some(serde_json::json!({"key": "ocr.apiKey"})))
1532                .ok()
1533                .and_then(|raw| raw.get("ocr.apiKey").and_then(Value::as_str).map(String::from))
1534                .unwrap_or_default()
1535        })
1536    };
1537
1538    let pdf_bytes = fs::read(attachment_path)
1539        .map_err(|e| format!("READ_PDF_FAILED: {}: {e}", attachment_path.display()))?;
1540
1541    const MAX_PDF_SIZE: usize = 100 * 1024 * 1024; // 100 MB
1542    if pdf_bytes.len() > MAX_PDF_SIZE {
1543        return Err(format!(
1544            "PDF_TOO_LARGE: {} is {} MB, max {} MB",
1545            attachment_path.display(),
1546            pdf_bytes.len() / (1024 * 1024),
1547            MAX_PDF_SIZE / (1024 * 1024),
1548        ));
1549    }
1550
1551    let base64_pdf = format!("data:application/pdf;base64,{}", base64_encode(&pdf_bytes));
1552
1553    let input = OcrRequestInput {
1554        content_base64: base64_pdf,
1555        file_name: attachment_path
1556            .file_name()
1557            .and_then(|n| n.to_str())
1558            .unwrap_or("document.pdf")
1559            .to_string(),
1560        mime_type: "application/pdf".to_string(),
1561        item_key: options.parent.clone(),
1562        attachment_key: attachment_key.to_string(),
1563        source_url: None,
1564        local_path: Some(attachment_path.to_string_lossy().to_string()),
1565        output_dir: None,
1566    };
1567    let request = build_ocr_provider_request(provider, &input)?;
1568
1569    let payload = if request.command.is_empty() {
1570        let method = request
1571            .method
1572            .ok_or_else(|| format!("OCR provider {provider} missing HTTP method"))?;
1573        let spec = raw_ocr_provider_spec(provider)?;
1574
1575        let mut transport = if !api_key.is_empty() {
1576            match spec.auth {
1577                "bearer" => UreqProviderHttpTransport::with_bearer_token(&api_key),
1578                "token" => UreqProviderHttpTransport::with_api_key(format!("token {api_key}")),
1579                _ => UreqProviderHttpTransport::new(),
1580            }
1581        } else {
1582            UreqProviderHttpTransport::new()
1583        };
1584
1585        transport.post_json(&ProviderHttpInvocation {
1586            provider: request.provider.to_string(),
1587            style: request.style.to_string(),
1588            method: method.to_string(),
1589            url: Some(api_url),
1590            auth_header_name: request.auth_header.map(ToString::to_string),
1591            auth_header_value: None,
1592            body: request.body,
1593        })?
1594    } else {
1595        let mut runner = StdProviderCommandRunner;
1596        runner.run_json(&request.command)?
1597    };
1598
1599    let blocks = parse_ocr_provider_response(provider, &payload, &options.parent, attachment_key)?;
1600    let chunks = zotron_types::chunks_from_blocks(&blocks, options.chunk_chars);
1601
1602    let artifacts = vec![
1603        write_sidecar_json(
1604            storage_dir, &options.parent, attachment_key,
1605            MachineArtifactKind::OcrRaw, &payload,
1606        )?,
1607        write_sidecar_jsonl(
1608            storage_dir, &options.parent, attachment_key,
1609            MachineArtifactKind::Blocks, &blocks,
1610        )?,
1611        write_sidecar_jsonl(
1612            storage_dir, &options.parent, attachment_key,
1613            MachineArtifactKind::Chunks, &chunks,
1614        )?,
1615    ];
1616
1617    let embedding_count = embed_sidecar_chunks(client, storage_dir, &options.parent, attachment_key, &chunks);
1618
1619    Ok(serde_json::json!({
1620        "provider": provider,
1621        "status": "indexed",
1622        "item_key": options.parent,
1623        "attachment_key": attachment_key,
1624        "embeddings": embedding_count,
1625        "attachment_path": attachment_path,
1626        "storage_dir": storage_dir,
1627        "blocks": blocks.len(),
1628        "chunks": chunks.len(),
1629        "artifacts": artifacts,
1630    }))
1631}
1632
1633fn embed_sidecar_chunks(
1634    client: &mut impl RpcCaller,
1635    storage_dir: &Path,
1636    item_key: &str,
1637    _attachment_key: &str,
1638    chunks: &[zotron_types::StructureChunk],
1639) -> usize {
1640    let Ok((provider, model, api_url, api_key)) = fetch_embedding_settings(client) else {
1641        return 0;
1642    };
1643    if provider.is_empty() || (api_key.is_empty() && provider != "ollama") {
1644        return 0;
1645    }
1646    let emb_chunks: Vec<EmbeddingChunkInput> = chunks
1647        .iter()
1648        .map(|c| EmbeddingChunkInput {
1649            chunk_key: c.chunk_key.clone(),
1650            text: c.text.clone(),
1651        })
1652        .collect();
1653    if emb_chunks.is_empty() {
1654        return 0;
1655    }
1656    // Batch in groups of 20 to avoid API limits
1657    let batch_size = 20;
1658    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
1659    for batch in emb_chunks.chunks(batch_size) {
1660        let input = EmbeddingRequestInput {
1661            item_key: item_key.to_string(),
1662            chunks: batch.to_vec(),
1663            model: if model.is_empty() { None } else { Some(model.clone()) },
1664            url: if api_url.is_empty() { None } else { Some(api_url.clone()) },
1665            input_type: Some("document".to_string()),
1666        };
1667        let Ok(request) = build_embedding_provider_request(&provider, &input) else {
1668            break;
1669        };
1670        let Some(url) = request.url.as_deref() else { break };
1671        let mut http = ureq::post(url).set("Content-Type", "application/json");
1672        if let Some(auth) = request.auth_header {
1673            if !api_key.is_empty() {
1674                http = http.set(auth, &format!("Bearer {api_key}"));
1675            }
1676        }
1677        let Ok(resp) = http.send_json(&request.body) else { break };
1678        let Ok(payload): Result<Value, _> = resp.into_json() else { break };
1679        let Ok(vectors) = parse_embedding_provider_response(&provider, &payload, item_key, batch)
1680        else {
1681            break;
1682        };
1683        all_vectors.extend(vectors);
1684    }
1685    let count = all_vectors.len();
1686    if count > 0 {
1687        let filename = embedding_vector_filename(&provider, &model);
1688        let vectors_dir = storage_dir.join(".zotron").join("embeddings");
1689        fs::create_dir_all(&vectors_dir).map_err(|e| {
1690            eprintln!("warning: cannot create embeddings dir {}: {e}", vectors_dir.display());
1691            e
1692        }).ok();
1693        let vectors_path = vectors_dir.join(&filename);
1694        let mut out = String::new();
1695        for v in &all_vectors {
1696            if let Ok(line) = serde_json::to_string(v) {
1697                out.push_str(&line);
1698                out.push('\n');
1699            }
1700        }
1701        if let Err(e) = fs::write(&vectors_path, &out) {
1702            eprintln!("warning: failed to persist embeddings to {}: {e}", vectors_path.display());
1703        }
1704    }
1705    count
1706}
1707
1708struct MineruResultSource {
1709    task_id: Option<String>,
1710    state: String,
1711    result_dir: PathBuf,
1712    raw_zip_bytes: Option<Vec<u8>>,
1713    task_status: Option<Value>,
1714    payload: Value,
1715    content_list_file: Option<PathBuf>,
1716    markdown: Option<String>,
1717}
1718
1719struct PersistedOcrArtifacts {
1720    block_count: usize,
1721    chunk_count: usize,
1722    artifacts: Vec<Value>,
1723}
1724
1725fn resolve_attachment_path(
1726    client: &mut impl RpcCaller,
1727    attachment_key: &str,
1728) -> Result<PathBuf, String> {
1729    let payload = client.call(
1730        "attachments.getPath",
1731        Some(serde_json::json!({"key": attachment_key})),
1732    )?;
1733    let raw_path = payload
1734        .get("path")
1735        .and_then(Value::as_str)
1736        .filter(|path| !path.trim().is_empty())
1737        .ok_or_else(|| {
1738            format!("ATTACHMENT_PATH_NOT_FOUND: attachment {attachment_key} has no local PDF path")
1739        })?;
1740    Ok(PathBuf::from(local_path_from_zotero_path(raw_path)))
1741}
1742
1743/// Resolve the first PDF attachment key for a parent item via `attachments.list`.
1744fn resolve_first_pdf_attachment_key(
1745    client: &mut impl RpcCaller,
1746    parent_key: &str,
1747) -> Result<String, String> {
1748    let response = client.call(
1749        "attachments.list",
1750        Some(serde_json::json!({"parentKey": parent_key})),
1751    )?;
1752    // The XPI returns {items: [...], total: N}.
1753    let attachments = response
1754        .get("items")
1755        .and_then(Value::as_array)
1756        .or_else(|| response.as_array())
1757        .ok_or_else(|| {
1758            format!("NO_PDF_ATTACHMENT: no attachments found for item {parent_key}")
1759        })?;
1760    for attachment in attachments {
1761        if is_pdf_attachment(attachment) {
1762            if let Some(key) = attachment.get("key").and_then(Value::as_str) {
1763                return Ok(key.to_string());
1764            }
1765        }
1766    }
1767    Err(format!(
1768        "NO_PDF_ATTACHMENT: no PDF attachment found for item {parent_key}"
1769    ))
1770}
1771
1772fn load_mineru_result_source(
1773    options: &OcrProcessOptions,
1774    attachment_path: &Path,
1775    file_name: &str,
1776) -> Result<MineruResultSource, String> {
1777    if let Some(result_dir) = options.result_dir.as_deref() {
1778        return mineru_result_source_from_dir(PathBuf::from(result_dir), None, None, None);
1779    }
1780    if let Some(result_zip) = options.result_zip.as_deref() {
1781        let zip_path = PathBuf::from(result_zip);
1782        let zip_bytes = fs::read(&zip_path)
1783            .map_err(|err| format!("read MinerU result zip {}: {err}", zip_path.display()))?;
1784        let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1785        return mineru_result_source_from_dir(result_dir, Some(zip_bytes), None, None);
1786    }
1787
1788    let Some(source_url) = options
1789        .source_url
1790        .as_deref()
1791        .filter(|value| !value.trim().is_empty())
1792    else {
1793        return submit_mineru_local_file(options, attachment_path, file_name);
1794    };
1795    let input = OcrRequestInput {
1796        item_key: options.parent.clone(),
1797        attachment_key: options.attachment.clone().expect("attachment resolved"),
1798        file_name: file_name.to_string(),
1799        mime_type: "application/pdf".to_string(),
1800        content_base64: format!("url:{source_url}"),
1801        source_url: Some(source_url.to_string()),
1802        local_path: None,
1803        output_dir: None,
1804    };
1805    let task = submit_mineru_task(
1806        &options.provider,
1807        &input,
1808        options.provider_endpoint.clone(),
1809        &options.api_key_env,
1810    )?;
1811    let task_id = task
1812        .get("data")
1813        .and_then(|data| data.get("task_id"))
1814        .and_then(Value::as_str)
1815        .ok_or_else(|| "MinerU submit response missing data.task_id".to_string())?
1816        .to_string();
1817    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1818    let status = poll_mineru_task(
1819        options.provider_endpoint.as_deref(),
1820        &task_id,
1821        &auth_header,
1822        options.poll_interval_seconds,
1823        options.timeout_seconds,
1824    )?;
1825    let zip_url = status
1826        .pointer("/data/full_zip_url")
1827        .or_else(|| status.pointer("/data/result/full_zip_url"))
1828        .and_then(Value::as_str)
1829        .ok_or_else(|| "MinerU completed task missing data.full_zip_url".to_string())?;
1830    let zip_bytes = download_bytes(zip_url)?;
1831    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1832    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(task_id))
1833}
1834
1835fn submit_mineru_local_file(
1836    options: &OcrProcessOptions,
1837    attachment_path: &Path,
1838    file_name: &str,
1839) -> Result<MineruResultSource, String> {
1840    let auth_header = provider_auth_header_value(&options.api_key_env, "bearer")?;
1841    let upload_request = create_mineru_file_upload(
1842        options.provider_endpoint.as_deref(),
1843        file_name,
1844        options.attachment.as_deref().expect("attachment resolved"),
1845        &auth_header,
1846    )?;
1847    let upload_url = upload_request
1848        .pointer("/data/file_urls/0")
1849        .or_else(|| upload_request.pointer("/data/fileUrls/0"))
1850        .and_then(Value::as_str)
1851        .ok_or_else(|| "MinerU upload URL response missing data.file_urls[0]".to_string())?;
1852    let batch_id = upload_request
1853        .pointer("/data/batch_id")
1854        .or_else(|| upload_request.pointer("/data/batchId"))
1855        .and_then(Value::as_str)
1856        .ok_or_else(|| "MinerU upload URL response missing data.batch_id".to_string())?
1857        .to_string();
1858    let bytes = fs::read(attachment_path)
1859        .map_err(|err| format!("read attachment PDF {}: {err}", attachment_path.display()))?;
1860    put_bytes(upload_url, &bytes)?;
1861    let status = poll_mineru_batch(
1862        options.provider_endpoint.as_deref(),
1863        &batch_id,
1864        &auth_header,
1865        options.poll_interval_seconds,
1866        options.timeout_seconds,
1867    )?;
1868    let zip_url = mineru_batch_zip_url(&status)
1869        .ok_or_else(|| "MinerU completed batch missing full_zip_url".to_string())?;
1870    let zip_bytes = download_bytes(&zip_url)?;
1871    let result_dir = extract_zip_bytes_to_temp("zotron-mineru-result", &zip_bytes)?;
1872    mineru_result_source_from_dir(result_dir, Some(zip_bytes), Some(status), Some(batch_id))
1873}
1874
1875fn create_mineru_file_upload(
1876    endpoint: Option<&str>,
1877    file_name: &str,
1878    data_id: &str,
1879    auth_header: &str,
1880) -> Result<Value, String> {
1881    let url = mineru_file_urls_url(endpoint);
1882    let body = serde_json::json!({
1883        "files": [{"name": file_name, "data_id": data_id}],
1884        "model_version": "vlm",
1885        "is_ocr": false,
1886        "enable_formula": true,
1887        "enable_table": true,
1888        "language": "ch",
1889        "page_ranges": "1-200",
1890    });
1891    ureq::post(&url)
1892        .set("Authorization", auth_header)
1893        .send_json(body)
1894        .map_err(|err| format!("POST {url} failed: {err}"))?
1895        .into_json::<Value>()
1896        .map_err(|err| format!("POST {url} returned invalid JSON: {err}"))
1897}
1898
1899fn put_bytes(url: &str, bytes: &[u8]) -> Result<(), String> {
1900    ureq::put(url)
1901        .send_bytes(bytes)
1902        .map_err(|err| format!("PUT {url} failed: {err}"))?;
1903    Ok(())
1904}
1905
1906fn submit_mineru_task(
1907    provider: &str,
1908    input: &OcrRequestInput,
1909    endpoint: Option<String>,
1910    api_key_env: &str,
1911) -> Result<Value, String> {
1912    let request = build_ocr_provider_request(provider, input)?;
1913    let method = request
1914        .method
1915        .ok_or_else(|| "MinerU provider missing HTTP method".to_string())?;
1916    let mut transport = provider_http_transport_with_auth(Some(api_key_env), "bearer")?;
1917    transport.post_json(&ProviderHttpInvocation {
1918        provider: request.provider.to_string(),
1919        style: request.style.to_string(),
1920        method: method.to_string(),
1921        url: endpoint.or_else(|| request.url.map(ToString::to_string)),
1922        auth_header_name: request.auth_header.map(ToString::to_string),
1923        auth_header_value: None,
1924        body: request.body,
1925    })
1926}
1927
1928fn poll_mineru_task(
1929    endpoint: Option<&str>,
1930    task_id: &str,
1931    auth_header: &str,
1932    poll_interval_seconds: u64,
1933    timeout_seconds: u64,
1934) -> Result<Value, String> {
1935    let url = mineru_task_status_url(endpoint, task_id);
1936    let started = Instant::now();
1937    loop {
1938        let status = get_json_with_auth(&url, auth_header)?;
1939        let state = status
1940            .pointer("/data/state")
1941            .or_else(|| status.pointer("/data/status"))
1942            .and_then(Value::as_str)
1943            .unwrap_or("unknown");
1944        match state {
1945            "done" | "finished" | "success" => return Ok(status),
1946            "failed" | "error" => return Err(format!("MinerU task {task_id} failed: {status}")),
1947            _ => {
1948                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
1949                    return Err(format!(
1950                        "MinerU task {task_id} timed out after {timeout_seconds}s with state {state}"
1951                    ));
1952                }
1953                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
1954            }
1955        }
1956    }
1957}
1958
1959fn mineru_task_status_url(endpoint: Option<&str>, task_id: &str) -> String {
1960    let base = endpoint
1961        .unwrap_or("https://mineru.net/api/v4/extract/task")
1962        .trim_end_matches('/');
1963    if base.ends_with("/extract/task") {
1964        format!("{base}/{task_id}")
1965    } else {
1966        format!("{base}/extract/task/{task_id}")
1967    }
1968}
1969
1970fn mineru_file_urls_url(endpoint: Option<&str>) -> String {
1971    let base = mineru_api_base(endpoint);
1972    format!("{base}/file-urls/batch")
1973}
1974
1975fn mineru_batch_status_url(endpoint: Option<&str>, batch_id: &str) -> String {
1976    let base = mineru_api_base(endpoint);
1977    format!("{base}/extract-results/batch/{batch_id}")
1978}
1979
1980fn mineru_api_base(endpoint: Option<&str>) -> String {
1981    let base = endpoint
1982        .unwrap_or("https://mineru.net/api/v4/extract/task")
1983        .trim_end_matches('/');
1984    if let Some(stripped) = base.strip_suffix("/extract/task") {
1985        return stripped.to_string();
1986    }
1987    if let Some(stripped) = base.strip_suffix("/extract") {
1988        return stripped.to_string();
1989    }
1990    base.to_string()
1991}
1992
1993fn poll_mineru_batch(
1994    endpoint: Option<&str>,
1995    batch_id: &str,
1996    auth_header: &str,
1997    poll_interval_seconds: u64,
1998    timeout_seconds: u64,
1999) -> Result<Value, String> {
2000    let url = mineru_batch_status_url(endpoint, batch_id);
2001    let started = Instant::now();
2002    loop {
2003        let status = get_json_with_auth(&url, auth_header)?;
2004        let state = mineru_batch_state(&status).unwrap_or("unknown");
2005        match state {
2006            "done" | "finished" | "success" => return Ok(status),
2007            "failed" | "error" => return Err(format!("MinerU batch {batch_id} failed: {status}")),
2008            _ => {
2009                if started.elapsed() >= Duration::from_secs(timeout_seconds) {
2010                    return Err(format!(
2011                        "MinerU batch {batch_id} timed out after {timeout_seconds}s with state {state}"
2012                    ));
2013                }
2014                thread::sleep(Duration::from_secs(poll_interval_seconds.max(1)));
2015            }
2016        }
2017    }
2018}
2019
2020fn mineru_batch_state(status: &Value) -> Option<&str> {
2021    status
2022        .pointer("/data/extract_result/0/state")
2023        .or_else(|| status.pointer("/data/extractResult/0/state"))
2024        .or_else(|| status.pointer("/data/state"))
2025        .and_then(Value::as_str)
2026}
2027
2028fn mineru_batch_zip_url(status: &Value) -> Option<String> {
2029    status
2030        .pointer("/data/extract_result/0/full_zip_url")
2031        .or_else(|| status.pointer("/data/extractResult/0/full_zip_url"))
2032        .or_else(|| status.pointer("/data/full_zip_url"))
2033        .and_then(Value::as_str)
2034        .map(ToString::to_string)
2035}
2036
2037fn provider_auth_header_value(api_key_env: &str, auth_scheme: &str) -> Result<String, String> {
2038    let token = env::var(api_key_env)
2039        .map_err(|_| format!("missing provider credential env var {api_key_env}"))?;
2040    let token = token.trim();
2041    if token.is_empty() {
2042        return Err(format!(
2043            "provider credential env var {api_key_env} is empty"
2044        ));
2045    }
2046    Ok(match auth_scheme {
2047        "bearer" if token.starts_with("Bearer ") => token.to_string(),
2048        "bearer" => format!("Bearer {token}"),
2049        "token" if token.starts_with("token ") => token.to_string(),
2050        "token" => format!("token {token}"),
2051        _ => token.to_string(),
2052    })
2053}
2054
2055fn get_json_with_auth(url: &str, auth_header: &str) -> Result<Value, String> {
2056    ureq::get(url)
2057        .set("Authorization", auth_header)
2058        .call()
2059        .map_err(|err| format!("GET {url} failed: {err}"))?
2060        .into_json::<Value>()
2061        .map_err(|err| format!("GET {url} returned invalid JSON: {err}"))
2062}
2063
2064fn download_bytes(url: &str) -> Result<Vec<u8>, String> {
2065    let response = ureq::get(url)
2066        .call()
2067        .map_err(|err| format!("download {url} failed: {err}"))?;
2068    let mut bytes = Vec::new();
2069    response
2070        .into_reader()
2071        .read_to_end(&mut bytes)
2072        .map_err(|err| format!("read download {url}: {err}"))?;
2073    Ok(bytes)
2074}
2075
2076fn extract_zip_bytes_to_temp(prefix: &str, zip_bytes: &[u8]) -> Result<PathBuf, String> {
2077    let dir = unique_temp_path(prefix);
2078    fs::create_dir_all(&dir).map_err(|err| format!("create temp dir {}: {err}", dir.display()))?;
2079    let zip_path = dir.with_extension("zip");
2080    fs::write(&zip_path, zip_bytes)
2081        .map_err(|err| format!("write temp zip {}: {err}", zip_path.display()))?;
2082    let output = ProcessCommand::new("unzip")
2083        .arg("-q")
2084        .arg("-o")
2085        .arg(&zip_path)
2086        .arg("-d")
2087        .arg(&dir)
2088        .output()
2089        .map_err(|err| format!("run unzip: {err}"))?;
2090    if !output.status.success() {
2091        return Err(format!(
2092            "unzip {} failed: {}",
2093            zip_path.display(),
2094            String::from_utf8_lossy(&output.stderr).trim()
2095        ));
2096    }
2097    Ok(dir)
2098}
2099
2100fn unique_temp_path(prefix: &str) -> PathBuf {
2101    let nanos = SystemTime::now()
2102        .duration_since(UNIX_EPOCH)
2103        .map(|duration| duration.as_nanos())
2104        .unwrap_or(0);
2105    env::temp_dir().join(format!("{prefix}-{}-{nanos}", std::process::id()))
2106}
2107
2108fn mineru_result_source_from_dir(
2109    result_dir: PathBuf,
2110    raw_zip_bytes: Option<Vec<u8>>,
2111    task_status: Option<Value>,
2112    task_id: Option<String>,
2113) -> Result<MineruResultSource, String> {
2114    let (payload, content_list_file) = mineru_payload_from_result_dir(&result_dir)?;
2115    let markdown = find_first_file_by_name(&result_dir, "full.md")
2116        .map(|path| {
2117            fs::read_to_string(&path)
2118                .map_err(|err| format!("read native markdown {}: {err}", path.display()))
2119        })
2120        .transpose()?;
2121    Ok(MineruResultSource {
2122        task_id,
2123        state: "done".to_string(),
2124        result_dir,
2125        raw_zip_bytes,
2126        task_status,
2127        payload,
2128        content_list_file,
2129        markdown,
2130    })
2131}
2132
2133fn mineru_payload_from_result_dir(result_dir: &Path) -> Result<(Value, Option<PathBuf>), String> {
2134    let v2 = find_first_file_with_suffix(result_dir, "_content_list_v2.json");
2135    if let Some(path) = v2 {
2136        let value = read_json_file(&path)?;
2137        return Ok((serde_json::json!({"content_list_v2": value}), Some(path)));
2138    }
2139    let content_list = find_first_file_with_suffix(result_dir, "_content_list.json");
2140    if let Some(path) = content_list {
2141        let value = read_json_file(&path)?;
2142        return Ok((serde_json::json!({"content_list": value}), Some(path)));
2143    }
2144    let layout = find_first_file_by_name(result_dir, "layout.json");
2145    if let Some(path) = layout {
2146        return Ok((read_json_file(&path)?, Some(path)));
2147    }
2148    let markdown = find_first_file_by_name(result_dir, "full.md");
2149    if let Some(path) = markdown {
2150        let text = fs::read_to_string(&path)
2151            .map_err(|err| format!("read native markdown {}: {err}", path.display()))?;
2152        return Ok((serde_json::json!({"result": text}), Some(path)));
2153    }
2154    Err(format!(
2155        "MinerU result directory {} missing content_list_v2/content_list/layout/full.md",
2156        result_dir.display()
2157    ))
2158}
2159
2160fn read_json_file(path: &Path) -> Result<Value, String> {
2161    let raw = fs::read_to_string(path).map_err(|err| format!("read {}: {err}", path.display()))?;
2162    serde_json::from_str(&raw).map_err(|err| format!("parse JSON {}: {err}", path.display()))
2163}
2164
2165fn persist_mineru_result_sidecars(
2166    storage_dir: &Path,
2167    item_key: &str,
2168    attachment_key: &str,
2169    provider: &str,
2170    source: &MineruResultSource,
2171    chunk_chars: usize,
2172) -> Result<PersistedOcrArtifacts, String> {
2173    let blocks = parse_ocr_provider_response(provider, &source.payload, item_key, attachment_key)?;
2174    let chunks = zotron_types::chunks_from_blocks(&blocks, chunk_chars);
2175    let assets = copy_mineru_assets(&source.result_dir, storage_dir)?;
2176    let raw_bundle = serde_json::json!({
2177        "provider": provider,
2178        "item_key": item_key,
2179        "attachment_key": attachment_key,
2180        "task_id": source.task_id,
2181        "state": source.state,
2182        "task_status": source.task_status,
2183        "content_list_file": source.content_list_file,
2184        "payload": source.payload,
2185    });
2186
2187    let mut artifacts = Vec::new();
2188    artifacts.push(write_sidecar_json(
2189        storage_dir,
2190        item_key,
2191        attachment_key,
2192        MachineArtifactKind::OcrRaw,
2193        &raw_bundle,
2194    )?);
2195    artifacts.push(write_sidecar_jsonl(
2196        storage_dir,
2197        item_key,
2198        attachment_key,
2199        MachineArtifactKind::Blocks,
2200        &blocks,
2201    )?);
2202    artifacts.push(write_sidecar_jsonl(
2203        storage_dir,
2204        item_key,
2205        attachment_key,
2206        MachineArtifactKind::Chunks,
2207        &chunks,
2208    )?);
2209    if let Some(markdown) = source.markdown.as_deref() {
2210        artifacts.push(write_sidecar_bytes(
2211            storage_dir,
2212            item_key,
2213            attachment_key,
2214            MachineArtifactKind::OcrNativeMarkdown,
2215            markdown.as_bytes(),
2216        )?);
2217    }
2218    artifacts.push(write_sidecar_json(
2219        storage_dir,
2220        item_key,
2221        attachment_key,
2222        MachineArtifactKind::OcrNativeAssets,
2223        &assets,
2224    )?);
2225    if let Some(bytes) = source.raw_zip_bytes.as_deref() {
2226        artifacts.push(write_extra_sidecar_bytes(
2227            storage_dir,
2228            ".zotron/ocr/latest.raw.zip",
2229            bytes,
2230        )?);
2231    }
2232
2233    Ok(PersistedOcrArtifacts {
2234        block_count: blocks.len(),
2235        chunk_count: chunks.len(),
2236        artifacts,
2237    })
2238}
2239
2240fn write_sidecar_json(
2241    storage_dir: &Path,
2242    item_key: &str,
2243    attachment_key: &str,
2244    kind: MachineArtifactKind,
2245    value: &Value,
2246) -> Result<Value, String> {
2247    let bytes = serde_json::to_vec_pretty(value).map_err(|err| err.to_string())?;
2248    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, &bytes)
2249}
2250
2251fn write_sidecar_jsonl<T: serde::Serialize>(
2252    storage_dir: &Path,
2253    item_key: &str,
2254    attachment_key: &str,
2255    kind: MachineArtifactKind,
2256    values: &[T],
2257) -> Result<Value, String> {
2258    let mut out = String::new();
2259    for value in values {
2260        out.push_str(&serde_json::to_string(value).map_err(|err| err.to_string())?);
2261        out.push('\n');
2262    }
2263    write_sidecar_bytes(storage_dir, item_key, attachment_key, kind, out.as_bytes())
2264}
2265
2266fn write_sidecar_bytes(
2267    storage_dir: &Path,
2268    item_key: &str,
2269    attachment_key: &str,
2270    kind: MachineArtifactKind,
2271    bytes: &[u8],
2272) -> Result<Value, String> {
2273    let record = write_machine_artifact_sidecar(storage_dir, item_key, attachment_key, kind, bytes)
2274        .map_err(|err| format!("write sidecar {:?}: {err}", kind))?;
2275    Ok(serde_json::json!({
2276        "kind": kind,
2277        "relative_path": record.relative_path,
2278        "absolute_path": record.absolute_path,
2279    }))
2280}
2281
2282fn write_extra_sidecar_bytes(
2283    storage_dir: &Path,
2284    relative_path: &str,
2285    bytes: &[u8],
2286) -> Result<Value, String> {
2287    let absolute_path = storage_dir.join(relative_path);
2288    if let Some(parent) = absolute_path.parent() {
2289        fs::create_dir_all(parent).map_err(|err| format!("create {}: {err}", parent.display()))?;
2290    }
2291    fs::write(&absolute_path, bytes)
2292        .map_err(|err| format!("write sidecar {}: {err}", absolute_path.display()))?;
2293    Ok(serde_json::json!({
2294        "kind": "ocr_raw_zip",
2295        "relative_path": relative_path,
2296        "absolute_path": absolute_path,
2297    }))
2298}
2299
2300fn copy_mineru_assets(result_dir: &Path, storage_dir: &Path) -> Result<Value, String> {
2301    let mut images = Vec::new();
2302    for file in collect_files(result_dir)? {
2303        if !is_image_file(&file) {
2304            continue;
2305        }
2306        let relative = file.strip_prefix(result_dir).unwrap_or(&file).to_path_buf();
2307        let destination = storage_dir.join(".zotron").join("ocr").join(&relative);
2308        if let Some(parent) = destination.parent() {
2309            fs::create_dir_all(parent)
2310                .map_err(|err| format!("create {}: {err}", parent.display()))?;
2311        }
2312        fs::copy(&file, &destination).map_err(|err| {
2313            format!(
2314                "copy MinerU asset {} to {}: {err}",
2315                file.display(),
2316                destination.display()
2317            )
2318        })?;
2319        images.push(serde_json::json!({
2320            "source_relative": relative,
2321            "sidecar_relative": PathBuf::from(".zotron").join("ocr").join(&relative),
2322            "absolute_path": destination,
2323        }));
2324    }
2325    Ok(serde_json::json!({
2326        "provider": "mineru",
2327        "images": images,
2328    }))
2329}
2330
2331fn is_image_file(path: &Path) -> bool {
2332    matches!(
2333        path.extension()
2334            .and_then(|ext| ext.to_str())
2335            .unwrap_or_default()
2336            .to_ascii_lowercase()
2337            .as_str(),
2338        "png" | "jpg" | "jpeg" | "webp" | "gif"
2339    )
2340}
2341
2342fn find_first_file_with_suffix(root: &Path, suffix: &str) -> Option<PathBuf> {
2343    collect_files(root).ok()?.into_iter().find(|path| {
2344        path.file_name()
2345            .and_then(|name| name.to_str())
2346            .is_some_and(|name| name.ends_with(suffix))
2347    })
2348}
2349
2350fn find_first_file_by_name(root: &Path, name: &str) -> Option<PathBuf> {
2351    collect_files(root).ok()?.into_iter().find(|path| {
2352        path.file_name()
2353            .and_then(|file_name| file_name.to_str())
2354            .is_some_and(|file_name| file_name == name)
2355    })
2356}
2357
2358fn collect_files(root: &Path) -> Result<Vec<PathBuf>, String> {
2359    let mut files = Vec::new();
2360    collect_files_into(root, &mut files)?;
2361    files.sort();
2362    Ok(files)
2363}
2364
2365fn collect_files_into(root: &Path, files: &mut Vec<PathBuf>) -> Result<(), String> {
2366    for entry in fs::read_dir(root).map_err(|err| format!("read dir {}: {err}", root.display()))? {
2367        let entry = entry.map_err(|err| format!("read dir entry {}: {err}", root.display()))?;
2368        let path = entry.path();
2369        let file_type = entry
2370            .file_type()
2371            .map_err(|err| format!("stat {}: {err}", path.display()))?;
2372        if file_type.is_dir() {
2373            collect_files_into(&path, files)?;
2374        } else if file_type.is_file() {
2375            files.push(path);
2376        }
2377    }
2378    Ok(())
2379}
2380
2381fn ocr_async_task_result(provider: &str, payload: &Value) -> Option<Value> {
2382    let data = payload.get("data")?;
2383    let task_id = data.get("task_id").and_then(Value::as_str)?;
2384    Some(serde_json::json!({
2385        "provider": provider,
2386        "status": "submitted",
2387        "task_id": task_id,
2388        "state": data.get("state").and_then(Value::as_str).unwrap_or("submitted"),
2389        "result_url": data.get("full_zip_url").or_else(|| data.get("markdown_url")).cloned(),
2390        "raw": payload,
2391    }))
2392}
2393
2394fn ocr_input_from_file(
2395    file: String,
2396    item_key: Option<String>,
2397    attachment_key: Option<String>,
2398    mime_type: Option<String>,
2399) -> Result<OcrRequestInput, String> {
2400    let item_key = item_key
2401        .ok_or_else(|| "INVALID_ARGS: --item-key is required when using --file".to_string())?;
2402    let attachment_key = attachment_key.ok_or_else(|| {
2403        "INVALID_ARGS: --attachment-key is required when using --file".to_string()
2404    })?;
2405    let path = PathBuf::from(&file);
2406    let bytes = fs::read(&path).map_err(|err| format!("read {file}: {err}"))?;
2407    let file_name = path
2408        .file_name()
2409        .and_then(|name| name.to_str())
2410        .unwrap_or("document.pdf")
2411        .to_string();
2412    let mime_type = mime_type.unwrap_or_else(|| guess_mime_type(&path).to_string());
2413    Ok(OcrRequestInput {
2414        item_key,
2415        attachment_key,
2416        file_name,
2417        mime_type,
2418        content_base64: base64_encode(&bytes),
2419        source_url: None,
2420        local_path: Some(file),
2421        output_dir: None,
2422    })
2423}
2424
2425fn guess_mime_type(path: &Path) -> &'static str {
2426    match path
2427        .extension()
2428        .and_then(|ext| ext.to_str())
2429        .unwrap_or_default()
2430        .to_ascii_lowercase()
2431        .as_str()
2432    {
2433        "png" => "image/png",
2434        "jpg" | "jpeg" => "image/jpeg",
2435        "webp" => "image/webp",
2436        _ => "application/pdf",
2437    }
2438}
2439
2440fn base64_encode(bytes: &[u8]) -> String {
2441    const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2442    let mut out = String::with_capacity(bytes.len().div_ceil(3) * 4);
2443    for chunk in bytes.chunks(3) {
2444        let b0 = chunk[0];
2445        let b1 = *chunk.get(1).unwrap_or(&0);
2446        let b2 = *chunk.get(2).unwrap_or(&0);
2447        out.push(TABLE[(b0 >> 2) as usize] as char);
2448        out.push(TABLE[(((b0 & 0b0000_0011) << 4) | (b1 >> 4)) as usize] as char);
2449        if chunk.len() > 1 {
2450            out.push(TABLE[(((b1 & 0b0000_1111) << 2) | (b2 >> 6)) as usize] as char);
2451        } else {
2452            out.push('=');
2453        }
2454        if chunk.len() > 2 {
2455            out.push(TABLE[(b2 & 0b0011_1111) as usize] as char);
2456        } else {
2457            out.push('=');
2458        }
2459    }
2460    out
2461}
2462
2463fn run_ocr_status_command(
2464    client: &mut impl RpcCaller,
2465    collection: String,
2466) -> Result<Value, String> {
2467    let collection_key = find_collection_in_tree(client, &collection)?
2468        .and_then(|node| node.get("key").cloned())
2469        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
2470    let raw = paginate_rpc(
2471        client,
2472        "collections.getItems",
2473        serde_json::json!({"key": collection_key}),
2474        500,
2475    )?;
2476    let items = raw
2477        .get("items")
2478        .and_then(Value::as_array)
2479        .or_else(|| raw.as_array())
2480        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
2481        .clone();
2482
2483    let mut has_ocr = 0usize;
2484    for item in &items {
2485        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
2486        if has_ocr_artifact(client, &item_key)? || has_ocr_note(client, &item_key)? {
2487            has_ocr += 1;
2488        }
2489    }
2490
2491    Ok(serde_json::json!({
2492        "collection": collection,
2493        "total": items.len(),
2494        "has_ocr": has_ocr,
2495        "missing_ocr": items.len() - has_ocr,
2496    }))
2497}
2498
2499fn has_ocr_artifact(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2500    if let Some(item_key) = item_key.as_str() {
2501        // Legacy external store lookup for artifacts produced before the
2502        // per-attachment hidden sidecar became the default.
2503        if machine_artifact_exists_for_item(
2504            machine_artifact_store_root(),
2505            item_key,
2506            MachineArtifactKind::Chunks,
2507        ) {
2508            return Ok(true);
2509        }
2510    }
2511
2512    let attachments = client.call(
2513        "attachments.list",
2514        Some(serde_json::json!({"parentKey": item_key.clone()})),
2515    )?;
2516    Ok(attachments.as_array().is_some_and(|attachments| {
2517        attachments.iter().any(|attachment| {
2518            let has_sidecar_chunks = attachment
2519                .get("path")
2520                .and_then(Value::as_str)
2521                .map(local_path_from_zotero_path)
2522                .as_deref()
2523                .map(Path::new)
2524                .and_then(Path::parent)
2525                .is_some_and(|dir| {
2526                    machine_artifact_exists_in_sidecar(dir, MachineArtifactKind::Chunks)
2527                });
2528            if has_sidecar_chunks {
2529                return true;
2530            }
2531
2532            // Read-only fallback for old Zotero-visible artifact attachments.
2533            attachment
2534                .get("title")
2535                .and_then(Value::as_str)
2536                .is_some_and(|title| title.ends_with("zotron-chunks.jsonl"))
2537        })
2538    }))
2539}
2540
2541fn local_path_from_zotero_path(path: &str) -> String {
2542    if is_wsl() && path.as_bytes().get(1) == Some(&b':') {
2543        return ProcessCommand::new("wslpath")
2544            .arg("-u")
2545            .arg(path)
2546            .output()
2547            .ok()
2548            .filter(|output| output.status.success())
2549            .and_then(|output| String::from_utf8(output.stdout).ok())
2550            .map(|converted| converted.trim().to_string())
2551            .filter(|converted| !converted.is_empty())
2552            .unwrap_or_else(|| path.to_string());
2553    }
2554    path.to_string()
2555}
2556
2557fn has_ocr_note(client: &mut impl RpcCaller, item_key: &Value) -> Result<bool, String> {
2558    let notes = client.call(
2559        "notes.list",
2560        Some(serde_json::json!({"parentKey": item_key.clone()})),
2561    )?;
2562    Ok(notes.as_array().is_some_and(|notes| {
2563        notes.iter().any(|note| {
2564            note.get("tags")
2565                .and_then(Value::as_array)
2566                .is_some_and(|tags| tags.iter().any(tag_is_ocr))
2567        })
2568    }))
2569}
2570
2571fn tag_is_ocr(tag: &Value) -> bool {
2572    tag.as_str() == Some("ocr")
2573        || tag
2574            .get("tag")
2575            .and_then(Value::as_str)
2576            .is_some_and(|tag| tag == "ocr")
2577}
2578
2579fn find_collection_in_tree(
2580    client: &mut impl RpcCaller,
2581    collection: &str,
2582) -> Result<Option<Value>, String> {
2583    let tree = client.call("collections.tree", None)?;
2584    let nodes = tree
2585        .as_array()
2586        .ok_or_else(|| "collections.tree returned non-array result".to_string())?;
2587    Ok(search_collection_tree(nodes, collection).cloned())
2588}
2589
2590fn search_collection_tree<'a>(nodes: &'a [Value], collection: &str) -> Option<&'a Value> {
2591    for node in nodes {
2592        if node.get("key").and_then(Value::as_str) == Some(collection)
2593            || node.get("name").and_then(Value::as_str) == Some(collection)
2594        {
2595            return Some(node);
2596        }
2597        if let Some(children) = node.get("children").and_then(Value::as_array) {
2598            if let Some(found) = search_collection_tree(children, collection) {
2599                return Some(found);
2600            }
2601        }
2602    }
2603    None
2604}
2605
2606fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
2607    if let Command::Export(args) = command {
2608        return run_export(args, client);
2609    }
2610
2611    let (value, style) = match command {
2612        Command::Ping { .. } => (
2613            call_json(client, "system.ping", None)?,
2614            JsonStyle::PythonCompact,
2615        ),
2616        Command::Rpc {
2617            method,
2618            params_json,
2619            paginate,
2620            page_size,
2621            ..
2622        } => {
2623            let params = serde_json::from_str::<Value>(&params_json)
2624                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
2625            if !params.is_object() {
2626                return Err("INVALID_JSON: params must be a JSON object".to_string());
2627            }
2628            if paginate {
2629                (
2630                    paginate_rpc(client, &method, params, page_size)?,
2631                    JsonStyle::Pretty,
2632                )
2633            } else {
2634                (call_json(client, &method, Some(params))?, JsonStyle::Pretty)
2635            }
2636        }
2637        Command::Push {
2638            json_file,
2639            pdf,
2640            collection,
2641            on_duplicate,
2642            dry_run,
2643            ..
2644        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
2645        Command::System { command } => run_system_command(command, client)?,
2646        Command::Search(args) => {
2647            if let Some(mgmt) = args.management {
2648                run_search_management_command(mgmt, client)?
2649            } else {
2650                run_search(args, client)?
2651            }
2652        }
2653        Command::Items { command } => run_items_command(command, client)?,
2654        Command::Collections { command } => run_collections_command(command, client)?,
2655        Command::Notes { command } => run_notes_command(command, client)?,
2656        Command::Attachments { command } => run_attachments_command(command, client)?,
2657        Command::Settings { command } => run_settings_command(command, client)?,
2658        Command::Tags { command } => run_tags_command(command, client)?,
2659        Command::Annotations { command } => run_annotations_command(command, client)?,
2660        Command::Ocr { command } => {
2661            return run_ocr_command(command, client);
2662        }
2663        Command::Rag { command } => {
2664            return run_rag_command(command, client);
2665        }
2666        Command::Export(_) => unreachable!("export commands return raw output above"),
2667        Command::FindPdfs {
2668            collection, limit, ..
2669        } => run_find_pdfs_command(client, collection, limit)?,
2670    };
2671
2672    format_json(&value, style)
2673}
2674
2675fn run_rag_command(command: RagCommand, client: &mut impl RpcCaller) -> Result<String, String> {
2676    match command {
2677        RagCommand::Providers => format_json(
2678            &serde_json::json!({
2679                "providers": [
2680                    embedding_provider_spec("volcengine")?,
2681                    embedding_provider_spec("alibaba")?,
2682                    embedding_provider_spec("custom")?,
2683                ],
2684            }),
2685            JsonStyle::Pretty,
2686        ),
2687        RagCommand::Embed {
2688            provider,
2689            input,
2690            endpoint,
2691            model,
2692            input_type,
2693            api_key_env,
2694        } => {
2695            let value = run_embedding_provider_json_command(
2696                provider,
2697                input,
2698                endpoint,
2699                model,
2700                input_type,
2701                api_key_env,
2702            )?;
2703            format_json(&value, JsonStyle::PythonCompact)
2704        }
2705        RagCommand::Status { collection, .. } => {
2706            let value = rag_status_value(client, &collection)?;
2707            format_json(&value, JsonStyle::PythonCompact)
2708        }
2709        RagCommand::Search {
2710            query,
2711            collection,
2712            keys,
2713            zotero,
2714            top_spans_per_item,
2715            include_fulltext_spans,
2716            top_k,
2717            output,
2718            ..
2719        } => run_rag_search_command(
2720            client,
2721            RagSearchOptions {
2722                query,
2723                collection,
2724                keys,
2725                zotero,
2726                top_spans_per_item,
2727                include_fulltext_spans,
2728                top_k,
2729                output,
2730            },
2731        ),
2732    }
2733}
2734
2735fn run_embedding_provider_json_command(
2736    provider: String,
2737    input: String,
2738    endpoint: Option<String>,
2739    model: Option<String>,
2740    input_type: Option<String>,
2741    api_key_env: Option<String>,
2742) -> Result<Value, String> {
2743    let mut input: EmbeddingRequestInput = read_json_input(&input)?;
2744    if endpoint.is_some() {
2745        input.url = endpoint;
2746    }
2747    if model.is_some() {
2748        input.model = model;
2749    }
2750    if input_type.is_some() {
2751        input.input_type = input_type;
2752    }
2753    let mut transport = provider_http_transport(api_key_env.as_deref())?;
2754    let vectors = execute_embedding_provider_request(&provider, &input, &mut transport)?;
2755
2756    Ok(serde_json::json!({
2757        "provider": provider,
2758        "vectors": vectors,
2759    }))
2760}
2761
2762fn provider_http_transport(api_key_env: Option<&str>) -> Result<UreqProviderHttpTransport, String> {
2763    provider_http_transport_with_auth(api_key_env, "bearer")
2764}
2765
2766fn provider_http_transport_with_auth(
2767    api_key_env: Option<&str>,
2768    auth_scheme: &str,
2769) -> Result<UreqProviderHttpTransport, String> {
2770    let Some(env_name) = api_key_env else {
2771        return Ok(UreqProviderHttpTransport::new());
2772    };
2773    let token = env::var(env_name)
2774        .map_err(|_| format!("missing provider credential env var {env_name}"))?;
2775    if token.trim().is_empty() {
2776        return Err(format!("provider credential env var {env_name} is empty"));
2777    }
2778    let token = token.trim();
2779    match auth_scheme {
2780        "token" if token.starts_with("token ") => {
2781            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2782        }
2783        "token" => Ok(UreqProviderHttpTransport::with_api_key(format!(
2784            "token {token}"
2785        ))),
2786        "bearer" if token.starts_with("Bearer ") => {
2787            Ok(UreqProviderHttpTransport::with_api_key(token.to_string()))
2788        }
2789        "bearer" => Ok(UreqProviderHttpTransport::with_bearer_token(token)),
2790        "none" => Ok(UreqProviderHttpTransport::new()),
2791        other => Err(format!("unsupported provider auth scheme {other}")),
2792    }
2793}
2794
2795fn read_json_input<T: serde::de::DeserializeOwned>(path: &str) -> Result<T, String> {
2796    let payload = if path == "-" {
2797        let mut input = String::new();
2798        io::stdin()
2799            .read_to_string(&mut input)
2800            .map_err(|err| format!("read stdin: {err}"))?;
2801        input
2802    } else {
2803        fs::read_to_string(path).map_err(|err| format!("read {path}: {err}"))?
2804    };
2805    serde_json::from_str::<T>(&payload)
2806        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))
2807}
2808
2809fn fetch_embedding_settings(
2810    client: &mut impl RpcCaller,
2811) -> Result<(String, String, String, String), String> {
2812    let settings = client.call("settings.getAll", None)?;
2813    let provider = settings
2814        .get("embedding.provider")
2815        .and_then(Value::as_str)
2816        .unwrap_or("ollama")
2817        .to_string();
2818    let model = settings
2819        .get("embedding.model")
2820        .and_then(Value::as_str)
2821        .unwrap_or("")
2822        .to_string();
2823    let api_url = settings
2824        .get("embedding.apiUrl")
2825        .and_then(Value::as_str)
2826        .unwrap_or("")
2827        .to_string();
2828    let raw = client.call("settings.getRaw", Some(serde_json::json!({"key": "embedding.apiKey"})))?;
2829    let api_key = raw
2830        .get("embedding.apiKey")
2831        .and_then(Value::as_str)
2832        .unwrap_or("")
2833        .to_string();
2834    Ok((provider, model, api_url, api_key))
2835}
2836
2837fn fetch_retrieval_mode(client: &mut impl RpcCaller) -> String {
2838    client
2839        .call(
2840            "settings.get",
2841            Some(serde_json::json!({"key": "rag.retrievalMode"})),
2842        )
2843        .ok()
2844        .and_then(|v| {
2845            v.get("rag.retrievalMode")
2846                .and_then(Value::as_str)
2847                .map(String::from)
2848        })
2849        .unwrap_or_else(|| "hybrid".to_string())
2850}
2851
2852fn resolve_sidecar_paths(
2853    client: &mut impl RpcCaller,
2854    collection: Option<&str>,
2855    keys: &[String],
2856) -> Result<Vec<(String, String, PathBuf)>, String> {
2857    let items = if !keys.is_empty() {
2858        let mut items = Vec::new();
2859        for key in keys {
2860            let item = client.call("items.get", Some(serde_json::json!({"key": key})))?;
2861            items.push(item);
2862        }
2863        items
2864    } else if let Some(col) = collection {
2865        let col_key = resolve_collection(client, col)?;
2866        let response = client.call(
2867            "collections.getItems",
2868            Some(serde_json::json!({"key": col_key})),
2869        )?;
2870        collection_items(&response)
2871    } else {
2872        return Err("INVALID_ARGS: --collection or --key required".into());
2873    };
2874
2875    let mut results = Vec::new();
2876    for item in &items {
2877        let item_key = item.get("key").and_then(Value::as_str).unwrap_or_default();
2878        let attachments = client.call(
2879            "attachments.list",
2880            Some(serde_json::json!({"parentKey": item_key})),
2881        )?;
2882        let att_list = attachments
2883            .get("items")
2884            .and_then(Value::as_array)
2885            .or_else(|| attachments.as_array())
2886            .cloned()
2887            .unwrap_or_default();
2888        for att in &att_list {
2889            let content_type = att
2890                .get("contentType")
2891                .and_then(Value::as_str)
2892                .unwrap_or("");
2893            if content_type != "application/pdf" {
2894                continue;
2895            }
2896            let att_key = att.get("key").and_then(Value::as_str).unwrap_or_default();
2897            let path = att.get("path").and_then(Value::as_str).unwrap_or_default();
2898            if path.is_empty() {
2899                continue;
2900            }
2901            let local_path = local_path_from_zotero_path(path);
2902            let pdf_path = PathBuf::from(&local_path);
2903            if let Some(parent) = pdf_path.parent() {
2904                let sidecar_root = parent.join(".zotron");
2905                if sidecar_root.exists() {
2906                    results.push((item_key.to_string(), att_key.to_string(), sidecar_root));
2907                }
2908            }
2909        }
2910    }
2911    Ok(results)
2912}
2913
2914fn load_sidecar_chunks(sidecar_root: &Path) -> Vec<StructureChunk> {
2915    let chunks_path = sidecar_root.join("chunks").join("chunks.v1.jsonl");
2916    let Ok(content) = fs::read_to_string(&chunks_path) else {
2917        return Vec::new();
2918    };
2919    content
2920        .lines()
2921        .filter(|line| !line.trim().is_empty())
2922        .filter_map(|line| serde_json::from_str::<StructureChunk>(line).ok())
2923        .collect()
2924}
2925
2926fn embedding_vector_filename(provider: &str, model: &str) -> String {
2927    let p = provider.trim().to_lowercase().replace('/', "-");
2928    let m = model.trim().to_lowercase().replace('/', "-");
2929    if p.is_empty() && m.is_empty() {
2930        return "vectors.jsonl".to_string();
2931    }
2932    format!("{p}--{m}.jsonl")
2933}
2934
2935fn load_sidecar_vectors(sidecar_root: &Path, provider: &str, model: &str) -> Vec<EmbeddingVector> {
2936    let embeddings_dir = sidecar_root.join("embeddings");
2937    let target = embedding_vector_filename(provider, model);
2938    let target_path = embeddings_dir.join(&target);
2939    if let Ok(content) = fs::read_to_string(&target_path) {
2940        let vecs: Vec<EmbeddingVector> = content
2941            .lines()
2942            .filter(|line| !line.trim().is_empty())
2943            .filter_map(|line| serde_json::from_str(line).ok())
2944            .collect();
2945        if !vecs.is_empty() {
2946            return vecs;
2947        }
2948    }
2949    // Fallback: try legacy vectors.v1.jsonl / vectors.jsonl with provider match
2950    for legacy in &["vectors.v1.jsonl", "vectors.jsonl"] {
2951        let path = embeddings_dir.join(legacy);
2952        if let Ok(content) = fs::read_to_string(&path) {
2953            let vecs: Vec<EmbeddingVector> = content
2954                .lines()
2955                .filter(|line| !line.trim().is_empty())
2956                .filter_map(|line| serde_json::from_str::<EmbeddingVector>(line).ok())
2957                .filter(|v| v.source_provider == provider || provider.is_empty())
2958                .collect();
2959            if !vecs.is_empty() {
2960                return vecs;
2961            }
2962        }
2963    }
2964    Vec::new()
2965}
2966
2967fn embed_query_text(
2968    query: &str,
2969    provider: &str,
2970    model: &str,
2971    api_url: &str,
2972    api_key: &str,
2973) -> Result<Vec<f64>, String> {
2974    let input = EmbeddingRequestInput {
2975        item_key: "query".to_string(),
2976        chunks: vec![EmbeddingChunkInput {
2977            chunk_key: "q0".to_string(),
2978            text: query.to_string(),
2979        }],
2980        model: if model.is_empty() {
2981            None
2982        } else {
2983            Some(model.to_string())
2984        },
2985        url: if api_url.is_empty() {
2986            None
2987        } else {
2988            Some(api_url.to_string())
2989        },
2990        input_type: Some("query".to_string()),
2991    };
2992    let request = build_embedding_provider_request(provider, &input)?;
2993    let url = request
2994        .url
2995        .as_deref()
2996        .ok_or("no embedding URL configured")?;
2997    let mut http = ureq::post(url).set("Content-Type", "application/json");
2998    if let Some(auth) = request.auth_header {
2999        if !api_key.is_empty() {
3000            http = http.set(auth, &format!("Bearer {api_key}"));
3001        }
3002    }
3003    let resp = http
3004        .send_json(&request.body)
3005        .map_err(|e| format!("embedding request failed: {e}"))?;
3006    let payload: Value = resp
3007        .into_json()
3008        .map_err(|e| format!("embedding response parse: {e}"))?;
3009    let vectors =
3010        parse_embedding_provider_response(provider, &payload, "query", &input.chunks)?;
3011    vectors
3012        .into_iter()
3013        .next()
3014        .map(|v| v.vector)
3015        .ok_or_else(|| "no embedding vector returned".to_string())
3016}
3017
3018fn run_rag_search_xpi_fallback(
3019    client: &mut impl RpcCaller,
3020    options: &RagSearchOptions,
3021) -> Result<String, String> {
3022    let mut params = serde_json::json!({
3023        "query": options.query,
3024        "limit": options.top_k,
3025        "top_spans_per_item": options.top_spans_per_item,
3026        "include_fulltext_spans": options.include_fulltext_spans,
3027    });
3028    if let Some(map) = params.as_object_mut() {
3029        if let Some(col) = &options.collection {
3030            map.insert("collection".into(), Value::String(col.clone()));
3031        }
3032        if !options.keys.is_empty() {
3033            map.insert(
3034                "keys".into(),
3035                Value::Array(options.keys.iter().map(|k| Value::String(k.clone())).collect()),
3036            );
3037        }
3038    }
3039    let payload = client.call("rag.searchHits", Some(params))?;
3040    let hits = payload
3041        .get("hits")
3042        .and_then(Value::as_array)
3043        .cloned()
3044        .unwrap_or_default();
3045    if options.output == "jsonl" {
3046        let mut out = String::new();
3047        for hit in &hits {
3048            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3049            out.push('\n');
3050        }
3051        Ok(out)
3052    } else {
3053        let total = hits.len() as u64;
3054        format_json(
3055            &normalize_list_envelope(
3056                serde_json::json!({"items": hits, "total": total}),
3057                "items",
3058                Some(options.top_k),
3059                0,
3060            ),
3061            JsonStyle::Pretty,
3062        )
3063    }
3064}
3065
3066fn run_rag_search_command(
3067    client: &mut impl RpcCaller,
3068    options: RagSearchOptions,
3069) -> Result<String, String> {
3070    // When --zotero is explicitly passed, use XPI fallback directly (backward compat).
3071    if options.zotero {
3072        if options.collection.is_none() && options.keys.is_empty() {
3073            return Err(
3074                "INVALID_ARGS: --collection or --key is required".to_string(),
3075            );
3076        }
3077        return run_rag_search_xpi_fallback(client, &options);
3078    }
3079
3080    // Hybrid path: require scope.
3081    if options.collection.is_none() && options.keys.is_empty() {
3082        return Err("INVALID_ARGS: --collection or --key required".to_string());
3083    }
3084
3085    // Step 1: resolve sidecar paths from collection/keys.
3086    let sidecars = resolve_sidecar_paths(
3087        client,
3088        options.collection.as_deref(),
3089        &options.keys,
3090    );
3091
3092    // If sidecar resolution fails or returns empty, fall back to XPI.
3093    // But propagate COLLECTION_NOT_FOUND errors directly instead of masking them.
3094    let sidecars = match sidecars {
3095        Ok(ref s) if !s.is_empty() => s,
3096        Err(ref e) if e.contains("COLLECTION_NOT_FOUND") => return Err(e.clone()),
3097        _ => return run_rag_search_xpi_fallback(client, &options),
3098    };
3099
3100    // Step 2: load all chunks and vectors from sidecars.
3101    let (emb_provider, emb_model, emb_url, emb_key) = fetch_embedding_settings(client)?;
3102    let mut all_chunks: Vec<StructureChunk> = Vec::new();
3103    let mut all_vectors: Vec<EmbeddingVector> = Vec::new();
3104    for (_item_key, _att_key, sidecar_root) in sidecars {
3105        all_chunks.extend(load_sidecar_chunks(sidecar_root));
3106        all_vectors.extend(load_sidecar_vectors(sidecar_root, &emb_provider, &emb_model));
3107    }
3108
3109    if all_chunks.is_empty() {
3110        return run_rag_search_xpi_fallback(client, &options);
3111    }
3112
3113    // Step 3: determine retrieval mode.
3114    let mode = fetch_retrieval_mode(client);
3115
3116    // Step 4: BM25 scoring (unless mode is "dense").
3117    let bm25_ranked = if mode != "dense" {
3118        bm25_score_chunks(&all_chunks, &options.query, 1.2, 0.75)
3119    } else {
3120        Vec::new()
3121    };
3122
3123    // Step 5: dense vector scoring (unless mode is "lexical" or no vectors).
3124    let dense_ranked = if mode != "lexical" && !all_vectors.is_empty() {
3125        match embed_query_text(&options.query, &emb_provider, &emb_model, &emb_url, &emb_key) {
3126            Ok(query_vec) => {
3127                let vec_map: std::collections::HashMap<&str, &[f64]> = all_vectors
3128                    .iter()
3129                    .map(|v| (v.chunk_key.as_str(), v.vector.as_slice()))
3130                    .collect();
3131                let mut scores: Vec<(usize, f64)> = all_chunks
3132                    .iter()
3133                    .enumerate()
3134                    .filter_map(|(i, chunk)| {
3135                        vec_map.get(chunk.chunk_key.as_str()).map(|stored| {
3136                            (i, cosine_similarity(&query_vec, stored))
3137                        })
3138                    })
3139                    .filter(|(_, s)| *s > 0.0)
3140                    .collect();
3141                scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3142                scores
3143            }
3144            Err(_) => Vec::new(),
3145        }
3146    } else {
3147        Vec::new()
3148    };
3149
3150    // Step 6: merge results.
3151    let limit = options.top_k as usize;
3152    let ranked = if !bm25_ranked.is_empty() && !dense_ranked.is_empty() {
3153        rrf_merge(&bm25_ranked, &dense_ranked, 60.0, limit)
3154    } else if !bm25_ranked.is_empty() {
3155        bm25_ranked.into_iter().take(limit).collect()
3156    } else {
3157        dense_ranked.into_iter().take(limit).collect()
3158    };
3159
3160    // Step 7: apply per-item span limit.
3161    let mut per_item_count: std::collections::HashMap<&str, u64> =
3162        std::collections::HashMap::new();
3163    let mut selected: Vec<(usize, f64)> = Vec::new();
3164    for (idx, score) in &ranked {
3165        let item_key = all_chunks[*idx].item_key.as_str();
3166        let count = per_item_count.entry(item_key).or_insert(0);
3167        if *count < options.top_spans_per_item {
3168            *count += 1;
3169            selected.push((*idx, *score));
3170        }
3171    }
3172
3173    // Step 8: enrich hits with item metadata.
3174    let mut meta_cache: std::collections::HashMap<String, Value> =
3175        std::collections::HashMap::new();
3176    let mut hits: Vec<Value> = Vec::new();
3177    for (idx, score) in &selected {
3178        let chunk = &all_chunks[*idx];
3179        let meta = if let Some(cached) = meta_cache.get(&chunk.item_key) {
3180            cached.clone()
3181        } else {
3182            let fetched = client
3183                .call(
3184                    "items.get",
3185                    Some(serde_json::json!({"key": chunk.item_key})),
3186                )
3187                .unwrap_or(Value::Null);
3188            meta_cache.insert(chunk.item_key.clone(), fetched.clone());
3189            fetched
3190        };
3191        let title = meta
3192            .get("title")
3193            .and_then(Value::as_str)
3194            .unwrap_or("")
3195            .to_string();
3196        let authors = meta
3197            .get("creators")
3198            .and_then(Value::as_array)
3199            .map(|creators| {
3200                creators
3201                    .iter()
3202                    .filter_map(|c| {
3203                        let last = c.get("lastName").and_then(Value::as_str).unwrap_or("");
3204                        let first = c.get("firstName").and_then(Value::as_str).unwrap_or("");
3205                        if last.is_empty() && first.is_empty() {
3206                            None
3207                        } else {
3208                            Some(format!("{last}{first}"))
3209                        }
3210                    })
3211                    .collect::<Vec<_>>()
3212                    .join(", ")
3213            })
3214            .unwrap_or_default();
3215        let year = meta.get("date").and_then(Value::as_str).unwrap_or("");
3216        let mut hit = serde_json::json!({
3217            "item_key": chunk.item_key,
3218            "chunk_key": chunk.chunk_key,
3219            "title": title,
3220            "authors": authors,
3221            "year": year,
3222            "text": chunk.text,
3223            "page_range": chunk.page_range,
3224            "section_path": chunk.section_path,
3225            "score": score,
3226        });
3227        if options.include_fulltext_spans {
3228            hit.as_object_mut().unwrap().insert(
3229                "attachment_key".to_string(),
3230                Value::String(chunk.attachment_key.clone()),
3231            );
3232        }
3233        hits.push(hit);
3234    }
3235
3236    // Step 9: format output.
3237    if options.output == "jsonl" {
3238        let mut out = String::new();
3239        for hit in &hits {
3240            out.push_str(&serde_json::to_string(hit).map_err(|e| e.to_string())?);
3241            out.push('\n');
3242        }
3243        Ok(out)
3244    } else {
3245        let total = hits.len() as u64;
3246        format_json(
3247            &normalize_list_envelope(
3248                serde_json::json!({"items": hits, "total": total}),
3249                "items",
3250                Some(options.top_k),
3251                0,
3252            ),
3253            JsonStyle::Pretty,
3254        )
3255    }
3256}
3257
3258fn rag_status_value(client: &mut impl RpcCaller, collection: &str) -> Result<Value, String> {
3259    let raw_store_path = rag_store_path(collection);
3260    if raw_store_path.exists() {
3261        return rag_status_from_store(collection, &raw_store_path);
3262    }
3263
3264    let mut store_candidates = Vec::new();
3265    let collection_match = find_collection_in_tree(client, collection)?;
3266    if let Some(collection_node) = collection_match.as_ref() {
3267        if let Some(name) = collection_node.get("name").and_then(Value::as_str) {
3268            store_candidates.push(rag_store_path(name));
3269        }
3270        if let Some(key) = collection_node.get("key").and_then(Value::as_str) {
3271            store_candidates.push(rag_store_path(key));
3272        }
3273    }
3274    for store_path in unique_paths(store_candidates) {
3275        if store_path.exists() {
3276            return rag_status_from_store(collection, &store_path);
3277        }
3278    }
3279
3280    rag_status_from_zotero_sidecars(client, collection, collection_match)
3281}
3282
3283fn unique_paths(paths: Vec<PathBuf>) -> Vec<PathBuf> {
3284    let mut unique = Vec::new();
3285    for path in paths {
3286        if !unique.iter().any(|seen| seen == &path) {
3287            unique.push(path);
3288        }
3289    }
3290    unique
3291}
3292
3293fn rag_status_from_store(collection: &str, store_path: &Path) -> Result<Value, String> {
3294    let raw = fs::read_to_string(store_path)
3295        .map_err(|err| format!("read RAG store {}: {err}", store_path.display()))?;
3296    let store: Value = serde_json::from_str(&raw)
3297        .map_err(|err| format!("parse RAG store {}: {err}", store_path.display()))?;
3298    let chunks = store
3299        .get("chunks")
3300        .and_then(Value::as_array)
3301        .cloned()
3302        .unwrap_or_default();
3303    let mut item_keys = Vec::<Value>::new();
3304    for chunk in &chunks {
3305        let Some(item_key) = chunk.get("item_key") else {
3306            continue;
3307        };
3308        if !item_keys.iter().any(|seen| seen == item_key) {
3309            item_keys.push(item_key.clone());
3310        }
3311    }
3312    Ok(serde_json::json!({
3313        "status": "indexed",
3314        "collection": store.get("collection").and_then(Value::as_str).unwrap_or(collection),
3315        "collection_key": store.get("collection_key").cloned().unwrap_or(Value::Null),
3316        "model": store.get("model").cloned().unwrap_or(Value::String("unknown".to_string())),
3317        "total_chunks": chunks.len(),
3318        "total_items": item_keys.len(),
3319        "store_path": store_path.to_string_lossy(),
3320    }))
3321}
3322
3323fn rag_status_from_zotero_sidecars(
3324    client: &mut impl RpcCaller,
3325    collection: &str,
3326    collection_match: Option<Value>,
3327) -> Result<Value, String> {
3328    let collection_key = collection_match
3329        .as_ref()
3330        .and_then(|node| node.get("key").cloned())
3331        .ok_or_else(|| format!("COLLECTION_NOT_FOUND: Collection not found: {collection:?}"))?;
3332    let raw = paginate_rpc(
3333        client,
3334        "collections.getItems",
3335        serde_json::json!({"key": collection_key}),
3336        500,
3337    )?;
3338    let items = raw
3339        .get("items")
3340        .and_then(Value::as_array)
3341        .or_else(|| raw.as_array())
3342        .ok_or_else(|| "collections.getItems returned non-array/non-items result".to_string())?
3343        .clone();
3344
3345    let mut indexed_items = 0usize;
3346    let mut total_chunks = 0usize;
3347    for item in &items {
3348        let item_key = item.get("key").cloned().unwrap_or(Value::Null);
3349        let chunk_count = sidecar_chunk_count_for_item(client, &item_key)?;
3350        if chunk_count > 0 {
3351            indexed_items += 1;
3352            total_chunks += chunk_count;
3353        }
3354    }
3355
3356    if indexed_items == 0 {
3357        return Ok(serde_json::json!({
3358            "status": "not indexed",
3359            "collection": collection,
3360            "total_items": items.len(),
3361            "indexed_items": 0,
3362        }));
3363    }
3364
3365    Ok(serde_json::json!({
3366        "status": "indexed",
3367        "collection": collection,
3368        "total_chunks": total_chunks,
3369        "total_items": indexed_items,
3370        "collection_items": items.len(),
3371        "source": "zotero-sidecar",
3372    }))
3373}
3374
3375fn sidecar_chunk_count_for_item(
3376    client: &mut impl RpcCaller,
3377    item_key: &Value,
3378) -> Result<usize, String> {
3379    let attachments = client.call(
3380        "attachments.list",
3381        Some(serde_json::json!({"parentKey": item_key.clone()})),
3382    )?;
3383    let Some(attachments) = attachments.as_array() else {
3384        return Ok(0);
3385    };
3386
3387    let mut count = 0usize;
3388    for attachment in attachments {
3389        let Some(path) = attachment.get("path").and_then(Value::as_str) else {
3390            continue;
3391        };
3392        let local = local_path_from_zotero_path(path);
3393        let Some(dir) = Path::new(&local).parent() else {
3394            continue;
3395        };
3396        let Ok(bytes) = read_machine_artifact_sidecar(dir, MachineArtifactKind::Chunks) else {
3397            continue;
3398        };
3399        let text = String::from_utf8_lossy(&bytes);
3400        count += text.lines().filter(|line| !line.trim().is_empty()).count();
3401    }
3402    Ok(count)
3403}
3404
3405fn rag_store_path(collection: &str) -> PathBuf {
3406    rag_store_root().join(format!("{collection}.json"))
3407}
3408
3409fn rag_store_root() -> PathBuf {
3410    let xdg_data_home = env::var_os("XDG_DATA_HOME")
3411        .filter(|path| !path.is_empty())
3412        .map(PathBuf::from);
3413    let appdata = env::var_os("APPDATA")
3414        .filter(|path| !path.is_empty())
3415        .map(PathBuf::from);
3416    let userprofile = env::var_os("USERPROFILE")
3417        .filter(|path| !path.is_empty())
3418        .map(PathBuf::from);
3419    let home = env::var_os("HOME")
3420        .filter(|path| !path.is_empty())
3421        .map(PathBuf::from);
3422
3423    rag_store_root_for_platform(
3424        ArtifactStorePlatform::current(),
3425        xdg_data_home.as_deref(),
3426        appdata.as_deref(),
3427        userprofile.as_deref(),
3428        home.as_deref(),
3429    )
3430}
3431
3432fn rag_store_root_for_platform(
3433    platform: ArtifactStorePlatform,
3434    xdg_data_home: Option<&Path>,
3435    appdata: Option<&Path>,
3436    userprofile: Option<&Path>,
3437    home: Option<&Path>,
3438) -> PathBuf {
3439    match platform {
3440        ArtifactStorePlatform::Windows => {
3441            if let Some(path) = appdata {
3442                return path.join("Zotron").join("rag");
3443            }
3444            if let Some(path) = userprofile {
3445                return path
3446                    .join("AppData")
3447                    .join("Roaming")
3448                    .join("Zotron")
3449                    .join("rag");
3450            }
3451            if let Some(path) = home {
3452                return path
3453                    .join("AppData")
3454                    .join("Roaming")
3455                    .join("Zotron")
3456                    .join("rag");
3457            }
3458            PathBuf::from(".zotron").join("rag")
3459        }
3460        ArtifactStorePlatform::Macos => {
3461            if let Some(path) = home {
3462                return path
3463                    .join("Library")
3464                    .join("Application Support")
3465                    .join("Zotron")
3466                    .join("rag");
3467            }
3468            if let Some(path) = xdg_data_home {
3469                return path.join("zotron").join("rag");
3470            }
3471            PathBuf::from(".zotron").join("rag")
3472        }
3473        ArtifactStorePlatform::Linux | ArtifactStorePlatform::Other => xdg_data_home
3474            .map(|path| path.join("zotron").join("rag"))
3475            .or_else(|| {
3476                home.map(|path| path.join(".local").join("share").join("zotron").join("rag"))
3477            })
3478            .unwrap_or_else(|| PathBuf::from(".zotron").join("rag")),
3479    }
3480}
3481
3482fn run_push_command(
3483    json_file: String,
3484    pdf: Option<String>,
3485    collection: Option<String>,
3486    on_duplicate: String,
3487    dry_run: bool,
3488    client: &mut impl RpcCaller,
3489) -> Result<String, String> {
3490    if !matches!(on_duplicate.as_str(), "skip" | "update" | "create") {
3491        return Err(format!(
3492            "INVALID_ARGS: --on-duplicate must be skip|update|create, got {on_duplicate:?}"
3493        ));
3494    }
3495
3496    let payload = if json_file == "-" {
3497        let mut input = String::new();
3498        io::stdin()
3499            .read_to_string(&mut input)
3500            .map_err(|err| format!("read stdin: {err}"))?;
3501        input
3502    } else {
3503        fs::read_to_string(&json_file).map_err(|err| format!("read {json_file}: {err}"))?
3504    };
3505    let item_json = serde_json::from_str::<Value>(&payload)
3506        .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
3507
3508    // Validate required fields
3509    match item_json.get("itemType").and_then(Value::as_str) {
3510        Some(s) if !s.is_empty() => {}
3511        _ => return Err("INVALID_ARGS: input JSON must include a non-empty \"itemType\" field".to_string()),
3512    }
3513
3514    if dry_run {
3515        let collection_key = collection
3516            .as_deref()
3517            .map(|name| resolve_collection(client, name))
3518            .transpose()?;
3519        return format_json(
3520            &serde_json::json!({
3521                "ok": true,
3522                "dryRun": true,
3523                "wouldPush": {
3524                    "title": item_json.get("title").cloned().unwrap_or(Value::Null),
3525                    "itemType": item_json.get("itemType").cloned().unwrap_or(Value::Null),
3526                    "collectionKey": collection_key,
3527                    "pdfPath": pdf,
3528                    "onDuplicate": on_duplicate,
3529                }
3530            }),
3531            JsonStyle::PythonCompact,
3532        );
3533    }
3534
3535    let result = push_item(
3536        client,
3537        &item_json,
3538        pdf.as_deref(),
3539        collection.as_deref(),
3540        &on_duplicate,
3541    )?;
3542    format_json(&result, JsonStyle::PythonCompact)
3543}
3544
3545fn push_item(
3546    client: &mut impl RpcCaller,
3547    item_json: &Value,
3548    pdf_path: Option<&str>,
3549    collection: Option<&str>,
3550    on_duplicate: &str,
3551) -> Result<Value, String> {
3552    let pdf_size = if let Some(path) = pdf_path {
3553        validate_pdf_magic(path)?
3554    } else {
3555        0
3556    };
3557
3558    let collection_key = match collection {
3559        Some(name) => resolve_collection(client, name)?,
3560        None => resolve_current_collection(client)?,
3561    };
3562
3563    let dup_id = find_duplicate(client, item_json)?;
3564    if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "skip") {
3565        if !is_library_root(&collection_key) {
3566            client.call(
3567                "collections.addItems",
3568                Some(serde_json::json!({"key": collection_key, "keys": [dup_id]})),
3569            )?;
3570        }
3571        let mut pdf_attached = false;
3572        if let Some(path) = pdf_path {
3573            if !item_has_pdf_attachment(client, dup_id)? {
3574                attach_pdf(client, dup_id, path)?;
3575                pdf_attached = true;
3576            }
3577        }
3578        return Ok(push_result(
3579            "skipped_duplicate",
3580            Some(dup_id.to_string()),
3581            pdf_attached,
3582            if pdf_attached { pdf_size } else { 0 },
3583            Value::Null,
3584        ));
3585    }
3586
3587    let xpi_payload = to_xpi_payload(item_json, Some(&collection_key));
3588    let (item_key, status) =
3589        if let Some(dup_id) = dup_id.as_deref().filter(|_| on_duplicate == "update") {
3590            let mut params = serde_json::Map::new();
3591            params.insert("key".to_string(), Value::String(dup_id.to_string()));
3592            params.insert(
3593                "fields".to_string(),
3594                xpi_payload
3595                    .get("fields")
3596                    .cloned()
3597                    .unwrap_or_else(|| serde_json::json!({})),
3598            );
3599            if let Some(creators) = xpi_payload.get("creators") {
3600                params.insert("creators".to_string(), creators.clone());
3601            }
3602            if let Some(tags) = xpi_payload.get("tags") {
3603                params.insert("tags".to_string(), tags.clone());
3604            }
3605            client.call("items.update", Some(Value::Object(params)))?;
3606            (dup_id.to_string(), "updated")
3607        } else {
3608            let created = client.call("items.create", Some(xpi_payload))?;
3609            let key = created
3610                .get("key")
3611                .and_then(Value::as_str)
3612                .ok_or_else(|| format!("items.create returned unexpected shape: {created:?}"))?;
3613            (key.to_string(), "created")
3614        };
3615
3616    let mut pdf_attached = false;
3617    if let Some(path) = pdf_path {
3618        if status != "updated" || !item_has_pdf_attachment(client, &item_key)? {
3619            attach_pdf(client, &item_key, path)?;
3620            pdf_attached = true;
3621        }
3622    }
3623
3624    if status == "updated" && !is_library_root(&collection_key) {
3625        client.call(
3626            "collections.addItems",
3627            Some(serde_json::json!({"key": collection_key, "keys": [item_key]})),
3628        )?;
3629    }
3630
3631    Ok(push_result(
3632        status,
3633        Some(item_key),
3634        pdf_attached,
3635        if pdf_attached { pdf_size } else { 0 },
3636        Value::Null,
3637    ))
3638}
3639
3640fn validate_pdf_magic(path: &str) -> Result<u64, String> {
3641    let bytes = fs::read(path)
3642        .map_err(|e| format!("INVALID_PDF: cannot read {path}: {e}"))?;
3643    if !bytes.starts_with(b"%PDF-") {
3644        return Err(format!(
3645            "INVALID_PDF: {path} does not start with %PDF- magic bytes"
3646        ));
3647    }
3648    Ok(bytes.len() as u64)
3649}
3650
3651fn resolve_current_collection(client: &mut impl RpcCaller) -> Result<Value, String> {
3652    let selected = client.call("system.currentCollection", None)?;
3653    Ok(selected
3654        .get("key")
3655        .cloned()
3656        .unwrap_or_else(|| Value::Number(0.into())))
3657}
3658
3659fn find_duplicate(
3660    client: &mut impl RpcCaller,
3661    item_json: &Value,
3662) -> Result<Option<String>, String> {
3663    if let Some(doi) = item_json
3664        .get("DOI")
3665        .and_then(Value::as_str)
3666        .filter(|doi| !doi.is_empty())
3667    {
3668        let hits = client.call("search.byIdentifier", Some(serde_json::json!({"doi": doi})))?;
3669        if let Some(key) = first_hit_key(&hits) {
3670            return Ok(Some(key));
3671        }
3672    }
3673
3674    if let Some(title) = item_json
3675        .get("title")
3676        .and_then(Value::as_str)
3677        .filter(|title| title.len() >= 10)
3678    {
3679        let hits = client.call(
3680            "search.quick",
3681            Some(serde_json::json!({"query": title, "limit": 20})),
3682        )?;
3683        if let Some(items) = response_items(&hits) {
3684            for item in items {
3685                if item.get("title").and_then(Value::as_str) == Some(title) {
3686                    if let Some(key) = item.get("key").and_then(Value::as_str) {
3687                        return Ok(Some(key.to_string()));
3688                    }
3689                }
3690            }
3691        }
3692    }
3693
3694    Ok(None)
3695}
3696
3697fn first_hit_key(response: &Value) -> Option<String> {
3698    response_items(response)?
3699        .first()?
3700        .get("key")?
3701        .as_str()
3702        .map(ToString::to_string)
3703}
3704
3705fn response_items(response: &Value) -> Option<&Vec<Value>> {
3706    response
3707        .get("items")
3708        .and_then(Value::as_array)
3709        .or_else(|| response.as_array())
3710}
3711
3712fn to_xpi_payload(item_json: &Value, collection_key: Option<&Value>) -> Value {
3713    const NON_FIELD_KEYS: &[&str] = &[
3714        "itemType",
3715        "creators",
3716        "tags",
3717        "collections",
3718        "attachments",
3719        "relations",
3720        "notes",
3721        "id",
3722        "key",
3723        "version",
3724    ];
3725
3726    let mut fields = serde_json::Map::new();
3727    if let Some(item) = item_json.as_object() {
3728        for (key, value) in item {
3729            if !NON_FIELD_KEYS.contains(&key.as_str()) && !value.is_null() && value != "" {
3730                fields.insert(key.clone(), value.clone());
3731            }
3732        }
3733    }
3734
3735    let mut payload = serde_json::Map::new();
3736    payload.insert(
3737        "itemType".to_string(),
3738        item_json
3739            .get("itemType")
3740            .cloned()
3741            .unwrap_or_else(|| Value::String("journalArticle".to_string())),
3742    );
3743    payload.insert("fields".to_string(), Value::Object(fields));
3744
3745    if let Some(creators) = item_json.get("creators").and_then(Value::as_array) {
3746        if !creators.is_empty() {
3747            payload.insert(
3748                "creators".to_string(),
3749                Value::Array(
3750                    creators
3751                        .iter()
3752                        .map(|creator| {
3753                            serde_json::json!({
3754                                "firstName": creator.get("firstName").and_then(Value::as_str).unwrap_or(""),
3755                                "lastName": creator.get("lastName").and_then(Value::as_str).unwrap_or(""),
3756                                "creatorType": creator.get("creatorType").and_then(Value::as_str).unwrap_or("author"),
3757                            })
3758                        })
3759                        .collect(),
3760                ),
3761            );
3762        }
3763    }
3764
3765    if let Some(tags) = item_json.get("tags").and_then(Value::as_array) {
3766        if !tags.is_empty() {
3767            payload.insert(
3768                "tags".to_string(),
3769                Value::Array(
3770                    tags.iter()
3771                        .map(|tag| tag.get("tag").cloned().unwrap_or_else(|| tag.clone()))
3772                        .collect(),
3773                ),
3774            );
3775        }
3776    }
3777
3778    if let Some(collection_key) = collection_key.filter(|key| !is_library_root(key)) {
3779        payload.insert(
3780            "collections".to_string(),
3781            Value::Array(vec![collection_key.clone()]),
3782        );
3783    }
3784
3785    Value::Object(payload)
3786}
3787
3788fn item_has_pdf_attachment(client: &mut impl RpcCaller, item_key: &str) -> Result<bool, String> {
3789    let attachments = client.call(
3790        "attachments.list",
3791        Some(serde_json::json!({"parentKey": item_key})),
3792    )?;
3793    Ok(has_pdf_attachment(&attachments))
3794}
3795
3796fn attach_pdf(client: &mut impl RpcCaller, item_key: &str, path: &str) -> Result<(), String> {
3797    client.call(
3798        "attachments.add",
3799        Some(serde_json::json!({
3800            "parentKey": item_key,
3801            "path": zotero_path(path),
3802            "title": "Full Text PDF",
3803        })),
3804    )?;
3805    Ok(())
3806}
3807
3808fn zotero_path(path: &str) -> String {
3809    let path = Path::new(path)
3810        .canonicalize()
3811        .unwrap_or_else(|_| Path::new(path).to_path_buf())
3812        .to_string_lossy()
3813        .into_owned();
3814    if is_wsl() {
3815        return ProcessCommand::new("wslpath")
3816            .arg("-w")
3817            .arg(&path)
3818            .output()
3819            .ok()
3820            .filter(|output| output.status.success())
3821            .and_then(|output| String::from_utf8(output.stdout).ok())
3822            .map(|converted| converted.trim().to_string())
3823            .filter(|converted| !converted.is_empty())
3824            .unwrap_or(path);
3825    }
3826    path
3827}
3828
3829fn is_wsl() -> bool {
3830    if env::var_os("WSL_DISTRO_NAME").is_some() {
3831        return true;
3832    }
3833    fs::read_to_string("/proc/sys/kernel/osrelease")
3834        .map(|release| release.to_ascii_lowercase().contains("microsoft"))
3835        .unwrap_or(false)
3836}
3837
3838fn is_library_root(value: &Value) -> bool {
3839    value.as_i64() == Some(0) || value.as_u64() == Some(0)
3840}
3841
3842fn push_result(
3843    status: &str,
3844    zotero_item_key: Option<String>,
3845    pdf_attached: bool,
3846    pdf_size_bytes: u64,
3847    error: Value,
3848) -> Value {
3849    serde_json::json!({
3850        "status": status,
3851        "zotero_item_key": zotero_item_key,
3852        "pdf_attached": pdf_attached,
3853        "pdf_size_bytes": pdf_size_bytes,
3854        "error": error,
3855    })
3856}
3857
3858fn run_search(
3859    args: SearchArgs,
3860    client: &mut impl RpcCaller,
3861) -> Result<(Value, JsonStyle), String> {
3862    let SearchArgs {
3863        query, fulltext, author, after, before, journal, tag,
3864        doi, isbn, issn, collection, limit, offset, ..
3865    } = args;
3866
3867    let has_identifier = doi.is_some() || isbn.is_some() || issn.is_some();
3868    if has_identifier {
3869        let mut params = serde_json::Map::new();
3870        if let Some(doi) = doi { params.insert("doi".into(), Value::String(doi)); }
3871        if let Some(isbn) = isbn { params.insert("isbn".into(), Value::String(isbn)); }
3872        if let Some(issn) = issn { params.insert("issn".into(), Value::String(issn)); }
3873        let value = client.call("search.byIdentifier", Some(Value::Object(params)))?;
3874        return Ok((normalize_list_envelope(value, "items", None, 0), JsonStyle::Pretty));
3875    }
3876
3877    if fulltext {
3878        let query = query.ok_or("INVALID_ARGS: --fulltext requires a search query")?;
3879        let mut params = serde_json::json!({"query": query, "limit": limit});
3880        if let (Some(col), Some(map)) = (collection, params.as_object_mut()) {
3881            map.insert("collection".into(), resolve_collection(client, &col)?);
3882        }
3883        let value = client.call("search.fulltext", Some(params))?;
3884        return Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty));
3885    }
3886
3887    let has_filters = author.is_some() || after.is_some() || before.is_some()
3888        || journal.is_some() || tag.is_some();
3889    if has_filters {
3890        let mut conditions: Vec<Value> = Vec::new();
3891        if let Some(query) = &query {
3892            conditions.push(serde_json::json!({
3893                "field": "quicksearch-titleCreatorYear",
3894                "operator": "contains",
3895                "value": query,
3896            }));
3897        }
3898        if let Some(author) = author {
3899            conditions.push(serde_json::json!({
3900                "field": "creator", "operator": "contains", "value": author,
3901            }));
3902        }
3903        if let Some(after) = after {
3904            conditions.push(serde_json::json!({
3905                "field": "date", "operator": "isAfter", "value": after,
3906            }));
3907        }
3908        if let Some(before) = before {
3909            conditions.push(serde_json::json!({
3910                "field": "date", "operator": "isBefore", "value": before,
3911            }));
3912        }
3913        if let Some(journal) = journal {
3914            conditions.push(serde_json::json!({
3915                "field": "publicationTitle", "operator": "contains", "value": journal,
3916            }));
3917        }
3918        if let Some(tag) = tag {
3919            conditions.push(serde_json::json!({
3920                "field": "tag", "operator": "is", "value": tag,
3921            }));
3922        }
3923        let value = client.call(
3924            "search.advanced",
3925            Some(serde_json::json!({
3926                "conditions": conditions,
3927                "operator": "and",
3928                "limit": limit,
3929                "offset": offset,
3930            })),
3931        )?;
3932        return Ok((normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty));
3933    }
3934
3935    let query = query.ok_or(
3936        "INVALID_ARGS: provide a search query, or use --doi/--isbn/--issn for identifier lookup"
3937    )?;
3938    let value = if let Some(col) = collection {
3939        let key = resolve_collection(client, &col)?;
3940        let response = client.call(
3941            "collections.getItems",
3942            Some(serde_json::json!({"key": key})),
3943        )?;
3944        collection_quick_search_response(&response, &query, limit)
3945    } else {
3946        filter_search_artifacts(client.call(
3947            "search.quick",
3948            Some(serde_json::json!({"query": query, "limit": limit})),
3949        )?)
3950    };
3951    Ok((normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty))
3952}
3953
3954fn run_search_management_command(
3955    command: SearchManagementCommand,
3956    client: &mut impl RpcCaller,
3957) -> Result<(Value, JsonStyle), String> {
3958    match command {
3959        SearchManagementCommand::SavedSearches { .. } => Ok((
3960            normalize_list_envelope(client.call("search.savedSearches", None)?, "items", None, 0),
3961            JsonStyle::Pretty,
3962        )),
3963        SearchManagementCommand::CreateSaved {
3964            name, condition, dry_run, ..
3965        } => {
3966            let conditions = condition
3967                .iter()
3968                .map(|raw| parse_search_condition(raw))
3969                .collect::<Result<Vec<_>, _>>()?;
3970            let params = serde_json::json!({"name": name, "conditions": conditions});
3971            if dry_run {
3972                Ok((dry_run_value("search.createSavedSearch", params), JsonStyle::PythonCompact))
3973            } else {
3974                Ok((client.call("search.createSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3975            }
3976        }
3977        SearchManagementCommand::DeleteSaved {
3978            search_key, dry_run, ..
3979        } => {
3980            let params = serde_json::json!({"key": search_key});
3981            if dry_run {
3982                Ok((dry_run_value("search.deleteSavedSearch", params), JsonStyle::PythonCompact))
3983            } else {
3984                Ok((client.call("search.deleteSavedSearch", Some(params))?, JsonStyle::PythonCompact))
3985            }
3986        }
3987    }
3988}
3989
3990fn filter_search_artifacts(mut value: Value) -> Value {
3991    let Some(items) = value.get_mut("items").and_then(Value::as_array_mut) else {
3992        return value;
3993    };
3994    items.retain(|item| match item.get("title").and_then(Value::as_str) {
3995        Some(title) => !is_zotron_evidence_artifact(title),
3996        None => true,
3997    });
3998    let total_items = items.len() as u64;
3999    if let Some(total) = value.get_mut("total") {
4000        *total = Value::from(total_items);
4001    }
4002    value
4003}
4004
4005fn collection_quick_search_response(response: &Value, query: &str, limit: u64) -> Value {
4006    let mut matched = collection_items(response)
4007        .into_iter()
4008        .filter(|item| !item_is_evidence_artifact(item))
4009        .filter(|item| quick_item_matches(item, query))
4010        .collect::<Vec<_>>();
4011    let total = matched.len() as u64;
4012    let limit = usize::try_from(limit).unwrap_or(usize::MAX);
4013    if matched.len() > limit {
4014        matched.truncate(limit);
4015    }
4016    serde_json::json!({"items": matched, "total": total})
4017}
4018
4019fn item_is_evidence_artifact(item: &Value) -> bool {
4020    item.get("title")
4021        .and_then(Value::as_str)
4022        .is_some_and(is_zotron_evidence_artifact)
4023}
4024
4025fn quick_item_matches(item: &Value, query: &str) -> bool {
4026    let terms = query
4027        .split_whitespace()
4028        .map(|term| term.to_lowercase())
4029        .filter(|term| !term.is_empty())
4030        .collect::<Vec<_>>();
4031    if terms.is_empty() {
4032        return true;
4033    }
4034    let mut haystack = String::new();
4035    append_search_text(item, &mut haystack);
4036    let haystack = haystack.to_lowercase();
4037    terms.iter().all(|term| haystack.contains(term))
4038}
4039
4040fn append_search_text(value: &Value, out: &mut String) {
4041    match value {
4042        Value::String(text) => {
4043            out.push(' ');
4044            out.push_str(text);
4045        }
4046        Value::Number(number) => {
4047            out.push(' ');
4048            out.push_str(&number.to_string());
4049        }
4050        Value::Bool(value) => {
4051            out.push(' ');
4052            out.push_str(if *value { "true" } else { "false" });
4053        }
4054        Value::Array(items) => {
4055            for item in items {
4056                append_search_text(item, out);
4057            }
4058        }
4059        Value::Object(map) => {
4060            for item in map.values() {
4061                append_search_text(item, out);
4062            }
4063        }
4064        Value::Null => {}
4065    }
4066}
4067
4068fn parse_search_condition(raw: &str) -> Result<Value, String> {
4069    let mut parts = raw.split_whitespace();
4070    let field = parts.next();
4071    let operator = parts.next();
4072    let value = parts.collect::<Vec<_>>().join(" ");
4073    match (field, operator, value.is_empty()) {
4074        (Some(field), Some(operator), false) => Ok(serde_json::json!({
4075            "field": field,
4076            "operator": operator,
4077            "value": value,
4078        })),
4079        _ => Err(format!(
4080            "INVALID_ARGS: --condition must be 'field operator value', got: {raw:?}"
4081        )),
4082    }
4083}
4084
4085fn normalize_list_envelope(value: Value, list_key: &str, limit: Option<u64>, offset: u64) -> Value {
4086    if let Value::Array(arr) = value {
4087        let total = arr.len() as u64;
4088        let mut obj = serde_json::Map::new();
4089        obj.insert(list_key.to_string(), Value::Array(arr));
4090        obj.insert("total".to_string(), Value::from(total));
4091        if let Some(limit) = limit {
4092            obj.insert("limit".to_string(), Value::from(limit));
4093        }
4094        obj.insert("offset".to_string(), Value::from(offset));
4095        obj.insert("hasMore".to_string(), Value::Bool(false));
4096        return Value::Object(obj);
4097    }
4098
4099    let mut obj = match value {
4100        Value::Object(obj) if obj.contains_key(list_key) => obj,
4101        other => return other,
4102    };
4103
4104    let items_len = obj
4105        .get(list_key)
4106        .and_then(Value::as_array)
4107        .map_or(0, |a| a.len()) as u64;
4108    let total = obj
4109        .get("total")
4110        .and_then(Value::as_u64)
4111        .unwrap_or(items_len);
4112
4113    obj.insert("total".to_string(), Value::from(total));
4114    if let Some(limit) = limit {
4115        obj.insert("limit".to_string(), Value::from(limit));
4116    }
4117    obj.insert("offset".to_string(), Value::from(offset));
4118    obj.insert(
4119        "hasMore".to_string(),
4120        Value::Bool(offset + items_len < total),
4121    );
4122
4123    Value::Object(obj)
4124}
4125
4126const RPC_PAGINATION_SAFETY_CAP: usize = 10_000;
4127const RPC_PAGE_LIST_KEYS: [&str; 4] = ["items", "tags", "results", "data"];
4128
4129fn paginate_rpc(
4130    client: &mut impl RpcCaller,
4131    method: &str,
4132    params: Value,
4133    page_size: usize,
4134) -> Result<Value, String> {
4135    let base = params
4136        .as_object()
4137        .ok_or_else(|| "params must be a JSON object".to_string())?;
4138    let mut out = Vec::new();
4139    let mut prev_page: Option<Vec<Value>> = None;
4140    let mut offset = 0usize;
4141
4142    loop {
4143        let mut page_params = base.clone();
4144        page_params.insert("offset".to_string(), Value::Number(offset.into()));
4145        page_params.insert("limit".to_string(), Value::Number(page_size.into()));
4146        let response = client.call(method, Some(Value::Object(page_params)))?;
4147
4148        let page = match extract_page(&response) {
4149            Some(page) => page,
4150            None if out.is_empty() => return Ok(response),
4151            None if response.is_object() => {
4152                return Err(format!(
4153                    "paginate: {method:?} returned a non-paginated dict after {} accumulated rows; aborting",
4154                    out.len()
4155                ));
4156            }
4157            None => {
4158                return Err(format!(
4159                    "paginate: {method:?} returned non-list/non-dict shape after {} accumulated rows; aborting",
4160                    out.len()
4161                ));
4162            }
4163        };
4164
4165        if prev_page.as_ref() == Some(&page) {
4166            return Err(format!(
4167                "paginate: {method:?} returned identical pages — method likely ignores offset; aborting after {} rows",
4168                out.len()
4169            ));
4170        }
4171
4172        let page_len = page.len();
4173        out.extend(page.clone());
4174        if page_len < page_size {
4175            return Ok(Value::Array(out));
4176        }
4177        if out.len() >= RPC_PAGINATION_SAFETY_CAP {
4178            out.truncate(RPC_PAGINATION_SAFETY_CAP);
4179            return Ok(Value::Array(out));
4180        }
4181        prev_page = Some(page);
4182        offset += page_size;
4183    }
4184}
4185
4186fn extract_page(response: &Value) -> Option<Vec<Value>> {
4187    if let Some(page) = response.as_array() {
4188        return Some(page.clone());
4189    }
4190    let object = response.as_object()?;
4191    for key in RPC_PAGE_LIST_KEYS {
4192        if let Some(page) = object.get(key).and_then(Value::as_array) {
4193            return Some(page.clone());
4194        }
4195    }
4196    None
4197}
4198
4199fn run_find_pdfs_command(
4200    client: &mut impl RpcCaller,
4201    collection: String,
4202    limit: usize,
4203) -> Result<(Value, JsonStyle), String> {
4204    let collection_key = resolve_collection(client, &collection)?;
4205    let response = client.call(
4206        "collections.getItems",
4207        Some(serde_json::json!({"key": collection_key})),
4208    )?;
4209    let items = collection_items(&response);
4210
4211    let mut missing = Vec::new();
4212    for item in &items {
4213        let Some(item_key) = item.get("key").and_then(Value::as_str) else {
4214            continue;
4215        };
4216        let attachments = client.call(
4217            "attachments.list",
4218            Some(serde_json::json!({"parentKey": item_key})),
4219        )?;
4220        if !has_pdf_attachment(&attachments) {
4221            missing.push(item.clone());
4222        }
4223        if limit > 0 && missing.len() >= limit {
4224            break;
4225        }
4226    }
4227
4228    let mut results = Vec::new();
4229    for item in &missing {
4230        let item_key = item
4231            .get("key")
4232            .and_then(Value::as_str)
4233            .ok_or_else(|| "missing item lacks key".to_string())?;
4234        let response = client.call(
4235            "attachments.findPDF",
4236            Some(serde_json::json!({"parentKey": item_key})),
4237        )?;
4238        let attachment = response.get("attachment").filter(|value| !value.is_null());
4239        results.push(serde_json::json!({
4240            "item_key": item_key,
4241            "title": item.get("title").cloned().unwrap_or(Value::Null),
4242            "found": attachment.is_some(),
4243            "attachment_key": attachment
4244                .and_then(|attachment| attachment.get("key"))
4245                .cloned()
4246                .unwrap_or(Value::Null),
4247        }));
4248    }
4249
4250    Ok((
4251        serde_json::json!({
4252            "scanned": items.len(),
4253            "attempted": missing.len(),
4254            "results": results,
4255        }),
4256        JsonStyle::Pretty,
4257    ))
4258}
4259
4260fn collection_items(response: &Value) -> Vec<Value> {
4261    if let Some(items) = response.get("items").and_then(Value::as_array) {
4262        return items.clone();
4263    }
4264    response.as_array().cloned().unwrap_or_default()
4265}
4266
4267fn has_pdf_attachment(attachments: &Value) -> bool {
4268    attachments
4269        .as_array()
4270        .is_some_and(|attachments| attachments.iter().any(is_pdf_attachment))
4271}
4272
4273fn is_pdf_attachment(attachment: &Value) -> bool {
4274    let content_type = attachment
4275        .get("contentType")
4276        .and_then(Value::as_str)
4277        .unwrap_or_default()
4278        .to_lowercase();
4279    let path = attachment
4280        .get("path")
4281        .and_then(Value::as_str)
4282        .unwrap_or_default()
4283        .to_lowercase();
4284    matches!(
4285        content_type.as_str(),
4286        "application/pdf" | "application/x-pdf"
4287    ) || path.ends_with(".pdf")
4288}
4289
4290fn call_json(
4291    client: &mut impl RpcCaller,
4292    method: &str,
4293    params: Option<Value>,
4294) -> Result<Value, String> {
4295    client.call(method, params)
4296}
4297
4298fn run_system_command(
4299    command: SystemCommand,
4300    client: &mut impl RpcCaller,
4301) -> Result<(Value, JsonStyle), String> {
4302    let value = match command {
4303        SystemCommand::Version { .. } => client.call("system.version", None)?,
4304        SystemCommand::Libraries { .. } => client.call("system.libraries", None)?,
4305        SystemCommand::LibraryStats { library, .. } => {
4306            let params = library.map(|id| serde_json::json!({"id": id}));
4307            client.call("system.libraryStats", params)?
4308        }
4309        SystemCommand::Schema { item_type, .. } => {
4310            if let Some(item_type) = item_type {
4311                let fields = client.call("system.itemFields", Some(serde_json::json!({"itemType": item_type})))?;
4312                let creators = client.call("system.creatorTypes", Some(serde_json::json!({"itemType": item_type})))?;
4313                let field_names: Vec<Value> = fields.as_array().unwrap_or(&vec![])
4314                    .iter()
4315                    .filter_map(|f| f.get("field").cloned())
4316                    .collect();
4317                let creator_names: Vec<Value> = creators.as_array().unwrap_or(&vec![])
4318                    .iter()
4319                    .filter_map(|c| c.get("creatorType").cloned())
4320                    .collect();
4321                serde_json::json!({
4322                    "itemType": item_type,
4323                    "fields": field_names,
4324                    "creatorTypes": creator_names,
4325                })
4326            } else {
4327                let types = client.call("system.itemTypes", None)?;
4328                let type_names: Vec<Value> = types.as_array().unwrap_or(&vec![])
4329                    .iter()
4330                    .filter_map(|t| t.get("itemType").cloned())
4331                    .collect();
4332                Value::Array(type_names)
4333            }
4334        }
4335        SystemCommand::CurrentCollection { .. } => client.call("system.currentCollection", None)?,
4336        SystemCommand::ListMethods { .. } => client.call("system.listMethods", None)?,
4337        SystemCommand::Describe { method, .. } => {
4338            let params = method.map(|method| serde_json::json!({"method": method}));
4339            client.call("system.describe", params)?
4340        }
4341    };
4342    Ok((value, JsonStyle::Pretty))
4343}
4344
4345fn run_items_command(
4346    command: ItemsCommand,
4347    client: &mut impl RpcCaller,
4348) -> Result<(Value, JsonStyle), String> {
4349    let (value, style) = match command {
4350        ItemsCommand::Add {
4351            doi,
4352            isbn,
4353            from_url,
4354            file,
4355            collection,
4356            dry_run,
4357            ..
4358        } => {
4359            if let Some(doi) = doi {
4360                run_add_identifier_command(client, "items.addByDOI", "doi", doi, collection, dry_run)?
4361            } else if let Some(isbn) = isbn {
4362                run_add_identifier_command(client, "items.addByISBN", "isbn", isbn, collection, dry_run)?
4363            } else if let Some(from_url) = from_url {
4364                run_add_identifier_command(client, "items.addByURL", "url", from_url, collection, dry_run)?
4365            } else if let Some(file) = file {
4366                let mut params = serde_json::json!({"path": zotero_path(&file)});
4367                maybe_insert_collection(client, &mut params, collection)?;
4368                run_mutation_command(client, "items.addFromFile", params, dry_run)?
4369            } else {
4370                return Err("INVALID_ARGS: provide one of --doi, --isbn, --from-url, or --file".into());
4371            }
4372        }
4373        ItemsCommand::Create {
4374            item_type,
4375            fields,
4376            dry_run,
4377            ..
4378        } => {
4379            let parsed_fields = parse_field_options(&fields)?;
4380            let mut params = serde_json::json!({"itemType": item_type});
4381            if !parsed_fields.is_empty() {
4382                if let Some(map) = params.as_object_mut() {
4383                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4384                }
4385            }
4386            run_mutation_command(client, "items.create", params, dry_run)?
4387        }
4388        ItemsCommand::Update {
4389            key,
4390            fields,
4391            dry_run,
4392            ..
4393        } => {
4394            let parsed_fields = parse_field_options(&fields)?;
4395            let mut params = serde_json::json!({"key": key});
4396            if !parsed_fields.is_empty() {
4397                if let Some(map) = params.as_object_mut() {
4398                    map.insert("fields".to_string(), Value::Object(parsed_fields));
4399                }
4400            }
4401            run_mutation_command(client, "items.update", params, dry_run)?
4402        }
4403        ItemsCommand::Delete { key, dry_run, .. } => run_mutation_command(
4404            client,
4405            "items.delete",
4406            serde_json::json!({"key": key}),
4407            dry_run,
4408        )?,
4409        ItemsCommand::Trash {
4410            items, dry_run, ..
4411        } => {
4412            if items.len() == 1 {
4413                run_mutation_command(
4414                    client,
4415                    "items.trash",
4416                    serde_json::json!({"key": items[0]}),
4417                    dry_run,
4418                )?
4419            } else {
4420                run_mutation_command(
4421                    client,
4422                    "items.batchTrash",
4423                    serde_json::json!({"keys": items}),
4424                    dry_run,
4425                )?
4426            }
4427        }
4428        ItemsCommand::Restore { item, dry_run, .. } => run_mutation_command(
4429            client,
4430            "items.restore",
4431            serde_json::json!({"key": item}),
4432            dry_run,
4433        )?,
4434        ItemsCommand::MergeDuplicates { keys, dry_run, .. } => {
4435            if keys.len() < 2 {
4436                return Err("INVALID_ARGS: need at least 2 keys to merge".to_string());
4437            }
4438            run_mutation_command(
4439                client,
4440                "items.mergeDuplicates",
4441                serde_json::json!({"keys": keys}),
4442                dry_run,
4443            )?
4444        }
4445        ItemsCommand::AddRelated {
4446            key,
4447            target,
4448            dry_run,
4449            ..
4450        } => run_mutation_command(
4451            client,
4452            "items.addRelated",
4453            serde_json::json!({"key": key, "targetKey": target}),
4454            dry_run,
4455        )?,
4456        ItemsCommand::RemoveRelated {
4457            key,
4458            target,
4459            dry_run,
4460            ..
4461        } => run_mutation_command(
4462            client,
4463            "items.removeRelated",
4464            serde_json::json!({"key": key, "targetKey": target}),
4465            dry_run,
4466        )?,
4467        ItemsCommand::Get { item, .. } => (
4468            client.call("items.get", Some(serde_json::json!({"key": item})))?,
4469            JsonStyle::Pretty,
4470        ),
4471        ItemsCommand::List {
4472            limit,
4473            offset,
4474            sort,
4475            direction,
4476            trash,
4477            ..
4478        } => {
4479            if trash {
4480                let value = client.call(
4481                    "items.getTrash",
4482                    Some(serde_json::json!({"limit": limit, "offset": offset})),
4483                )?;
4484                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4485            } else {
4486                let mut params = serde_json::json!({
4487                    "limit": limit,
4488                    "offset": offset,
4489                    "direction": direction,
4490                });
4491                if let (Some(sort), Some(map)) = (sort, params.as_object_mut()) {
4492                    map.insert("sort".to_string(), Value::String(sort));
4493                }
4494                let value = client.call("items.list", Some(params))?;
4495                (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4496            }
4497        }
4498        ItemsCommand::FindDuplicates { .. } => (
4499            client.call("items.findDuplicates", None)?,
4500            JsonStyle::Pretty,
4501        ),
4502        ItemsCommand::Recent {
4503            limit,
4504            offset,
4505            recent_type,
4506            ..
4507        } => {
4508            if recent_type != "added" && recent_type != "modified" {
4509                return Err(format!(
4510                    "--type must be added or modified, got {recent_type:?}"
4511                ));
4512            }
4513            let value = client.call(
4514                "items.getRecent",
4515                Some(
4516                    serde_json::json!({"limit": limit, "offset": offset, "type": recent_type}),
4517                ),
4518            )?;
4519            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
4520        }
4521        ItemsCommand::Fulltext { key, .. } => (
4522            client.call("items.getFullText", Some(serde_json::json!({"key": key})))?,
4523            JsonStyle::Pretty,
4524        ),
4525        ItemsCommand::Related { key, .. } => (
4526            normalize_list_envelope(
4527                client.call("items.getRelated", Some(serde_json::json!({"key": key})))?,
4528                "items",
4529                None,
4530                0,
4531            ),
4532            JsonStyle::Pretty,
4533        ),
4534        ItemsCommand::CitationKey { key, .. } => (
4535            client.call("items.citationKey", Some(serde_json::json!({"key": key})))?,
4536            JsonStyle::Pretty,
4537        ),
4538    };
4539    Ok((value, style))
4540}
4541
4542fn run_add_identifier_command(
4543    client: &mut impl RpcCaller,
4544    method: &str,
4545    param_name: &str,
4546    param_value: String,
4547    collection: Option<String>,
4548    dry_run: bool,
4549) -> Result<(Value, JsonStyle), String> {
4550    let mut params = Value::Object(serde_json::Map::from_iter([(
4551        param_name.to_string(),
4552        Value::String(param_value),
4553    )]));
4554    maybe_insert_collection(client, &mut params, collection)?;
4555    run_mutation_command(client, method, params, dry_run)
4556}
4557
4558fn run_mutation_command(
4559    client: &mut impl RpcCaller,
4560    method: &str,
4561    params: Value,
4562    dry_run: bool,
4563) -> Result<(Value, JsonStyle), String> {
4564    let value = if dry_run {
4565        serde_json::json!({
4566            "ok": true,
4567            "dryRun": true,
4568            "wouldCall": method,
4569            "wouldCallParams": params,
4570        })
4571    } else {
4572        client.call(method, Some(params))?
4573    };
4574    Ok((value, JsonStyle::PythonCompact))
4575}
4576
4577fn parse_field_options(fields: &[String]) -> Result<serde_json::Map<String, Value>, String> {
4578    let mut parsed = serde_json::Map::new();
4579    for field in fields {
4580        let (key, value) = field
4581            .split_once('=')
4582            .ok_or_else(|| format!("INVALID_ARGS: --field must be key=value, got: {field:?}"))?;
4583        parsed.insert(key.to_string(), Value::String(value.to_string()));
4584    }
4585    Ok(parsed)
4586}
4587
4588fn maybe_insert_collection(
4589    client: &mut impl RpcCaller,
4590    params: &mut Value,
4591    collection: Option<String>,
4592) -> Result<(), String> {
4593    let Some(collection) = collection else {
4594        return Ok(());
4595    };
4596    let collection = resolve_collection(client, &collection)?;
4597    let include = match &collection {
4598        Value::Null => false,
4599        Value::Number(number) => number.as_i64() != Some(0),
4600        _ => true,
4601    };
4602    if include {
4603        params
4604            .as_object_mut()
4605            .expect("mutation params are always objects")
4606            .insert("collection".to_string(), collection);
4607    }
4608    Ok(())
4609}
4610
4611fn run_settings_command(
4612    command: SettingsCommand,
4613    client: &mut impl RpcCaller,
4614) -> Result<(Value, JsonStyle), String> {
4615    let (value, style) = match command {
4616        SettingsCommand::Get { key, .. } => (
4617            client.call("settings.get", Some(serde_json::json!({"key": key})))?,
4618            JsonStyle::Pretty,
4619        ),
4620        SettingsCommand::List { .. } => (client.call("settings.getAll", None)?, JsonStyle::Pretty),
4621        SettingsCommand::Set {
4622            pairs,
4623            file,
4624            dry_run,
4625            ..
4626        } => {
4627            if let Some(file) = file {
4628                // --file mode: read JSON and call settings.setAll
4629                let raw = fs::read_to_string(&file)
4630                    .map_err(|err| format!("INVALID_JSON: Could not read JSON: {err}"))?;
4631                let settings: Value = serde_json::from_str(&raw)
4632                    .map_err(|err| format!("INVALID_JSON: Could not parse JSON: {err}"))?;
4633                if dry_run {
4634                    (
4635                        dry_run_value("settings.setAll", settings),
4636                        JsonStyle::PythonCompact,
4637                    )
4638                } else {
4639                    (
4640                        client.call("settings.setAll", Some(settings))?,
4641                        JsonStyle::PythonCompact,
4642                    )
4643                }
4644            } else if pairs.len() == 2 {
4645                // Single key=value: settings.set
4646                let key = &pairs[0];
4647                let value = &pairs[1];
4648                let parsed_value = serde_json::from_str::<Value>(value)
4649                    .unwrap_or(Value::String(value.clone()));
4650                let params = serde_json::json!({"key": key, "value": parsed_value});
4651                if dry_run {
4652                    (
4653                        dry_run_value("settings.set", params),
4654                        JsonStyle::PythonCompact,
4655                    )
4656                } else {
4657                    (
4658                        client.call("settings.set", Some(params))?,
4659                        JsonStyle::PythonCompact,
4660                    )
4661                }
4662            } else if pairs.len() > 2 && pairs.len() % 2 == 0 {
4663                // Multiple pairs: build a map and call settings.setAll
4664                let mut map = serde_json::Map::new();
4665                for chunk in pairs.chunks(2) {
4666                    let parsed = serde_json::from_str::<Value>(&chunk[1])
4667                        .unwrap_or(Value::String(chunk[1].clone()));
4668                    map.insert(chunk[0].clone(), parsed);
4669                }
4670                let settings = Value::Object(map);
4671                if dry_run {
4672                    (
4673                        dry_run_value("settings.setAll", settings),
4674                        JsonStyle::PythonCompact,
4675                    )
4676                } else {
4677                    (
4678                        client.call("settings.setAll", Some(settings))?,
4679                        JsonStyle::PythonCompact,
4680                    )
4681                }
4682            } else {
4683                return Err(
4684                    "INVALID_ARGS: provide key value pairs (even number of args) or --file".into(),
4685                );
4686            }
4687        }
4688    };
4689    Ok((value, style))
4690}
4691
4692fn run_tags_command(
4693    command: TagsCommand,
4694    client: &mut impl RpcCaller,
4695) -> Result<(Value, JsonStyle), String> {
4696    let (value, style) = match command {
4697        TagsCommand::List { limit, .. } => {
4698            let value = client.call("tags.list", Some(serde_json::json!({"limit": limit})))?;
4699            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
4700        }
4701        TagsCommand::Rename {
4702            old, new, dry_run, ..
4703        } => run_tag_mutation(
4704            client,
4705            "tags.rename",
4706            serde_json::json!({"oldName": old, "newName": new}),
4707            dry_run,
4708        )?,
4709        TagsCommand::Delete { tag, dry_run, .. } => run_tag_mutation(
4710            client,
4711            "tags.delete",
4712            serde_json::json!({"tag": tag}),
4713            dry_run,
4714        )?,
4715        TagsCommand::Add {
4716            keys, tags, dry_run, ..
4717        } => {
4718            if keys.len() == 1 {
4719                run_tag_mutation(
4720                    client,
4721                    "tags.add",
4722                    serde_json::json!({"key": keys[0], "tags": tags}),
4723                    dry_run,
4724                )?
4725            } else {
4726                run_tag_mutation(
4727                    client,
4728                    "tags.batchUpdate",
4729                    serde_json::json!({"keys": keys, "add": tags}),
4730                    dry_run,
4731                )?
4732            }
4733        }
4734        TagsCommand::Remove {
4735            keys, tags, dry_run, ..
4736        } => {
4737            if keys.len() == 1 {
4738                run_tag_mutation(
4739                    client,
4740                    "tags.remove",
4741                    serde_json::json!({"key": keys[0], "tags": tags}),
4742                    dry_run,
4743                )?
4744            } else {
4745                run_tag_mutation(
4746                    client,
4747                    "tags.batchUpdate",
4748                    serde_json::json!({"keys": keys, "remove": tags}),
4749                    dry_run,
4750                )?
4751            }
4752        }
4753    };
4754    Ok((value, style))
4755}
4756
4757fn run_tag_mutation(
4758    client: &mut impl RpcCaller,
4759    method: &str,
4760    params: Value,
4761    dry_run: bool,
4762) -> Result<(Value, JsonStyle), String> {
4763    if dry_run {
4764        Ok((dry_run_value(method, params), JsonStyle::PythonCompact))
4765    } else {
4766        Ok((client.call(method, Some(params))?, JsonStyle::PythonCompact))
4767    }
4768}
4769
4770fn dry_run_value(method: &str, params: Value) -> Value {
4771    serde_json::json!({
4772        "ok": true,
4773        "dryRun": true,
4774        "wouldCall": method,
4775        "wouldCallParams": params,
4776    })
4777}
4778
4779fn run_annotations_command(
4780    command: AnnotationsCommand,
4781    client: &mut impl RpcCaller,
4782) -> Result<(Value, JsonStyle), String> {
4783    let (value, style) = match command {
4784        AnnotationsCommand::List { parent, .. } => {
4785            let value = client.call(
4786                "annotations.list",
4787                Some(serde_json::json!({"parentKey": parent})),
4788            )?;
4789            let total = value
4790                .get("items")
4791                .and_then(Value::as_array)
4792                .map_or(0, |a| a.len()) as u64;
4793            (normalize_list_envelope(value, "items", Some(total), 0), JsonStyle::Pretty)
4794        }
4795        AnnotationsCommand::Create {
4796            parent,
4797            annotation_type,
4798            position,
4799            quote,
4800            page,
4801            sort_index,
4802            text,
4803            comment,
4804            color,
4805            dry_run,
4806            ..
4807        } => {
4808            let annotation_type = annotation_type.unwrap_or_else(|| "highlight".to_string());
4809            if !matches!(
4810                annotation_type.as_str(),
4811                "highlight" | "note" | "underline" | "image" | "ink"
4812            ) {
4813                return Err(format!(
4814                    "INVALID_ARGS: --type must be highlight|note|underline|image|ink, got {annotation_type:?}"
4815                ));
4816            }
4817            let mut params = serde_json::Map::new();
4818            params.insert("parentKey".to_string(), Value::String(parent));
4819            params.insert("type".to_string(), Value::String(annotation_type.clone()));
4820            params.insert("color".to_string(), Value::String(color));
4821
4822            if let Some(ref quote_text) = quote {
4823                if !matches!(annotation_type.as_str(), "highlight" | "underline") {
4824                    return Err(format!(
4825                        "INVALID_ARGS: --quote is only valid for highlight|underline, got {annotation_type:?}"
4826                    ));
4827                }
4828                params.insert("quote".to_string(), Value::String(quote_text.clone()));
4829                if let Some(page_idx) = page {
4830                    params.insert(
4831                        "pageIndex".to_string(),
4832                        Value::Number(page_idx.into()),
4833                    );
4834                }
4835                // When --quote is given, --position is optional
4836                if let Some(raw) = position {
4837                    let pos = serde_json::from_str::<Value>(&raw)
4838                        .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))?;
4839                    validate_annotation_position(annotation_type.as_str(), &pos)?;
4840                    params.insert("position".to_string(), pos);
4841                }
4842            } else {
4843                let position = position
4844                    .ok_or_else(|| "INVALID_ARGS: --position JSON is required (or use --quote)".to_string())
4845                    .and_then(|raw| {
4846                        serde_json::from_str::<Value>(&raw)
4847                            .map_err(|err| format!("INVALID_JSON: Could not parse --position: {err}"))
4848                    })?;
4849                validate_annotation_position(annotation_type.as_str(), &position)?;
4850                params.insert("position".to_string(), position);
4851            }
4852
4853            if let Some(sort_index) = sort_index {
4854                params.insert(
4855                    "sortIndex".to_string(),
4856                    parse_annotation_sort_index(sort_index)?,
4857                );
4858            }
4859            if let Some(text) = text {
4860                params.insert("text".to_string(), Value::String(text));
4861            }
4862            if let Some(comment) = comment {
4863                params.insert("comment".to_string(), Value::String(comment));
4864            }
4865            run_mutating_command(client, "annotations.create", Value::Object(params), dry_run)?
4866        }
4867        AnnotationsCommand::Delete {
4868            annotation_key,
4869            dry_run,
4870            ..
4871        } => run_mutating_command(
4872            client,
4873            "annotations.delete",
4874            serde_json::json!({"key": annotation_key}),
4875            dry_run,
4876        )?,
4877    };
4878    Ok((value, style))
4879}
4880
4881fn validate_annotation_position(annotation_type: &str, position: &Value) -> Result<(), String> {
4882    position
4883        .get("pageIndex")
4884        .and_then(Value::as_i64)
4885        .filter(|value| *value >= 0)
4886        .ok_or_else(|| {
4887            "INVALID_ARGS: --position must include a non-negative integer pageIndex".to_string()
4888        })?;
4889
4890    if annotation_type == "ink" {
4891        let has_paths = position
4892            .get("paths")
4893            .and_then(Value::as_array)
4894            .is_some_and(|paths| !paths.is_empty());
4895        if !has_paths {
4896            return Err("INVALID_ARGS: ink --position must include non-empty paths".to_string());
4897        }
4898        return Ok(());
4899    }
4900
4901    let valid_rects = position
4902        .get("rects")
4903        .and_then(Value::as_array)
4904        .is_some_and(|rects| !rects.is_empty() && rects.iter().all(is_annotation_rect));
4905    if !valid_rects {
4906        return Err(
4907            "INVALID_ARGS: --position must include non-empty rects of [x1, y1, x2, y2]".to_string(),
4908        );
4909    }
4910    Ok(())
4911}
4912
4913fn is_annotation_rect(value: &Value) -> bool {
4914    value.as_array().is_some_and(|coords| {
4915        coords.len() == 4
4916            && coords
4917                .iter()
4918                .all(|coord| coord.as_f64().is_some_and(f64::is_finite))
4919    })
4920}
4921
4922fn parse_annotation_sort_index(raw: String) -> Result<Value, String> {
4923    let parsed = serde_json::from_str::<Value>(&raw).unwrap_or_else(|_| Value::String(raw));
4924    let valid = match &parsed {
4925        Value::Number(number) => number.as_f64().is_some_and(f64::is_finite),
4926        Value::String(value) => {
4927            is_zotero_pdf_sort_index(value.trim())
4928                || (!value.trim().is_empty()
4929                    && value.trim().parse::<f64>().is_ok_and(f64::is_finite))
4930        }
4931        _ => false,
4932    };
4933    if valid {
4934        Ok(parsed)
4935    } else {
4936        Err(format!(
4937            "INVALID_ARGS: --sort-index must be a finite number or numeric string, got {parsed}"
4938        ))
4939    }
4940}
4941
4942fn is_zotero_pdf_sort_index(value: &str) -> bool {
4943    let mut parts = value.split('|');
4944    matches!(
4945        (parts.next(), parts.next(), parts.next(), parts.next()),
4946        (Some(page), Some(offset), Some(y), None)
4947            if page.len() == 5
4948                && offset.len() == 6
4949                && y.len() == 5
4950                && page.chars().all(|ch| ch.is_ascii_digit())
4951                && offset.chars().all(|ch| ch.is_ascii_digit())
4952                && y.chars().all(|ch| ch.is_ascii_digit())
4953    )
4954}
4955
4956fn run_attachments_command(
4957    command: AttachmentsCommand,
4958    client: &mut impl RpcCaller,
4959) -> Result<(Value, JsonStyle), String> {
4960    let value = match command {
4961        AttachmentsCommand::List {
4962            parent,
4963            limit,
4964            offset,
4965            ..
4966        } => normalize_list_envelope(
4967            client.call(
4968                "attachments.list",
4969                Some(serde_json::json!({"parentKey": parent})),
4970            )?,
4971            "items",
4972            Some(limit),
4973            offset,
4974        ),
4975        AttachmentsCommand::Get { key, .. } => {
4976            client.call("attachments.get", Some(serde_json::json!({"key": key})))?
4977        }
4978        AttachmentsCommand::Fulltext { key, .. } => client.call(
4979            "attachments.getFulltext",
4980            Some(serde_json::json!({"key": key})),
4981        )?,
4982        AttachmentsCommand::Path { key, .. } => localize_attachment_path_response(
4983            client.call("attachments.getPath", Some(serde_json::json!({"key": key})))?,
4984        ),
4985        AttachmentsCommand::Add {
4986            parent,
4987            path,
4988            from_url,
4989            title,
4990            dry_run,
4991            ..
4992        } => {
4993            match (path, from_url) {
4994                (Some(p), None) => {
4995                    let mut params = serde_json::json!({"parentKey": parent, "path": zotero_path(&p)});
4996                    insert_optional_string(&mut params, "title", title);
4997                    if dry_run {
4998                        return Ok((
4999                            dry_run_value("attachments.add", params),
5000                            JsonStyle::PythonCompact,
5001                        ));
5002                    }
5003                    return Ok((
5004                        client.call("attachments.add", Some(params))?,
5005                        JsonStyle::PythonCompact,
5006                    ));
5007                }
5008                (None, Some(u)) => {
5009                    let mut params = serde_json::json!({"parentKey": parent, "url": u});
5010                    insert_optional_string(&mut params, "title", title);
5011                    if dry_run {
5012                        return Ok((
5013                            dry_run_value("attachments.addByURL", params),
5014                            JsonStyle::PythonCompact,
5015                        ));
5016                    }
5017                    return Ok((
5018                        client.call("attachments.addByURL", Some(params))?,
5019                        JsonStyle::PythonCompact,
5020                    ));
5021                }
5022                (Some(_), Some(_)) => {
5023                    return Err("INVALID_ARGS: --path and --from-url are mutually exclusive".to_string());
5024                }
5025                (None, None) => {
5026                    return Err("INVALID_ARGS: either --path or --from-url is required".to_string());
5027                }
5028            }
5029        }
5030        AttachmentsCommand::Delete { key, dry_run, .. } => {
5031            let params = serde_json::json!({"key": key});
5032            if dry_run {
5033                return Ok((
5034                    dry_run_value("attachments.delete", params),
5035                    JsonStyle::PythonCompact,
5036                ));
5037            }
5038            return Ok((
5039                client.call("attachments.delete", Some(params))?,
5040                JsonStyle::PythonCompact,
5041            ));
5042        }
5043        AttachmentsCommand::FindPdf { parent, .. } => client.call(
5044            "attachments.findPDF",
5045            Some(serde_json::json!({"parentKey": parent})),
5046        )?,
5047    };
5048    Ok((value, JsonStyle::Pretty))
5049}
5050
5051fn localize_attachment_path_response(mut value: Value) -> Value {
5052    if let Some(path) = value.get("path").and_then(Value::as_str) {
5053        let local = local_path_from_zotero_path(path);
5054        if let Some(map) = value.as_object_mut() {
5055            map.insert("path".to_string(), Value::String(local));
5056        }
5057    }
5058    value
5059}
5060
5061fn run_notes_command(
5062    command: NotesCommand,
5063    client: &mut impl RpcCaller,
5064) -> Result<(Value, JsonStyle), String> {
5065    let (value, style) = match command {
5066        NotesCommand::List {
5067            parent,
5068            limit,
5069            offset,
5070            ..
5071        } => {
5072            let value = client.call(
5073                "notes.list",
5074                Some(serde_json::json!({"parentKey": parent})),
5075            )?;
5076            (normalize_list_envelope(value, "items", Some(limit), offset), JsonStyle::Pretty)
5077        }
5078        NotesCommand::Get { note_key, .. } => {
5079            let value = client.call("notes.get", Some(serde_json::json!({"key": note_key})))?;
5080            (value, JsonStyle::Pretty)
5081        }
5082        NotesCommand::Create {
5083            parent,
5084            content,
5085            tags,
5086            dry_run,
5087            ..
5088        } => {
5089            let mut params = serde_json::Map::new();
5090            params.insert("parentKey".to_string(), Value::String(parent));
5091            params.insert("content".to_string(), Value::String(content));
5092            if !tags.is_empty() {
5093                params.insert(
5094                    "tags".to_string(),
5095                    Value::Array(tags.into_iter().map(Value::String).collect()),
5096                );
5097            }
5098            run_mutating_command(client, "notes.create", Value::Object(params), dry_run)?
5099        }
5100        NotesCommand::Update {
5101            note_key,
5102            content,
5103            dry_run,
5104            ..
5105        } => run_mutating_command(
5106            client,
5107            "notes.update",
5108            serde_json::json!({"key": note_key, "content": content}),
5109            dry_run,
5110        )?,
5111        NotesCommand::Delete {
5112            note_key, dry_run, ..
5113        } => {
5114            // Python CLI intentionally routes note deletion through items.delete.
5115            run_mutating_command(
5116                client,
5117                "items.delete",
5118                serde_json::json!({"key": note_key}),
5119                dry_run,
5120            )?
5121        }
5122        NotesCommand::Search { query, limit, .. } => {
5123            let value = client.call(
5124                "notes.search",
5125                Some(serde_json::json!({"query": query, "limit": limit})),
5126            )?;
5127            (normalize_list_envelope(value, "items", Some(limit), 0), JsonStyle::Pretty)
5128        }
5129    };
5130    Ok((value, style))
5131}
5132
5133fn run_mutating_command(
5134    client: &mut impl RpcCaller,
5135    method: &str,
5136    params: Value,
5137    dry_run: bool,
5138) -> Result<(Value, JsonStyle), String> {
5139    if dry_run {
5140        Ok((
5141            serde_json::json!({
5142                "ok": true,
5143                "dryRun": true,
5144                "wouldCall": method,
5145                "wouldCallParams": params,
5146            }),
5147            JsonStyle::PythonCompact,
5148        ))
5149    } else {
5150        client
5151            .call(method, Some(params))
5152            .map(|value| (value, JsonStyle::PythonCompact))
5153    }
5154}
5155
5156fn run_collections_command(
5157    command: CollectionsCommand,
5158    client: &mut impl RpcCaller,
5159) -> Result<(Value, JsonStyle), String> {
5160    let value = match command {
5161        CollectionsCommand::List { .. } => normalize_list_envelope(
5162            client.call("collections.list", None)?,
5163            "items",
5164            None,
5165            0,
5166        ),
5167        CollectionsCommand::Tree { .. } => client.call("collections.tree", None)?,
5168        CollectionsCommand::Get { name_or_id, .. } => {
5169            let key = resolve_collection(client, &name_or_id)?;
5170            client.call("collections.get", Some(serde_json::json!({"key": key})))?
5171        }
5172        CollectionsCommand::GetItems {
5173            name_or_id,
5174            limit,
5175            offset,
5176            ..
5177        } => {
5178            let key = resolve_collection(client, &name_or_id)?;
5179            let mut params = serde_json::json!({"key": key});
5180            if let Some(map) = params.as_object_mut() {
5181                if let Some(limit) = limit {
5182                    map.insert("limit".to_string(), Value::Number(limit.into()));
5183                }
5184                if offset > 0 {
5185                    map.insert("offset".to_string(), Value::Number(offset.into()));
5186                }
5187            }
5188            normalize_list_envelope(
5189                client.call("collections.getItems", Some(params))?,
5190                "items",
5191                limit,
5192                offset,
5193            )
5194        }
5195        CollectionsCommand::Stats { name_or_id, .. } => {
5196            let key = resolve_collection(client, &name_or_id)?;
5197            client.call("collections.stats", Some(serde_json::json!({"key": key})))?
5198        }
5199        CollectionsCommand::Rename {
5200            old_name,
5201            new_name,
5202            dry_run,
5203            ..
5204        } => {
5205            let key = resolve_mutable_collection(client, &old_name, "rename")?;
5206            let params = serde_json::json!({"key": key, "name": new_name});
5207            if dry_run {
5208                return Ok((
5209                    dry_run_value("collections.rename", params),
5210                    JsonStyle::PythonCompact,
5211                ));
5212            }
5213            return Ok((
5214                client.call("collections.rename", Some(params))?,
5215                JsonStyle::PythonCompact,
5216            ));
5217        }
5218        CollectionsCommand::Create {
5219            name,
5220            parent,
5221            dry_run,
5222            ..
5223        } => {
5224            let mut params = serde_json::json!({"name": name});
5225            if let Some(parent) = parent {
5226                let parent_key = resolve_mutable_collection(client, &parent, "use as parent")?;
5227                if let Some(map) = params.as_object_mut() {
5228                    map.insert("parentKey".to_string(), parent_key);
5229                }
5230            }
5231            if dry_run {
5232                return Ok((
5233                    dry_run_value("collections.create", params),
5234                    JsonStyle::PythonCompact,
5235                ));
5236            }
5237            return Ok((
5238                client.call("collections.create", Some(params))?,
5239                JsonStyle::PythonCompact,
5240            ));
5241        }
5242        CollectionsCommand::Delete {
5243            name_or_id,
5244            dry_run,
5245            ..
5246        } => {
5247            let key = resolve_mutable_collection(client, &name_or_id, "delete")?;
5248            let params = serde_json::json!({"key": key});
5249            if dry_run {
5250                return Ok((
5251                    dry_run_value("collections.delete", params),
5252                    JsonStyle::PythonCompact,
5253                ));
5254            }
5255            return Ok((
5256                client.call("collections.delete", Some(params))?,
5257                JsonStyle::PythonCompact,
5258            ));
5259        }
5260        CollectionsCommand::AddItems {
5261            collection,
5262            item_keys,
5263            dry_run,
5264            ..
5265        } => {
5266            let key = resolve_mutable_collection(client, &collection, "add to")?;
5267            let params = serde_json::json!({"key": key, "keys": item_keys});
5268            if dry_run {
5269                return Ok((
5270                    dry_run_value("collections.addItems", params),
5271                    JsonStyle::PythonCompact,
5272                ));
5273            }
5274            return Ok((
5275                client.call("collections.addItems", Some(params))?,
5276                JsonStyle::PythonCompact,
5277            ));
5278        }
5279        CollectionsCommand::RemoveItems {
5280            collection,
5281            item_keys,
5282            dry_run,
5283            ..
5284        } => {
5285            let key = resolve_mutable_collection(client, &collection, "operate on")?;
5286            let params = serde_json::json!({"key": key, "keys": item_keys});
5287            if dry_run {
5288                return Ok((
5289                    dry_run_value("collections.removeItems", params),
5290                    JsonStyle::PythonCompact,
5291                ));
5292            }
5293            return Ok((
5294                client.call("collections.removeItems", Some(params))?,
5295                JsonStyle::PythonCompact,
5296            ));
5297        }
5298    };
5299    Ok((value, JsonStyle::Pretty))
5300}
5301
5302fn resolve_export_keys(
5303    client: &mut impl RpcCaller,
5304    mut keys: Vec<String>,
5305    collection: Option<String>,
5306) -> Result<Vec<String>, String> {
5307    if let Some(name) = collection {
5308        let col_key = resolve_collection(client, &name)?;
5309        let response = client.call(
5310            "collections.getItems",
5311            Some(serde_json::json!({"key": col_key})),
5312        )?;
5313        let items = collection_items(&response);
5314        for item in items {
5315            if let Some(key) = item.get("key").and_then(Value::as_str) {
5316                if !keys.contains(&key.to_string()) {
5317                    keys.push(key.to_string());
5318                }
5319            }
5320        }
5321    }
5322    if keys.is_empty() {
5323        return Err("No item keys provided. Pass positional keys and/or --collection.".to_string());
5324    }
5325    Ok(keys)
5326}
5327
5328fn run_export(args: ExportArgs, client: &mut impl RpcCaller) -> Result<String, String> {
5329    let keys = resolve_export_keys(client, args.keys, args.collection)?;
5330    match args.format.as_str() {
5331        "bibtex" => run_export_content_command(client, "export.bibtex", keys),
5332        "ris" => run_export_content_command(client, "export.ris", keys),
5333        "csl-json" => {
5334            let response =
5335                client.call("export.cslJson", Some(serde_json::json!({"keys": keys})))?;
5336            if let Some(content) = response.get("content") {
5337                format_json(content, JsonStyle::Pretty)
5338            } else {
5339                format_json(&response, JsonStyle::PythonCompact)
5340            }
5341        }
5342        "bibliography" => {
5343            let response = client.call(
5344                "export.bibliography",
5345                Some(serde_json::json!({"keys": keys, "style": args.style})),
5346            )?;
5347            if let Some(object) = response.as_object() {
5348                let field = if args.html { "html" } else { "text" };
5349                if object.contains_key("html") || object.contains_key("text") {
5350                    return raw_value_output(
5351                        object.get(field).unwrap_or(&Value::String(String::new())),
5352                    );
5353                }
5354            }
5355            format_json(&response, JsonStyle::PythonCompact)
5356        }
5357        other => Err(format!(
5358            "INVALID_ARGS: unknown format {other:?}, expected bibtex/ris/csl-json/bibliography"
5359        )),
5360    }
5361}
5362
5363fn run_export_content_command(
5364    client: &mut impl RpcCaller,
5365    method: &str,
5366    keys: Vec<String>,
5367) -> Result<String, String> {
5368    let response = client.call(method, Some(serde_json::json!({"keys": keys})))?;
5369    if let Some(content) = response.get("content") {
5370        raw_value_output(content)
5371    } else {
5372        format_json(&response, JsonStyle::PythonCompact)
5373    }
5374}
5375
5376fn raw_value_output(value: &Value) -> Result<String, String> {
5377    let mut out = match value {
5378        Value::Null => String::new(),
5379        Value::String(content) => content.clone(),
5380        other => to_python_repr(other),
5381    };
5382    out.push('\n');
5383    Ok(out)
5384}
5385
5386fn to_python_repr(value: &Value) -> String {
5387    match value {
5388        Value::Null => "None".to_string(),
5389        Value::Bool(value) => {
5390            if *value {
5391                "True".to_string()
5392            } else {
5393                "False".to_string()
5394            }
5395        }
5396        Value::Number(value) => value.to_string(),
5397        Value::String(value) => format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\'")),
5398        Value::Array(values) => {
5399            let inner = values
5400                .iter()
5401                .map(to_python_repr)
5402                .collect::<Vec<_>>()
5403                .join(", ");
5404            format!("[{inner}]")
5405        }
5406        Value::Object(entries) => {
5407            let inner = entries
5408                .iter()
5409                .map(|(key, value)| {
5410                    format!("'{}': {}", key.replace('\'', "\\'"), to_python_repr(value))
5411                })
5412                .collect::<Vec<_>>()
5413                .join(", ");
5414            format!("{{{inner}}}")
5415        }
5416    }
5417}
5418
5419fn resolve_collection(client: &mut impl RpcCaller, name_or_id: &str) -> Result<Value, String> {
5420    let trimmed = name_or_id.trim();
5421    if let Ok(id) = trimmed.parse::<i64>() {
5422        return Ok(Value::Number(id.into()));
5423    }
5424
5425    let collections = client.call("collections.list", None)?;
5426    let items = collections
5427        .get("items")
5428        .and_then(Value::as_array)
5429        .or_else(|| collections.as_array())
5430        .ok_or_else(|| "collections.list returned non-array result".to_string())?;
5431
5432    if let Some(collection) = items
5433        .iter()
5434        .find(|collection| collection.get("key").and_then(Value::as_str) == Some(trimmed))
5435    {
5436        return collection_key(collection);
5437    }
5438
5439    let exact = items
5440        .iter()
5441        .filter(|collection| collection.get("name").and_then(Value::as_str) == Some(trimmed))
5442        .collect::<Vec<_>>();
5443    if exact.len() == 1 {
5444        return collection_key(exact[0]);
5445    }
5446
5447    let needle = normalize_collection_name(trimmed);
5448    let fuzzy = items
5449        .iter()
5450        .filter(|collection| {
5451            collection
5452                .get("name")
5453                .and_then(Value::as_str)
5454                .map(normalize_collection_name)
5455                .is_some_and(|name| name.contains(&needle))
5456        })
5457        .collect::<Vec<_>>();
5458
5459    match fuzzy.len() {
5460        1 => collection_key(fuzzy[0]),
5461        0 => Err(format!(
5462            "COLLECTION_NOT_FOUND: No collection named {trimmed:?}"
5463        )),
5464        _ => Err(format!(
5465            "COLLECTION_AMBIGUOUS: Multiple collections match {trimmed:?}"
5466        )),
5467    }
5468}
5469
5470fn collection_key(collection: &Value) -> Result<Value, String> {
5471    collection
5472        .get("key")
5473        .cloned()
5474        .ok_or_else(|| "collection result is missing key".to_string())
5475}
5476
5477fn resolve_mutable_collection(
5478    client: &mut impl RpcCaller,
5479    name_or_id: &str,
5480    operation: &str,
5481) -> Result<Value, String> {
5482    let key = resolve_collection(client, name_or_id)?;
5483    if key.as_i64() == Some(0) {
5484        return Err(format!(
5485            "COLLECTION_NOT_FOUND: {name_or_id:?} resolved to library root (cannot {operation})"
5486        ));
5487    }
5488    Ok(key)
5489}
5490
5491fn insert_optional_string(value: &mut Value, key: &str, maybe: Option<String>) {
5492    if let (Some(map), Some(content)) = (value.as_object_mut(), maybe) {
5493        map.insert(key.to_string(), Value::String(content));
5494    }
5495}
5496
5497fn normalize_collection_name(name: &str) -> String {
5498    name.split_whitespace()
5499        .collect::<Vec<_>>()
5500        .join(" ")
5501        .to_lowercase()
5502}
5503
5504fn format_json(value: &Value, style: JsonStyle) -> Result<String, String> {
5505    let mut out = match style {
5506        JsonStyle::PythonCompact => to_python_compact_json(value),
5507        JsonStyle::Pretty => serde_json::to_string_pretty(value).map_err(|err| err.to_string())?,
5508    };
5509    out.push('\n');
5510    Ok(out)
5511}
5512
5513fn to_python_compact_json(value: &Value) -> String {
5514    match value {
5515        Value::Null => "null".to_string(),
5516        Value::Bool(value) => value.to_string(),
5517        Value::Number(value) => value.to_string(),
5518        Value::String(value) => {
5519            serde_json::to_string(value).expect("string serialization cannot fail")
5520        }
5521        Value::Array(values) => {
5522            let inner = values
5523                .iter()
5524                .map(to_python_compact_json)
5525                .collect::<Vec<_>>()
5526                .join(", ");
5527            format!("[{inner}]")
5528        }
5529        Value::Object(entries) => {
5530            let inner = entries
5531                .iter()
5532                .map(|(key, value)| {
5533                    let key = serde_json::to_string(key).expect("string serialization cannot fail");
5534                    format!("{key}: {}", to_python_compact_json(value))
5535                })
5536                .collect::<Vec<_>>()
5537                .join(", ");
5538            format!("{{{inner}}}")
5539        }
5540    }
5541}