Skip to main content

zotron_cli/
lib.rs

1//! Minimal typed CLI surface for the Rust migration scaffold.
2
3use std::ffi::OsString;
4
5use clap::{error::ErrorKind, Parser, Subcommand};
6use serde_json::Value;
7use zotron_rpc::ZoteroRpc;
8use zotron_types::{builtin_ocr_provider_specs, DEFAULT_RPC_URL};
9
10mod commands;
11mod ocr;
12mod output;
13mod rag;
14mod rpc;
15mod sources;
16
17use crate::commands::*;
18use crate::ocr::*;
19use crate::output::*;
20pub use crate::output::{classify_error, format_error_json};
21use crate::rag::*;
22pub use crate::rag::{fetch_rerank_settings, RerankSettings};
23use crate::rpc::*;
24pub use crate::rpc::RpcCaller;
25use crate::sources::{run_external_command, run_sources_list, run_sources_sync};
26
27#[derive(Debug, Clone, PartialEq, serde::Serialize)]
28pub struct CliOcrProviderSpec {
29    pub id: &'static str,
30    pub provider: &'static str,
31    pub request_style: &'static str,
32    pub auth: &'static str,
33    pub auth_header: &'static str,
34    pub supports_pdf_direct: bool,
35    pub key_field: &'static str,
36}
37
38#[derive(Debug, Clone, PartialEq, serde::Serialize)]
39pub struct CliEmbeddingProviderSpec {
40    pub id: &'static str,
41    pub provider: &'static str,
42    pub request_style: &'static str,
43    pub default_url: String,
44    pub default_model: &'static str,
45    pub auth: &'static str,
46    pub key_field: &'static str,
47}
48
49pub fn ocr_provider_specs() -> Vec<CliOcrProviderSpec> {
50    builtin_ocr_provider_specs()
51        .into_iter()
52        .map(cli_ocr_provider_spec)
53        .collect()
54}
55
56pub fn ocr_provider_spec(provider: &str) -> Result<CliOcrProviderSpec, String> {
57    zotron_types::ocr_provider_spec(provider).map(cli_ocr_provider_spec)
58}
59
60pub fn embedding_provider_spec(provider: &str) -> Result<CliEmbeddingProviderSpec, String> {
61    let spec = zotron_types::embedding_provider_spec(provider)?;
62    Ok(CliEmbeddingProviderSpec {
63        id: spec.id,
64        provider: spec.provider_key,
65        request_style: if spec.provider_key == "alibaba" {
66            "dashscope"
67        } else {
68            spec.request_style.as_str()
69        },
70        default_url: spec.default_url.unwrap_or("").to_string(),
71        default_model: spec.default_model,
72        auth: spec.auth,
73        key_field: spec.key_field,
74    })
75}
76
77pub fn chunks_from_blocks(blocks: &[Value], max_chars: usize) -> Result<Vec<Value>, String> {
78    let typed = blocks
79        .iter()
80        .map(json_block_to_pdf_block)
81        .collect::<Result<Vec<_>, _>>()?;
82    let chunks = zotron_types::chunks_from_blocks(&typed, max_chars);
83    chunks
84        .into_iter()
85        .map(|chunk| chunk_to_cli_value(&chunk, &typed))
86        .collect()
87}
88
89fn cli_ocr_provider_spec(spec: zotron_types::OcrProviderSpec) -> CliOcrProviderSpec {
90    CliOcrProviderSpec {
91        id: spec.provider_key,
92        provider: spec.provider_key,
93        request_style: spec.request_style.as_str(),
94        auth: spec.auth,
95        auth_header: spec.auth_header,
96        supports_pdf_direct: spec.supports_pdf_direct,
97        key_field: spec.key_field,
98    }
99}
100
101fn json_block_to_pdf_block(value: &Value) -> Result<zotron_types::PdfEvidenceBlock, String> {
102    let block_key = value
103        .get("block_key")
104        .and_then(Value::as_str)
105        .ok_or_else(|| "block missing block_key".to_string())?
106        .to_string();
107    let item_key = value
108        .get("item_key")
109        .and_then(Value::as_str)
110        .ok_or_else(|| "block missing item_key".to_string())?
111        .to_string();
112    let attachment_key = value
113        .get("attachment_key")
114        .and_then(Value::as_str)
115        .ok_or_else(|| "block missing attachment_key".to_string())?
116        .to_string();
117    let page_idx = value
118        .get("page_idx")
119        .or_else(|| value.get("page"))
120        .and_then(Value::as_u64)
121        .unwrap_or(1);
122    let block_type = value
123        .get("type")
124        .or_else(|| value.get("block_type"))
125        .and_then(Value::as_str)
126        .unwrap_or("paragraph")
127        .to_string();
128    let section_path = value
129        .get("section_path")
130        .and_then(Value::as_array)
131        .map(|items| {
132            items
133                .iter()
134                .filter_map(Value::as_str)
135                .map(ToString::to_string)
136                .collect::<Vec<_>>()
137        })
138        .unwrap_or_default();
139    let text = value
140        .get("text")
141        .and_then(Value::as_str)
142        .unwrap_or("")
143        .to_string();
144    let bbox = value.get("bbox").and_then(value_bbox4);
145
146    Ok(zotron_types::PdfEvidenceBlock {
147        block_key,
148        item_key,
149        attachment_key,
150        page_idx,
151        block_type,
152        bbox,
153        section_path,
154        text,
155    })
156}
157
158fn chunk_to_cli_value(
159    chunk: &zotron_types::StructureChunk,
160    blocks: &[zotron_types::PdfEvidenceBlock],
161) -> Result<Value, String> {
162    let refs = chunk
163        .block_keys
164        .iter()
165        .filter_map(|key| blocks.iter().find(|block| &block.block_key == key))
166        .map(|block| {
167            serde_json::json!({
168                "blockKey": block.block_key,
169                "pageIdx": block.page_idx,
170                "bbox": block.bbox.map(|bbox| bbox.iter().map(|n| {
171                    if n.fract() == 0.0 {
172                        Value::from(*n as i64)
173                    } else {
174                        Value::from(*n)
175                    }
176                }).collect::<Vec<_>>()),
177            })
178        })
179        .collect::<Vec<_>>();
180    Ok(serde_json::json!({
181        "chunkKey": chunk.chunk_key,
182        "itemKey": chunk.item_key,
183        "attachmentKey": chunk.attachment_key,
184        "blockKeys": chunk.block_keys,
185        "sectionPath": chunk.section_path,
186        "text": chunk.text,
187        "pageStart": chunk.page_start,
188        "pageEnd": chunk.page_end,
189        "evidenceRefs": refs,
190    }))
191}
192
193fn value_bbox4(value: &Value) -> Option<[f64; 4]> {
194    let arr = value.as_array()?;
195    if arr.len() != 4 {
196        return None;
197    }
198    Some([
199        arr[0].as_f64()?,
200        arr[1].as_f64()?,
201        arr[2].as_f64()?,
202        arr[3].as_f64()?,
203    ])
204}
205
206#[derive(Debug, Parser)]
207#[command(name = "zotron", about = "Rust client + CLI for the Zotron XPI")]
208struct Cli {
209    /// Zotero JSON-RPC endpoint. Applies to every subcommand.
210    #[arg(long, default_value = DEFAULT_RPC_URL, global = true)]
211    url: String,
212    #[command(subcommand)]
213    command: Command,
214}
215
216#[derive(Debug, Subcommand)]
217pub(crate) enum OcrCommand {
218    /// Print supported OCR provider contracts.
219    Providers,
220    /// Execute an OCR provider request from JSON and emit normalized blocks.
221    #[command(name = "run")]
222    Run {
223        #[arg(long)]
224        provider: String,
225        /// Path to an OcrRequestInput JSON file, or "-" to read stdin.
226        #[arg(long)]
227        input: Option<String>,
228        /// Local PDF/image file to encode into an OcrRequestInput.
229        #[arg(long)]
230        file: Option<String>,
231        /// Zotero item key used when --file builds the OCR request.
232        #[arg(long = "item-key")]
233        item_key: Option<String>,
234        /// Zotero attachment key used when --file builds the OCR request.
235        #[arg(long = "attachment-key")]
236        attachment_key: Option<String>,
237        /// MIME type used when --file builds the OCR request.
238        #[arg(long = "mime-type")]
239        mime_type: Option<String>,
240        /// Override the provider endpoint, required for service-hosted PaddleOCR-VL.
241        #[arg(long)]
242        endpoint: Option<String>,
243        /// Environment variable containing the provider bearer token.
244        #[arg(long = "api-key-env")]
245        api_key_env: Option<String>,
246    },
247    /// Show OCR statistics for a collection.
248    Status {
249        #[arg(long)]
250        collection: String,
251    },
252    /// Re-chunk and re-embed existing OCR results without re-running OCR.
253    Reindex {
254        #[arg(long)]
255        collection: Option<String>,
256        #[arg(long)]
257        key: Option<String>,
258        #[arg(long, help = "Only reindex items with stale schema version")]
259        stale_only: bool,
260        #[arg(long = "chunk-chars", default_value_t = 1200)]
261        chunk_chars: usize,
262        #[arg(long, help = "Re-parse blocks from the saved raw OCR response (back-fills parser improvements like heading detection) instead of re-chunking existing blocks. No OCR API call.")]
263        reparse: bool,
264    },
265    /// Parse a Zotero PDF and write hidden sidecar OCR/RAG artifacts. Provider read from Zotero settings unless --provider is given.
266    #[command(name = "process")]
267    Process {
268        /// Override OCR provider (default: read from Zotero settings ocr.provider).
269        #[arg(long)]
270        provider: Option<String>,
271        /// Parent Zotero item key. Required unless --collection is given.
272        #[arg(long, conflicts_with = "collection")]
273        parent: Option<String>,
274        /// Collection name (fuzzy) or key: OCR every item in the collection.
275        #[arg(long)]
276        collection: Option<String>,
277        /// Zotero PDF attachment key (auto-resolved from --parent when omitted; ignored with --collection).
278        #[arg(long, conflicts_with = "collection")]
279        attachment: Option<String>,
280        /// Public URL for MinerU cloud parsing. Use --result-dir/--result-zip for offline ingestion.
281        #[arg(long = "source-url")]
282        source_url: Option<String>,
283        /// Already-extracted MinerU result directory, used by tests/offline replay.
284        #[arg(long = "result-dir")]
285        result_dir: Option<String>,
286        /// Already-downloaded MinerU result zip, used by tests/offline replay.
287        #[arg(long = "result-zip")]
288        result_zip: Option<String>,
289        /// Override provider endpoint (default: read from Zotero settings ocr.apiUrl).
290        #[arg(long = "provider-endpoint")]
291        provider_endpoint: Option<String>,
292        /// Environment variable containing the provider bearer token (fallback: Zotero settings ocr.apiKey).
293        #[arg(long = "api-key-env")]
294        api_key_env: Option<String>,
295        #[arg(long = "poll-interval-seconds", default_value_t = 5)]
296        poll_interval_seconds: u64,
297        #[arg(long = "timeout-seconds", default_value_t = 900)]
298        timeout_seconds: u64,
299        #[arg(long = "chunk-chars", default_value_t = 1200)]
300        chunk_chars: usize,
301    },
302}
303
304#[derive(Debug, Subcommand)]
305#[command(allow_external_subcommands = true)]
306pub(crate) enum Command {
307    /// Check that Zotero is running with the Zotron XPI enabled.
308    Ping,
309    /// Generic RPC escape hatch.
310    Rpc {
311        method: String,
312        #[arg(default_value = "{}")]
313        params_json: String,
314        #[arg(long)]
315        paginate: bool,
316        #[arg(long, default_value_t = 100)]
317        page_size: usize,
318    },
319    /// Push prepared Zotero JSON (from file or stdin) to Zotero.
320    Push {
321        /// Path to a JSON file, or "-" to read from stdin.
322        json_file: String,
323        /// Optional PDF attachment path.
324        #[arg(long)]
325        pdf: Option<String>,
326        /// Collection name (fuzzy) or key.
327        #[arg(long)]
328        collection: Option<String>,
329        /// Duplicate handling: skip | update | create.
330        #[arg(long = "on-duplicate", default_value = "skip")]
331        on_duplicate: String,
332        /// Parse input + resolve collection only; do not push to Zotero.
333        #[arg(long = "dry-run")]
334        dry_run: bool,
335    },
336    /// System and plugin introspection commands.
337    System {
338        #[command(subcommand)]
339        command: SystemCommand,
340    },
341    /// Search items by text, tag, identifier, or structured conditions.
342    Search(SearchArgs),
343    /// Inspect and manage Zotero items.
344    Items {
345        #[command(subcommand)]
346        command: ItemsCommand,
347    },
348    /// Inspect Zotero collections.
349    Collections {
350        #[command(subcommand)]
351        command: CollectionsCommand,
352    },
353    /// Inspect Zotero notes.
354    Notes {
355        #[command(subcommand)]
356        command: NotesCommand,
357    },
358    /// Inspect Zotero preferences.
359    Settings {
360        #[command(subcommand)]
361        command: SettingsCommand,
362    },
363    /// Inspect and manage Zotero tags.
364    Tags {
365        #[command(subcommand)]
366        command: TagsCommand,
367    },
368    /// Export items as BibTeX, RIS, CSL-JSON, or formatted bibliography.
369    Export(ExportArgs),
370    /// List, create, and delete PDF annotations.
371    Annotations {
372        #[command(subcommand)]
373        command: AnnotationsCommand,
374    },
375    /// OCR PDFs and manage raw/block/chunk evidence artifacts.
376    Ocr {
377        #[command(subcommand)]
378        command: OcrCommand,
379    },
380    /// Build and search retrieval artifacts.
381    Rag {
382        #[command(subcommand)]
383        command: RagCommand,
384    },
385    /// Discover and manage source plugins (`zotron-*` on PATH).
386    Sources {
387        #[command(subcommand)]
388        command: Option<SourcesCommand>,
389    },
390    /// Transparent proxy: forward `zotron <name> [args]` to `zotron-<name>`.
391    #[command(external_subcommand)]
392    External(Vec<OsString>),
393}
394
395#[derive(Debug, Subcommand)]
396pub(crate) enum SourcesCommand {
397    /// List all discovered source plugins on PATH (the default action).
398    List,
399    /// Symlink plugin skills into the Claude Code plugin's `plugin/skills/`.
400    Sync {
401        /// Path to the repo's `plugin/skills/` directory (auto-discovered when omitted).
402        #[arg(long, default_value = "")]
403        skills_dir: String,
404    },
405}
406
407pub(crate) struct RagSearchOptions {
408    pub(crate) query: String,
409    pub(crate) collection: Option<String>,
410    pub(crate) keys: Vec<String>,
411    pub(crate) zotero: bool,
412    pub(crate) top_spans_per_item: u64,
413    pub(crate) include_fulltext_spans: bool,
414    pub(crate) top_k: u64,
415    pub(crate) output: String,
416}
417
418#[derive(Debug, Subcommand)]
419pub(crate) enum RagCommand {
420    /// Print supported embedding provider contracts.
421    #[command(name = "providers")]
422    Providers,
423    /// Execute an embedding provider request from JSON and emit vectors.
424    #[command(name = "embed")]
425    Embed {
426        #[arg(long)]
427        provider: String,
428        /// Path to an EmbeddingRequestInput JSON file, or "-" to read stdin.
429        #[arg(long)]
430        input: String,
431        /// Override the embedding endpoint.
432        #[arg(long)]
433        endpoint: Option<String>,
434        /// Override the embedding model.
435        #[arg(long)]
436        model: Option<String>,
437        /// Override provider input type, for example document or query.
438        #[arg(long = "input-type")]
439        input_type: Option<String>,
440        /// Environment variable containing the provider bearer token.
441        #[arg(long = "api-key-env")]
442        api_key_env: Option<String>,
443    },
444    /// Show index status for a collection.
445    Status {
446        #[arg(long)]
447        collection: String,
448    },
449    /// Emit academic-zh retrieval hits with itemKey/title/text provenance.
450    #[command(name = "search")]
451    Search {
452        query: String,
453        #[arg(long)]
454        collection: Option<String>,
455        /// Limit retrieval to one or more Zotero item keys.
456        #[arg(long = "key", alias = "keys")]
457        keys: Vec<String>,
458        #[arg(long)]
459        zotero: bool,
460        #[arg(long = "top-spans-per-item", default_value_t = 3)]
461        top_spans_per_item: u64,
462        #[arg(long = "include-fulltext-spans")]
463        include_fulltext_spans: bool,
464        #[arg(long = "limit", alias = "top-k", default_value_t = 50)]
465        top_k: u64,
466        #[arg(long, default_value = "json", value_parser = ["json", "jsonl"])]
467        output: String,
468    },
469}
470
471#[derive(Debug, Subcommand)]
472pub(crate) enum SystemCommand {
473    /// Show XPI version and exposed method metadata.
474    Version,
475    /// List all libraries (user + groups).
476    Libraries,
477    /// Get statistics for the current (or specified) library.
478    #[command(name = "library-stats")]
479    LibraryStats {
480        #[arg(long)]
481        library: Option<i64>,
482    },
483    /// Show item type schema. Without --type, lists all types. With --type, shows fields and creator types.
484    Schema {
485        #[arg(long = "type")]
486        item_type: Option<String>,
487    },
488    /// Get the currently selected Zotero collection (or null).
489    #[command(name = "current-collection")]
490    CurrentCollection,
491    /// List RPC methods, or describe a specific method.
492    Methods {
493        /// Method name to describe. Omit to list all methods.
494        method: Option<String>,
495    },
496}
497
498#[derive(Debug, clap::Args)]
499pub(crate) struct SearchArgs {
500    /// Search query (title/creator/year by default; PDF content with --fulltext).
501    pub(crate) query: Option<String>,
502    /// Search inside PDF full-text content instead of metadata.
503    #[arg(long)]
504    pub(crate) fulltext: bool,
505    /// Filter by author/creator name (contains match).
506    #[arg(long)]
507    pub(crate) author: Option<String>,
508    /// Filter by date after (YYYY or YYYY-MM-DD).
509    #[arg(long)]
510    pub(crate) after: Option<String>,
511    /// Filter by date before (YYYY or YYYY-MM-DD).
512    #[arg(long)]
513    pub(crate) before: Option<String>,
514    /// Filter by journal/publication title (contains match).
515    #[arg(long)]
516    pub(crate) journal: Option<String>,
517    /// Filter by tag (exact match).
518    #[arg(long)]
519    pub(crate) tag: Option<String>,
520    /// Find by DOI.
521    #[arg(long)]
522    pub(crate) doi: Option<String>,
523    /// Find by ISBN.
524    #[arg(long)]
525    pub(crate) isbn: Option<String>,
526    /// Find by ISSN.
527    #[arg(long)]
528    pub(crate) issn: Option<String>,
529    /// Limit results to a collection name or key.
530    #[arg(long)]
531    pub(crate) collection: Option<String>,
532    #[arg(long, default_value_t = 50)]
533    pub(crate) limit: u64,
534    #[arg(long, default_value_t = 0)]
535    pub(crate) offset: u64,
536    #[command(subcommand)]
537    pub(crate) management: Option<SearchManagementCommand>,
538}
539
540#[derive(Debug, Subcommand)]
541pub(crate) enum SearchManagementCommand {
542    /// List all saved searches in the library.
543    #[command(name = "saved-searches")]
544    SavedSearches,
545    /// Create a saved search with one or more conditions.
546    #[command(name = "create-saved")]
547    CreateSaved {
548        name: String,
549        #[arg(long = "condition", required = true)]
550        condition: Vec<String>,
551        #[arg(long)]
552        dry_run: bool,
553    },
554    /// Delete a saved search by key.
555    #[command(name = "delete-saved")]
556    DeleteSaved {
557        search_key: String,
558        #[arg(long)]
559        dry_run: bool,
560    },
561}
562
563#[derive(Debug, Subcommand)]
564pub(crate) enum ItemsCommand {
565    /// Add an item by DOI, ISBN, URL, local file, or manual entry (--type + --field).
566    Add {
567        #[arg(long)]
568        doi: Option<String>,
569        #[arg(long)]
570        isbn: Option<String>,
571        /// Web page URL to add from.
572        #[arg(long = "from-url")]
573        from_url: Option<String>,
574        /// Local file path to add from.
575        #[arg(long)]
576        file: Option<String>,
577        /// Item type for manual creation (e.g. journalArticle).
578        #[arg(long = "type")]
579        item_type: Option<String>,
580        /// Field values for manual creation (e.g. title="My Paper").
581        #[arg(long = "field")]
582        fields: Vec<String>,
583        #[arg(long)]
584        collection: Option<String>,
585        #[arg(long)]
586        dry_run: bool,
587    },
588    /// Update fields on an existing item.
589    Update {
590        key: String,
591        #[arg(long = "field")]
592        fields: Vec<String>,
593        #[arg(long)]
594        dry_run: bool,
595    },
596    /// Permanently delete an item.
597    Delete {
598        key: String,
599        #[arg(long)]
600        dry_run: bool,
601    },
602    /// Move one or more items to trash.
603    Trash {
604        items: Vec<String>,
605        #[arg(long)]
606        dry_run: bool,
607    },
608    /// Restore a trashed item.
609    Restore {
610        item: String,
611        #[arg(long)]
612        dry_run: bool,
613    },
614    /// Merge a group of duplicate items.
615    #[command(name = "merge-duplicates")]
616    MergeDuplicates {
617        keys: Vec<String>,
618        #[arg(long)]
619        dry_run: bool,
620    },
621    /// Add a related-item link between two items.
622    #[command(name = "add-related")]
623    AddRelated {
624        key: String,
625        #[arg(long)]
626        target: String,
627        #[arg(long)]
628        dry_run: bool,
629    },
630    /// Remove a related-item link between two items.
631    #[command(name = "remove-related")]
632    RemoveRelated {
633        key: String,
634        #[arg(long)]
635        target: String,
636        #[arg(long)]
637        dry_run: bool,
638    },
639    /// Print the full serialization of an item by key.
640    Get {
641        item: String,
642    },
643    /// List items in the library with optional sorting and pagination.
644    List {
645        #[arg(long, default_value_t = 50)]
646        limit: u64,
647        #[arg(long, default_value_t = 0)]
648        offset: u64,
649        #[arg(long)]
650        sort: Option<String>,
651        #[arg(long, default_value = "asc")]
652        direction: String,
653        /// List trashed items instead of regular items.
654        #[arg(long)]
655        trash: bool,
656    },
657    /// Run Zotero's duplicate scan and print groups.
658    #[command(name = "find-duplicates")]
659    FindDuplicates,
660    /// List recently added or modified items.
661    Recent {
662        #[arg(long, default_value_t = 20)]
663        limit: u64,
664        #[arg(long, default_value_t = 0)]
665        offset: u64,
666        #[arg(long = "type", default_value = "added")]
667        recent_type: String,
668    },
669    /// Retrieve the full-text content of an item's attachment. Prefers the
670    /// clean OCR sidecar text, falling back to Zotero's built-in extraction.
671    Fulltext {
672        key: String,
673        /// Force OCR-only: read the OCR sidecar and error if the item has no OCR data (no Zotero fallback).
674        #[arg(long)]
675        ocr: bool,
676    },
677    /// List items related to the given item.
678    Related {
679        key: String,
680    },
681    /// Get the citation key for an item.
682    #[command(name = "citation-key")]
683    CitationKey {
684        key: String,
685    },
686    /// Get the local filesystem path of an item's PDF attachment.
687    Path {
688        key: String,
689    },
690    /// List attachments belonging to an item.
691    Attachments {
692        key: String,
693        #[arg(long, default_value_t = 0)]
694        offset: u64,
695    },
696    /// Batch find missing PDFs in a collection via Zotero's resolver chain.
697    #[command(name = "find-pdfs")]
698    FindPdfs {
699        #[arg(long)]
700        collection: String,
701        #[arg(long, default_value_t = 0)]
702        limit: usize,
703    },
704}
705
706#[derive(Debug, Subcommand)]
707pub(crate) enum SettingsCommand {
708    /// Get a single Zotero preference value.
709    Get {
710        key: String,
711    },
712    /// List all Zotero preferences as a key->value dict.
713    #[command(visible_alias = "get-all")]
714    List,
715    /// Set one or more Zotero preferences (key value pairs), or bulk-set from a JSON file.
716    Set {
717        /// key value key value ... (pairs of positional args)
718        pairs: Vec<String>,
719        /// Bulk-set from a JSON file.
720        #[arg(long)]
721        file: Option<String>,
722        #[arg(long)]
723        dry_run: bool,
724    },
725}
726
727#[derive(Debug, Subcommand)]
728pub(crate) enum TagsCommand {
729    /// List all tags in the library (flat).
730    List {
731        #[arg(long, default_value_t = 200)]
732        limit: u64,
733    },
734    /// Rename a tag across all items.
735    Rename {
736        old: String,
737        new: String,
738        #[arg(long)]
739        dry_run: bool,
740    },
741    /// Delete a tag library-wide.
742    Delete {
743        tag: String,
744        #[arg(long)]
745        dry_run: bool,
746    },
747    /// Add tags to one or more items.
748    Add {
749        keys: Vec<String>,
750        #[arg(long = "tag", required = true)]
751        tags: Vec<String>,
752        #[arg(long)]
753        dry_run: bool,
754    },
755    /// Remove tags from one or more items.
756    Remove {
757        keys: Vec<String>,
758        #[arg(long = "tag", required = true)]
759        tags: Vec<String>,
760        #[arg(long)]
761        dry_run: bool,
762    },
763}
764
765#[derive(Debug, clap::Args)]
766pub(crate) struct ExportArgs {
767    /// Item keys to export.
768    pub(crate) keys: Vec<String>,
769    /// Output format: bibtex, ris, csl-json, bibliography.
770    #[arg(long, default_value = "bibtex")]
771    pub(crate) format: String,
772    /// Export all items from this collection (name or key).
773    #[arg(long)]
774    pub(crate) collection: Option<String>,
775    /// Citation style URL (only for bibliography format).
776    #[arg(long, default_value = "http://www.zotero.org/styles/apa")]
777    pub(crate) style: String,
778    /// Output HTML instead of plain text (only for bibliography format).
779    #[arg(long)]
780    pub(crate) html: bool,
781}
782
783#[derive(Debug, Subcommand)]
784pub(crate) enum AnnotationsCommand {
785    /// List annotations on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
786    List {
787        /// Item key or attachment key
788        parent: String,
789        /// Use a specific attachment when the item has multiple PDFs
790        #[arg(long)]
791        attachment: Option<String>,
792        /// Include N characters of surrounding text for each annotation
793        #[arg(long)]
794        context: Option<u32>,
795    },
796    /// Create a new annotation on a PDF. Accepts an item key (auto-resolves to PDF) or attachment key.
797    Create {
798        /// Item key or attachment key
799        parent: String,
800        /// Use a specific attachment when the item has multiple PDFs
801        #[arg(long)]
802        attachment: Option<String>,
803        #[arg(long = "type")]
804        annotation_type: Option<String>,
805        /// JSON annotation position, for example '{"pageIndex":0,"rects":[[10,20,30,40]]}'.
806        /// Not required when --quote is given.
807        #[arg(long)]
808        position: Option<String>,
809        /// Text to locate in the PDF and highlight. Resolves to rects automatically.
810        /// Locates text headlessly (no PDF viewer required).
811        #[arg(long)]
812        quote: Option<String>,
813        /// Restrict quote search to a specific page (0-indexed).
814        #[arg(long)]
815        page: Option<u32>,
816        /// Zotero annotation sort index.
817        #[arg(long = "sort-index")]
818        sort_index: Option<String>,
819        #[arg(long)]
820        text: Option<String>,
821        #[arg(long)]
822        comment: Option<String>,
823        #[arg(long, default_value = "#ffd400")]
824        color: String,
825        #[arg(long)]
826        dry_run: bool,
827    },
828    /// Batch-create annotations from a JSON array on stdin or --file.
829    /// Each entry: {"quote": "...", "color": "#hex", "comment": "...", "type": "highlight"}
830    CreateBatch {
831        /// Item key or attachment key
832        parent: String,
833        /// Use a specific attachment when the item has multiple PDFs
834        #[arg(long)]
835        attachment: Option<String>,
836        /// Read annotations from a JSON file instead of stdin
837        #[arg(long)]
838        file: Option<String>,
839        #[arg(long)]
840        dry_run: bool,
841    },
842    /// Locate a text quote in a PDF without creating an annotation.
843    /// Returns page index and rects if found.
844    Locate {
845        /// Item key or attachment key
846        parent: String,
847        /// Use a specific attachment when the item has multiple PDFs
848        #[arg(long)]
849        attachment: Option<String>,
850        /// Text to locate in the PDF
851        #[arg(long)]
852        quote: String,
853        /// Restrict search to a specific page (0-indexed)
854        #[arg(long)]
855        page: Option<u32>,
856    },
857    /// Delete an annotation by key.
858    Delete {
859        annotation_key: String,
860        #[arg(long)]
861        dry_run: bool,
862    },
863}
864
865#[derive(Debug, Subcommand)]
866pub(crate) enum NotesCommand {
867    /// List notes attached to a parent item.
868    List {
869        #[arg(long)]
870        parent: String,
871        #[arg(long, default_value_t = 50)]
872        limit: u64,
873        #[arg(long, default_value_t = 0)]
874        offset: u64,
875    },
876    /// Get a single note by key.
877    Get {
878        note_key: String,
879    },
880    /// Create a note attached to a parent item.
881    Create {
882        #[arg(long)]
883        parent: String,
884        #[arg(long)]
885        content: String,
886        #[arg(long = "tag")]
887        tags: Vec<String>,
888        #[arg(long)]
889        dry_run: bool,
890    },
891    /// Update the content of an existing note.
892    Update {
893        note_key: String,
894        #[arg(long)]
895        content: String,
896        #[arg(long)]
897        dry_run: bool,
898    },
899    /// Delete a note by key.
900    Delete {
901        note_key: String,
902        #[arg(long)]
903        dry_run: bool,
904    },
905    /// Search notes by text content.
906    Search {
907        query: String,
908        #[arg(long, default_value_t = 50)]
909        limit: u64,
910    },
911}
912
913#[derive(Debug, Subcommand)]
914pub(crate) enum CollectionsCommand {
915    /// List all collections in the user library (flat).
916    List,
917    /// Print the collection hierarchy as a tree.
918    Tree,
919    /// Get a single collection's metadata.
920    Get {
921        name_or_id: String,
922    },
923    /// List all items in a collection.
924    #[command(name = "get-items", visible_alias = "items")]
925    GetItems {
926        name_or_id: String,
927        #[arg(long)]
928        limit: Option<u64>,
929        #[arg(long, default_value_t = 0)]
930        offset: u64,
931    },
932    /// Show item/attachment/note/subcollection counts for a collection.
933    Stats {
934        name_or_id: String,
935    },
936    /// Rename a collection.
937    Rename {
938        old_name: String,
939        new_name: String,
940        #[arg(long)]
941        dry_run: bool,
942    },
943    /// Create a collection, optionally nested under a parent.
944    Create {
945        name: String,
946        #[arg(long)]
947        parent: Option<String>,
948        #[arg(long)]
949        dry_run: bool,
950    },
951    /// Delete a collection.
952    Delete {
953        name_or_id: String,
954        #[arg(long)]
955        dry_run: bool,
956    },
957    /// Add existing items to a collection.
958    #[command(name = "add-items")]
959    AddItems {
960        collection: String,
961        item_keys: Vec<String>,
962        #[arg(long)]
963        dry_run: bool,
964    },
965    /// Remove items from a collection.
966    #[command(name = "remove-items")]
967    RemoveItems {
968        collection: String,
969        item_keys: Vec<String>,
970        #[arg(long)]
971        dry_run: bool,
972    },
973}
974
975enum ParseOutcome<T> {
976    Command(T),
977    Display(String),
978}
979
980fn parse_cli<T>(
981    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
982) -> Result<ParseOutcome<T>, String>
983where
984    T: Parser,
985{
986    match T::try_parse_from(args) {
987        Ok(cli) => Ok(ParseOutcome::Command(cli)),
988        Err(err)
989            if matches!(
990                err.kind(),
991                ErrorKind::DisplayHelp | ErrorKind::DisplayVersion
992            ) => {
993            Ok(ParseOutcome::Display(err.to_string()))
994        }
995        Err(err) => Err(err.to_string()),
996    }
997}
998
999pub fn run(
1000    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1001) -> Result<String, String> {
1002    let cli = match parse_cli::<Cli>(args)? {
1003        ParseOutcome::Command(cli) => cli,
1004        ParseOutcome::Display(output) => return Ok(output),
1005    };
1006    let mut client = ZoteroRpc::new(cli.url);
1007    run_command(cli.command, &mut client)
1008}
1009
1010pub fn run_with_client(
1011    args: impl IntoIterator<Item = impl Into<std::ffi::OsString> + Clone>,
1012    client: &mut impl RpcCaller,
1013) -> Result<String, String> {
1014    let cli = match parse_cli::<Cli>(args)? {
1015        ParseOutcome::Command(cli) => cli,
1016        ParseOutcome::Display(output) => return Ok(output),
1017    };
1018    run_command(cli.command, client)
1019}
1020
1021fn run_command(command: Command, client: &mut impl RpcCaller) -> Result<String, String> {
1022    if let Command::Export(args) = command {
1023        return run_export(args, client);
1024    }
1025
1026    let value = match command {
1027        Command::Ping => call_json(client, "system.ping", None)?,
1028        Command::Rpc {
1029            method,
1030            params_json,
1031            paginate,
1032            page_size,
1033            ..
1034        } => {
1035            let params = serde_json::from_str::<Value>(&params_json)
1036                .map_err(|err| format!("INVALID_JSON: params must be a JSON object: {err}"))?;
1037            if !params.is_object() {
1038                return Err("INVALID_JSON: params must be a JSON object".to_string());
1039            }
1040            if paginate {
1041                paginate_rpc(client, &method, params, page_size)?
1042            } else {
1043                call_json(client, &method, Some(params))?
1044            }
1045        }
1046        Command::Push {
1047            json_file,
1048            pdf,
1049            collection,
1050            on_duplicate,
1051            dry_run,
1052            ..
1053        } => return run_push_command(json_file, pdf, collection, on_duplicate, dry_run, client),
1054        Command::System { command } => run_system_command(command, client)?,
1055        Command::Search(args) => {
1056            if let Some(mgmt) = args.management {
1057                run_search_management_command(mgmt, client)?
1058            } else {
1059                run_search(args, client)?
1060            }
1061        }
1062        Command::Items { command } => run_items_command(command, client)?,
1063        Command::Collections { command } => run_collections_command(command, client)?,
1064        Command::Notes { command } => run_notes_command(command, client)?,
1065        Command::Settings { command } => run_settings_command(command, client)?,
1066        Command::Tags { command } => run_tags_command(command, client)?,
1067        Command::Annotations { command } => run_annotations_command(command, client)?,
1068        Command::Ocr { command } => {
1069            return run_ocr_command(command, client);
1070        }
1071        Command::Rag { command } => {
1072            return run_rag_command(command, client);
1073        }
1074        Command::Sources { command } => {
1075            return match command.unwrap_or(SourcesCommand::List) {
1076                SourcesCommand::List => run_sources_list(),
1077                SourcesCommand::Sync { skills_dir } => run_sources_sync(&skills_dir),
1078            };
1079        }
1080        Command::External(args) => return run_external_command(args),
1081        Command::Export(_) => unreachable!("export commands return raw output above"),
1082    };
1083
1084    format_json(&value)
1085}