1use anyhow::{Context, Result, anyhow, bail};
18use rlx_cli::{parse_standard_device, req};
19use std::path::PathBuf;
20
21use crate::OcrRunner;
22
23pub fn run(args: &[String]) -> Result<()> {
24 let mut detection: Option<PathBuf> = None;
25 let mut recognition: Option<PathBuf> = None;
26 let mut model_dir: Option<PathBuf> = None;
27 let mut image: Option<PathBuf> = None;
28 let mut device = "cpu".to_string();
29 let mut dry = false;
30 let mut i = 0;
31 while i < args.len() {
32 match args[i].as_str() {
33 "--detection-model" => detection = Some(req(args, &mut i)?.into()),
34 "--recognition-model" => recognition = Some(req(args, &mut i)?.into()),
35 "--model-dir" => model_dir = Some(req(args, &mut i)?.into()),
36 "--image" => image = Some(req(args, &mut i)?.into()),
37 "--device" => device = req(args, &mut i)?,
38 "--dry" => {
39 dry = true;
40 i += 1;
41 }
42 "--help" | "-h" => {
43 eprintln!(
44 "rlx-ocr — ocrs U-Net + CRNN/GRU (native RLX, safetensors weights)\n\
45 Flags:\n\
46 --model-dir DIR directory with ocr-*-full.safetensors\n\
47 --detection-model PATH detection .safetensors\n\
48 --recognition-model PATH recognition .safetensors\n\
49 --image PATH input JPEG/PNG\n\
50 [--device cpu|metal|cuda|…] [--dry]"
51 );
52 return Ok(());
53 }
54 other => bail!("unknown flag: {other}"),
55 }
56 }
57
58 let device = parse_standard_device("ocr", &device)?;
59
60 let mut builder = OcrRunner::builder().device(device);
61 builder = match (detection, recognition, model_dir) {
62 (Some(d), Some(r), _) => builder.detection_model(d).recognition_model(r),
63 (_, _, Some(dir)) => builder.model_dir(dir),
64 _ => {
65 return Err(anyhow!(
66 "provide --model-dir DIR or both --detection-model and --recognition-model"
67 ));
68 }
69 };
70
71 eprintln!("[rlx-ocr] device={device:?}");
72
73 let runner = builder.build()?;
74 eprintln!(
75 "[rlx-ocr] engine ready — det_threshold={}",
76 runner.engine().detection_threshold()
77 );
78
79 if dry {
80 eprintln!("[rlx-ocr] --dry set; skipping inference");
81 return Ok(());
82 }
83
84 let image = image.ok_or_else(|| anyhow!("--image is required unless --dry"))?;
85 let t0 = std::time::Instant::now();
86 let out = runner.predict_path(&image).context("OCR inference")?;
87 eprintln!(
88 "[rlx-ocr] extracted {} lines, {} words in {:?}",
89 out.lines.len(),
90 out.words.len(),
91 t0.elapsed()
92 );
93 if !out.text.is_empty() {
94 println!("{}", out.text);
95 }
96 Ok(())
97}