Skip to main content

rlx_ocr/
cli.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16// RLX CLI
17use anyhow::{Context, Result, anyhow, bail};
18use rlx_cli::{parse_standard_device, req};
19use std::path::PathBuf;
20
21use crate::OcrRunner;
22
23pub fn run(args: &[String]) -> Result<()> {
24    let mut detection: Option<PathBuf> = None;
25    let mut recognition: Option<PathBuf> = None;
26    let mut model_dir: Option<PathBuf> = None;
27    let mut image: Option<PathBuf> = None;
28    let mut device = "cpu".to_string();
29    let mut dry = false;
30    let mut i = 0;
31    while i < args.len() {
32        match args[i].as_str() {
33            "--detection-model" => detection = Some(req(args, &mut i)?.into()),
34            "--recognition-model" => recognition = Some(req(args, &mut i)?.into()),
35            "--model-dir" => model_dir = Some(req(args, &mut i)?.into()),
36            "--image" => image = Some(req(args, &mut i)?.into()),
37            "--device" => device = req(args, &mut i)?,
38            "--dry" => {
39                dry = true;
40                i += 1;
41            }
42            "--help" | "-h" => {
43                eprintln!(
44                    "rlx-ocr — ocrs U-Net + CRNN/GRU (native RLX, safetensors weights)\n\
45                     Flags:\n\
46                       --model-dir DIR          directory with ocr-*-full.safetensors\n\
47                       --detection-model PATH   detection .safetensors\n\
48                       --recognition-model PATH recognition .safetensors\n\
49                       --image PATH             input JPEG/PNG\n\
50                       [--device cpu|metal|cuda|…] [--dry]"
51                );
52                return Ok(());
53            }
54            other => bail!("unknown flag: {other}"),
55        }
56    }
57
58    let device = parse_standard_device("ocr", &device)?;
59
60    let mut builder = OcrRunner::builder().device(device);
61    builder = match (detection, recognition, model_dir) {
62        (Some(d), Some(r), _) => builder.detection_model(d).recognition_model(r),
63        (_, _, Some(dir)) => builder.model_dir(dir),
64        _ => {
65            return Err(anyhow!(
66                "provide --model-dir DIR or both --detection-model and --recognition-model"
67            ));
68        }
69    };
70
71    eprintln!("[rlx-ocr] device={device:?}");
72
73    let runner = builder.build()?;
74    eprintln!(
75        "[rlx-ocr] engine ready — det_threshold={}",
76        runner.engine().detection_threshold()
77    );
78
79    if dry {
80        eprintln!("[rlx-ocr] --dry set; skipping inference");
81        return Ok(());
82    }
83
84    let image = image.ok_or_else(|| anyhow!("--image is required unless --dry"))?;
85    let t0 = std::time::Instant::now();
86    let out = runner.predict_path(&image).context("OCR inference")?;
87    eprintln!(
88        "[rlx-ocr] extracted {} lines, {} words in {:?}",
89        out.lines.len(),
90        out.words.len(),
91        t0.elapsed()
92    );
93    if !out.text.is_empty() {
94        println!("{}", out.text);
95    }
96    Ok(())
97}