image_anonymizer/ocr/
detection.rs

1use anyhow::{Context, Result};
2use base64::{Engine as _, engine::general_purpose};
3use reqwest::blocking::Client;
4use serde::{Deserialize, Serialize};
5use std::env;
6use std::path::Path;
7use tracing::{debug, error};
8
9#[derive(Debug, Deserialize)]
10pub struct TextDetectionResponse {
11    pub responses: Vec<Response>,
12}
13
14#[derive(Debug, Deserialize)]
15pub struct Response {
16    #[serde(default)]
17    #[serde(rename = "textAnnotations")]
18    pub text_annotations: Vec<TextAnnotation>,
19}
20
21#[derive(Debug, Deserialize, Clone)]
22pub struct TextAnnotation {
23    pub description: String,
24    #[serde(default)]
25    #[serde(rename = "boundingPoly")]
26    pub bounding_poly: Option<BoundingPoly>,
27}
28
29#[derive(Debug, Deserialize, Clone, Default)]
30pub struct BoundingPoly {
31    pub vertices: Vec<Vertex>,
32}
33
34#[derive(Debug, Deserialize, Clone)]
35pub struct Vertex {
36    #[serde(default)]
37    pub x: i32,
38    #[serde(default)]
39    pub y: i32,
40}
41
42#[derive(Debug, Serialize)]
43struct TextDetectionRequest {
44    requests: Vec<Request>,
45}
46
47#[derive(Debug, Serialize)]
48struct Request {
49    image: Image,
50    features: Vec<Feature>,
51}
52
53#[derive(Debug, Serialize)]
54struct Image {
55    content: String,
56}
57
58#[derive(Debug, Serialize)]
59struct Feature {
60    #[serde(rename = "type")]
61    feature_type: String,
62    max_results: i32,
63}
64
65/// Detect text in an image using the Google Cloud Vision API
66///
67/// # Arguments
68///
69/// * `image_path` - The path to the image file
70///
71/// # Returns
72///
73/// * `Result<Vec<TextAnnotation>>` - The detected text annotations
74///
75/// # Errors
76///
77/// * `anyhow::Error` - If the image processing fails
78///
79pub fn detect_text_with_api(image_path: &Path) -> Result<Vec<TextAnnotation>> {
80    let api_key = env::var("GCP_API_KEY").context("GCP_API_KEY environment variable not set")?;
81    debug!("image_path: {}", image_path.display());
82
83    let image_data = std::fs::read(image_path).context("Failed to read image file")?;
84    let base64_image = general_purpose::STANDARD.encode(&image_data);
85
86    let request = TextDetectionRequest {
87        requests: vec![Request {
88            image: Image {
89                content: base64_image,
90            },
91            features: vec![Feature {
92                feature_type: "TEXT_DETECTION".to_string(),
93                max_results: 100,
94            }],
95        }],
96    };
97
98    let client = Client::new();
99    let response = client
100        .post(&format!(
101            "https://vision.googleapis.com/v1/images:annotate?key={}",
102            api_key
103        ))
104        .json(&request)
105        .send()
106        .context("Failed to send request to Google Cloud Vision API")?;
107
108    let response_text = response.text().context("Failed to get response text")?;
109
110    if response_text.len() > 1000 {
111        debug!(
112            "Response text (first 1000 chars): {}",
113            &response_text[..1000]
114        );
115        debug!("Response text length: {}", response_text.len());
116    } else {
117        debug!("Response text: {}", &response_text);
118    }
119
120    let response_body: TextDetectionResponse = serde_json::from_str(&response_text)
121        .context("Failed to parse Google Cloud Vision API response")?;
122
123    if response_body.responses.is_empty() {
124        error!("No responses from Google Cloud Vision API");
125        anyhow::bail!("No responses from Google Cloud Vision API");
126    }
127
128    let annotations = response_body.responses[0].text_annotations.clone();
129    debug!("Detected {} text annotations", annotations.len());
130
131    Ok(annotations)
132}