image_anonymizer/ocr/
detection.rs1use anyhow::{Context, Result};
2use base64::{Engine as _, engine::general_purpose};
3use reqwest::blocking::Client;
4use serde::{Deserialize, Serialize};
5use std::env;
6use std::path::Path;
7use tracing::{debug, error};
8
9#[derive(Debug, Deserialize)]
10pub struct TextDetectionResponse {
11 pub responses: Vec<Response>,
12}
13
14#[derive(Debug, Deserialize)]
15pub struct Response {
16 #[serde(default)]
17 #[serde(rename = "textAnnotations")]
18 pub text_annotations: Vec<TextAnnotation>,
19}
20
21#[derive(Debug, Deserialize, Clone)]
22pub struct TextAnnotation {
23 pub description: String,
24 #[serde(default)]
25 #[serde(rename = "boundingPoly")]
26 pub bounding_poly: Option<BoundingPoly>,
27}
28
29#[derive(Debug, Deserialize, Clone, Default)]
30pub struct BoundingPoly {
31 pub vertices: Vec<Vertex>,
32}
33
34#[derive(Debug, Deserialize, Clone)]
35pub struct Vertex {
36 #[serde(default)]
37 pub x: i32,
38 #[serde(default)]
39 pub y: i32,
40}
41
42#[derive(Debug, Serialize)]
43struct TextDetectionRequest {
44 requests: Vec<Request>,
45}
46
47#[derive(Debug, Serialize)]
48struct Request {
49 image: Image,
50 features: Vec<Feature>,
51}
52
53#[derive(Debug, Serialize)]
54struct Image {
55 content: String,
56}
57
58#[derive(Debug, Serialize)]
59struct Feature {
60 #[serde(rename = "type")]
61 feature_type: String,
62 max_results: i32,
63}
64
65pub fn detect_text_with_api(image_path: &Path) -> Result<Vec<TextAnnotation>> {
80 let api_key = env::var("GCP_API_KEY").context("GCP_API_KEY environment variable not set")?;
81 debug!("image_path: {}", image_path.display());
82
83 let image_data = std::fs::read(image_path).context("Failed to read image file")?;
84 let base64_image = general_purpose::STANDARD.encode(&image_data);
85
86 let request = TextDetectionRequest {
87 requests: vec![Request {
88 image: Image {
89 content: base64_image,
90 },
91 features: vec![Feature {
92 feature_type: "TEXT_DETECTION".to_string(),
93 max_results: 100,
94 }],
95 }],
96 };
97
98 let client = Client::new();
99 let response = client
100 .post(&format!(
101 "https://vision.googleapis.com/v1/images:annotate?key={}",
102 api_key
103 ))
104 .json(&request)
105 .send()
106 .context("Failed to send request to Google Cloud Vision API")?;
107
108 let response_text = response.text().context("Failed to get response text")?;
109
110 if response_text.len() > 1000 {
111 debug!(
112 "Response text (first 1000 chars): {}",
113 &response_text[..1000]
114 );
115 debug!("Response text length: {}", response_text.len());
116 } else {
117 debug!("Response text: {}", &response_text);
118 }
119
120 let response_body: TextDetectionResponse = serde_json::from_str(&response_text)
121 .context("Failed to parse Google Cloud Vision API response")?;
122
123 if response_body.responses.is_empty() {
124 error!("No responses from Google Cloud Vision API");
125 anyhow::bail!("No responses from Google Cloud Vision API");
126 }
127
128 let annotations = response_body.responses[0].text_annotations.clone();
129 debug!("Detected {} text annotations", annotations.len());
130
131 Ok(annotations)
132}