1use crate::c14n;
24use crate::error::EthosError;
25use crate::model::{Document, Page};
26use crate::SCHEMA_VERSION;
27use serde::{Deserialize, Serialize};
28use serde_json::json;
29
30const CROP_ELEMENT_REQUEST_ARTIFACT_TYPE: &str = "ethos.crop_element_request.v1";
31const CROP_DESCRIPTOR_ARTIFACT_TYPE: &str = "ethos.crop_descriptor.v1";
32const CROP_ELEMENT_REQUEST_REF_VERSION: &str = "ethos.crop_element_request_ref.v1";
33const LOGICAL_CROP_REF_VERSION: &str = "ethos.logical_crop_ref.v1";
34
35#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37#[serde(deny_unknown_fields)]
38pub struct CropElementRequest {
39 pub artifact_type: String,
41 pub schema_version: String,
43 pub request_ref: String,
45 pub document_fingerprint: String,
47 pub element_id: String,
49 pub rendering: CropElementRendering,
51 #[serde(skip_serializing_if = "Option::is_none")]
53 pub source_pdf_fingerprint: Option<String>,
54}
55
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "snake_case")]
59pub enum CropElementRendering {
60 DescriptorOnly,
62 Rendered,
64}
65
66impl CropElementRendering {
67 fn as_contract_str(self) -> &'static str {
68 match self {
69 CropElementRendering::DescriptorOnly => "descriptor_only",
70 CropElementRendering::Rendered => "rendered",
71 }
72 }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
77pub struct CropElementDescriptor {
78 pub artifact_type: String,
80 pub schema_version: String,
82 pub crop_ref: String,
84 pub document_fingerprint: String,
86 pub page: String,
88 pub bbox: [i64; 4],
90 pub check_ids: Vec<String>,
92 pub rendering_status: CropElementRendering,
94 #[serde(skip_serializing_if = "Option::is_none")]
96 pub source_pdf_fingerprint: Option<String>,
97 #[serde(skip_serializing_if = "Option::is_none")]
99 pub rendered_ref: Option<String>,
100 #[serde(skip_serializing_if = "Option::is_none")]
102 pub rendered_format: Option<String>,
103 #[serde(skip_serializing_if = "Option::is_none")]
105 pub rendered_sha256: Option<String>,
106 #[serde(skip_serializing_if = "Option::is_none")]
108 pub rendered_width_px: Option<u32>,
109 #[serde(skip_serializing_if = "Option::is_none")]
111 pub rendered_height_px: Option<u32>,
112 #[serde(skip_serializing_if = "Option::is_none")]
114 pub text_sha256: Option<String>,
115}
116
117#[derive(Debug, Clone, PartialEq, Eq)]
119pub struct CropElementError {
120 diagnostic: &'static str,
121}
122
123impl CropElementError {
124 pub fn diagnostic(&self) -> &'static str {
126 self.diagnostic
127 }
128
129 fn new(diagnostic: &'static str) -> Self {
130 CropElementError { diagnostic }
131 }
132}
133
134impl core::fmt::Display for CropElementError {
135 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
136 f.write_str(self.diagnostic)
137 }
138}
139
140impl std::error::Error for CropElementError {}
141
142pub fn crop_element_request_ref(request: &CropElementRequest) -> Result<String, EthosError> {
144 let mut identity = json!({
145 "document_fingerprint": request.document_fingerprint,
146 "element_id": request.element_id,
147 "rendering": request.rendering.as_contract_str(),
148 "version": CROP_ELEMENT_REQUEST_REF_VERSION,
149 });
150 if let Some(source_pdf_fingerprint) = &request.source_pdf_fingerprint {
151 identity.as_object_mut().expect("json object").insert(
152 "source_pdf_fingerprint".to_string(),
153 json!(source_pdf_fingerprint),
154 );
155 }
156 let digest = c14n::sha256_hex(&identity)
157 .map_err(|err| EthosError::internal(format!("crop_element request_ref error: {err}")))?;
158 Ok(format!("request-{digest}"))
159}
160
161pub fn crop_element_crop_ref(
163 document_fingerprint: &str,
164 check_id: &str,
165 page: &str,
166) -> Result<String, EthosError> {
167 let identity = json!({
168 "check_id": check_id,
169 "document_fingerprint": document_fingerprint,
170 "page": page,
171 "version": LOGICAL_CROP_REF_VERSION,
172 });
173 let digest = c14n::sha256_hex(&identity)
174 .map_err(|err| EthosError::internal(format!("crop_element crop_ref error: {err}")))?;
175 Ok(format!("crop-{digest}.json"))
176}
177
178pub fn resolve_crop_element_descriptor(
180 document: &Document,
181 request: &CropElementRequest,
182 check_id: &str,
183) -> Result<CropElementDescriptor, CropElementError> {
184 if request.artifact_type != CROP_ELEMENT_REQUEST_ARTIFACT_TYPE {
185 return Err(CropElementError::new(
186 "request artifact_type is not ethos.crop_element_request.v1",
187 ));
188 }
189 if request.schema_version != SCHEMA_VERSION {
190 return Err(CropElementError::new(
191 "request schema_version is not supported",
192 ));
193 }
194 if request.request_ref.is_empty() {
195 return Err(CropElementError::new("request_ref is missing"));
196 }
197 if request.document_fingerprint.is_empty() || document.fingerprint.is_empty() {
198 return Err(CropElementError::new("document_fingerprint is missing"));
199 }
200 let expected_request_ref = crop_element_request_ref(request).map_err(|_| {
201 CropElementError::new("request_ref does not match crop element request identity tuple")
202 })?;
203 if request.request_ref != expected_request_ref {
204 return Err(CropElementError::new(
205 "request_ref does not match crop element request identity tuple",
206 ));
207 }
208 if request.document_fingerprint != document.fingerprint {
209 return Err(CropElementError::new(
210 "request document_fingerprint does not match document fingerprint",
211 ));
212 }
213 if !is_check_id(check_id) {
214 return Err(CropElementError::new(
215 "descriptor must bind exactly one logical check id",
216 ));
217 }
218 match request.rendering {
219 CropElementRendering::DescriptorOnly => {
220 if request.source_pdf_fingerprint.is_some() {
221 return Err(CropElementError::new(
222 "descriptor_only crop_element request must not include source_pdf_fingerprint",
223 ));
224 }
225 }
226 CropElementRendering::Rendered => {
227 let Some(source_pdf_fingerprint) = request.source_pdf_fingerprint.as_deref() else {
228 return Err(CropElementError::new(
229 "rendered crop_element request requires source_pdf_fingerprint",
230 ));
231 };
232 if source_pdf_fingerprint != document.source.fingerprint {
233 return Err(CropElementError::new(
234 "request source_pdf_fingerprint does not match document source fingerprint",
235 ));
236 }
237 }
238 }
239
240 let element = document
241 .payload
242 .elements
243 .iter()
244 .find(|element| element.id == request.element_id)
245 .ok_or_else(|| CropElementError::new("request element_id does not resolve in document"))?;
246 let page = document
247 .payload
248 .pages
249 .iter()
250 .find(|page| page.id == element.page)
251 .ok_or_else(|| CropElementError::new("resolved element is missing page"))?;
252 validate_resolved_bbox(element.bbox, page)?;
253
254 let text_sha256 = element
255 .text
256 .as_deref()
257 .map(|text| c14n::sha256_hex_bytes(text.as_bytes()));
258 let crop_ref =
259 crop_element_crop_ref(&document.fingerprint, check_id, &element.page).map_err(|_| {
260 CropElementError::new("descriptor crop_ref does not match logical identity tuple")
261 })?;
262
263 Ok(CropElementDescriptor {
264 artifact_type: CROP_DESCRIPTOR_ARTIFACT_TYPE.to_string(),
265 schema_version: SCHEMA_VERSION.to_string(),
266 crop_ref,
267 document_fingerprint: document.fingerprint.clone(),
268 page: element.page.clone(),
269 bbox: element.bbox.to_array(),
270 check_ids: vec![check_id.to_string()],
271 rendering_status: request.rendering,
272 source_pdf_fingerprint: request.source_pdf_fingerprint.clone(),
273 rendered_ref: None,
274 rendered_format: None,
275 rendered_sha256: None,
276 rendered_width_px: None,
277 rendered_height_px: None,
278 text_sha256,
279 })
280}
281
282fn validate_resolved_bbox(bbox: crate::geom::QRect, page: &Page) -> Result<(), CropElementError> {
283 let [x0, y0, x1, y1] = bbox.to_array();
284 if x0 >= x1 || y0 >= y1 {
285 return Err(CropElementError::new(
286 "resolved element bbox has non-positive area",
287 ));
288 }
289 if x0 < 0 || y0 < 0 || x1 > page.width || y1 > page.height {
290 return Err(CropElementError::new(
291 "resolved element bbox exceeds page bounds",
292 ));
293 }
294 Ok(())
295}
296
297fn is_check_id(value: &str) -> bool {
298 value.len() == 5
299 && value
300 .strip_prefix('v')
301 .is_some_and(|digits| digits.chars().all(|ch| ch.is_ascii_digit()))
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307 use crate::model::Document;
308 use serde_json::Value;
309
310 fn fixture_document() -> Document {
311 serde_json::from_str(include_str!(
312 "../../../schemas/examples/document.example.json"
313 ))
314 .unwrap()
315 }
316
317 fn fixture_request() -> CropElementRequest {
318 serde_json::from_str(include_str!(
319 "../../../schemas/examples/crop-element-request.example.json"
320 ))
321 .unwrap()
322 }
323
324 fn expected_descriptor_value() -> Value {
325 serde_json::from_str(include_str!(
326 "../../../schemas/examples/crop-descriptor.example.json"
327 ))
328 .unwrap()
329 }
330
331 #[test]
332 fn crop_element_descriptor_matches_committed_example() {
333 let descriptor =
334 resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v0001")
335 .unwrap();
336
337 assert_eq!(
338 serde_json::to_value(descriptor).unwrap(),
339 expected_descriptor_value()
340 );
341 }
342
343 #[test]
344 fn crop_element_request_ref_matches_committed_example() {
345 let request = fixture_request();
346
347 assert_eq!(
348 crop_element_request_ref(&request).unwrap(),
349 request.request_ref
350 );
351 }
352
353 #[test]
354 fn crop_element_request_rejects_unknown_fields() {
355 let mut request = serde_json::to_value(fixture_request()).unwrap();
356 request
357 .as_object_mut()
358 .unwrap()
359 .insert("unexpected".to_string(), Value::Bool(true));
360
361 let error = serde_json::from_value::<CropElementRequest>(request).unwrap_err();
362 assert!(error.to_string().contains("unknown field `unexpected`"));
363 }
364
365 #[test]
366 fn crop_element_crop_ref_matches_committed_descriptor() {
367 let document = fixture_document();
368 let descriptor = expected_descriptor_value();
369
370 assert_eq!(
371 crop_element_crop_ref(&document.fingerprint, "v0001", "p0001").unwrap(),
372 descriptor["crop_ref"]
373 );
374 }
375
376 #[test]
377 fn stale_request_ref_fails_closed() {
378 let mut request = fixture_request();
379 request.request_ref = format!("request-{}", "0".repeat(64));
380
381 let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
382 .expect_err("stale request_ref must fail");
383
384 assert_eq!(
385 err.diagnostic(),
386 "request_ref does not match crop element request identity tuple"
387 );
388 }
389
390 #[test]
391 fn document_fingerprint_mismatch_fails_closed() {
392 let mut request = fixture_request();
393 request.document_fingerprint = format!("sha256:{}", "0".repeat(64));
394 request.request_ref = crop_element_request_ref(&request).unwrap();
395
396 let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
397 .expect_err("document fingerprint mismatch must fail");
398
399 assert_eq!(
400 err.diagnostic(),
401 "request document_fingerprint does not match document fingerprint"
402 );
403 }
404
405 #[test]
406 fn missing_document_fingerprint_fails_closed() {
407 let mut document = fixture_document();
408 document.fingerprint.clear();
409 let mut request = fixture_request();
410 request.document_fingerprint.clear();
411 request.request_ref = crop_element_request_ref(&request).unwrap();
412
413 let err = resolve_crop_element_descriptor(&document, &request, "v0001")
414 .expect_err("missing document fingerprint must fail");
415
416 assert_eq!(err.diagnostic(), "document_fingerprint is missing");
417 }
418
419 #[test]
420 fn unresolved_element_fails_closed() {
421 let mut request = fixture_request();
422 request.element_id = "e999999".to_string();
423 request.request_ref = crop_element_request_ref(&request).unwrap();
424
425 let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
426 .expect_err("unknown element must fail");
427
428 assert_eq!(
429 err.diagnostic(),
430 "request element_id does not resolve in document"
431 );
432 }
433
434 #[test]
435 fn missing_element_page_fails_closed() {
436 let mut document = fixture_document();
437 document.payload.pages.clear();
438
439 let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
440 .expect_err("missing page must fail");
441
442 assert_eq!(err.diagnostic(), "resolved element is missing page");
443 }
444
445 #[test]
446 fn zero_area_element_bbox_fails_closed() {
447 let mut document = fixture_document();
448 let element = document
449 .payload
450 .elements
451 .iter_mut()
452 .find(|element| element.id == fixture_request().element_id)
453 .expect("fixture element exists");
454 element.bbox = crate::geom::QRect::new(10, 20, 10, 30).unwrap();
455
456 let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
457 .expect_err("zero-area bbox must fail");
458
459 assert_eq!(
460 err.diagnostic(),
461 "resolved element bbox has non-positive area"
462 );
463 }
464
465 #[test]
466 fn negative_element_bbox_fails_closed() {
467 let mut document = fixture_document();
468 let element = document
469 .payload
470 .elements
471 .iter_mut()
472 .find(|element| element.id == fixture_request().element_id)
473 .expect("fixture element exists");
474 element.bbox = crate::geom::QRect::new(-1, 0, 10, 10).unwrap();
475
476 let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
477 .expect_err("negative bbox coordinate must fail");
478
479 assert_eq!(
480 err.diagnostic(),
481 "resolved element bbox exceeds page bounds"
482 );
483 }
484
485 #[test]
486 fn page_overflow_element_bbox_fails_closed() {
487 let mut document = fixture_document();
488 let page = document
489 .payload
490 .pages
491 .iter()
492 .find(|page| page.id == "p0001")
493 .expect("fixture page exists")
494 .clone();
495 let element = document
496 .payload
497 .elements
498 .iter_mut()
499 .find(|element| element.id == fixture_request().element_id)
500 .expect("fixture element exists");
501 element.bbox = crate::geom::QRect::new(0, 0, page.width + 1, 10).unwrap();
502
503 let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
504 .expect_err("bbox beyond page width must fail");
505
506 assert_eq!(
507 err.diagnostic(),
508 "resolved element bbox exceeds page bounds"
509 );
510 }
511
512 #[test]
513 fn malformed_check_id_fails_closed() {
514 let err = resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v1")
515 .expect_err("malformed check id must fail");
516
517 assert_eq!(
518 err.diagnostic(),
519 "descriptor must bind exactly one logical check id"
520 );
521 }
522
523 #[test]
524 fn rendered_descriptor_binds_source_fingerprint() {
525 let mut request = fixture_request();
526 request.rendering = CropElementRendering::Rendered;
527 request.source_pdf_fingerprint = Some(fixture_document().source.fingerprint);
528 request.request_ref = crop_element_request_ref(&request).unwrap();
529
530 let descriptor =
531 resolve_crop_element_descriptor(&fixture_document(), &request, "v0001").unwrap();
532
533 assert_eq!(descriptor.rendering_status, CropElementRendering::Rendered);
534 assert_eq!(
535 descriptor.source_pdf_fingerprint,
536 Some(fixture_document().source.fingerprint)
537 );
538 }
539
540 #[test]
541 fn rendered_request_requires_source_fingerprint() {
542 let mut request = fixture_request();
543 request.rendering = CropElementRendering::Rendered;
544 request.request_ref = crop_element_request_ref(&request).unwrap();
545
546 let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
547 .expect_err("rendered request without source fingerprint must fail");
548
549 assert_eq!(
550 err.diagnostic(),
551 "rendered crop_element request requires source_pdf_fingerprint"
552 );
553 }
554
555 #[test]
556 fn rendered_request_rejects_source_fingerprint_mismatch() {
557 let mut request = fixture_request();
558 request.rendering = CropElementRendering::Rendered;
559 request.source_pdf_fingerprint = Some("sha256:".to_string() + &"0".repeat(64));
560 request.request_ref = crop_element_request_ref(&request).unwrap();
561
562 let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
563 .expect_err("rendered request with mismatched source fingerprint must fail");
564
565 assert_eq!(
566 err.diagnostic(),
567 "request source_pdf_fingerprint does not match document source fingerprint"
568 );
569 }
570}