use crate::c14n;
use crate::error::EthosError;
use crate::model::{Document, Page};
use crate::SCHEMA_VERSION;
use serde::{Deserialize, Serialize};
use serde_json::json;
const CROP_ELEMENT_REQUEST_ARTIFACT_TYPE: &str = "ethos.crop_element_request.v1";
const CROP_DESCRIPTOR_ARTIFACT_TYPE: &str = "ethos.crop_descriptor.v1";
const CROP_ELEMENT_REQUEST_REF_VERSION: &str = "ethos.crop_element_request_ref.v1";
const LOGICAL_CROP_REF_VERSION: &str = "ethos.logical_crop_ref.v1";
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct CropElementRequest {
pub artifact_type: String,
pub schema_version: String,
pub request_ref: String,
pub document_fingerprint: String,
pub element_id: String,
pub rendering: CropElementRendering,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_pdf_fingerprint: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CropElementRendering {
DescriptorOnly,
Rendered,
}
impl CropElementRendering {
fn as_contract_str(self) -> &'static str {
match self {
CropElementRendering::DescriptorOnly => "descriptor_only",
CropElementRendering::Rendered => "rendered",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CropElementDescriptor {
pub artifact_type: String,
pub schema_version: String,
pub crop_ref: String,
pub document_fingerprint: String,
pub page: String,
pub bbox: [i64; 4],
pub check_ids: Vec<String>,
pub rendering_status: CropElementRendering,
#[serde(skip_serializing_if = "Option::is_none")]
pub source_pdf_fingerprint: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rendered_ref: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rendered_format: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rendered_sha256: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rendered_width_px: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub rendered_height_px: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub text_sha256: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct CropElementError {
diagnostic: &'static str,
}
impl CropElementError {
pub fn diagnostic(&self) -> &'static str {
self.diagnostic
}
fn new(diagnostic: &'static str) -> Self {
CropElementError { diagnostic }
}
}
impl core::fmt::Display for CropElementError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(self.diagnostic)
}
}
impl std::error::Error for CropElementError {}
pub fn crop_element_request_ref(request: &CropElementRequest) -> Result<String, EthosError> {
let mut identity = json!({
"document_fingerprint": request.document_fingerprint,
"element_id": request.element_id,
"rendering": request.rendering.as_contract_str(),
"version": CROP_ELEMENT_REQUEST_REF_VERSION,
});
if let Some(source_pdf_fingerprint) = &request.source_pdf_fingerprint {
identity.as_object_mut().expect("json object").insert(
"source_pdf_fingerprint".to_string(),
json!(source_pdf_fingerprint),
);
}
let digest = c14n::sha256_hex(&identity)
.map_err(|err| EthosError::internal(format!("crop_element request_ref error: {err}")))?;
Ok(format!("request-{digest}"))
}
pub fn crop_element_crop_ref(
document_fingerprint: &str,
check_id: &str,
page: &str,
) -> Result<String, EthosError> {
let identity = json!({
"check_id": check_id,
"document_fingerprint": document_fingerprint,
"page": page,
"version": LOGICAL_CROP_REF_VERSION,
});
let digest = c14n::sha256_hex(&identity)
.map_err(|err| EthosError::internal(format!("crop_element crop_ref error: {err}")))?;
Ok(format!("crop-{digest}.json"))
}
pub fn resolve_crop_element_descriptor(
document: &Document,
request: &CropElementRequest,
check_id: &str,
) -> Result<CropElementDescriptor, CropElementError> {
if request.artifact_type != CROP_ELEMENT_REQUEST_ARTIFACT_TYPE {
return Err(CropElementError::new(
"request artifact_type is not ethos.crop_element_request.v1",
));
}
if request.schema_version != SCHEMA_VERSION {
return Err(CropElementError::new(
"request schema_version is not supported",
));
}
if request.request_ref.is_empty() {
return Err(CropElementError::new("request_ref is missing"));
}
if request.document_fingerprint.is_empty() || document.fingerprint.is_empty() {
return Err(CropElementError::new("document_fingerprint is missing"));
}
let expected_request_ref = crop_element_request_ref(request).map_err(|_| {
CropElementError::new("request_ref does not match crop element request identity tuple")
})?;
if request.request_ref != expected_request_ref {
return Err(CropElementError::new(
"request_ref does not match crop element request identity tuple",
));
}
if request.document_fingerprint != document.fingerprint {
return Err(CropElementError::new(
"request document_fingerprint does not match document fingerprint",
));
}
if !is_check_id(check_id) {
return Err(CropElementError::new(
"descriptor must bind exactly one logical check id",
));
}
match request.rendering {
CropElementRendering::DescriptorOnly => {
if request.source_pdf_fingerprint.is_some() {
return Err(CropElementError::new(
"descriptor_only crop_element request must not include source_pdf_fingerprint",
));
}
}
CropElementRendering::Rendered => {
let Some(source_pdf_fingerprint) = request.source_pdf_fingerprint.as_deref() else {
return Err(CropElementError::new(
"rendered crop_element request requires source_pdf_fingerprint",
));
};
if source_pdf_fingerprint != document.source.fingerprint {
return Err(CropElementError::new(
"request source_pdf_fingerprint does not match document source fingerprint",
));
}
}
}
let element = document
.payload
.elements
.iter()
.find(|element| element.id == request.element_id)
.ok_or_else(|| CropElementError::new("request element_id does not resolve in document"))?;
let page = document
.payload
.pages
.iter()
.find(|page| page.id == element.page)
.ok_or_else(|| CropElementError::new("resolved element is missing page"))?;
validate_resolved_bbox(element.bbox, page)?;
let text_sha256 = element
.text
.as_deref()
.map(|text| c14n::sha256_hex_bytes(text.as_bytes()));
let crop_ref =
crop_element_crop_ref(&document.fingerprint, check_id, &element.page).map_err(|_| {
CropElementError::new("descriptor crop_ref does not match logical identity tuple")
})?;
Ok(CropElementDescriptor {
artifact_type: CROP_DESCRIPTOR_ARTIFACT_TYPE.to_string(),
schema_version: SCHEMA_VERSION.to_string(),
crop_ref,
document_fingerprint: document.fingerprint.clone(),
page: element.page.clone(),
bbox: element.bbox.to_array(),
check_ids: vec![check_id.to_string()],
rendering_status: request.rendering,
source_pdf_fingerprint: request.source_pdf_fingerprint.clone(),
rendered_ref: None,
rendered_format: None,
rendered_sha256: None,
rendered_width_px: None,
rendered_height_px: None,
text_sha256,
})
}
fn validate_resolved_bbox(bbox: crate::geom::QRect, page: &Page) -> Result<(), CropElementError> {
let [x0, y0, x1, y1] = bbox.to_array();
if x0 >= x1 || y0 >= y1 {
return Err(CropElementError::new(
"resolved element bbox has non-positive area",
));
}
if x0 < 0 || y0 < 0 || x1 > page.width || y1 > page.height {
return Err(CropElementError::new(
"resolved element bbox exceeds page bounds",
));
}
Ok(())
}
fn is_check_id(value: &str) -> bool {
value.len() == 5
&& value
.strip_prefix('v')
.is_some_and(|digits| digits.chars().all(|ch| ch.is_ascii_digit()))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::Document;
use serde_json::Value;
fn fixture_document() -> Document {
serde_json::from_str(include_str!(
"../../../schemas/examples/document.example.json"
))
.unwrap()
}
fn fixture_request() -> CropElementRequest {
serde_json::from_str(include_str!(
"../../../schemas/examples/crop-element-request.example.json"
))
.unwrap()
}
fn expected_descriptor_value() -> Value {
serde_json::from_str(include_str!(
"../../../schemas/examples/crop-descriptor.example.json"
))
.unwrap()
}
#[test]
fn crop_element_descriptor_matches_committed_example() {
let descriptor =
resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v0001")
.unwrap();
assert_eq!(
serde_json::to_value(descriptor).unwrap(),
expected_descriptor_value()
);
}
#[test]
fn crop_element_request_ref_matches_committed_example() {
let request = fixture_request();
assert_eq!(
crop_element_request_ref(&request).unwrap(),
request.request_ref
);
}
#[test]
fn crop_element_request_rejects_unknown_fields() {
let mut request = serde_json::to_value(fixture_request()).unwrap();
request
.as_object_mut()
.unwrap()
.insert("unexpected".to_string(), Value::Bool(true));
let error = serde_json::from_value::<CropElementRequest>(request).unwrap_err();
assert!(error.to_string().contains("unknown field `unexpected`"));
}
#[test]
fn crop_element_crop_ref_matches_committed_descriptor() {
let document = fixture_document();
let descriptor = expected_descriptor_value();
assert_eq!(
crop_element_crop_ref(&document.fingerprint, "v0001", "p0001").unwrap(),
descriptor["crop_ref"]
);
}
#[test]
fn stale_request_ref_fails_closed() {
let mut request = fixture_request();
request.request_ref = format!("request-{}", "0".repeat(64));
let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
.expect_err("stale request_ref must fail");
assert_eq!(
err.diagnostic(),
"request_ref does not match crop element request identity tuple"
);
}
#[test]
fn document_fingerprint_mismatch_fails_closed() {
let mut request = fixture_request();
request.document_fingerprint = format!("sha256:{}", "0".repeat(64));
request.request_ref = crop_element_request_ref(&request).unwrap();
let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
.expect_err("document fingerprint mismatch must fail");
assert_eq!(
err.diagnostic(),
"request document_fingerprint does not match document fingerprint"
);
}
#[test]
fn missing_document_fingerprint_fails_closed() {
let mut document = fixture_document();
document.fingerprint.clear();
let mut request = fixture_request();
request.document_fingerprint.clear();
request.request_ref = crop_element_request_ref(&request).unwrap();
let err = resolve_crop_element_descriptor(&document, &request, "v0001")
.expect_err("missing document fingerprint must fail");
assert_eq!(err.diagnostic(), "document_fingerprint is missing");
}
#[test]
fn unresolved_element_fails_closed() {
let mut request = fixture_request();
request.element_id = "e999999".to_string();
request.request_ref = crop_element_request_ref(&request).unwrap();
let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
.expect_err("unknown element must fail");
assert_eq!(
err.diagnostic(),
"request element_id does not resolve in document"
);
}
#[test]
fn missing_element_page_fails_closed() {
let mut document = fixture_document();
document.payload.pages.clear();
let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
.expect_err("missing page must fail");
assert_eq!(err.diagnostic(), "resolved element is missing page");
}
#[test]
fn zero_area_element_bbox_fails_closed() {
let mut document = fixture_document();
let element = document
.payload
.elements
.iter_mut()
.find(|element| element.id == fixture_request().element_id)
.expect("fixture element exists");
element.bbox = crate::geom::QRect::new(10, 20, 10, 30).unwrap();
let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
.expect_err("zero-area bbox must fail");
assert_eq!(
err.diagnostic(),
"resolved element bbox has non-positive area"
);
}
#[test]
fn negative_element_bbox_fails_closed() {
let mut document = fixture_document();
let element = document
.payload
.elements
.iter_mut()
.find(|element| element.id == fixture_request().element_id)
.expect("fixture element exists");
element.bbox = crate::geom::QRect::new(-1, 0, 10, 10).unwrap();
let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
.expect_err("negative bbox coordinate must fail");
assert_eq!(
err.diagnostic(),
"resolved element bbox exceeds page bounds"
);
}
#[test]
fn page_overflow_element_bbox_fails_closed() {
let mut document = fixture_document();
let page = document
.payload
.pages
.iter()
.find(|page| page.id == "p0001")
.expect("fixture page exists")
.clone();
let element = document
.payload
.elements
.iter_mut()
.find(|element| element.id == fixture_request().element_id)
.expect("fixture element exists");
element.bbox = crate::geom::QRect::new(0, 0, page.width + 1, 10).unwrap();
let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
.expect_err("bbox beyond page width must fail");
assert_eq!(
err.diagnostic(),
"resolved element bbox exceeds page bounds"
);
}
#[test]
fn malformed_check_id_fails_closed() {
let err = resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v1")
.expect_err("malformed check id must fail");
assert_eq!(
err.diagnostic(),
"descriptor must bind exactly one logical check id"
);
}
#[test]
fn rendered_descriptor_binds_source_fingerprint() {
let mut request = fixture_request();
request.rendering = CropElementRendering::Rendered;
request.source_pdf_fingerprint = Some(fixture_document().source.fingerprint);
request.request_ref = crop_element_request_ref(&request).unwrap();
let descriptor =
resolve_crop_element_descriptor(&fixture_document(), &request, "v0001").unwrap();
assert_eq!(descriptor.rendering_status, CropElementRendering::Rendered);
assert_eq!(
descriptor.source_pdf_fingerprint,
Some(fixture_document().source.fingerprint)
);
}
#[test]
fn rendered_request_requires_source_fingerprint() {
let mut request = fixture_request();
request.rendering = CropElementRendering::Rendered;
request.request_ref = crop_element_request_ref(&request).unwrap();
let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
.expect_err("rendered request without source fingerprint must fail");
assert_eq!(
err.diagnostic(),
"rendered crop_element request requires source_pdf_fingerprint"
);
}
#[test]
fn rendered_request_rejects_source_fingerprint_mismatch() {
let mut request = fixture_request();
request.rendering = CropElementRendering::Rendered;
request.source_pdf_fingerprint = Some("sha256:".to_string() + &"0".repeat(64));
request.request_ref = crop_element_request_ref(&request).unwrap();
let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
.expect_err("rendered request with mismatched source fingerprint must fail");
assert_eq!(
err.diagnostic(),
"request source_pdf_fingerprint does not match document source fingerprint"
);
}
}