Skip to main content

ethos_core/
crop_element.rs

1/*
2 * Copyright 2026 The Ethos maintainers
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! Internal source-only `crop_element` resolver for Milestone D contract work.
18//!
19//! This module validates the committed request and descriptor identity rules over
20//! native Ethos document JSON. It intentionally does not add a CLI command,
21//! binding surface, rendered backend, or sandbox boundary.
22
23use crate::c14n;
24use crate::error::EthosError;
25use crate::model::{Document, Page};
26use crate::SCHEMA_VERSION;
27use serde::{Deserialize, Serialize};
28use serde_json::json;
29
30const CROP_ELEMENT_REQUEST_ARTIFACT_TYPE: &str = "ethos.crop_element_request.v1";
31const CROP_DESCRIPTOR_ARTIFACT_TYPE: &str = "ethos.crop_descriptor.v1";
32const CROP_ELEMENT_REQUEST_REF_VERSION: &str = "ethos.crop_element_request_ref.v1";
33const LOGICAL_CROP_REF_VERSION: &str = "ethos.logical_crop_ref.v1";
34
35/// Source-only `crop_element` request envelope.
36#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
37#[serde(deny_unknown_fields)]
38pub struct CropElementRequest {
39    /// Request artifact type, currently `ethos.crop_element_request.v1`.
40    pub artifact_type: String,
41    /// Contract schema version.
42    pub schema_version: String,
43    /// Stable request identity.
44    pub request_ref: String,
45    /// Expected canonical document fingerprint.
46    pub document_fingerprint: String,
47    /// Element id in the canonical Ethos document graph.
48    pub element_id: String,
49    /// Requested rendering mode.
50    pub rendering: CropElementRendering,
51    /// Fingerprint of caller-provided source PDF bytes for rendered requests.
52    #[serde(skip_serializing_if = "Option::is_none")]
53    pub source_pdf_fingerprint: Option<String>,
54}
55
56/// Supported `crop_element` rendering modes.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
58#[serde(rename_all = "snake_case")]
59pub enum CropElementRendering {
60    /// Emit only a JSON crop descriptor.
61    DescriptorOnly,
62    /// Emit a crop descriptor plus rendered crop artifact.
63    Rendered,
64}
65
66impl CropElementRendering {
67    fn as_contract_str(self) -> &'static str {
68        match self {
69            CropElementRendering::DescriptorOnly => "descriptor_only",
70            CropElementRendering::Rendered => "rendered",
71        }
72    }
73}
74
75/// Deterministic JSON descriptor for one resolved crop element.
76#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
77pub struct CropElementDescriptor {
78    /// Descriptor artifact type, currently `ethos.crop_descriptor.v1`.
79    pub artifact_type: String,
80    /// Contract schema version.
81    pub schema_version: String,
82    /// Stable descriptor filename.
83    pub crop_ref: String,
84    /// Canonical document fingerprint.
85    pub document_fingerprint: String,
86    /// Resolved page id.
87    pub page: String,
88    /// Resolved element bounding box in contract array form.
89    pub bbox: [i64; 4],
90    /// Logical verification check ids bound to this descriptor.
91    pub check_ids: Vec<String>,
92    /// Descriptor rendering status.
93    pub rendering_status: CropElementRendering,
94    /// Fingerprint of caller-provided source PDF bytes for rendered descriptors.
95    #[serde(skip_serializing_if = "Option::is_none")]
96    pub source_pdf_fingerprint: Option<String>,
97    /// Stable rendered crop filename.
98    #[serde(skip_serializing_if = "Option::is_none")]
99    pub rendered_ref: Option<String>,
100    /// Rendered crop format.
101    #[serde(skip_serializing_if = "Option::is_none")]
102    pub rendered_format: Option<String>,
103    /// SHA-256 of rendered crop bytes.
104    #[serde(skip_serializing_if = "Option::is_none")]
105    pub rendered_sha256: Option<String>,
106    /// Rendered crop width in pixels.
107    #[serde(skip_serializing_if = "Option::is_none")]
108    pub rendered_width_px: Option<u32>,
109    /// Rendered crop height in pixels.
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub rendered_height_px: Option<u32>,
112    /// SHA-256 of resolved element text when textual evidence is present.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub text_sha256: Option<String>,
115}
116
117/// Fail-closed `crop_element` resolver diagnostic.
118#[derive(Debug, Clone, PartialEq, Eq)]
119pub struct CropElementError {
120    diagnostic: &'static str,
121}
122
123impl CropElementError {
124    /// Deterministic diagnostic message for the resolver failure.
125    pub fn diagnostic(&self) -> &'static str {
126        self.diagnostic
127    }
128
129    fn new(diagnostic: &'static str) -> Self {
130        CropElementError { diagnostic }
131    }
132}
133
134impl core::fmt::Display for CropElementError {
135    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
136        f.write_str(self.diagnostic)
137    }
138}
139
140impl std::error::Error for CropElementError {}
141
142/// Compute the source-only `crop_element` request identity.
143pub fn crop_element_request_ref(request: &CropElementRequest) -> Result<String, EthosError> {
144    let mut identity = json!({
145        "document_fingerprint": request.document_fingerprint,
146        "element_id": request.element_id,
147        "rendering": request.rendering.as_contract_str(),
148        "version": CROP_ELEMENT_REQUEST_REF_VERSION,
149    });
150    if let Some(source_pdf_fingerprint) = &request.source_pdf_fingerprint {
151        identity.as_object_mut().expect("json object").insert(
152            "source_pdf_fingerprint".to_string(),
153            json!(source_pdf_fingerprint),
154        );
155    }
156    let digest = c14n::sha256_hex(&identity)
157        .map_err(|err| EthosError::internal(format!("crop_element request_ref error: {err}")))?;
158    Ok(format!("request-{digest}"))
159}
160
161/// Compute the logical crop descriptor filename for a bound verification check.
162pub fn crop_element_crop_ref(
163    document_fingerprint: &str,
164    check_id: &str,
165    page: &str,
166) -> Result<String, EthosError> {
167    let identity = json!({
168        "check_id": check_id,
169        "document_fingerprint": document_fingerprint,
170        "page": page,
171        "version": LOGICAL_CROP_REF_VERSION,
172    });
173    let digest = c14n::sha256_hex(&identity)
174        .map_err(|err| EthosError::internal(format!("crop_element crop_ref error: {err}")))?;
175    Ok(format!("crop-{digest}.json"))
176}
177
178/// Resolve one descriptor-only crop descriptor from a native Ethos document and request.
179pub fn resolve_crop_element_descriptor(
180    document: &Document,
181    request: &CropElementRequest,
182    check_id: &str,
183) -> Result<CropElementDescriptor, CropElementError> {
184    if request.artifact_type != CROP_ELEMENT_REQUEST_ARTIFACT_TYPE {
185        return Err(CropElementError::new(
186            "request artifact_type is not ethos.crop_element_request.v1",
187        ));
188    }
189    if request.schema_version != SCHEMA_VERSION {
190        return Err(CropElementError::new(
191            "request schema_version is not supported",
192        ));
193    }
194    if request.request_ref.is_empty() {
195        return Err(CropElementError::new("request_ref is missing"));
196    }
197    if request.document_fingerprint.is_empty() || document.fingerprint.is_empty() {
198        return Err(CropElementError::new("document_fingerprint is missing"));
199    }
200    let expected_request_ref = crop_element_request_ref(request).map_err(|_| {
201        CropElementError::new("request_ref does not match crop element request identity tuple")
202    })?;
203    if request.request_ref != expected_request_ref {
204        return Err(CropElementError::new(
205            "request_ref does not match crop element request identity tuple",
206        ));
207    }
208    if request.document_fingerprint != document.fingerprint {
209        return Err(CropElementError::new(
210            "request document_fingerprint does not match document fingerprint",
211        ));
212    }
213    if !is_check_id(check_id) {
214        return Err(CropElementError::new(
215            "descriptor must bind exactly one logical check id",
216        ));
217    }
218    match request.rendering {
219        CropElementRendering::DescriptorOnly => {
220            if request.source_pdf_fingerprint.is_some() {
221                return Err(CropElementError::new(
222                    "descriptor_only crop_element request must not include source_pdf_fingerprint",
223                ));
224            }
225        }
226        CropElementRendering::Rendered => {
227            let Some(source_pdf_fingerprint) = request.source_pdf_fingerprint.as_deref() else {
228                return Err(CropElementError::new(
229                    "rendered crop_element request requires source_pdf_fingerprint",
230                ));
231            };
232            if source_pdf_fingerprint != document.source.fingerprint {
233                return Err(CropElementError::new(
234                    "request source_pdf_fingerprint does not match document source fingerprint",
235                ));
236            }
237        }
238    }
239
240    let element = document
241        .payload
242        .elements
243        .iter()
244        .find(|element| element.id == request.element_id)
245        .ok_or_else(|| CropElementError::new("request element_id does not resolve in document"))?;
246    let page = document
247        .payload
248        .pages
249        .iter()
250        .find(|page| page.id == element.page)
251        .ok_or_else(|| CropElementError::new("resolved element is missing page"))?;
252    validate_resolved_bbox(element.bbox, page)?;
253
254    let text_sha256 = element
255        .text
256        .as_deref()
257        .map(|text| c14n::sha256_hex_bytes(text.as_bytes()));
258    let crop_ref =
259        crop_element_crop_ref(&document.fingerprint, check_id, &element.page).map_err(|_| {
260            CropElementError::new("descriptor crop_ref does not match logical identity tuple")
261        })?;
262
263    Ok(CropElementDescriptor {
264        artifact_type: CROP_DESCRIPTOR_ARTIFACT_TYPE.to_string(),
265        schema_version: SCHEMA_VERSION.to_string(),
266        crop_ref,
267        document_fingerprint: document.fingerprint.clone(),
268        page: element.page.clone(),
269        bbox: element.bbox.to_array(),
270        check_ids: vec![check_id.to_string()],
271        rendering_status: request.rendering,
272        source_pdf_fingerprint: request.source_pdf_fingerprint.clone(),
273        rendered_ref: None,
274        rendered_format: None,
275        rendered_sha256: None,
276        rendered_width_px: None,
277        rendered_height_px: None,
278        text_sha256,
279    })
280}
281
282fn validate_resolved_bbox(bbox: crate::geom::QRect, page: &Page) -> Result<(), CropElementError> {
283    let [x0, y0, x1, y1] = bbox.to_array();
284    if x0 >= x1 || y0 >= y1 {
285        return Err(CropElementError::new(
286            "resolved element bbox has non-positive area",
287        ));
288    }
289    if x0 < 0 || y0 < 0 || x1 > page.width || y1 > page.height {
290        return Err(CropElementError::new(
291            "resolved element bbox exceeds page bounds",
292        ));
293    }
294    Ok(())
295}
296
297fn is_check_id(value: &str) -> bool {
298    value.len() == 5
299        && value
300            .strip_prefix('v')
301            .is_some_and(|digits| digits.chars().all(|ch| ch.is_ascii_digit()))
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use crate::model::Document;
308    use serde_json::Value;
309
310    fn fixture_document() -> Document {
311        serde_json::from_str(include_str!(
312            "../../../schemas/examples/document.example.json"
313        ))
314        .unwrap()
315    }
316
317    fn fixture_request() -> CropElementRequest {
318        serde_json::from_str(include_str!(
319            "../../../schemas/examples/crop-element-request.example.json"
320        ))
321        .unwrap()
322    }
323
324    fn expected_descriptor_value() -> Value {
325        serde_json::from_str(include_str!(
326            "../../../schemas/examples/crop-descriptor.example.json"
327        ))
328        .unwrap()
329    }
330
331    #[test]
332    fn crop_element_descriptor_matches_committed_example() {
333        let descriptor =
334            resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v0001")
335                .unwrap();
336
337        assert_eq!(
338            serde_json::to_value(descriptor).unwrap(),
339            expected_descriptor_value()
340        );
341    }
342
343    #[test]
344    fn crop_element_request_ref_matches_committed_example() {
345        let request = fixture_request();
346
347        assert_eq!(
348            crop_element_request_ref(&request).unwrap(),
349            request.request_ref
350        );
351    }
352
353    #[test]
354    fn crop_element_request_rejects_unknown_fields() {
355        let mut request = serde_json::to_value(fixture_request()).unwrap();
356        request
357            .as_object_mut()
358            .unwrap()
359            .insert("unexpected".to_string(), Value::Bool(true));
360
361        let error = serde_json::from_value::<CropElementRequest>(request).unwrap_err();
362        assert!(error.to_string().contains("unknown field `unexpected`"));
363    }
364
365    #[test]
366    fn crop_element_crop_ref_matches_committed_descriptor() {
367        let document = fixture_document();
368        let descriptor = expected_descriptor_value();
369
370        assert_eq!(
371            crop_element_crop_ref(&document.fingerprint, "v0001", "p0001").unwrap(),
372            descriptor["crop_ref"]
373        );
374    }
375
376    #[test]
377    fn stale_request_ref_fails_closed() {
378        let mut request = fixture_request();
379        request.request_ref = format!("request-{}", "0".repeat(64));
380
381        let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
382            .expect_err("stale request_ref must fail");
383
384        assert_eq!(
385            err.diagnostic(),
386            "request_ref does not match crop element request identity tuple"
387        );
388    }
389
390    #[test]
391    fn document_fingerprint_mismatch_fails_closed() {
392        let mut request = fixture_request();
393        request.document_fingerprint = format!("sha256:{}", "0".repeat(64));
394        request.request_ref = crop_element_request_ref(&request).unwrap();
395
396        let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
397            .expect_err("document fingerprint mismatch must fail");
398
399        assert_eq!(
400            err.diagnostic(),
401            "request document_fingerprint does not match document fingerprint"
402        );
403    }
404
405    #[test]
406    fn missing_document_fingerprint_fails_closed() {
407        let mut document = fixture_document();
408        document.fingerprint.clear();
409        let mut request = fixture_request();
410        request.document_fingerprint.clear();
411        request.request_ref = crop_element_request_ref(&request).unwrap();
412
413        let err = resolve_crop_element_descriptor(&document, &request, "v0001")
414            .expect_err("missing document fingerprint must fail");
415
416        assert_eq!(err.diagnostic(), "document_fingerprint is missing");
417    }
418
419    #[test]
420    fn unresolved_element_fails_closed() {
421        let mut request = fixture_request();
422        request.element_id = "e999999".to_string();
423        request.request_ref = crop_element_request_ref(&request).unwrap();
424
425        let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
426            .expect_err("unknown element must fail");
427
428        assert_eq!(
429            err.diagnostic(),
430            "request element_id does not resolve in document"
431        );
432    }
433
434    #[test]
435    fn missing_element_page_fails_closed() {
436        let mut document = fixture_document();
437        document.payload.pages.clear();
438
439        let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
440            .expect_err("missing page must fail");
441
442        assert_eq!(err.diagnostic(), "resolved element is missing page");
443    }
444
445    #[test]
446    fn zero_area_element_bbox_fails_closed() {
447        let mut document = fixture_document();
448        let element = document
449            .payload
450            .elements
451            .iter_mut()
452            .find(|element| element.id == fixture_request().element_id)
453            .expect("fixture element exists");
454        element.bbox = crate::geom::QRect::new(10, 20, 10, 30).unwrap();
455
456        let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
457            .expect_err("zero-area bbox must fail");
458
459        assert_eq!(
460            err.diagnostic(),
461            "resolved element bbox has non-positive area"
462        );
463    }
464
465    #[test]
466    fn negative_element_bbox_fails_closed() {
467        let mut document = fixture_document();
468        let element = document
469            .payload
470            .elements
471            .iter_mut()
472            .find(|element| element.id == fixture_request().element_id)
473            .expect("fixture element exists");
474        element.bbox = crate::geom::QRect::new(-1, 0, 10, 10).unwrap();
475
476        let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
477            .expect_err("negative bbox coordinate must fail");
478
479        assert_eq!(
480            err.diagnostic(),
481            "resolved element bbox exceeds page bounds"
482        );
483    }
484
485    #[test]
486    fn page_overflow_element_bbox_fails_closed() {
487        let mut document = fixture_document();
488        let page = document
489            .payload
490            .pages
491            .iter()
492            .find(|page| page.id == "p0001")
493            .expect("fixture page exists")
494            .clone();
495        let element = document
496            .payload
497            .elements
498            .iter_mut()
499            .find(|element| element.id == fixture_request().element_id)
500            .expect("fixture element exists");
501        element.bbox = crate::geom::QRect::new(0, 0, page.width + 1, 10).unwrap();
502
503        let err = resolve_crop_element_descriptor(&document, &fixture_request(), "v0001")
504            .expect_err("bbox beyond page width must fail");
505
506        assert_eq!(
507            err.diagnostic(),
508            "resolved element bbox exceeds page bounds"
509        );
510    }
511
512    #[test]
513    fn malformed_check_id_fails_closed() {
514        let err = resolve_crop_element_descriptor(&fixture_document(), &fixture_request(), "v1")
515            .expect_err("malformed check id must fail");
516
517        assert_eq!(
518            err.diagnostic(),
519            "descriptor must bind exactly one logical check id"
520        );
521    }
522
523    #[test]
524    fn rendered_descriptor_binds_source_fingerprint() {
525        let mut request = fixture_request();
526        request.rendering = CropElementRendering::Rendered;
527        request.source_pdf_fingerprint = Some(fixture_document().source.fingerprint);
528        request.request_ref = crop_element_request_ref(&request).unwrap();
529
530        let descriptor =
531            resolve_crop_element_descriptor(&fixture_document(), &request, "v0001").unwrap();
532
533        assert_eq!(descriptor.rendering_status, CropElementRendering::Rendered);
534        assert_eq!(
535            descriptor.source_pdf_fingerprint,
536            Some(fixture_document().source.fingerprint)
537        );
538    }
539
540    #[test]
541    fn rendered_request_requires_source_fingerprint() {
542        let mut request = fixture_request();
543        request.rendering = CropElementRendering::Rendered;
544        request.request_ref = crop_element_request_ref(&request).unwrap();
545
546        let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
547            .expect_err("rendered request without source fingerprint must fail");
548
549        assert_eq!(
550            err.diagnostic(),
551            "rendered crop_element request requires source_pdf_fingerprint"
552        );
553    }
554
555    #[test]
556    fn rendered_request_rejects_source_fingerprint_mismatch() {
557        let mut request = fixture_request();
558        request.rendering = CropElementRendering::Rendered;
559        request.source_pdf_fingerprint = Some("sha256:".to_string() + &"0".repeat(64));
560        request.request_ref = crop_element_request_ref(&request).unwrap();
561
562        let err = resolve_crop_element_descriptor(&fixture_document(), &request, "v0001")
563            .expect_err("rendered request with mismatched source fingerprint must fail");
564
565        assert_eq!(
566            err.diagnostic(),
567            "request source_pdf_fingerprint does not match document source fingerprint"
568        );
569    }
570}