Skip to main content

dongler_core/
image.rs

1use crate::engine::ExtractionEngine;
2use crate::error::{DonglerError, Result};
3use crate::ir::{
4    Asset, BBox, Block, Confidence, Document, FigureBlock, ImageObject, Metadata, Page,
5    SourceAnchor, Warning, SCHEMA_VERSION,
6};
7use crate::source::Source;
8
9const EXTRACTION_METHOD: &str = "image_native";
10
11#[derive(Debug, Default, Clone, Copy)]
12pub struct ImageEngine;
13
14#[derive(Debug, Clone, Copy)]
15struct ImageInfo {
16    width: u32,
17    height: u32,
18}
19
20impl ExtractionEngine for ImageEngine {
21    fn name(&self) -> &'static str {
22        "image-native"
23    }
24
25    fn extract(&self, source: &Source) -> Result<Document> {
26        let bytes = source.bytes.as_deref().unwrap_or(source.content.as_bytes());
27        let info = image_info(bytes)
28            .ok_or_else(|| DonglerError::image("unsupported or malformed image header"))?;
29        let bbox = BBox {
30            x: 0.0,
31            y: 0.0,
32            width: info.width as f32,
33            height: info.height as f32,
34        };
35        let image = ImageObject {
36            id: "image-1".to_owned(),
37            object_id: None,
38            bbox: Some(bbox),
39            width: Some(info.width),
40            height: Some(info.height),
41        };
42        let asset = Asset {
43            id: image.id.clone(),
44            kind: "image".to_owned(),
45            object_id: None,
46            bbox: Some(bbox),
47            width: Some(info.width),
48            height: Some(info.height),
49        };
50        let figure = Block::Figure(FigureBlock {
51            alt_text: source
52                .path
53                .as_deref()
54                .and_then(|path| std::path::Path::new(path).file_name())
55                .and_then(|name| name.to_str())
56                .map(str::to_owned),
57            caption: None,
58            bbox: Some(bbox),
59            image_ref: Some(image.id.clone()),
60            source_anchors: vec![SourceAnchor {
61                page_number: 1,
62                pdf_object_ids: Vec::new(),
63                bbox: Some(bbox),
64                extraction_method: EXTRACTION_METHOD.to_owned(),
65            }],
66            confidence: Some(Confidence {
67                score: 0.9,
68                calibrated: false,
69            }), ..Default::default()
70        });
71
72        Ok(Document {
73            schema_version: SCHEMA_VERSION.to_owned(),
74            metadata: Metadata {
75                format: source.format.clone(),
76                engine: self.name().to_owned(),
77                source: source.path.clone(),
78                title: None,
79                character_count: 0,
80                word_count: 0,
81                block_count: 1,
82                file_size_bytes: Some(bytes.len() as u64),
83                pdf_version: None,
84                encrypted: false,
85            },
86            pages: vec![Page {
87                number: 1,
88                width: Some(info.width as f32),
89                height: Some(info.height as f32),
90                rotation: None,
91                bbox: Some(bbox),
92                blocks: vec![figure],
93                images: vec![image],
94                assets: vec![asset.clone()],
95                warnings: Vec::new(), ..Default::default()
96            }],
97            assets: vec![asset],
98            warnings: Vec::<Warning>::new(),
99        })
100    }
101}
102
103fn image_info(bytes: &[u8]) -> Option<ImageInfo> {
104    parse_png(bytes)
105        .or_else(|| parse_jpeg(bytes))
106        .or_else(|| parse_gif(bytes))
107        .or_else(|| parse_bmp(bytes))
108        .or_else(|| parse_tiff(bytes))
109        .or_else(|| parse_webp(bytes))
110}
111
112fn parse_png(bytes: &[u8]) -> Option<ImageInfo> {
113    if bytes.len() < 24 || !bytes.starts_with(b"\x89PNG\r\n\x1a\n") || &bytes[12..16] != b"IHDR" {
114        return None;
115    }
116    Some(ImageInfo {
117        width: u32::from_be_bytes(bytes[16..20].try_into().ok()?),
118        height: u32::from_be_bytes(bytes[20..24].try_into().ok()?),
119    })
120}
121
122fn parse_jpeg(bytes: &[u8]) -> Option<ImageInfo> {
123    if bytes.len() < 4 || !bytes.starts_with(&[0xff, 0xd8]) {
124        return None;
125    }
126
127    let mut pos = 2;
128    while pos + 4 <= bytes.len() {
129        while pos < bytes.len() && bytes[pos] == 0xff {
130            pos += 1;
131        }
132        if pos >= bytes.len() {
133            return None;
134        }
135
136        let marker = bytes[pos];
137        pos += 1;
138        if marker == 0xd9 || marker == 0xda {
139            return None;
140        }
141        if pos + 2 > bytes.len() {
142            return None;
143        }
144        let segment_len = u16::from_be_bytes(bytes[pos..pos + 2].try_into().ok()?) as usize;
145        if segment_len < 2 || pos + segment_len > bytes.len() {
146            return None;
147        }
148        let data_start = pos + 2;
149        if is_jpeg_sof(marker) && data_start + 5 <= bytes.len() {
150            return Some(ImageInfo {
151                height: u16::from_be_bytes(bytes[data_start + 1..data_start + 3].try_into().ok()?)
152                    as u32,
153                width: u16::from_be_bytes(bytes[data_start + 3..data_start + 5].try_into().ok()?)
154                    as u32,
155            });
156        }
157        pos += segment_len;
158    }
159
160    None
161}
162
163fn is_jpeg_sof(marker: u8) -> bool {
164    matches!(
165        marker,
166        0xc0 | 0xc1 | 0xc2 | 0xc3 | 0xc5 | 0xc6 | 0xc7 | 0xc9 | 0xca | 0xcb | 0xcd | 0xce | 0xcf
167    )
168}
169
170fn parse_gif(bytes: &[u8]) -> Option<ImageInfo> {
171    if bytes.len() < 10 || !(bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a")) {
172        return None;
173    }
174    Some(ImageInfo {
175        width: u16::from_le_bytes(bytes[6..8].try_into().ok()?) as u32,
176        height: u16::from_le_bytes(bytes[8..10].try_into().ok()?) as u32,
177    })
178}
179
180fn parse_bmp(bytes: &[u8]) -> Option<ImageInfo> {
181    if bytes.len() < 26 || !bytes.starts_with(b"BM") {
182        return None;
183    }
184    Some(ImageInfo {
185        width: i32::from_le_bytes(bytes[18..22].try_into().ok()?).unsigned_abs(),
186        height: i32::from_le_bytes(bytes[22..26].try_into().ok()?).unsigned_abs(),
187    })
188}
189
190fn parse_tiff(bytes: &[u8]) -> Option<ImageInfo> {
191    if bytes.len() < 8 {
192        return None;
193    }
194    let endian = TiffEndian::from_header(bytes)?;
195    if endian.read_u16(&bytes[2..4])? != 42 {
196        return None;
197    }
198    let ifd_offset = endian.read_u32(&bytes[4..8])? as usize;
199    if ifd_offset + 2 > bytes.len() {
200        return None;
201    }
202
203    let entry_count = endian.read_u16(&bytes[ifd_offset..ifd_offset + 2])? as usize;
204    let mut width = None;
205    let mut height = None;
206    let mut entry_pos = ifd_offset + 2;
207    for _ in 0..entry_count {
208        if entry_pos + 12 > bytes.len() {
209            return None;
210        }
211        let tag = endian.read_u16(&bytes[entry_pos..entry_pos + 2])?;
212        let field_type = endian.read_u16(&bytes[entry_pos + 2..entry_pos + 4])?;
213        let count = endian.read_u32(&bytes[entry_pos + 4..entry_pos + 8])?;
214        let value = tiff_inline_value(
215            endian,
216            field_type,
217            count,
218            &bytes[entry_pos + 8..entry_pos + 12],
219        )?;
220        match tag {
221            256 => width = Some(value),
222            257 => height = Some(value),
223            _ => {}
224        }
225        entry_pos += 12;
226    }
227
228    Some(ImageInfo {
229        width: width?,
230        height: height?,
231    })
232}
233
234fn tiff_inline_value(endian: TiffEndian, field_type: u16, count: u32, bytes: &[u8]) -> Option<u32> {
235    if count != 1 {
236        return None;
237    }
238    match field_type {
239        3 => endian.read_u16(&bytes[..2]).map(u32::from),
240        4 => endian.read_u32(bytes),
241        _ => None,
242    }
243}
244
245fn parse_webp(bytes: &[u8]) -> Option<ImageInfo> {
246    if bytes.len() < 30 || !bytes.starts_with(b"RIFF") || &bytes[8..12] != b"WEBP" {
247        return None;
248    }
249    if &bytes[12..16] != b"VP8X" {
250        return None;
251    }
252
253    Some(ImageInfo {
254        width: 1 + read_u24_le(&bytes[24..27])?,
255        height: 1 + read_u24_le(&bytes[27..30])?,
256    })
257}
258
259#[derive(Debug, Clone, Copy)]
260enum TiffEndian {
261    Little,
262    Big,
263}
264
265impl TiffEndian {
266    fn from_header(bytes: &[u8]) -> Option<Self> {
267        match bytes.get(..2)? {
268            b"II" => Some(Self::Little),
269            b"MM" => Some(Self::Big),
270            _ => None,
271        }
272    }
273
274    fn read_u16(self, bytes: &[u8]) -> Option<u16> {
275        let bytes = bytes.get(..2)?;
276        match self {
277            Self::Little => Some(u16::from_le_bytes(bytes.try_into().ok()?)),
278            Self::Big => Some(u16::from_be_bytes(bytes.try_into().ok()?)),
279        }
280    }
281
282    fn read_u32(self, bytes: &[u8]) -> Option<u32> {
283        let bytes = bytes.get(..4)?;
284        match self {
285            Self::Little => Some(u32::from_le_bytes(bytes.try_into().ok()?)),
286            Self::Big => Some(u32::from_be_bytes(bytes.try_into().ok()?)),
287        }
288    }
289}
290
291fn read_u24_le(bytes: &[u8]) -> Option<u32> {
292    Some(
293        (bytes.first().copied()? as u32)
294            | ((bytes.get(1).copied()? as u32) << 8)
295            | ((bytes.get(2).copied()? as u32) << 16),
296    )
297}