Skip to main content

exiftool_rs/formats/
postscript.rs

1//! PostScript/EPS/AI file format reader.
2//!
3//! Parses DSC (Document Structuring Convention) comments for metadata.
4//! Mirrors ExifTool's PostScript.pm.
5
6use crate::error::{Error, Result};
7use crate::metadata::XmpReader;
8use crate::tag::{Tag, TagGroup, TagId};
9use crate::value::Value;
10
11/// Decode hex string (ignoring spaces) to bytes
12fn decode_hex(s: &str) -> Vec<u8> {
13    let s: String = s.chars().filter(|c| c.is_ascii_hexdigit()).collect();
14    (0..s.len() / 2)
15        .filter_map(|i| u8::from_str_radix(&s[i * 2..i * 2 + 2], 16).ok())
16        .collect()
17}
18
19pub fn read_postscript(data: &[u8]) -> Result<Vec<Tag>> {
20    let mut tags = Vec::new();
21    let mut offset = 0;
22
23    // DOS EPS binary header: C5 D0 D3 C6
24    if data.len() >= 30 && data.starts_with(&[0xC5, 0xD0, 0xD3, 0xC6]) {
25        let ps_offset = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
26        let ps_length = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
27
28        if ps_offset + ps_length <= data.len() {
29            offset = ps_offset;
30        }
31        tags.push(mk("EPSFormat", "EPS Format", Value::String("DOS Binary".into())));
32    }
33
34    // Check for PS magic
35    if offset + 4 > data.len() || (!data[offset..].starts_with(b"%!PS") && !data[offset..].starts_with(b"%!Ad")) {
36        return Err(Error::InvalidData("not a PostScript file".into()));
37    }
38
39    // Parse DSC comments line by line (handle \r, \n, and \r\n)
40    let text = String::from_utf8_lossy(&data[offset..data.len().min(offset + 65536)]);
41    let text = text.replace('\r', "\n");
42
43    for line in text.lines() {
44        // Stop at the first embedded document or end of comments section
45        if line.starts_with("%%EndComments") || line.starts_with("%%BeginDocument")
46            || line.starts_with("%%BeginProlog") || line.starts_with("%%BeginSetup")
47        {
48            break;
49        }
50
51        if !line.starts_with("%%") && !line.starts_with("%!") {
52            // Stop at first non-comment non-DSC line (after header section)
53            if !line.starts_with('%') && !line.is_empty() {
54                break;
55            }
56            continue;
57        }
58
59        let line = line.trim();
60
61        if let Some(rest) = line.strip_prefix("%%Title:") {
62            tags.push(mk("Title", "Title", Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string())));
63        } else if let Some(rest) = line.strip_prefix("%%Creator:") {
64            tags.push(mk("Creator", "Creator", Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string())));
65        } else if let Some(rest) = line.strip_prefix("%%CreationDate:") {
66            tags.push(mk("CreateDate", "Create Date", Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string())));
67        } else if let Some(rest) = line.strip_prefix("%%For:") {
68            tags.push(mk("Author", "Author", Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string())));
69        } else if let Some(rest) = line.strip_prefix("%%BoundingBox:") {
70            tags.push(mk("BoundingBox", "Bounding Box", Value::String(rest.trim().to_string())));
71        } else if let Some(_rest) = line.strip_prefix("%%HiResBoundingBox:") {
72            // Perl doesn't emit HiResBoundingBox
73        } else if let Some(rest) = line.strip_prefix("%%Pages:") {
74            tags.push(mk("Pages", "Pages", Value::String(rest.trim().to_string())));
75        } else if let Some(rest) = line.strip_prefix("%%LanguageLevel:") {
76            tags.push(mk("LanguageLevel", "Language Level", Value::String(rest.trim().to_string())));
77        } else if let Some(rest) = line.strip_prefix("%%DocumentData:") {
78            tags.push(mk("DocumentData", "Document Data", Value::String(rest.trim().to_string())));
79        } else if line.starts_with("%!PS-Adobe-") {
80            // Perl stores version internally but doesn't emit PSVersion/EPSVersion directly
81        }
82    }
83
84    // Look for embedded XMP
85    if let Some(xmp_start) = find_bytes(&data[offset..], b"<?xpacket begin") {
86        let xmp_data = &data[offset + xmp_start..];
87        if let Some(xmp_end) = find_bytes(xmp_data, b"<?xpacket end") {
88            let end = xmp_end + 20; // Include the end tag
89            if let Ok(xmp_tags) = XmpReader::read(&xmp_data[..end.min(xmp_data.len())]) {
90                tags.extend(xmp_tags);
91            }
92        }
93    }
94
95    // Look for %BeginPhotoshop blocks (Photoshop IRB data encoded as hex)
96    let full_text = String::from_utf8_lossy(&data[offset..]);
97    let full_text = full_text.replace('\r', "\n");
98    parse_photoshop_blocks(&full_text, &mut tags);
99
100    // Parse %ImageData: for image dimensions
101    parse_image_data_comment(&full_text, &mut tags);
102
103    Ok(tags)
104}
105
106/// Parse %BeginPhotoshop ... %EndPhotoshop blocks
107fn parse_photoshop_blocks(text: &str, tags: &mut Vec<Tag>) {
108    let mut search: &str = text;
109    while let Some(start) = search.find("%BeginPhotoshop:") {
110        let block = &search[start..];
111        let end = block.find("%EndPhotoshop").unwrap_or(block.len());
112        let block = &block[..end];
113
114        // Collect hex data from continuation lines
115        let mut hex_str = String::new();
116        let mut first = true;
117        for line in block.lines() {
118            if first { first = false; continue; } // skip header line
119            let line = line.trim();
120            if line.starts_with("% ") {
121                let hex_part = &line[2..];
122                hex_str.push_str(hex_part);
123            }
124        }
125
126        if !hex_str.is_empty() {
127            let irb_data = decode_hex(&hex_str);
128            parse_photoshop_irb(&irb_data, tags);
129        }
130
131        let advance = start + end + 13; // skip past %EndPhotoshop
132        if advance >= search.len() { break; }
133        search = &search[advance..];
134    }
135}
136
137/// Parse Photoshop Image Resource Blocks (8BIM format)
138fn parse_photoshop_irb(data: &[u8], tags: &mut Vec<Tag>) {
139    let mut pos = 0;
140    while pos + 12 <= data.len() {
141        if &data[pos..pos + 4] != b"8BIM" {
142            break;
143        }
144        let res_type = u16::from_be_bytes([data[pos + 4], data[pos + 5]]);
145
146        // Pascal string at pos+6: 1 byte length + string data, padded to even
147        let name_len = data[pos + 6] as usize;
148        let name_total = 1 + name_len;
149        let name_total = if name_total % 2 != 0 { name_total + 1 } else { name_total };
150        let data_start = pos + 6 + name_total;
151        if data_start + 4 > data.len() {
152            break;
153        }
154        let data_size = u32::from_be_bytes([
155            data[data_start], data[data_start + 1],
156            data[data_start + 2], data[data_start + 3],
157        ]) as usize;
158        let data_end = data_start + 4 + data_size;
159        if data_end > data.len() {
160            break;
161        }
162        let block_data = &data[data_start + 4..data_end];
163
164        match res_type {
165            0x0404 => {
166                // IPTC-NAA: compute CurrentIPTCDigest as MD5 of the data
167                let digest = crate::md5::md5_hex(block_data);
168                tags.push(mk("CurrentIPTCDigest", "Current IPTC Digest", Value::String(digest)));
169                if let Ok(iptc_tags) = crate::metadata::IptcReader::read(block_data) {
170                    tags.extend(iptc_tags);
171                }
172            }
173            0x0425 => {
174                // IPTCDigest (stored as raw 16-byte MD5)
175                if block_data.len() >= 16 {
176                    let digest = block_data[..16].iter().map(|b| format!("{:02x}", b)).collect::<String>();
177                    tags.push(mk("IPTCDigest", "IPTC Digest", Value::String(digest)));
178                }
179            }
180            _ => {}
181        }
182
183        pos = data_end;
184        if pos % 2 != 0 {
185            pos += 1;
186        }
187    }
188}
189
190/// Parse %ImageData: comment for image dimensions
191fn parse_image_data_comment(text: &str, tags: &mut Vec<Tag>) {
192    for line in text.lines() {
193        if let Some(rest) = line.strip_prefix("%ImageData:") {
194            let parts: Vec<&str> = rest.trim().split_whitespace().collect();
195            if parts.len() >= 2 {
196                if let Ok(w) = parts[0].parse::<u32>() {
197                    tags.push(mk("ImageWidth", "Image Width", Value::U32(w)));
198                }
199                if let Ok(h) = parts[1].parse::<u32>() {
200                    tags.push(mk("ImageHeight", "Image Height", Value::U32(h)));
201                }
202                // Build the ImageData string
203                let img_data_str = rest.trim().to_string();
204                tags.push(mk("ImageData", "Image Data", Value::String(img_data_str)));
205            }
206            break;
207        }
208    }
209}
210
211fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
212    haystack.windows(needle.len()).position(|w| w == needle)
213}
214
215fn mk(name: &str, description: &str, value: Value) -> Tag {
216    let pv = value.to_display_string();
217    Tag {
218        id: TagId::Text(name.to_string()),
219        name: name.to_string(),
220        description: description.to_string(),
221        group: TagGroup {
222            family0: "PostScript".into(),
223            family1: "PostScript".into(),
224            family2: "Document".into(),
225        },
226        raw_value: value,
227        print_value: pv,
228        priority: 0,
229    }
230}