Skip to main content

exiftool_rs/formats/
postscript.rs

1//! PostScript/EPS/AI file format reader.
2//!
3//! Parses DSC (Document Structuring Convention) comments for metadata.
4//! Mirrors ExifTool's PostScript.pm.
5
6use crate::error::{Error, Result};
7use crate::metadata::XmpReader;
8use crate::tag::{Tag, TagGroup, TagId};
9use crate::value::Value;
10
11/// Decode hex string (ignoring spaces) to bytes
12fn decode_hex(s: &str) -> Vec<u8> {
13    let s: String = s.chars().filter(|c| c.is_ascii_hexdigit()).collect();
14    (0..s.len() / 2)
15        .filter_map(|i| u8::from_str_radix(&s[i * 2..i * 2 + 2], 16).ok())
16        .collect()
17}
18
19pub fn read_postscript(data: &[u8]) -> Result<Vec<Tag>> {
20    let mut tags = Vec::new();
21    let mut offset = 0;
22
23    // DOS EPS binary header: C5 D0 D3 C6
24    if data.len() >= 30 && data.starts_with(&[0xC5, 0xD0, 0xD3, 0xC6]) {
25        let ps_offset = u32::from_le_bytes([data[4], data[5], data[6], data[7]]) as usize;
26        let ps_length = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize;
27
28        if ps_offset + ps_length <= data.len() {
29            offset = ps_offset;
30        }
31        tags.push(mk(
32            "EPSFormat",
33            "EPS Format",
34            Value::String("DOS Binary".into()),
35        ));
36    }
37
38    // Check for PS magic
39    if offset + 4 > data.len()
40        || (!data[offset..].starts_with(b"%!PS") && !data[offset..].starts_with(b"%!Ad"))
41    {
42        return Err(Error::InvalidData("not a PostScript file".into()));
43    }
44
45    // Parse DSC comments line by line (handle \r, \n, and \r\n)
46    let text =
47        crate::encoding::decode_utf8_or_latin1(&data[offset..data.len().min(offset + 65536)]);
48    let text = text.replace('\r', "\n");
49
50    for line in text.lines() {
51        // Stop at the first embedded document or end of comments section
52        if line.starts_with("%%EndComments")
53            || line.starts_with("%%BeginDocument")
54            || line.starts_with("%%BeginProlog")
55            || line.starts_with("%%BeginSetup")
56        {
57            break;
58        }
59
60        if !line.starts_with("%%") && !line.starts_with("%!") {
61            // Stop at first non-comment non-DSC line (after header section)
62            if !line.starts_with('%') && !line.is_empty() {
63                break;
64            }
65            continue;
66        }
67
68        let line = line.trim();
69
70        if let Some(rest) = line.strip_prefix("%%Title:") {
71            tags.push(mk(
72                "Title",
73                "Title",
74                Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string()),
75            ));
76        } else if let Some(rest) = line.strip_prefix("%%Creator:") {
77            tags.push(mk(
78                "Creator",
79                "Creator",
80                Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string()),
81            ));
82        } else if let Some(rest) = line.strip_prefix("%%CreationDate:") {
83            tags.push(mk(
84                "CreateDate",
85                "Create Date",
86                Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string()),
87            ));
88        } else if let Some(rest) = line.strip_prefix("%%For:") {
89            tags.push(mk(
90                "Author",
91                "Author",
92                Value::String(rest.trim().trim_matches('(').trim_matches(')').to_string()),
93            ));
94        } else if let Some(rest) = line.strip_prefix("%%BoundingBox:") {
95            tags.push(mk(
96                "BoundingBox",
97                "Bounding Box",
98                Value::String(rest.trim().to_string()),
99            ));
100        } else if let Some(_rest) = line.strip_prefix("%%HiResBoundingBox:") {
101            // Perl doesn't emit HiResBoundingBox
102        } else if let Some(rest) = line.strip_prefix("%%Pages:") {
103            tags.push(mk("Pages", "Pages", Value::String(rest.trim().to_string())));
104        } else if let Some(rest) = line.strip_prefix("%%LanguageLevel:") {
105            tags.push(mk(
106                "LanguageLevel",
107                "Language Level",
108                Value::String(rest.trim().to_string()),
109            ));
110        } else if let Some(rest) = line.strip_prefix("%%DocumentData:") {
111            tags.push(mk(
112                "DocumentData",
113                "Document Data",
114                Value::String(rest.trim().to_string()),
115            ));
116        } else if line.starts_with("%!PS-Adobe-") {
117            // Perl stores version internally but doesn't emit PSVersion/EPSVersion directly
118        }
119    }
120
121    // Look for embedded XMP
122    if let Some(xmp_start) = find_bytes(&data[offset..], b"<?xpacket begin") {
123        let xmp_data = &data[offset + xmp_start..];
124        if let Some(xmp_end) = find_bytes(xmp_data, b"<?xpacket end") {
125            let end = xmp_end + 20; // Include the end tag
126            if let Ok(xmp_tags) = XmpReader::read(&xmp_data[..end.min(xmp_data.len())]) {
127                tags.extend(xmp_tags);
128            }
129        }
130    }
131
132    // Look for %BeginPhotoshop blocks (Photoshop IRB data encoded as hex)
133    let full_text = crate::encoding::decode_utf8_or_latin1(&data[offset..]);
134    let full_text = full_text.replace('\r', "\n");
135    parse_photoshop_blocks(&full_text, &mut tags);
136
137    // Parse %ImageData: for image dimensions
138    parse_image_data_comment(&full_text, &mut tags);
139
140    Ok(tags)
141}
142
143/// Parse %BeginPhotoshop ... %EndPhotoshop blocks
144fn parse_photoshop_blocks(text: &str, tags: &mut Vec<Tag>) {
145    let mut search: &str = text;
146    while let Some(start) = search.find("%BeginPhotoshop:") {
147        let block = &search[start..];
148        let end = block.find("%EndPhotoshop").unwrap_or(block.len());
149        let block = &block[..end];
150
151        // Collect hex data from continuation lines
152        let mut hex_str = String::new();
153        let mut first = true;
154        for line in block.lines() {
155            if first {
156                first = false;
157                continue;
158            } // skip header line
159            let line = line.trim();
160            if let Some(hex_part) = line.strip_prefix("% ") {
161                hex_str.push_str(hex_part);
162            }
163        }
164
165        if !hex_str.is_empty() {
166            let irb_data = decode_hex(&hex_str);
167            parse_photoshop_irb(&irb_data, tags);
168        }
169
170        let advance = start + end + 13; // skip past %EndPhotoshop
171        if advance >= search.len() {
172            break;
173        }
174        search = &search[advance..];
175    }
176}
177
178/// Parse Photoshop Image Resource Blocks (8BIM format)
179fn parse_photoshop_irb(data: &[u8], tags: &mut Vec<Tag>) {
180    let mut pos = 0;
181    while pos + 12 <= data.len() {
182        if &data[pos..pos + 4] != b"8BIM" {
183            break;
184        }
185        let res_type = u16::from_be_bytes([data[pos + 4], data[pos + 5]]);
186
187        // Pascal string at pos+6: 1 byte length + string data, padded to even
188        let name_len = data[pos + 6] as usize;
189        let name_total = 1 + name_len;
190        let name_total = if name_total % 2 != 0 {
191            name_total + 1
192        } else {
193            name_total
194        };
195        let data_start = pos + 6 + name_total;
196        if data_start + 4 > data.len() {
197            break;
198        }
199        let data_size = u32::from_be_bytes([
200            data[data_start],
201            data[data_start + 1],
202            data[data_start + 2],
203            data[data_start + 3],
204        ]) as usize;
205        let data_end = data_start + 4 + data_size;
206        if data_end > data.len() {
207            break;
208        }
209        let block_data = &data[data_start + 4..data_end];
210
211        match res_type {
212            0x0404 => {
213                // IPTC-NAA: compute CurrentIPTCDigest as MD5 of the data
214                let digest = crate::md5::md5_hex(block_data);
215                tags.push(mk(
216                    "CurrentIPTCDigest",
217                    "Current IPTC Digest",
218                    Value::String(digest),
219                ));
220                if let Ok(iptc_tags) = crate::metadata::IptcReader::read(block_data) {
221                    tags.extend(iptc_tags);
222                }
223            }
224            0x0425 => {
225                // IPTCDigest (stored as raw 16-byte MD5)
226                if block_data.len() >= 16 {
227                    let digest = block_data[..16]
228                        .iter()
229                        .map(|b| format!("{:02x}", b))
230                        .collect::<String>();
231                    tags.push(mk("IPTCDigest", "IPTC Digest", Value::String(digest)));
232                }
233            }
234            _ => {}
235        }
236
237        pos = data_end;
238        if pos % 2 != 0 {
239            pos += 1;
240        }
241    }
242}
243
244/// Parse %ImageData: comment for image dimensions
245fn parse_image_data_comment(text: &str, tags: &mut Vec<Tag>) {
246    for line in text.lines() {
247        if let Some(rest) = line.strip_prefix("%ImageData:") {
248            let parts: Vec<&str> = rest.split_whitespace().collect();
249            if parts.len() >= 2 {
250                if let Ok(w) = parts[0].parse::<u32>() {
251                    tags.push(mk("ImageWidth", "Image Width", Value::U32(w)));
252                }
253                if let Ok(h) = parts[1].parse::<u32>() {
254                    tags.push(mk("ImageHeight", "Image Height", Value::U32(h)));
255                }
256                // Build the ImageData string
257                let img_data_str = rest.trim().to_string();
258                tags.push(mk("ImageData", "Image Data", Value::String(img_data_str)));
259            }
260            break;
261        }
262    }
263}
264
265fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option<usize> {
266    haystack.windows(needle.len()).position(|w| w == needle)
267}
268
269fn mk(name: &str, description: &str, value: Value) -> Tag {
270    let pv = value.to_display_string();
271    Tag {
272        id: TagId::Text(name.to_string()),
273        name: name.to_string(),
274        description: description.to_string(),
275        group: TagGroup {
276            family0: "PostScript".into(),
277            family1: "PostScript".into(),
278            family2: "Document".into(),
279        },
280        raw_value: value,
281        print_value: pv,
282        priority: 0,
283    }
284}