Skip to main content

pdf_font/postscript/
mod.rs

1/*!
2A lightweight PostScript scanner.
3
4This crate provides a scanner for tokenizing PostScript programs into typed objects.
5It currently only implements a very small subset of the PostScript language,
6with the main goal of being enough to parse CMAP files, but the scope _might_
7be expanded upon in the future.
8
9The supported types include integers and real numbers, name objects, strings and arrays.
10Unsupported is anything else, including dictionaries, procedures, etc. An error
11will be returned in case any of these is encountered.
12
13## Safety
14This crate forbids unsafe code via a crate-level attribute.
15*/
16
17mod array;
18mod error;
19mod name;
20mod number;
21mod object;
22mod reader;
23mod string;
24
25pub use array::Array;
26pub use error::{Error, Result};
27pub use name::Name;
28pub use number::Number;
29pub use object::Object;
30pub use string::String;
31
32use reader::Reader;
33
34/// A PostScript scanner that parses [`Object`]s from a byte stream.
35pub struct Scanner<'a> {
36    reader: Reader<'a>,
37}
38
39impl<'a> Scanner<'a> {
40    /// Create a new scanner over the given bytes of a PostScript program.
41    pub fn new(data: &'a [u8]) -> Self {
42        Self {
43            reader: Reader::new(data),
44        }
45    }
46
47    /// Returns `true` if there are no more objects to parse.
48    pub fn at_end(&mut self) -> bool {
49        object::at_end(&mut self.reader)
50    }
51
52    /// Parse the next object.
53    pub fn parse_object(&mut self) -> Result<Object<'a>> {
54        object::read(&mut self.reader)
55    }
56
57    /// Parse the next object as a [`Number`].
58    pub fn parse_number(&mut self) -> Result<Number> {
59        match self.parse_object()? {
60            Object::Number(n) => Ok(n),
61            _ => Err(Error::SyntaxError),
62        }
63    }
64
65    /// Parse the next object as a [`Name`].
66    pub fn parse_name(&mut self) -> Result<Name<'a>> {
67        match self.parse_object()? {
68            Object::Name(n) => Ok(n),
69            _ => Err(Error::SyntaxError),
70        }
71    }
72
73    /// Parse the next object as a [`String`].
74    pub fn parse_string(&mut self) -> Result<String<'a>> {
75        match self.parse_object()? {
76            Object::String(s) => Ok(s),
77            _ => Err(Error::SyntaxError),
78        }
79    }
80
81    /// Parse the next object as an [`Array`].
82    pub fn parse_array(&mut self) -> Result<Array<'a>> {
83        match self.parse_object()? {
84            Object::Array(a) => Ok(a),
85            _ => Err(Error::SyntaxError),
86        }
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn cmap_snippet() {
96        let input = br#"/CIDInit /ProcSet findresource begin
9712 dict begin
98begincmap
99/CMapName /Test-H def
1001 begincodespacerange
101<00> <FF>
102endcodespacerange
1032 beginbfchar
104<03> <0041>
105<04> <0042>
106endbfchar
107endcmap"#;
108
109        let mut s = Scanner::new(input);
110
111        assert_eq!(s.parse_name().unwrap(), Name::new(b"CIDInit", true));
112        assert_eq!(s.parse_name().unwrap(), Name::new(b"ProcSet", true));
113        assert_eq!(s.parse_name().unwrap(), Name::new(b"findresource", false));
114        assert_eq!(s.parse_name().unwrap(), Name::new(b"begin", false));
115        assert_eq!(s.parse_number().unwrap(), Number::Integer(12));
116        assert_eq!(s.parse_name().unwrap(), Name::new(b"dict", false));
117        assert_eq!(s.parse_name().unwrap(), Name::new(b"begin", false));
118        assert_eq!(s.parse_name().unwrap(), Name::new(b"begincmap", false));
119        assert_eq!(s.parse_name().unwrap(), Name::new(b"CMapName", true));
120        assert_eq!(s.parse_name().unwrap(), Name::new(b"Test-H", true));
121        assert_eq!(s.parse_name().unwrap(), Name::new(b"def", false));
122        assert_eq!(s.parse_number().unwrap(), Number::Integer(1));
123        assert_eq!(
124            s.parse_name().unwrap(),
125            Name::new(b"begincodespacerange", false)
126        );
127        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"00"));
128        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"FF"));
129        assert_eq!(
130            s.parse_name().unwrap(),
131            Name::new(b"endcodespacerange", false)
132        );
133        assert_eq!(s.parse_number().unwrap(), Number::Integer(2));
134        assert_eq!(s.parse_name().unwrap(), Name::new(b"beginbfchar", false));
135        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"03"));
136        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"0041"));
137        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"04"));
138        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"0042"));
139        assert_eq!(s.parse_name().unwrap(), Name::new(b"endbfchar", false));
140        assert_eq!(s.parse_name().unwrap(), Name::new(b"endcmap", false));
141        assert!(s.at_end());
142    }
143
144    #[test]
145    fn array_round_trip() {
146        let input = b"[123 /abc (xyz)]";
147        let mut scanner = Scanner::new(input);
148        let arr = scanner.parse_array().unwrap();
149        assert!(scanner.at_end());
150
151        let mut inner = arr.objects();
152        assert_eq!(inner.parse_number().unwrap(), Number::Integer(123));
153        assert_eq!(inner.parse_name().unwrap(), Name::new(b"abc", true));
154        assert_eq!(inner.parse_string().unwrap(), String::from_literal(b"xyz"));
155        assert!(inner.at_end());
156    }
157
158    #[test]
159    fn comments_skipped() {
160        let input = b"% comment\n42 % another\n/Name";
161        let mut scanner = Scanner::new(input);
162
163        assert_eq!(scanner.parse_number().unwrap(), Number::Integer(42));
164        assert_eq!(scanner.parse_name().unwrap(), Name::new(b"Name", true));
165        assert!(scanner.at_end());
166    }
167
168    #[test]
169    fn wrong_type_is_error() {
170        let mut scanner = Scanner::new(b"42 ");
171        assert!(scanner.parse_name().is_err());
172    }
173}