Skip to main content

hayro_postscript/
lib.rs

1/*!
2A lightweight PostScript scanner.
3
4This crate provides a scanner for tokenizing PostScript programs into typed objects.
5It currently only implements a very small subset of the PostScript language,
6with the main goal of being enough to parse CMAP files, but the scope _might_
7be expanded upon in the future.
8
9The supported types include integers and real numbers, name objects, strings and arrays.
10Unsupported is anything else, including dictionaries, procedures, etc. An error
11will be returned in case any of these is encountered.
12
13## Safety
14This crate forbids unsafe code via a crate-level attribute.
15*/
16
17#![no_std]
18#![forbid(unsafe_code)]
19#![allow(missing_docs)]
20
21extern crate alloc;
22
23mod array;
24mod error;
25mod name;
26mod number;
27mod object;
28mod reader;
29mod string;
30
31pub use array::Array;
32pub use error::{Error, Result};
33pub use name::Name;
34pub use number::Number;
35pub use object::Object;
36pub use string::String;
37
38use reader::Reader;
39
40/// A PostScript scanner that parses [`Object`]s from a byte stream.
41pub struct Scanner<'a> {
42    reader: Reader<'a>,
43}
44
45impl<'a> Scanner<'a> {
46    /// Create a new scanner over the given bytes of a PostScript program.
47    pub fn new(data: &'a [u8]) -> Self {
48        Self {
49            reader: Reader::new(data),
50        }
51    }
52
53    /// Returns `true` if there are no more objects to parse.
54    pub fn at_end(&mut self) -> bool {
55        object::at_end(&mut self.reader)
56    }
57
58    /// Parse the next object.
59    pub fn parse_object(&mut self) -> Result<Object<'a>> {
60        object::read(&mut self.reader)
61    }
62
63    /// Parse the next object as a [`Number`].
64    pub fn parse_number(&mut self) -> Result<Number> {
65        match self.parse_object()? {
66            Object::Number(n) => Ok(n),
67            _ => Err(Error::SyntaxError),
68        }
69    }
70
71    /// Parse the next object as a [`Name`].
72    pub fn parse_name(&mut self) -> Result<Name<'a>> {
73        match self.parse_object()? {
74            Object::Name(n) => Ok(n),
75            _ => Err(Error::SyntaxError),
76        }
77    }
78
79    /// Parse the next object as a [`String`].
80    pub fn parse_string(&mut self) -> Result<String<'a>> {
81        match self.parse_object()? {
82            Object::String(s) => Ok(s),
83            _ => Err(Error::SyntaxError),
84        }
85    }
86
87    /// Parse the next object as an [`Array`].
88    pub fn parse_array(&mut self) -> Result<Array<'a>> {
89        match self.parse_object()? {
90            Object::Array(a) => Ok(a),
91            _ => Err(Error::SyntaxError),
92        }
93    }
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99
100    #[test]
101    fn cmap_snippet() {
102        let input = br#"/CIDInit /ProcSet findresource begin
10312 dict begin
104begincmap
105/CMapName /Test-H def
1061 begincodespacerange
107<00> <FF>
108endcodespacerange
1092 beginbfchar
110<03> <0041>
111<04> <0042>
112endbfchar
113endcmap"#;
114
115        let mut s = Scanner::new(input);
116
117        assert_eq!(s.parse_name().unwrap(), Name::new(b"CIDInit", true));
118        assert_eq!(s.parse_name().unwrap(), Name::new(b"ProcSet", true));
119        assert_eq!(s.parse_name().unwrap(), Name::new(b"findresource", false));
120        assert_eq!(s.parse_name().unwrap(), Name::new(b"begin", false));
121        assert_eq!(s.parse_number().unwrap(), Number::Integer(12));
122        assert_eq!(s.parse_name().unwrap(), Name::new(b"dict", false));
123        assert_eq!(s.parse_name().unwrap(), Name::new(b"begin", false));
124        assert_eq!(s.parse_name().unwrap(), Name::new(b"begincmap", false));
125        assert_eq!(s.parse_name().unwrap(), Name::new(b"CMapName", true));
126        assert_eq!(s.parse_name().unwrap(), Name::new(b"Test-H", true));
127        assert_eq!(s.parse_name().unwrap(), Name::new(b"def", false));
128        assert_eq!(s.parse_number().unwrap(), Number::Integer(1));
129        assert_eq!(
130            s.parse_name().unwrap(),
131            Name::new(b"begincodespacerange", false)
132        );
133        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"00"));
134        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"FF"));
135        assert_eq!(
136            s.parse_name().unwrap(),
137            Name::new(b"endcodespacerange", false)
138        );
139        assert_eq!(s.parse_number().unwrap(), Number::Integer(2));
140        assert_eq!(s.parse_name().unwrap(), Name::new(b"beginbfchar", false));
141        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"03"));
142        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"0041"));
143        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"04"));
144        assert_eq!(s.parse_string().unwrap(), String::from_hex(b"0042"));
145        assert_eq!(s.parse_name().unwrap(), Name::new(b"endbfchar", false));
146        assert_eq!(s.parse_name().unwrap(), Name::new(b"endcmap", false));
147        assert!(s.at_end());
148    }
149
150    #[test]
151    fn array_round_trip() {
152        let input = b"[123 /abc (xyz)]";
153        let mut scanner = Scanner::new(input);
154        let arr = scanner.parse_array().unwrap();
155        assert!(scanner.at_end());
156
157        let mut inner = arr.objects();
158        assert_eq!(inner.parse_number().unwrap(), Number::Integer(123));
159        assert_eq!(inner.parse_name().unwrap(), Name::new(b"abc", true));
160        assert_eq!(inner.parse_string().unwrap(), String::from_literal(b"xyz"));
161        assert!(inner.at_end());
162    }
163
164    #[test]
165    fn comments_skipped() {
166        let input = b"% comment\n42 % another\n/Name";
167        let mut scanner = Scanner::new(input);
168
169        assert_eq!(scanner.parse_number().unwrap(), Number::Integer(42));
170        assert_eq!(scanner.parse_name().unwrap(), Name::new(b"Name", true));
171        assert!(scanner.at_end());
172    }
173
174    #[test]
175    fn wrong_type_is_error() {
176        let mut scanner = Scanner::new(b"42 ");
177        assert!(scanner.parse_name().is_err());
178    }
179}