1use nom::{
2 branch::alt,
3 bytes::complete::{tag, take_until, take_while, take_while1},
4 character::complete::{char, digit1, multispace0, multispace1, one_of},
5 combinator::{map, map_res, opt, recognize, value},
6 multi::many0,
7 sequence::{delimited, pair, preceded, tuple},
8 IResult,
9};
10
11pub fn skip_whitespace(input: &[u8]) -> IResult<&[u8], ()> {
12 value((), multispace0)(input)
13}
14
15pub fn skip_whitespace_and_comments(input: &[u8]) -> IResult<&[u8], ()> {
16 value((), many0(alt((value((), multispace1), value((), comment)))))(input)
17}
18
19pub fn comment(input: &[u8]) -> IResult<&[u8], &[u8]> {
20 preceded(
21 char('%'),
22 alt((take_until("\n"), take_until("\r"), nom::combinator::rest)),
23 )(input)
24}
25
26pub fn pdf_header(input: &[u8]) -> IResult<&[u8], (u8, u8)> {
27 let (input, _) = tag(b"%PDF-")(input)?;
28 let (input, major) = map_res(digit1, |s: &[u8]| {
29 std::str::from_utf8(s).unwrap().parse::<u8>()
30 })(input)?;
31 let (input, _) = char('.')(input)?;
32 let (input, minor) = map_res(digit1, |s: &[u8]| {
33 std::str::from_utf8(s).unwrap().parse::<u8>()
34 })(input)?;
35 Ok((input, (major, minor)))
36}
37
38pub fn pdf_eof(input: &[u8]) -> IResult<&[u8], &[u8]> {
39 tag(b"%%EOF")(input)
40}
41
42pub fn is_whitespace(c: u8) -> bool {
43 matches!(c, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' | b'\0')
44}
45
46pub fn is_delimiter(c: u8) -> bool {
47 matches!(
48 c,
49 b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%'
50 )
51}
52
53pub fn is_regular_char(c: u8) -> bool {
54 !is_whitespace(c) && !is_delimiter(c)
55}
56
57pub fn regular_chars(input: &[u8]) -> IResult<&[u8], &[u8]> {
58 take_while1(is_regular_char)(input)
59}
60
61pub fn keyword(input: &[u8]) -> IResult<&[u8], &[u8]> {
62 alt((
63 tag(b"true"),
64 tag(b"false"),
65 tag(b"null"),
66 tag(b"obj"),
67 tag(b"endobj"),
68 tag(b"stream"),
69 tag(b"endstream"),
70 tag(b"xref"),
71 tag(b"startxref"),
72 tag(b"trailer"),
73 tag(b"R"),
74 tag(b"n"),
75 tag(b"f"),
76 ))(input)
77}
78
79pub fn integer(input: &[u8]) -> IResult<&[u8], i64> {
80 map_res(recognize(pair(opt(one_of("+-")), digit1)), |s: &[u8]| {
81 std::str::from_utf8(s).unwrap().parse::<i64>()
82 })(input)
83}
84
85pub fn real(input: &[u8]) -> IResult<&[u8], f64> {
86 map_res(
87 recognize(tuple((
88 opt(one_of("+-")),
89 alt((
90 recognize(tuple((digit1, char('.'), opt(digit1)))),
91 recognize(tuple((opt(digit1), char('.'), digit1))),
92 )),
93 ))),
94 |s: &[u8]| std::str::from_utf8(s).unwrap().parse::<f64>(),
95 )(input)
96}
97
98pub fn hex_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
99 delimited(
100 char('<'),
101 map(
102 take_while(|c: u8| c.is_ascii_hexdigit() || is_whitespace(c)),
103 |hex: &[u8]| {
104 let hex_str: String = hex
105 .iter()
106 .filter(|&&c| !is_whitespace(c))
107 .map(|&c| c as char)
108 .collect();
109
110 let mut result = Vec::new();
111 let mut chars = hex_str.chars();
112
113 while let Some(c1) = chars.next() {
114 let c2 = chars.next().unwrap_or('0');
115 if let Ok(byte) = u8::from_str_radix(&format!("{}{}", c1, c2), 16) {
116 result.push(byte);
117 }
118 }
119
120 result
121 },
122 ),
123 char('>'),
124 )(input)
125}
126
127pub fn literal_string(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
128 delimited(
129 char('('),
130 map(
131 many0(alt((
132 preceded(char('\\'), escape_sequence),
133 map(take_while1(|c| c != b')' && c != b'\\'), |s: &[u8]| {
134 s.to_vec()
135 }),
136 ))),
137 |parts| parts.into_iter().flatten().collect(),
138 ),
139 char(')'),
140 )(input)
141}
142
143fn escape_sequence(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
144 alt((
145 value(vec![b'\n'], char('n')),
146 value(vec![b'\r'], char('r')),
147 value(vec![b'\t'], char('t')),
148 value(vec![b'\x08'], char('b')),
149 value(vec![b'\x0C'], char('f')),
150 value(vec![b'('], char('(')),
151 value(vec![b')'], char(')')),
152 value(vec![b'\\'], char('\\')),
153 map(octal_escape, |b| vec![b]),
154 ))(input)
155}
156
157fn octal_escape(input: &[u8]) -> IResult<&[u8], u8> {
158 map_res(
159 recognize(tuple((
160 one_of("01234567"),
161 opt(one_of("01234567")),
162 opt(one_of("01234567")),
163 ))),
164 |s: &[u8]| u8::from_str_radix(std::str::from_utf8(s).unwrap(), 8),
165 )(input)
166}
167
168pub fn name(input: &[u8]) -> IResult<&[u8], String> {
169 preceded(
170 char('/'),
171 map(
172 take_while(|c: u8| !is_whitespace(c) && !is_delimiter(c)),
173 |bytes: &[u8]| {
174 let mut result = String::new();
175 let mut chars = bytes.iter();
176
177 while let Some(&c) = chars.next() {
178 if c == b'#' {
179 if let (Some(&c1), Some(&c2)) = (chars.next(), chars.next()) {
180 if let Ok(byte) =
181 u8::from_str_radix(&format!("{}{}", c1 as char, c2 as char), 16)
182 {
183 result.push(byte as char);
184 continue;
185 }
186 }
187 result.push('#');
188 } else {
189 result.push(c as char);
190 }
191 }
192
193 format!("/{}", result)
194 },
195 ),
196 )(input)
197}