1#[derive(Debug)]
2pub enum Error {
3 UnexpectedToken {
4 expected: String,
5 actual: String,
6 start: usize,
7 end: usize,
8 },
9 UnexpectedEof {
10 expected: String,
11 start: usize,
12 },
13 Invalid {
14 message: String,
15 start: usize,
16 end: usize,
17 },
18}
19
20impl std::fmt::Display for Error {
21 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22 match self {
23 Error::UnexpectedToken {
24 actual, expected, ..
25 } => writeln!(f, "Expected {}, found {}", expected, actual),
26 Error::UnexpectedEof { expected, .. } => {
27 writeln!(f, "Expected {}, found Eof", expected)
28 }
29 Error::Invalid { message, .. } => writeln!(f, "{}", message),
30 }
31 }
32}
33
34pub struct Parser<'a> {
35 source: &'a str,
36 boundary_length: usize,
37 pos: usize,
38}
39
40impl<'a> Parser<'a> {
41 pub fn new(source: &'a str) -> Self {
42 Self {
43 source,
44 boundary_length: 0,
45 pos: 0,
46 }
47 }
48
49 pub fn cur_byte(&self) -> Option<u8> {
50 self.source.as_bytes().get(self.pos).map(|item| *item)
51 }
52
53 pub fn eof(&self) -> bool {
54 self.pos == self.source.len()
55 }
56
57 pub fn is_line_feed(&self) -> bool {
61 self.cur_byte() == Some(b'\n')
62 }
63
64 pub fn pos(&self) -> usize {
65 self.pos
66 }
67
68 pub fn scan_boundary(&mut self) -> Result<(usize, usize), Error> {
69 let start = self.pos;
70 let mut eq_len = 1;
71 self.expect_byte(b'<')?;
72 self.expect_byte(b'=')?;
73 while self.cur_byte() == Some(b'=') {
74 eq_len += 1;
75 self.pos += 1;
76 }
77 self.expect_byte(b'>')?;
78
79 let end = self.pos;
80 if self.boundary_length == 0 {
81 self.boundary_length = eq_len;
82 } else if self.boundary_length != eq_len {
83 return Err(Error::Invalid {
84 message: "Un matched boundary length".to_string(),
85 start,
86 end,
87 });
88 }
89 Ok((start, end))
90 }
91
92 pub fn scan_contents(&mut self) -> Result<(usize, usize), Error> {
97 let start = self.pos;
98 if self.cur_byte() == Some(b'<') {
99 match self.scan_boundary() {
100 Ok(_) => {
102 self.pos = start;
103 return Ok((start, start));
104 }
105 Err(_) => {}
106 }
107 }
108 loop {
109 if self.eof() {
110 break;
111 }
112 if self.cur_byte() == Some(b'\n') {
113 self.pos += 1;
114 if self.cur_byte() == Some(b'<') {
115 let checkpoint = self.pos;
116 match self.scan_boundary() {
117 Ok(_) => {
119 self.pos = checkpoint;
120 break;
121 }
122 Err(_) => {}
123 }
124 }
125 } else {
126 self.pos += 1;
127 }
128 }
129 let end = self.pos;
130 Ok((start, end))
131 }
132
133 pub fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
134 if self.cur_byte() != Some(byte) {
135 if self.eof() {
136 return Err(Error::UnexpectedEof {
137 expected: (byte as char).to_string(),
138 start: self.pos,
140 });
141 }
142 return Err(Error::UnexpectedToken {
143 expected: (byte as char).to_string(),
144 actual: (self.cur_byte().unwrap() as char).to_string(),
146 start: self.pos,
147 end: self.pos + 1,
148 });
149 }
150 self.pos += 1;
151 Ok(())
152 }
153
154 pub fn scan_path_component(&mut self) -> Result<(usize, usize, &'a str), Error> {
155 let start = self.pos;
156 while !self.eof() && self.is_path_component(self.cur_byte().unwrap()) {
158 self.pos += 1;
159 }
160 let end = self.pos;
161 let source = &self.source[start..end];
162 if source.len() == 0 || source == "." || source == ".." {
163 if self.eof() {
164 return Err(Error::UnexpectedEof {
165 expected: "path".to_string(),
166 start: self.pos,
167 });
168 } else {
169 return Err(Error::UnexpectedToken {
170 expected: "path".to_string(),
171 actual: source.to_string(),
172 start,
173 end,
174 });
175 }
176 }
177 Ok((start, end, source))
178 }
179
180 #[inline]
185 fn is_path_component(&self, byte: u8) -> bool {
186 !matches!(byte, 0..=0x1f | 0x7f | 0x2f | 0x3a | 0x5c)
187 }
188}
189
190#[derive(Debug)]
191pub enum FileOrDirectory<'a> {
192 File(File<'a>),
193 Directory(Directory<'a>),
194}
195
196#[derive(Debug)]
197pub struct File<'a> {
198 pub start: usize,
199 pub end: usize,
200 pub body: Option<Body<'a>>,
201 pub path: Path<'a>,
202}
203#[derive(Debug)]
204pub struct Path<'a> {
205 pub start: usize,
206 pub end: usize,
207 pub source: &'a str,
208}
209
210#[derive(Debug)]
211pub struct Directory<'a> {
212 pub start: usize,
213 pub end: usize,
214 pub path: Path<'a>,
215}
216
217#[derive(Debug)]
218pub struct Archive<'a> {
219 pub start: usize,
220 pub end: usize,
221 pub entries: Vec<Entry<'a>>,
222 pub comment: Option<Comment<'a>>,
223}
224
225#[derive(Debug)]
226pub struct Entry<'a> {
227 pub start: usize,
228 pub end: usize,
229 pub comment: Option<Comment<'a>>,
230 pub body: FileOrDirectory<'a>,
231}
232
233impl<'a> Entry<'a> {
234 pub fn path(&self) -> String {
235 match self.body {
236 FileOrDirectory::File(ref f) => f.path.source.to_string(),
237 FileOrDirectory::Directory(ref d) => d.path.source.to_string() + "/",
238 }
239 }
240
241 pub fn content(&self) -> Option<String> {
242 match self.body {
243 FileOrDirectory::File(ref f) => f.body.as_ref().map(|item| item.source.to_string()),
244 FileOrDirectory::Directory(_) => None,
245 }
246 }
247}
248
249#[derive(Debug)]
250pub struct Comment<'a> {
251 pub start: usize,
252 pub end: usize,
253 pub source: &'a str,
254 pub boundary: Boundary,
255 pub body: Body<'a>,
256}
257
258#[derive(Debug, Clone)]
259pub struct Body<'a> {
260 pub start: usize,
261 pub end: usize,
262 pub source: &'a str,
263}
264
265#[derive(Debug)]
266pub struct Boundary {
267 pub start: usize,
268 pub end: usize,
269}
270
271pub fn parse_archive<'a>(p: &mut Parser<'a>) -> Result<Archive<'a>, Error> {
272 let start = p.pos;
273 let mut entries = vec![];
274 loop {
275 let checkpoint = p.pos;
276 match parse_entry(p) {
277 Ok(entry) => {
278 entries.push(entry);
279 }
280 Err(err) => {
281 p.pos = checkpoint;
282 match err {
283 Error::UnexpectedToken { .. } => {}
284 Error::UnexpectedEof { .. } => {}
285 Error::Invalid { .. } => {
286 return Err(err);
287 }
288 }
289 break;
290 }
291 }
292 }
293 dbg!(&p.source[p.pos..]);
294 let comment = parse_comment(p).ok();
295 dbg!(&comment);
296 if !p.eof() {
297 return Err(Error::UnexpectedToken {
298 expected: "Eof eof".to_string(),
299 actual: p.cur_byte().unwrap().to_string(),
301 start: p.pos,
302 end: p.pos + 1,
303 });
304 }
305 Ok(Archive {
306 start,
307 end: p.pos,
308 entries,
309 comment,
310 })
311 }
313
314pub fn parse_entry<'a>(p: &mut Parser<'a>) -> Result<Entry<'a>, Error> {
315 let start = p.pos;
316 let comment = parse_comment(p).ok();
317 if comment.is_none() {
318 p.pos = start;
319 }
320 parse_boundary(p)?;
321 p.expect_byte(b' ')?;
322 let path = parse_path(p)?;
323
324 match p.cur_byte() {
325 Some(b'/') => {
326 p.expect_byte(b'/')?;
327 while !p.eof() && p.is_line_feed() {
328 p.pos += 1;
329 }
330 let end = p.pos;
331 if !p.eof() && p.cur_byte() != Some(b'<') {
332 return Err(Error::Invalid {
333 message: "A directory can't have text contents.".to_string(),
334 start: p.pos,
335 end: p.pos + 1,
336 });
337 }
338 return Ok(Entry {
339 start,
340 end,
341 comment,
342 body: FileOrDirectory::Directory(Directory { start, end, path }),
343 });
344 }
345 Some(b'\n') => {
346 p.expect_byte(b'\n')?;
347 let checkpoint = p.pos;
348
349 let body = parse_body(p).ok();
350 if body.is_none() {
351 p.pos = checkpoint;
352 }
353 let end = p.pos;
354 return Ok(Entry {
355 start,
356 end,
357 comment,
358 body: FileOrDirectory::File(File {
359 start,
360 end,
361 body,
362 path,
363 }),
364 });
365 }
366 _ if p.eof() => {
367 return Err(Error::UnexpectedEof {
368 expected: "`/` or `\n`".to_string(),
369 start: p.pos,
370 })
371 }
372 _ => {
373 return Err(Error::UnexpectedToken {
374 expected: "`/` or `\n`".to_string(),
375 actual: p.cur_byte().unwrap().to_string(),
377 start,
378 end: p.pos,
379 });
380 }
381 }
382}
383
384pub fn parse_comment<'a>(p: &mut Parser<'a>) -> Result<Comment<'a>, Error> {
385 let start = p.pos();
386 let boundary = parse_boundary(p)?;
387 p.expect_byte(b'\n')?;
388 let body = parse_body(p)?;
389 let end = p.pos;
390 Ok(Comment {
391 start,
392 end,
393 source: &p.source[start..end],
394 boundary,
395 body,
396 })
397}
398
399fn parse_path<'a>(p: &mut Parser<'a>) -> Result<Path<'a>, Error> {
400 let (start, mut end, _) = p.scan_path_component()?;
401
402 while !p.eof() && p.cur_byte() == Some(b'/') {
403 let checkpoint = p.pos;
404 p.expect_byte(b'/')?;
405 match p.scan_path_component() {
406 Ok((_, e, ..)) => {
407 end = e;
408 }
409 Err(_) => {
411 p.pos = checkpoint;
412 break;
413 }
414 }
415 }
416
417 Ok(Path {
418 start,
419 end,
420 source: &p.source[start..end],
421 })
422}
423
424fn parse_body<'a>(p: &mut Parser<'a>) -> Result<Body<'a>, Error> {
425 let (start, end) = p.scan_contents()?;
426 Ok(Body {
427 start,
428 end,
429 source: &p.source[start..end],
430 })
431}
432
433pub fn parse_boundary<'a>(p: &mut Parser<'a>) -> Result<Boundary, Error> {
434 let (start, end) = p.scan_boundary()?;
435 Ok(Boundary { start, end })
436}
437
438pub fn parse(source: &str) -> Result<Archive, Error> {
439 let mut parser = Parser::new(source);
440 parse_archive(&mut parser)
441}