#[derive(Debug)]
pub enum Error {
UnexpectedToken {
expected: String,
actual: String,
start: usize,
end: usize,
},
UnexpectedEof {
expected: String,
start: usize,
},
Invalid {
message: String,
start: usize,
end: usize,
},
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Error::UnexpectedToken {
actual, expected, ..
} => writeln!(f, "Expected {}, found {}", expected, actual),
Error::UnexpectedEof { expected, .. } => {
writeln!(f, "Expected {}, found Eof", expected)
}
Error::Invalid { message, .. } => writeln!(f, "{}", message),
}
}
}
pub struct Parser<'a> {
source: &'a str,
boundary_length: usize,
pos: usize,
}
impl<'a> Parser<'a> {
pub fn new(source: &'a str) -> Self {
Self {
source,
boundary_length: 0,
pos: 0,
}
}
pub fn cur_byte(&self) -> Option<u8> {
self.source.as_bytes().get(self.pos).map(|item| *item)
}
pub fn eof(&self) -> bool {
self.pos == self.source.len()
}
pub fn is_line_feed(&self) -> bool {
self.cur_byte() == Some(b'\n')
}
pub fn pos(&self) -> usize {
self.pos
}
pub fn scan_boundary(&mut self) -> Result<(usize, usize), Error> {
let start = self.pos;
let mut eq_len = 1;
self.expect_byte(b'<')?;
self.expect_byte(b'=')?;
while self.cur_byte() == Some(b'=') {
eq_len += 1;
self.pos += 1;
}
self.expect_byte(b'>')?;
let end = self.pos;
if self.boundary_length == 0 {
self.boundary_length = eq_len;
} else if self.boundary_length != eq_len {
return Err(Error::Invalid {
message: "Un matched boundary length".to_string(),
start,
end,
});
}
Ok((start, end))
}
pub fn scan_contents(&mut self) -> Result<(usize, usize), Error> {
let start = self.pos;
if self.cur_byte() == Some(b'<') {
match self.scan_boundary() {
Ok(_) => {
self.pos = start;
return Ok((start, start));
}
Err(_) => {}
}
}
loop {
if self.eof() {
break;
}
if self.cur_byte() == Some(b'\n') {
self.pos += 1;
if self.cur_byte() == Some(b'<') {
let checkpoint = self.pos;
match self.scan_boundary() {
Ok(_) => {
self.pos = checkpoint;
break;
}
Err(_) => {}
}
}
} else {
self.pos += 1;
}
}
let end = self.pos;
Ok((start, end))
}
pub fn expect_byte(&mut self, byte: u8) -> Result<(), Error> {
if self.cur_byte() != Some(byte) {
if self.eof() {
return Err(Error::UnexpectedEof {
expected: (byte as char).to_string(),
start: self.pos,
});
}
return Err(Error::UnexpectedToken {
expected: (byte as char).to_string(),
actual: (self.cur_byte().unwrap() as char).to_string(),
start: self.pos,
end: self.pos + 1,
});
}
self.pos += 1;
Ok(())
}
pub fn scan_path_component(&mut self) -> Result<(usize, usize, &'a str), Error> {
let start = self.pos;
while !self.eof() && self.is_path_component(self.cur_byte().unwrap()) {
self.pos += 1;
}
let end = self.pos;
let source = &self.source[start..end];
if source.len() == 0 || source == "." || source == ".." {
if self.eof() {
return Err(Error::UnexpectedEof {
expected: "path".to_string(),
start: self.pos,
});
} else {
return Err(Error::UnexpectedToken {
expected: "path".to_string(),
actual: source.to_string(),
start,
end,
});
}
}
Ok((start, end, source))
}
#[inline]
fn is_path_component(&self, byte: u8) -> bool {
!matches!(byte, 0..=0x1f | 0x7f | 0x2f | 0x3a | 0x5c)
}
}
#[derive(Debug)]
pub enum FileOrDirectory<'a> {
File(File<'a>),
Directory(Directory<'a>),
}
#[derive(Debug)]
pub struct File<'a> {
pub start: usize,
pub end: usize,
pub body: Option<Body<'a>>,
pub path: Path<'a>,
}
#[derive(Debug)]
pub struct Path<'a> {
pub start: usize,
pub end: usize,
pub source: &'a str,
}
#[derive(Debug)]
pub struct Directory<'a> {
pub start: usize,
pub end: usize,
pub path: Path<'a>,
}
#[derive(Debug)]
pub struct Archive<'a> {
pub start: usize,
pub end: usize,
pub entries: Vec<Entry<'a>>,
pub comment: Option<Comment<'a>>,
}
#[derive(Debug)]
pub struct Entry<'a> {
pub start: usize,
pub end: usize,
pub comment: Option<Comment<'a>>,
pub body: FileOrDirectory<'a>,
}
impl<'a> Entry<'a> {
pub fn path(&self) -> String {
match self.body {
FileOrDirectory::File(ref f) => f.path.source.to_string(),
FileOrDirectory::Directory(ref d) => d.path.source.to_string() + "/",
}
}
pub fn content(&self) -> Option<String> {
match self.body {
FileOrDirectory::File(ref f) => f.body.as_ref().map(|item| item.source.to_string()),
FileOrDirectory::Directory(_) => None,
}
}
}
#[derive(Debug)]
pub struct Comment<'a> {
pub start: usize,
pub end: usize,
pub source: &'a str,
pub boundary: Boundary,
pub body: Body<'a>,
}
#[derive(Debug, Clone)]
pub struct Body<'a> {
pub start: usize,
pub end: usize,
pub source: &'a str,
}
#[derive(Debug)]
pub struct Boundary {
pub start: usize,
pub end: usize,
}
pub fn parse_archive<'a>(p: &mut Parser<'a>) -> Result<Archive<'a>, Error> {
let start = p.pos;
let mut entries = vec![];
loop {
let checkpoint = p.pos;
match parse_entry(p) {
Ok(entry) => {
entries.push(entry);
}
Err(err) => {
p.pos = checkpoint;
match err {
Error::UnexpectedToken { .. } => {}
Error::UnexpectedEof { .. } => {}
Error::Invalid { .. } => {
return Err(err);
}
}
break;
}
}
}
dbg!(&p.source[p.pos..]);
let comment = parse_comment(p).ok();
dbg!(&comment);
if !p.eof() {
return Err(Error::UnexpectedToken {
expected: "Eof eof".to_string(),
actual: p.cur_byte().unwrap().to_string(),
start: p.pos,
end: p.pos + 1,
});
}
Ok(Archive {
start,
end: p.pos,
entries,
comment,
})
}
pub fn parse_entry<'a>(p: &mut Parser<'a>) -> Result<Entry<'a>, Error> {
let start = p.pos;
let comment = parse_comment(p).ok();
if comment.is_none() {
p.pos = start;
}
parse_boundary(p)?;
p.expect_byte(b' ')?;
let path = parse_path(p)?;
match p.cur_byte() {
Some(b'/') => {
p.expect_byte(b'/')?;
while !p.eof() && p.is_line_feed() {
p.pos += 1;
}
let end = p.pos;
if !p.eof() && p.cur_byte() != Some(b'<') {
return Err(Error::Invalid {
message: "A directory can't have text contents.".to_string(),
start: p.pos,
end: p.pos + 1,
});
}
return Ok(Entry {
start,
end,
comment,
body: FileOrDirectory::Directory(Directory { start, end, path }),
});
}
Some(b'\n') => {
p.expect_byte(b'\n')?;
let checkpoint = p.pos;
let body = parse_body(p).ok();
if body.is_none() {
p.pos = checkpoint;
}
let end = p.pos;
return Ok(Entry {
start,
end,
comment,
body: FileOrDirectory::File(File {
start,
end,
body,
path,
}),
});
}
_ if p.eof() => {
return Err(Error::UnexpectedEof {
expected: "`/` or `\n`".to_string(),
start: p.pos,
})
}
_ => {
return Err(Error::UnexpectedToken {
expected: "`/` or `\n`".to_string(),
actual: p.cur_byte().unwrap().to_string(),
start,
end: p.pos,
});
}
}
}
pub fn parse_comment<'a>(p: &mut Parser<'a>) -> Result<Comment<'a>, Error> {
let start = p.pos();
let boundary = parse_boundary(p)?;
p.expect_byte(b'\n')?;
let body = parse_body(p)?;
let end = p.pos;
Ok(Comment {
start,
end,
source: &p.source[start..end],
boundary,
body,
})
}
fn parse_path<'a>(p: &mut Parser<'a>) -> Result<Path<'a>, Error> {
let (start, mut end, _) = p.scan_path_component()?;
while !p.eof() && p.cur_byte() == Some(b'/') {
let checkpoint = p.pos;
p.expect_byte(b'/')?;
match p.scan_path_component() {
Ok((_, e, ..)) => {
end = e;
}
Err(_) => {
p.pos = checkpoint;
break;
}
}
}
Ok(Path {
start,
end,
source: &p.source[start..end],
})
}
fn parse_body<'a>(p: &mut Parser<'a>) -> Result<Body<'a>, Error> {
let (start, end) = p.scan_contents()?;
Ok(Body {
start,
end,
source: &p.source[start..end],
})
}
pub fn parse_boundary<'a>(p: &mut Parser<'a>) -> Result<Boundary, Error> {
let (start, end) = p.scan_boundary()?;
Ok(Boundary { start, end })
}
pub fn parse(source: &str) -> Result<Archive, Error> {
let mut parser = Parser::new(source);
parse_archive(&mut parser)
}