use crate::parse::keyword::Keyword;
use crate::types::misc::FromBytes;
use crate::util::PauseAt;
use crate::{Error, ParseErrorKind as EK, Pos, Result};
use base64ct::{Base64, Encoding};
use std::cell::{Ref, RefCell};
use std::str::FromStr;
use tor_error::internal;
pub(crate) fn is_sp(c: char) -> bool {
c == ' ' || c == '\t'
}
fn b64check(s: &str) -> Result<()> {
for b in s.bytes() {
match b {
b'=' => (),
b'a'..=b'z' => (),
b'A'..=b'Z' => (),
b'0'..=b'9' => (),
b'/' | b'+' => (),
_ => {
return Err(EK::BadObjectBase64.at_pos(Pos::at(s)));
}
};
}
Ok(())
}
#[derive(Clone, Copy, Debug)]
pub(crate) struct Object<'a> {
tag: &'a str,
data: &'a str,
endline: &'a str,
}
#[derive(Clone, Debug)]
pub(crate) struct Item<'a, K: Keyword> {
kwd: K,
kwd_str: &'a str,
args: &'a str,
split_args: RefCell<Option<Vec<&'a str>>>,
object: Option<Object<'a>>,
}
#[derive(Debug)]
struct NetDocReaderBase<'a, K: Keyword> {
s: &'a str,
off: usize,
_k: std::marker::PhantomData<K>,
}
impl<'a, K: Keyword> NetDocReaderBase<'a, K> {
fn new(s: &'a str) -> Self {
NetDocReaderBase {
s,
off: 0,
_k: std::marker::PhantomData,
}
}
fn pos(&self, pos: usize) -> Pos {
Pos::from_offset(self.s, pos)
}
fn advance(&mut self, n: usize) -> Result<()> {
if n > self.remaining() {
return Err(
Error::from(internal!("tried to advance past end of document"))
.at_pos(Pos::from_offset(self.s, self.off)),
);
}
self.off += n;
Ok(())
}
fn remaining(&self) -> usize {
self.s.len() - self.off
}
fn starts_with(&self, s: &str) -> bool {
self.s[self.off..].starts_with(s)
}
fn line(&mut self) -> Result<&'a str> {
let remainder = &self.s[self.off..];
if let Some(nl_pos) = remainder.find('\n') {
self.advance(nl_pos + 1)?;
let line = &remainder[..nl_pos];
Ok(line)
} else {
self.advance(remainder.len())?; Err(EK::TruncatedLine.at_pos(self.pos(self.s.len())))
}
}
fn kwdline(&mut self) -> Result<(&'a str, &'a str)> {
let pos = self.off;
let line = self.line()?;
if line.is_empty() {
return Err(EK::EmptyLine.at_pos(self.pos(pos)));
}
let (line, anno_ok) = if let Some(rem) = line.strip_prefix("opt ") {
(rem, false)
} else {
(line, true)
};
let mut parts_iter = line.splitn(2, |c| c == ' ' || c == '\t');
let kwd = match parts_iter.next() {
Some(k) => k,
None => return Err(EK::MissingKeyword.at_pos(self.pos(pos))),
};
if !keyword_ok(kwd, anno_ok) {
return Err(EK::BadKeyword.at_pos(self.pos(pos)));
}
let args = match parts_iter.next() {
Some(a) => a,
None => &kwd[kwd.len()..],
};
Ok((kwd, args))
}
fn object(&mut self) -> Result<Option<Object<'a>>> {
const BEGIN_STR: &str = "-----BEGIN ";
const END_STR: &str = "-----END ";
const TAG_END: &str = "-----";
let pos = self.off;
if !self.starts_with(BEGIN_STR) {
return Ok(None);
}
let line = self.line()?;
if !line.ends_with(TAG_END) {
return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));
}
let tag = &line[BEGIN_STR.len()..(line.len() - TAG_END.len())];
if !tag_keyword_ok(tag) {
return Err(EK::BadObjectBeginTag.at_pos(self.pos(pos)));
}
let datapos = self.off;
let (endlinepos, endline) = loop {
let p = self.off;
let line = self.line()?;
if line.starts_with(END_STR) {
break (p, line);
}
b64check(line).map_err(|e| e.within(self.s))?;
};
let data = &self.s[datapos..endlinepos];
if !endline.ends_with(TAG_END) {
return Err(EK::BadObjectEndTag.at_pos(self.pos(endlinepos)));
}
let endtag = &endline[END_STR.len()..(endline.len() - TAG_END.len())];
if endtag != tag {
return Err(EK::BadObjectMismatchedTag.at_pos(self.pos(endlinepos)));
}
Ok(Some(Object { tag, data, endline }))
}
fn item(&mut self) -> Result<Option<Item<'a, K>>> {
if self.remaining() == 0 {
return Ok(None);
}
let (kwd_str, args) = self.kwdline()?;
let object = self.object()?;
let split_args = RefCell::new(None);
let kwd = K::from_str(kwd_str);
Ok(Some(Item {
kwd,
kwd_str,
args,
split_args,
object,
}))
}
}
fn keyword_ok(mut s: &str, anno_ok: bool) -> bool {
fn kwd_char_ok(c: char) -> bool {
matches!(c,'A'..='Z' | 'a'..='z' |'0'..='9' | '-')
}
if s.is_empty() {
return false;
}
if anno_ok && s.starts_with('@') {
s = &s[1..];
}
if s.starts_with('-') {
return false;
}
s.chars().all(kwd_char_ok)
}
fn tag_keyword_ok(s: &str) -> bool {
s.split(' ').all(|w| keyword_ok(w, false))
}
impl<'a, K: Keyword> Iterator for NetDocReaderBase<'a, K> {
type Item = Result<Item<'a, K>>;
fn next(&mut self) -> Option<Self::Item> {
self.item().transpose()
}
}
fn base64_decode_multiline(s: &str) -> std::result::Result<Vec<u8>, base64ct::Error> {
let mut s = s.to_string();
s.retain(|ch| ch != '\n');
let v = Base64::decode_vec(&s)?;
Ok(v)
}
impl<'a, K: Keyword> Item<'a, K> {
pub(crate) fn kwd(&self) -> K {
self.kwd
}
pub(crate) fn kwd_str(&self) -> &'a str {
self.kwd_str
}
pub(crate) fn has_kwd_in(&self, ks: &[K]) -> bool {
ks.contains(&self.kwd)
}
pub(crate) fn args_as_str(&self) -> &'a str {
self.args
}
fn args_as_vec(&self) -> Ref<'_, Vec<&'a str>> {
if self.split_args.borrow().is_none() {
self.split_args.replace(Some(self.args().collect()));
}
Ref::map(self.split_args.borrow(), |opt| match opt {
Some(v) => v,
None => panic!(),
})
}
pub(crate) fn args(&self) -> impl Iterator<Item = &'a str> {
self.args.split(is_sp).filter(|s| !s.is_empty())
}
pub(crate) fn arg(&self, idx: usize) -> Option<&'a str> {
self.args_as_vec().get(idx).copied()
}
pub(crate) fn required_arg(&self, idx: usize) -> Result<&'a str> {
self.arg(idx)
.ok_or_else(|| EK::MissingArgument.at_pos(Pos::at(self.args)))
}
pub(crate) fn parse_optional_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
where
Error: From<V::Err>,
{
match self.arg(idx) {
None => Ok(None),
Some(s) => match s.parse() {
Ok(r) => Ok(Some(r)),
Err(e) => {
let e: Error = e.into();
Err(e.or_at_pos(Pos::at(s)))
}
},
}
}
pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<V>
where
Error: From<V::Err>,
{
match self.parse_optional_arg(idx) {
Ok(Some(v)) => Ok(v),
Ok(None) => Err(EK::MissingArgument.at_pos(self.arg_pos(idx))),
Err(e) => Err(e),
}
}
pub(crate) fn n_args(&self) -> usize {
self.args().count()
}
pub(crate) fn has_obj(&self) -> bool {
self.object.is_some()
}
pub(crate) fn obj_tag(&self) -> Option<&'a str> {
self.object.map(|o| o.tag)
}
pub(crate) fn obj_raw(&self) -> Result<Option<(&'a str, Vec<u8>)>> {
match self.object {
None => Ok(None),
Some(obj) => {
let decoded = base64_decode_multiline(obj.data)
.map_err(|_| EK::BadObjectBase64.at_pos(Pos::at(obj.data)))?;
Ok(Some((obj.tag, decoded)))
}
}
}
pub(crate) fn obj(&self, want_tag: &str) -> Result<Vec<u8>> {
match self.obj_raw()? {
None => Err(EK::MissingObject
.with_msg(self.kwd.to_str())
.at_pos(self.end_pos())),
Some((tag, decoded)) => {
if tag != want_tag {
Err(EK::WrongObject.at_pos(Pos::at(tag)))
} else {
Ok(decoded)
}
}
}
}
pub(crate) fn parse_obj<V: FromBytes>(&self, want_tag: &str) -> Result<V> {
let bytes = self.obj(want_tag)?;
#[allow(clippy::unwrap_used)]
let p = Pos::at(self.object.unwrap().data);
V::from_vec(bytes, p).map_err(|e| e.at_pos(p))
}
pub(crate) fn pos(&self) -> Pos {
Pos::at(self.kwd_str)
}
pub(crate) fn offset_in(&self, s: &str) -> Option<usize> {
crate::util::str::str_offset(s, self.kwd_str)
}
pub(crate) fn arg_pos(&self, n: usize) -> Pos {
let args = self.args_as_vec();
if n < args.len() {
Pos::at(args[n])
} else {
self.last_arg_end_pos()
}
}
fn last_arg_end_pos(&self) -> Pos {
let args = self.args_as_vec();
if args.len() >= 1 {
let last_arg = args[args.len() - 1];
Pos::at_end_of(last_arg)
} else {
Pos::at_end_of(self.kwd_str)
}
}
pub(crate) fn end_pos(&self) -> Pos {
match self.object {
Some(o) => Pos::at_end_of(o.endline),
None => self.last_arg_end_pos(),
}
}
pub(crate) fn offset_after(&self, s: &str) -> Option<usize> {
self.end_pos().offset_within(s).map(|nl_pos| nl_pos + 1)
}
}
pub(crate) struct MaybeItem<'a, 'b, K: Keyword>(Option<&'a Item<'b, K>>);
impl<'a, 'b, K: Keyword> MaybeItem<'a, 'b, K> {
fn pos(&self) -> Pos {
match self.0 {
Some(item) => item.pos(),
None => Pos::None,
}
}
pub(crate) fn from_option(opt: Option<&'a Item<'b, K>>) -> Self {
MaybeItem(opt)
}
#[cfg(any(test, feature = "routerdesc"))]
pub(crate) fn parse_arg<V: FromStr>(&self, idx: usize) -> Result<Option<V>>
where
Error: From<V::Err>,
{
match self.0 {
Some(item) => match item.parse_arg(idx) {
Ok(v) => Ok(Some(v)),
Err(e) => Err(e.or_at_pos(self.pos())),
},
None => Ok(None),
}
}
pub(crate) fn args_as_str(&self) -> Option<&str> {
self.0.map(|item| item.args_as_str())
}
pub(crate) fn parse_args_as_str<V: FromStr>(&self) -> Result<Option<V>>
where
Error: From<V::Err>,
{
match self.0 {
Some(item) => match item.args_as_str().parse::<V>() {
Ok(v) => Ok(Some(v)),
Err(e) => {
let e: Error = e.into();
Err(e.or_at_pos(self.pos()))
}
},
None => Ok(None),
}
}
}
pub(crate) trait ItemResult<K: Keyword> {
fn is_ok_with_annotation(&self) -> bool;
fn is_ok_with_non_annotation(&self) -> bool;
fn is_ok_with_kwd(&self, k: K) -> bool {
self.is_ok_with_kwd_in(&[k])
}
fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool;
fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool;
fn is_empty_line(&self) -> bool;
}
impl<'a, K: Keyword> ItemResult<K> for Result<Item<'a, K>> {
fn is_ok_with_annotation(&self) -> bool {
match self {
Ok(item) => item.kwd().is_annotation(),
Err(_) => false,
}
}
fn is_ok_with_non_annotation(&self) -> bool {
match self {
Ok(item) => !item.kwd().is_annotation(),
Err(_) => false,
}
}
fn is_ok_with_kwd_in(&self, ks: &[K]) -> bool {
match self {
Ok(item) => item.has_kwd_in(ks),
Err(_) => false,
}
}
fn is_ok_with_kwd_not_in(&self, ks: &[K]) -> bool {
match self {
Ok(item) => !item.has_kwd_in(ks),
Err(_) => false,
}
}
fn is_empty_line(&self) -> bool {
matches!(
self,
Err(e) if e.parse_error_kind() == crate::err::ParseErrorKind::EmptyLine
)
}
}
#[derive(Debug)]
pub(crate) struct NetDocReader<'a, K: Keyword> {
s: &'a str,
tokens: std::iter::Peekable<NetDocReaderBase<'a, K>>,
}
impl<'a, K: Keyword> NetDocReader<'a, K> {
pub(crate) fn new(s: &'a str) -> Self {
NetDocReader {
s,
tokens: NetDocReaderBase::new(s).peekable(),
}
}
pub(crate) fn str(&self) -> &'a str {
self.s
}
pub(crate) fn iter(
&mut self,
) -> &mut std::iter::Peekable<impl Iterator<Item = Result<Item<'a, K>>>> {
&mut self.tokens
}
pub(crate) fn pause_at<F>(
&mut self,
f: F,
) -> PauseAt<'_, impl Iterator<Item = Result<Item<'a, K>>>, F>
where
F: FnMut(&Result<Item<'a, K>>) -> bool,
{
PauseAt::from_peekable(&mut self.tokens, f)
}
#[allow(clippy::wrong_self_convention)]
pub(crate) fn is_exhausted(&mut self) -> bool {
self.iter().peek().is_none()
}
pub(crate) fn should_be_exhausted(&mut self) -> Result<()> {
match self.iter().peek() {
None => Ok(()),
Some(Ok(t)) => Err(EK::UnexpectedToken
.with_msg(t.kwd().to_str())
.at_pos(t.pos())),
Some(Err(e)) => Err(e.clone()),
}
}
#[cfg(feature = "routerdesc")]
pub(crate) fn should_be_exhausted_but_for_empty_lines(&mut self) -> Result<()> {
use crate::err::ParseErrorKind as K;
while let Some(Err(e)) = self.iter().peek() {
if e.parse_error_kind() == K::EmptyLine {
let _ignore = self.iter().next();
} else {
break;
}
}
self.should_be_exhausted()
}
pub(crate) fn pos(&mut self) -> Pos {
match self.tokens.peek() {
Some(Ok(tok)) => tok.pos(),
Some(Err(e)) => e.pos(),
None => Pos::at_end_of(self.s),
}
}
}
#[cfg(test)]
mod test {
#![allow(clippy::unwrap_used)]
#![allow(clippy::cognitive_complexity)]
use super::*;
use crate::parse::macros::test::Fruit;
use crate::{ParseErrorKind as EK, Pos, Result};
#[test]
fn read_simple() {
use Fruit::*;
let s = "\
@tasty very much so
opt apple 77
banana 60
cherry 6
-----BEGIN CHERRY SYNOPSIS-----
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
-----END CHERRY SYNOPSIS-----
plum hello there
";
let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
assert_eq!(r.str(), s);
assert!(r.should_be_exhausted().is_err());
let toks: Result<Vec<_>> = r.iter().collect();
assert!(r.should_be_exhausted().is_ok());
let toks = toks.unwrap();
assert_eq!(toks.len(), 5);
assert_eq!(toks[0].kwd(), ANN_TASTY);
assert_eq!(toks[0].n_args(), 3);
assert_eq!(toks[0].args_as_str(), "very much so");
assert_eq!(toks[0].arg(1), Some("much"));
{
let a: Vec<_> = toks[0].args().collect();
assert_eq!(a, vec!["very", "much", "so"]);
}
assert!(toks[0].parse_arg::<usize>(0).is_err());
assert!(toks[0].parse_arg::<usize>(10).is_err());
assert!(!toks[0].has_obj());
assert_eq!(toks[0].obj_tag(), None);
assert_eq!(toks[2].pos().within(s), Pos::from_line(3, 1));
assert_eq!(toks[2].arg_pos(0).within(s), Pos::from_line(3, 8));
assert_eq!(toks[2].last_arg_end_pos().within(s), Pos::from_line(3, 10));
assert_eq!(toks[2].end_pos().within(s), Pos::from_line(3, 10));
assert_eq!(toks[3].kwd(), STONEFRUIT);
assert_eq!(toks[3].kwd_str(), "cherry"); assert_eq!(toks[3].n_args(), 1);
assert_eq!(toks[3].required_arg(0), Ok("6"));
assert_eq!(toks[3].parse_arg::<usize>(0), Ok(6));
assert_eq!(toks[3].parse_optional_arg::<usize>(0), Ok(Some(6)));
assert_eq!(toks[3].parse_optional_arg::<usize>(3), Ok(None));
assert!(toks[3].has_obj());
assert_eq!(toks[3].obj_tag(), Some("CHERRY SYNOPSIS"));
assert_eq!(
&toks[3].obj("CHERRY SYNOPSIS").unwrap()[..],
"🍒🍒🍒🍒🍒🍒".as_bytes()
);
assert!(toks[3].obj("PLUOT SYNOPSIS").is_err());
assert_eq!(toks[3].end_pos().within(s), Pos::from_line(7, 30));
}
#[test]
fn test_badtoks() {
use Fruit::*;
let s = "\
-foobar 9090
apple 3.14159
$hello
unrecognized 127.0.0.1 foo
plum
-----BEGIN WHATEVER-----
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
-----END SOMETHING ELSE-----
orange
orange
-----BEGIN WHATEVER-----
not! base64!
-----END WHATEVER-----
guava paste
opt @annotation
orange
-----BEGIN LOBSTER
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
-----END SOMETHING ELSE-----
orange
-----BEGIN !!!!!!-----
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
-----END !!!!!!-----
cherry
-----BEGIN CHERRY SYNOPSIS-----
8J+NkvCfjZLwn42S8J+NkvCfjZLwn42S
-----END CHERRY SYNOPSIS
truncated line";
let mut r: NetDocReader<'_, Fruit> = NetDocReader::new(s);
let toks: Vec<_> = r.iter().collect();
assert!(toks[0].is_err());
assert_eq!(
toks[0].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(1, 1))
);
assert!(toks[1].is_ok());
assert!(toks[1].is_ok_with_non_annotation());
assert!(!toks[1].is_ok_with_annotation());
assert!(toks[1].is_ok_with_kwd_in(&[APPLE, ORANGE]));
assert!(toks[1].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));
let t = toks[1].as_ref().unwrap();
assert_eq!(t.kwd(), APPLE);
assert_eq!(t.arg(0), Some("3.14159"));
assert!(toks[2].is_err());
assert!(!toks[2].is_ok_with_non_annotation());
assert!(!toks[2].is_ok_with_annotation());
assert!(!toks[2].is_ok_with_kwd_in(&[APPLE, ORANGE]));
assert!(!toks[2].is_ok_with_kwd_not_in(&[ORANGE, UNRECOGNIZED]));
assert_eq!(
toks[2].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(3, 1))
);
assert!(toks[3].is_ok());
let t = toks[3].as_ref().unwrap();
assert_eq!(t.kwd(), UNRECOGNIZED);
assert_eq!(t.arg(1), Some("foo"));
assert!(toks[4].is_err());
assert_eq!(
toks[4].as_ref().err().unwrap(),
&EK::BadObjectMismatchedTag.at_pos(Pos::from_line(8, 1))
);
assert!(toks[5].is_ok());
let t = toks[5].as_ref().unwrap();
assert_eq!(t.kwd(), ORANGE);
assert_eq!(t.args_as_str(), "");
assert!(toks[6].is_err());
assert_eq!(
toks[6].as_ref().err().unwrap(),
&EK::BadObjectBase64.at_pos(Pos::from_line(12, 1))
);
assert!(toks[7].is_err());
assert_eq!(
toks[7].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(13, 1))
);
assert!(toks[8].is_ok());
let t = toks[8].as_ref().unwrap();
assert_eq!(t.kwd(), GUAVA);
assert!(toks[9].is_err());
assert_eq!(
toks[9].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(15, 1))
);
assert!(toks[10].is_err());
assert_eq!(
toks[10].as_ref().err().unwrap(),
&EK::BadObjectBeginTag.at_pos(Pos::from_line(17, 1))
);
assert!(toks[11].is_err());
assert_eq!(
toks[11].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(18, 1))
);
assert!(toks[12].is_err());
assert_eq!(
toks[12].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(19, 1))
);
assert!(toks[13].is_err());
assert_eq!(
toks[13].as_ref().err().unwrap(),
&EK::BadObjectBeginTag.at_pos(Pos::from_line(21, 1))
);
assert!(toks[14].is_err());
assert_eq!(
toks[14].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(22, 1))
);
assert!(toks[15].is_err());
assert_eq!(
toks[15].as_ref().err().unwrap(),
&EK::BadKeyword.at_pos(Pos::from_line(23, 1))
);
assert!(toks[16].is_err());
assert_eq!(
toks[16].as_ref().err().unwrap(),
&EK::BadObjectEndTag.at_pos(Pos::from_line(27, 1))
);
assert!(toks[17].is_err());
assert_eq!(
toks[17].as_ref().err().unwrap(),
&EK::EmptyLine.at_pos(Pos::from_line(28, 1))
);
assert!(toks[18].is_err());
assert_eq!(
toks[18].as_ref().err().unwrap(),
&EK::TruncatedLine.at_pos(Pos::from_line(29, 15))
);
}
}