codebiber 0.1.0

Library for mixing handwritten and autogenerated code.
Documentation
use super::*;

mod line;

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Syntax_Error
{
  EXPECTED_IDENTIFIER,
  EXPECTED_SNIPPET(&'static str),
  UNEXPECTED_END,
  CHECKSUM_WRONG_LENGTH,
  NESTED_CODEGEN_NOT_SUPPORTED,
  CODEGEN_END_WITHOUT_MATCHING_BEGIN,
}

pub fn parse<'a>(full_code: &'a str) -> Result<Section_List<'a>>
{
  let mut sections = Vec::with_capacity(16);
  let mut state_machine = State_Machine::default();

  for line in full_code.lines()
  {
    state_machine.consume_line(full_code, &mut sections, line)?;
  }

  state_machine.end(full_code, &mut sections)?;

  Ok(sections)
}

#[derive(Clone, Copy, Default)]
pub enum State_Machine<'a>
{
  #[default]
  NOTHING,
  HANDWRITTEN(&'a str),
  CODEGEN{marker: Marker<'a>, identifier: &'a str, code: Option<&'a str>},
}

impl<'a> State_Machine<'a>
{
  fn consume_line(&mut self, full_code: &'a str, sections: &mut Vec<Section<'a>>, line_span: &'a str) -> Result<()>
  {
    let syntax_error = |e: Syntax_Error| -> Parse_Error
    {
      let line = slice_join(full_code, &full_code[..0], &line_span[..0]).lines().count();
      return Parse_Error::SYNTAX(Error_Location{path: None, line}, e);
    };

    use self::line::Line;
    use State_Machine::*;
    let line = self::line::parse(line_span).map_err(syntax_error)?;

    let slice_join = |a, b| self::slice_join(full_code, a, b);
    
    *self = match (*self, line)
    {
    (NOTHING, Line::CODE(span)) => HANDWRITTEN(span),
    (NOTHING, Line::BEGIN_CODEGEN{marker, identifier}) => CODEGEN{marker, identifier, code: None},
    (HANDWRITTEN(so_far), Line::CODE(span)) => HANDWRITTEN(slice_join(so_far, span)),
    (HANDWRITTEN(so_far), Line::BEGIN_CODEGEN{marker, identifier}) =>
    {
      sections.push(Section::HANDWRITTEN(slice_join(so_far, &line_span[..0])));
      CODEGEN{marker, identifier, code: None}
    }
    (CODEGEN{marker, identifier, code: None}, Line::CODE(span)) => CODEGEN{marker, identifier, code: Some(span)},
    (CODEGEN{marker, identifier, code: Some(code)}, Line::CODE(span)) => CODEGEN{marker, identifier, code: Some(slice_join(code, span))},
    (CODEGEN{..}, Line::BEGIN_CODEGEN{..}) => Err(Syntax_Error::NESTED_CODEGEN_NOT_SUPPORTED).map_err(syntax_error)?,
    (CODEGEN{marker: begin, identifier, code}, Line::END_CODEGEN{marker: end, checksum}) =>
    {
      let checksum = parse_checksum(checksum).map_err(syntax_error)?;
      let code = code.unwrap_or(&line_span[..0]);
      let code = slice_join(code, &line_span[..0]);
      sections.push(Section::CODEGEN{identifier, code, checksum, begin, end});
      NOTHING
    }
    (_, Line::END_CODEGEN{..}) => Err(Syntax_Error::CODEGEN_END_WITHOUT_MATCHING_BEGIN).map_err(syntax_error)?,
    };

    Ok(())
  }

  fn end(self, full_code: &'a str, sections: &mut Vec<Section<'a>>) -> Result<()>
  {
    use State_Machine::*;
    match self
    {
    NOTHING => (),
    HANDWRITTEN(code) => sections.push(Section::HANDWRITTEN(slice_join(full_code, code, end_slice(full_code)))),
    CODEGEN{..} => todo!("error"),
    }

    Ok(())
  }
}

fn end_slice<'a>(slice: &'a str) -> &'a str
{
  return &slice[slice.len()..];
}

fn slice_join<'a>(full_slice: &'a str, a: &'a str, b: &'a str) -> &'a str
{
  let len = full_slice.len();
  let origin = full_slice.as_ptr();
  let begin = a.as_ptr();
  let end = b[b.len()..].as_ptr();

  let begin = begin as usize - origin as usize;
  let end = end as usize - origin as usize;

  assert!(begin <= end);
  assert!(end <= len);
  return &full_slice[begin .. end];
}

fn parse_checksum(checksum: &str) -> Result<Option<crc32::Hash>, Syntax_Error>
{
  if checksum.len() == 0
  {
    return Ok(None);
  }
  if checksum.len() != crc32::CHECKSUM_TEXT_LEN 
  {
    return Err(Syntax_Error::CHECKSUM_WRONG_LENGTH);
  }

  assert!(checksum.chars().all(|x| x.is_ascii_hexdigit()), "The parser should have guaranteed it!");

  return Ok(Some(u32::from_str_radix(checksum, 16).expect("Right number of hex chars")));
}

#[cfg(test)]
mod test
{
  use super::*;
  use Section::*;
  
  #[test]
  fn test_parse_section() -> Result
  {
    assert_eq!(parse("").unwrap(), vec![]);
    assert_eq!(parse("xyz").unwrap(), vec![HANDWRITTEN("xyz")]);
    assert_eq!(parse("x\ny\nz").unwrap(), vec![HANDWRITTEN("x\ny\nz")]);
    assert_eq!(parse("x\ny\n").unwrap(), vec![HANDWRITTEN("x\ny\n")]);

    Ok(())
  }

  #[test]
  fn trivial()
  {
    assert_eq!(find("").unwrap(), vec![] as Section_List);
    assert_eq!(find("xyz").unwrap(), vec![HANDWRITTEN("xyz")] as Section_List);
    assert_eq!(find("xyz\nuvw").unwrap(), vec![HANDWRITTEN("xyz\nuvw")] as Section_List);
    assert_eq!(find("// << codegen foo >>\n// << /codegen >>\n").unwrap(), vec![
      CODEGEN{
        identifier: "foo",
        code: "",
        checksum: None,
        begin: Marker{
          indentation: I(0),
          before_marker: "// ",
          after_marker: "",
        },
        end: Marker{
          indentation: I(0),
          before_marker: "// ",
          after_marker: "",
        },
      },
    ] as Section_List);
  }
  
  #[test]
  fn test_multiple_sections()
  {
    let code = "x\ny\nz\n  // << codegen blub >>\n  uvw\n // << /codegen >>\nabc";
    assert_eq!(
      find(code).unwrap(),
      vec![
        HANDWRITTEN("x\ny\nz\n"),
        CODEGEN{
          identifier: "blub",
          code: "  uvw\n",
          checksum: None,
          begin: Marker{
            indentation: I(2),
            before_marker: "// ",
            after_marker: "",
          },
          end: Marker{
            indentation: I(1),
            before_marker: "// ",
            after_marker: "",
          },
        },
        HANDWRITTEN("abc"),
      ] as Section_List);
  }

  #[test]
  fn test_checksum()
  {
    assert_eq!(parse_checksum("").unwrap(), None);
    assert_eq!(parse_checksum("42"), Err(Syntax_Error::CHECKSUM_WRONG_LENGTH));
    assert_eq!(parse_checksum("01234567").unwrap(), Some(0x01234567));
    assert_eq!(parse_checksum("0123456789abcdef"), Err(Syntax_Error::CHECKSUM_WRONG_LENGTH));

    let checksum = crc32::hash(b"42");
    assert_eq!(parse_checksum(crc32::fmt(checksum).as_str()).unwrap(), Some(checksum));
  }

  use Indentation as I;
}

impl fmt::Display for Syntax_Error
{
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result
  {
    use Syntax_Error::*;
    match self
    {
    EXPECTED_IDENTIFIER => write!(f, "Expected identifier"),
    EXPECTED_SNIPPET(s) => write!(f, "Expected {s:?}"),
    UNEXPECTED_END => write!(f, "Unexpected end"),
    CHECKSUM_WRONG_LENGTH => write!(f, "Checksum has wrong length"),
    NESTED_CODEGEN_NOT_SUPPORTED => write!(f, "Nested code generatoin blocks are not supported"),
    CODEGEN_END_WITHOUT_MATCHING_BEGIN => write!(f, "`<< /codegen` without matching `<< codegen`"),
    }
  }
}

use crate::indentation::Indentation;