#![no_std]
extern crate alloc;
use alloc::boxed::Box;
use alloc::vec;
use alloc::vec::Vec;
use core::error::Error;
use core::fmt;
#[cfg(test)]
extern crate std;
pub const DEFAULT_MAX_STEPS: usize = 1_000_000;
const TOK_ONCE: &[u8] = b"(once)";
const TOK_START: &[u8] = b"(start)";
const TOK_END: &[u8] = b"(end)";
const TOK_RETURN: &[u8] = b"(return)";
pub fn run(
source: impl AsRef<[u8]>,
input: impl AsRef<[u8]>,
options: RunOptions,
) -> Result<RunResult, AebError> {
let program = Program::parse(source).map_err(AebError::Parse)?;
program.run(input, options).map_err(AebError::Run)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RunOptions {
pub max_steps: usize,
}
impl RunOptions {
#[must_use]
pub const fn new(max_steps: usize) -> Self {
Self { max_steps }
}
}
impl Default for RunOptions {
fn default() -> Self {
Self {
max_steps: DEFAULT_MAX_STEPS,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RuleId(usize);
impl RuleId {
#[must_use]
pub const fn index(self) -> usize {
self.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct RuleInfo<'program> {
id: RuleId,
line_number: usize,
compact_source: &'program [u8],
}
impl<'program> RuleInfo<'program> {
#[must_use]
pub const fn id(self) -> RuleId {
self.id
}
#[must_use]
pub const fn line_number(self) -> usize {
self.line_number
}
#[must_use]
pub const fn compact_source(self) -> &'program [u8] {
self.compact_source
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Anchor {
Anywhere,
Start,
End,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuleRepeat {
Always,
Once,
}
impl RuleRepeat {
fn is_once(self) -> bool {
matches!(self, Self::Once)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeRuleState {
Fresh,
Consumed,
}
impl RuntimeRuleState {
fn is_consumed(self) -> bool {
matches!(self, Self::Consumed)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CodeByte(u8);
impl CodeByte {
fn parse_compact(
byte: CompactByte,
line_number: usize,
payload_kind: PayloadKind,
) -> Result<Self, ParseError> {
debug_assert!(byte.as_u8().is_ascii());
debug_assert!(!byte.as_u8().is_ascii_whitespace());
if is_reserved_code_byte(byte.as_u8()) {
return Err(ParseError::new(
line_number,
Some(byte.source_column()),
ParseErrorKind::ReservedByteInPayload {
byte: byte.as_u8(),
payload_kind,
},
));
}
Ok(Self(byte.as_u8()))
}
fn as_u8(self) -> u8 {
self.0
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct RuntimeByte(u8);
impl RuntimeByte {
fn parse_input(byte: u8, zero_based_column: usize) -> Result<Self, InputError> {
if !byte.is_ascii() {
return Err(InputError {
column: zero_based_column + 1,
byte,
});
}
Ok(Self(byte))
}
fn from_code(byte: CodeByte) -> Self {
Self(byte.as_u8())
}
fn as_u8(self) -> u8 {
self.0
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Payload {
bytes: Vec<CodeByte>,
}
impl Payload {
fn parse(
input: &[CompactByte],
line_number: usize,
payload_kind: PayloadKind,
) -> Result<Self, ParseError> {
let bytes = input
.iter()
.copied()
.map(|byte| CodeByte::parse_compact(byte, line_number, payload_kind))
.collect::<Result<Vec<_>, _>>()?;
Ok(Self { bytes })
}
fn len(&self) -> usize {
self.bytes.len()
}
fn is_empty(&self) -> bool {
self.bytes.is_empty()
}
fn iter(&self) -> impl Iterator<Item = CodeByte> + '_ {
self.bytes.iter().copied()
}
fn to_runtime_bytes(&self) -> Vec<RuntimeByte> {
self.iter().map(RuntimeByte::from_code).collect()
}
fn to_vec_u8(&self) -> Vec<u8> {
self.iter().map(CodeByte::as_u8).collect()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct State {
bytes: Vec<RuntimeByte>,
}
impl State {
fn parse_input(input: &[u8]) -> Result<Self, InputError> {
let bytes = input
.iter()
.copied()
.enumerate()
.map(|(zero_based_column, byte)| RuntimeByte::parse_input(byte, zero_based_column))
.collect::<Result<Vec<_>, _>>()?;
Ok(Self { bytes })
}
fn len(&self) -> usize {
self.bytes.len()
}
fn starts_with_payload(&self, payload: &Payload) -> Option<StateMatch> {
self.matches_payload_at(0, payload)
}
fn ends_with_payload(&self, payload: &Payload) -> Option<StateMatch> {
let start = self.len().checked_sub(payload.len())?;
self.matches_payload_at(start, payload)
}
fn find_payload(&self, payload: &Payload) -> Option<StateMatch> {
if payload.is_empty() {
return StateMatch::new(self.len(), 0, 0);
}
let last_start = self.len().checked_sub(payload.len())?;
(0..=last_start).find_map(|position| self.matches_payload_at(position, payload))
}
fn matches_payload_at(&self, position: usize, payload: &Payload) -> Option<StateMatch> {
let end = position.checked_add(payload.len())?;
let window = self.bytes.get(position..end)?;
window
.iter()
.copied()
.zip(payload.iter())
.all(|(state_byte, payload_byte)| state_byte.as_u8() == payload_byte.as_u8())
.then(|| StateMatch::new_unchecked(position, payload.len(), end))
}
fn replace_at(&self, state_match: StateMatch, rhs: &Payload) -> Self {
let mut bytes = Vec::with_capacity(self.replaced_len(state_match, rhs));
self.push_prefix(&mut bytes, state_match);
bytes.extend(rhs.to_runtime_bytes());
self.push_suffix(&mut bytes, state_match);
Self { bytes }
}
fn move_start_at(&self, state_match: StateMatch, rhs: &Payload) -> Self {
let mut bytes = Vec::with_capacity(self.replaced_len(state_match, rhs));
bytes.extend(rhs.to_runtime_bytes());
self.push_prefix(&mut bytes, state_match);
self.push_suffix(&mut bytes, state_match);
Self { bytes }
}
fn move_end_at(&self, state_match: StateMatch, rhs: &Payload) -> Self {
let mut bytes = Vec::with_capacity(self.replaced_len(state_match, rhs));
self.push_prefix(&mut bytes, state_match);
self.push_suffix(&mut bytes, state_match);
bytes.extend(rhs.to_runtime_bytes());
Self { bytes }
}
fn apply_action(&self, state_match: StateMatch, action: &Action) -> RewriteEffect {
match action {
Action::Replace(rhs) => RewriteEffect::Continue(self.replace_at(state_match, rhs)),
Action::MoveStart(rhs) => RewriteEffect::Continue(self.move_start_at(state_match, rhs)),
Action::MoveEnd(rhs) => RewriteEffect::Continue(self.move_end_at(state_match, rhs)),
Action::Return(output) => RewriteEffect::Return(output.to_vec_u8()),
}
}
fn replaced_len(&self, state_match: StateMatch, rhs: &Payload) -> usize {
self.len() - state_match.lhs_len() + rhs.len()
}
fn push_prefix(&self, output: &mut Vec<RuntimeByte>, state_match: StateMatch) {
output.extend(self.bytes.iter().take(state_match.position()).copied());
}
fn push_suffix(&self, output: &mut Vec<RuntimeByte>, state_match: StateMatch) {
output.extend(self.bytes.iter().skip(state_match.end()).copied());
}
fn to_vec_u8(&self) -> Vec<u8> {
self.bytes.iter().copied().map(RuntimeByte::as_u8).collect()
}
fn into_vec_u8(self) -> Vec<u8> {
self.bytes.into_iter().map(RuntimeByte::as_u8).collect()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct StateMatch {
position: usize,
lhs_len: usize,
end: usize,
}
impl StateMatch {
fn new(state_len: usize, position: usize, lhs_len: usize) -> Option<Self> {
let end = position.checked_add(lhs_len)?;
(position <= state_len && end <= state_len)
.then(|| Self::new_unchecked(position, lhs_len, end))
}
const fn new_unchecked(position: usize, lhs_len: usize, end: usize) -> Self {
Self {
position,
lhs_len,
end,
}
}
const fn position(self) -> usize {
self.position
}
const fn lhs_len(self) -> usize {
self.lhs_len
}
const fn end(self) -> usize {
self.end
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum Action {
Replace(Payload),
MoveStart(Payload),
MoveEnd(Payload),
Return(Payload),
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum RewriteEffect {
Continue(State),
Return(Vec<u8>),
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Rule {
line_number: usize,
compact_source: Vec<u8>,
repeat: RuleRepeat,
anchor: Anchor,
lhs: Payload,
action: Action,
}
impl Rule {
fn info(&self, index: usize) -> RuleInfo<'_> {
RuleInfo {
id: RuleId(index),
line_number: self.line_number,
compact_source: &self.compact_source,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Program {
rules: Vec<Rule>,
}
impl Program {
pub fn parse(source: impl AsRef<[u8]>) -> Result<Self, ParseError> {
parse_program_impl(source.as_ref())
}
pub fn parse_bytes(source: &[u8]) -> Result<Self, ParseError> {
parse_program_impl(source)
}
pub fn parse_str(source: &str) -> Result<Self, ParseError> {
parse_program_impl(source.as_bytes())
}
#[must_use]
pub fn rule_count(&self) -> usize {
self.rules.len()
}
#[must_use]
pub fn rule(&self, id: RuleId) -> Option<RuleInfo<'_>> {
self.rules.get(id.index()).map(|rule| rule.info(id.index()))
}
pub fn rules(&self) -> impl Iterator<Item = RuleInfo<'_>> + '_ {
self.rules
.iter()
.enumerate()
.map(|(index, rule)| rule.info(index))
}
pub fn run(&self, input: impl AsRef<[u8]>, options: RunOptions) -> Result<RunResult, RunError> {
Runtime::new(self, input.as_ref())?.run(options.max_steps)
}
pub fn run_with_trace<F>(
&self,
input: impl AsRef<[u8]>,
options: RunOptions,
trace: F,
) -> Result<RunResult, RunError>
where
F: FnMut(TraceEvent),
{
Runtime::new(self, input.as_ref())?.run_with_trace(options.max_steps, trace)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct MatchedRule<'program> {
rule_id: RuleId,
rule: &'program Rule,
state_match: StateMatch,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Runtime<'program> {
program: &'program Program,
state: State,
steps: usize,
rule_states: Box<[RuntimeRuleState]>,
}
impl<'program> Runtime<'program> {
fn new(program: &'program Program, input: &[u8]) -> Result<Self, InputError> {
Ok(Self {
program,
state: State::parse_input(input)?,
steps: 0,
rule_states: vec![RuntimeRuleState::Fresh; program.rules.len()].into_boxed_slice(),
})
}
fn run(self, max_steps: usize) -> Result<RunResult, RunError> {
self.run_impl(max_steps, None::<fn(TraceEvent)>)
}
fn run_with_trace<F>(self, max_steps: usize, trace: F) -> Result<RunResult, RunError>
where
F: FnMut(TraceEvent),
{
self.run_impl(max_steps, Some(trace))
}
fn run_impl<F>(mut self, max_steps: usize, mut trace: Option<F>) -> Result<RunResult, RunError>
where
F: FnMut(TraceEvent),
{
emit_trace(
&mut trace,
TraceEvent::Initial {
state: self.state.to_vec_u8(),
},
);
loop {
let Some(matched) = self.find_next_match() else {
return Ok(RunResult {
output: self.state.into_vec_u8(),
steps: self.steps,
returned: false,
});
};
if self.steps >= max_steps {
return Err(StepLimitError {
max_steps,
state: self.state.into_vec_u8(),
}
.into());
}
if let Some(result) = self.apply_rule(matched, &mut trace) {
return Ok(result);
}
}
}
fn find_next_match(&self) -> Option<MatchedRule<'program>> {
let rules: &'program [Rule] = &self.program.rules;
rules
.iter()
.zip(self.rule_states.iter())
.enumerate()
.find_map(|(rule_index, (rule, state))| {
if rule.repeat.is_once() && state.is_consumed() {
return None;
}
find_match(&self.state, rule).map(|state_match| MatchedRule {
rule_id: RuleId(rule_index),
rule,
state_match,
})
})
}
fn consume_rule_if_needed(&mut self, matched: MatchedRule<'_>) {
if !matched.rule.repeat.is_once() {
return;
}
if let Some(state) = self.rule_states.get_mut(matched.rule_id.index()) {
*state = RuntimeRuleState::Consumed;
}
}
fn apply_rule<F>(
&mut self,
matched: MatchedRule<'program>,
trace: &mut Option<F>,
) -> Option<RunResult>
where
F: FnMut(TraceEvent),
{
self.consume_rule_if_needed(matched);
let rule = matched.rule;
let rule_id = matched.rule_id;
let effect = self
.state
.apply_action(matched.state_match, &matched.rule.action);
self.steps += 1;
let (output, returned) = match effect {
RewriteEffect::Continue(next_state) => {
let output = next_state.to_vec_u8();
self.state = next_state;
(output, false)
}
RewriteEffect::Return(output) => (output, true),
};
emit_trace(
trace,
TraceEvent::Step {
step: self.steps,
rule: rule_id,
line_number: rule.line_number,
output: output.clone(),
returned,
},
);
if returned {
Some(RunResult {
output,
steps: self.steps,
returned: true,
})
} else {
None
}
}
}
fn emit_trace<F>(trace: &mut Option<F>, event: TraceEvent)
where
F: FnMut(TraceEvent),
{
if let Some(trace) = trace.as_mut() {
trace(event);
}
}
fn find_match(state: &State, rule: &Rule) -> Option<StateMatch> {
match rule.anchor {
Anchor::Anywhere => state.find_payload(&rule.lhs),
Anchor::Start => state.starts_with_payload(&rule.lhs),
Anchor::End => state.ends_with_payload(&rule.lhs),
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RunResult {
output: Vec<u8>,
steps: usize,
returned: bool,
}
impl RunResult {
#[must_use]
pub fn output(&self) -> &[u8] {
&self.output
}
#[must_use]
pub fn into_output(self) -> Vec<u8> {
self.output
}
#[must_use]
pub fn steps(&self) -> usize {
self.steps
}
#[must_use]
pub fn returned(&self) -> bool {
self.returned
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TraceEvent {
Initial { state: Vec<u8> },
Step {
step: usize,
rule: RuleId,
line_number: usize,
output: Vec<u8>,
returned: bool,
},
}
impl TraceEvent {
#[must_use]
pub fn bytes(&self) -> &[u8] {
match self {
Self::Initial { state } => state,
Self::Step { output, .. } => output,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum AebError {
Parse(ParseError),
Run(RunError),
}
impl fmt::Display for AebError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Parse(error) => error.fmt(f),
Self::Run(error) => error.fmt(f),
}
}
}
impl Error for AebError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Parse(error) => Some(error),
Self::Run(error) => Some(error),
}
}
}
impl From<ParseError> for AebError {
fn from(value: ParseError) -> Self {
Self::Parse(value)
}
}
impl From<RunError> for AebError {
fn from(value: RunError) -> Self {
Self::Run(value)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParseError {
line: usize,
column: Option<usize>,
kind: ParseErrorKind,
}
impl ParseError {
fn new(line: usize, column: Option<usize>, kind: ParseErrorKind) -> Self {
Self { line, column, kind }
}
#[must_use]
pub const fn line(&self) -> usize {
self.line
}
#[must_use]
pub const fn column(&self) -> Option<usize> {
self.column
}
#[must_use]
pub const fn kind(&self) -> &ParseErrorKind {
&self.kind
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "parse error at line {}", self.line)?;
if let Some(column) = self.column {
write!(f, ", column {column}")?;
}
write!(f, ": {}", self.kind)
}
}
impl Error for ParseError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ParseErrorKind {
NonAsciiInCode { byte: u8 },
MissingEquals,
MultipleEquals,
ReservedByteInPayload { byte: u8, payload_kind: PayloadKind },
UnsupportedLeftModifierOrder,
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NonAsciiInCode { byte } => write!(f, "non-ASCII byte 0x{byte:02x} in code"),
Self::MissingEquals => write!(f, "missing '='"),
Self::MultipleEquals => write!(f, "multiple '=' characters are not allowed"),
Self::ReservedByteInPayload { byte, payload_kind } => write!(
f,
"reserved character '{}' cannot be represented as {payload_kind}",
printable_ascii(*byte),
),
Self::UnsupportedLeftModifierOrder => {
write!(f, "duplicated or unsupported left-side modifier order")
}
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PayloadKind {
LeftSideData,
RightSideData,
RightSideMoveStartPayload,
RightSideMoveEndPayload,
RightSideReturnPayload,
}
impl fmt::Display for PayloadKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::LeftSideData => write!(f, "left-side data"),
Self::RightSideData => write!(f, "right-side data"),
Self::RightSideMoveStartPayload => write!(f, "right-side move-to-start payload"),
Self::RightSideMoveEndPayload => write!(f, "right-side move-to-end payload"),
Self::RightSideReturnPayload => write!(f, "right-side return payload"),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RunError {
Input(InputError),
StepLimit(StepLimitError),
}
impl fmt::Display for RunError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Input(error) => error.fmt(f),
Self::StepLimit(error) => error.fmt(f),
}
}
}
impl Error for RunError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Input(error) => Some(error),
Self::StepLimit(error) => Some(error),
}
}
}
impl From<InputError> for RunError {
fn from(value: InputError) -> Self {
Self::Input(value)
}
}
impl From<StepLimitError> for RunError {
fn from(value: StepLimitError) -> Self {
Self::StepLimit(value)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InputError {
column: usize,
byte: u8,
}
impl InputError {
#[must_use]
pub const fn column(&self) -> usize {
self.column
}
#[must_use]
pub const fn byte(&self) -> u8 {
self.byte
}
}
impl fmt::Display for InputError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"input error: non-ASCII byte 0x{:02x} at column {}",
self.byte, self.column,
)
}
}
impl Error for InputError {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct StepLimitError {
max_steps: usize,
state: Vec<u8>,
}
impl StepLimitError {
#[must_use]
pub const fn max_steps(&self) -> usize {
self.max_steps
}
#[must_use]
pub fn state(&self) -> &[u8] {
&self.state
}
#[must_use]
pub fn into_state(self) -> Vec<u8> {
self.state
}
}
impl fmt::Display for StepLimitError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"step limit exceeded after {} steps; state length: {} bytes",
self.max_steps,
self.state.len(),
)
}
}
impl Error for StepLimitError {}
fn is_reserved_code_byte(byte: u8) -> bool {
matches!(byte, b'=' | b'#' | b'(' | b')')
}
fn printable_ascii(byte: u8) -> char {
if byte.is_ascii() {
byte as char
} else {
'\u{fffd}'
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CompactByte {
byte: u8,
source_column: usize,
}
impl CompactByte {
const fn new(byte: u8, source_column: usize) -> Self {
Self {
byte,
source_column,
}
}
const fn as_u8(self) -> u8 {
self.byte
}
const fn source_column(self) -> usize {
self.source_column
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CodeLine<'source> {
line_number: usize,
bytes: &'source [u8],
}
impl<'source> CodeLine<'source> {
fn parse(raw_line: &'source [u8], line_number: usize) -> Result<Self, ParseError> {
let code_bytes = raw_line
.iter()
.position(|&byte| byte == b'#')
.and_then(|comment_start| raw_line.get(..comment_start))
.unwrap_or(raw_line);
if let Some((zero_based_column, byte)) = code_bytes
.iter()
.copied()
.enumerate()
.find(|(_, byte)| !byte.is_ascii())
{
return Err(ParseError::new(
line_number,
Some(zero_based_column + 1),
ParseErrorKind::NonAsciiInCode { byte },
));
}
Ok(Self {
line_number,
bytes: code_bytes,
})
}
fn compact(self) -> CompactCodeLine {
let bytes = self
.bytes
.iter()
.copied()
.enumerate()
.filter(|(_, byte)| !byte.is_ascii_whitespace())
.map(|(zero_based_column, byte)| CompactByte::new(byte, zero_based_column + 1))
.collect();
CompactCodeLine {
line_number: self.line_number,
bytes,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct CompactCodeLine {
line_number: usize,
bytes: Vec<CompactByte>,
}
impl CompactCodeLine {
fn is_empty(&self) -> bool {
self.bytes.is_empty()
}
fn compact_source(&self) -> Vec<u8> {
self.bytes.iter().copied().map(CompactByte::as_u8).collect()
}
fn equals_position(&self) -> Result<usize, ParseError> {
let Some(first_equals) = self.bytes.iter().position(|byte| byte.as_u8() == b'=') else {
return Err(ParseError::new(
self.line_number,
None,
ParseErrorKind::MissingEquals,
));
};
if let Some(second_equals) = self
.bytes
.iter()
.skip(first_equals + 1)
.find(|byte| byte.as_u8() == b'=')
.copied()
{
return Err(ParseError::new(
self.line_number,
Some(second_equals.source_column()),
ParseErrorKind::MultipleEquals,
));
}
Ok(first_equals)
}
fn split_at_equals(
&self,
equals_position: usize,
) -> Result<(&[CompactByte], &[CompactByte]), ParseError> {
let (lhs, rhs_with_equals) = self.bytes.split_at(equals_position);
let Some(rhs) = rhs_with_equals.get(1..) else {
return Err(ParseError::new(
self.line_number,
None,
ParseErrorKind::MissingEquals,
));
};
Ok((lhs, rhs))
}
}
fn parse_program_impl(source: &[u8]) -> Result<Program, ParseError> {
let mut rules = Vec::new();
for (zero_based_line, raw_line) in source.split(|&byte| byte == b'\n').enumerate() {
let line_number = zero_based_line + 1;
let compact_code = CodeLine::parse(raw_line, line_number)?.compact();
if compact_code.is_empty() {
continue;
}
let equals_position = compact_code.equals_position()?;
let compact_source = compact_code.compact_source();
let (lhs_code, rhs_code) = compact_code.split_at_equals(equals_position)?;
let (repeat, anchor, lhs) = parse_lhs(lhs_code, line_number)?;
let action = parse_rhs(rhs_code, line_number)?;
rules.push(Rule {
line_number,
compact_source,
repeat,
anchor,
lhs,
action,
});
}
Ok(Program { rules })
}
fn strip_token<'code>(input: &'code [CompactByte], token: &[u8]) -> Option<&'code [CompactByte]> {
if input.len() < token.len() {
return None;
}
let starts_with_token = input
.iter()
.take(token.len())
.copied()
.map(CompactByte::as_u8)
.eq(token.iter().copied());
if starts_with_token {
input.get(token.len()..)
} else {
None
}
}
fn starts_with_token(input: &[CompactByte], token: &[u8]) -> bool {
strip_token(input, token).is_some()
}
fn parse_lhs(
mut input: &[CompactByte],
line_number: usize,
) -> Result<(RuleRepeat, Anchor, Payload), ParseError> {
let mut repeat = RuleRepeat::Always;
if let Some(rest) = strip_token(input, TOK_ONCE) {
repeat = RuleRepeat::Once;
input = rest;
}
let anchor = if let Some(rest) = strip_token(input, TOK_START) {
input = rest;
Anchor::Start
} else if let Some(rest) = strip_token(input, TOK_END) {
input = rest;
Anchor::End
} else {
Anchor::Anywhere
};
if starts_with_token(input, TOK_ONCE)
|| starts_with_token(input, TOK_START)
|| starts_with_token(input, TOK_END)
{
return Err(ParseError::new(
line_number,
input.first().copied().map(CompactByte::source_column),
ParseErrorKind::UnsupportedLeftModifierOrder,
));
}
let lhs = Payload::parse(input, line_number, PayloadKind::LeftSideData)?;
Ok((repeat, anchor, lhs))
}
fn parse_rhs(input: &[CompactByte], line_number: usize) -> Result<Action, ParseError> {
if let Some(rest) = strip_token(input, TOK_START) {
let payload = Payload::parse(rest, line_number, PayloadKind::RightSideMoveStartPayload)?;
Ok(Action::MoveStart(payload))
} else if let Some(rest) = strip_token(input, TOK_END) {
let payload = Payload::parse(rest, line_number, PayloadKind::RightSideMoveEndPayload)?;
Ok(Action::MoveEnd(payload))
} else if let Some(rest) = strip_token(input, TOK_RETURN) {
let payload = Payload::parse(rest, line_number, PayloadKind::RightSideReturnPayload)?;
Ok(Action::Return(payload))
} else {
let payload = Payload::parse(input, line_number, PayloadKind::RightSideData)?;
Ok(Action::Replace(payload))
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::{FromUtf8Error, String};
enum TestFailure {
Message(&'static str),
Parse(ParseError),
Run(RunError),
Aeb(AebError),
Utf8(FromUtf8Error),
}
impl core::fmt::Debug for TestFailure {
fn fmt(&self, formatter: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::Message(message) => formatter.debug_tuple("Message").field(message).finish(),
Self::Parse(error) => formatter.debug_tuple("Parse").field(error).finish(),
Self::Run(error) => formatter.debug_tuple("Run").field(error).finish(),
Self::Aeb(error) => formatter.debug_tuple("Aeb").field(error).finish(),
Self::Utf8(error) => formatter.debug_tuple("Utf8").field(error).finish(),
}
}
}
impl From<ParseError> for TestFailure {
fn from(value: ParseError) -> Self {
Self::Parse(value)
}
}
impl From<RunError> for TestFailure {
fn from(value: RunError) -> Self {
Self::Run(value)
}
}
impl From<AebError> for TestFailure {
fn from(value: AebError) -> Self {
Self::Aeb(value)
}
}
impl From<FromUtf8Error> for TestFailure {
fn from(value: FromUtf8Error) -> Self {
Self::Utf8(value)
}
}
type TestResult = Result<(), TestFailure>;
fn run_source(source: &str, input: &str) -> Result<String, TestFailure> {
let program = Program::parse(source)?;
let result = program.run(input.as_bytes(), RunOptions::new(10_000))?;
Ok(String::from_utf8(result.into_output())?)
}
fn expect_parse_error(source: &str) -> Result<ParseError, TestFailure> {
match Program::parse(source) {
Ok(_) => Err(TestFailure::Message("expected parse error")),
Err(error) => Ok(error),
}
}
fn expect_run_error(result: Result<RunResult, RunError>) -> Result<RunError, TestFailure> {
match result {
Ok(_) => Err(TestFailure::Message("expected runtime error")),
Err(error) => Ok(error),
}
}
fn expect_event(events: &[TraceEvent], index: usize) -> Result<&TraceEvent, TestFailure> {
events
.get(index)
.ok_or(TestFailure::Message("expected trace event"))
}
fn expect_rule(program: &Program, rule: RuleId) -> Result<RuleInfo<'_>, TestFailure> {
program
.rule(rule)
.ok_or(TestFailure::Message("expected rule metadata"))
}
fn expect_step_limit(error: RunError) -> Result<StepLimitError, TestFailure> {
match error {
RunError::StepLimit(error) => Ok(error),
RunError::Input(_) => Err(TestFailure::Message("expected step limit error")),
}
}
fn expect_input_error(error: RunError) -> Result<InputError, TestFailure> {
match error {
RunError::Input(error) => Ok(error),
RunError::StepLimit(_) => Err(TestFailure::Message("expected input error")),
}
}
#[test]
fn public_free_run_works() -> TestResult {
let result = run("a=b", b"a", RunOptions::default())?;
assert_eq!(result.output(), b"b");
assert_eq!(result.steps(), 1);
assert!(!result.returned());
Ok(())
}
#[test]
fn parsed_program_is_reusable_and_once_state_is_per_run() -> TestResult {
let program = Program::parse("(once)a=b\na=c")?;
let first = program.run(b"aa", RunOptions::new(10_000))?;
let second = program.run(b"aa", RunOptions::new(10_000))?;
assert_eq!(first.output(), b"bc");
assert_eq!(second.output(), b"bc");
Ok(())
}
#[test]
fn trace_events_are_emitted_without_core_stderr() -> TestResult {
let program = Program::parse("a=b\nb=(return)ok")?;
let mut events = Vec::new();
let result = program.run_with_trace(b"a", RunOptions::new(10_000), |event| {
events.push(event);
})?;
assert_eq!(result.output(), b"ok");
assert!(result.returned());
assert_eq!(events.len(), 3);
let initial = expect_event(&events, 0)?;
let first_step = expect_event(&events, 1)?;
let second_step = expect_event(&events, 2)?;
assert!(matches!(initial, TraceEvent::Initial { .. }));
assert_eq!(initial.bytes(), b"a");
assert_eq!(first_step.bytes(), b"b");
assert_eq!(second_step.bytes(), b"ok");
match first_step {
TraceEvent::Step {
rule, line_number, ..
} => {
assert_eq!(rule.index(), 0);
assert_eq!(*line_number, 1);
assert_eq!(expect_rule(&program, *rule)?.compact_source(), b"a=b");
}
TraceEvent::Initial { .. } => {
return Err(TestFailure::Message("expected step event"));
}
}
Ok(())
}
#[test]
fn rule_metadata_is_exposed_without_embedding_display_strings_in_trace_events() -> TestResult {
let program = Program::parse("a = b # comment\n(start)c=(end)d")?;
let rules = program.rules().collect::<Vec<_>>();
assert_eq!(rules.len(), 2);
let first = rules
.first()
.ok_or(TestFailure::Message("expected first rule"))?;
let second = rules
.get(1)
.ok_or(TestFailure::Message("expected second rule"))?;
assert_eq!(first.id().index(), 0);
assert_eq!(first.line_number(), 1);
assert_eq!(first.compact_source(), b"a=b");
assert_eq!(second.id().index(), 1);
assert_eq!(second.line_number(), 2);
assert_eq!(second.compact_source(), b"(start)c=(end)d");
Ok(())
}
#[test]
fn normal_replacement_is_ordered_and_leftmost() -> TestResult {
let source = "aa=x\na=y";
assert_eq!(run_source(source, "aaaa")?, "xx");
Ok(())
}
#[test]
fn start_anchor_matches_only_at_start() -> TestResult {
let source = "(start)a=x";
assert_eq!(run_source(source, "aba")?, "xba");
assert_eq!(run_source(source, "ba")?, "ba");
Ok(())
}
#[test]
fn end_anchor_matches_only_at_end() -> TestResult {
let source = "(end)a=x";
assert_eq!(run_source(source, "aba")?, "abx");
assert_eq!(run_source(source, "ab")?, "ab");
Ok(())
}
#[test]
fn runtime_continues_after_anchored_replacement() -> TestResult {
let source = "(start)a=x\na=y";
assert_eq!(run_source(source, "aba")?, "xby");
let source = "(end)a=x\na=y";
assert_eq!(run_source(source, "aba")?, "ybx");
Ok(())
}
#[test]
fn move_start_works() -> TestResult {
let source = "a=(start)x";
assert_eq!(run_source(source, "ba")?, "xb");
Ok(())
}
#[test]
fn move_end_works() -> TestResult {
let source = "a=(end)x";
assert_eq!(run_source(source, "ba")?, "bx");
Ok(())
}
#[test]
fn empty_lhs_anywhere_matches_at_start() -> TestResult {
let source = "(once)=x\n(start)x=(return)ok";
let result = Program::parse(source)?.run(b"ab", RunOptions::new(2))?;
assert_eq!(result.output(), b"ok");
assert_eq!(result.steps(), 2);
assert!(result.returned());
Ok(())
}
#[test]
fn empty_lhs_start_and_end_anchors_pick_different_edges() -> TestResult {
let start_result =
Program::parse("(once)(start)=x\nxab=(return)start")?.run(b"ab", RunOptions::new(2))?;
let end_result =
Program::parse("(once)(end)=x\nabx=(return)end")?.run(b"ab", RunOptions::new(2))?;
assert_eq!(start_result.output(), b"start");
assert_eq!(end_result.output(), b"end");
Ok(())
}
#[test]
fn once_rule_is_used_at_most_once() -> TestResult {
let source = "(once)a=b\na=c";
assert_eq!(run_source(source, "aa")?, "bc");
Ok(())
}
#[test]
fn return_discards_current_state() -> TestResult {
let source = "aa=(return)ok\na=x";
assert_eq!(run_source(source, "aabb")?, "ok");
Ok(())
}
#[test]
fn empty_lhs_inserts_at_start() -> TestResult {
let source = "aaa=(return)a\n=a";
assert_eq!(run_source(source, "")?, "a");
Ok(())
}
#[test]
fn code_spaces_are_ignored_in_rules() -> TestResult {
assert_eq!(run_source("a b=bb", "abc")?, "bbc");
assert_eq!(run_source("a = b", "a")?, "b");
assert_eq!(run_source("( once ) a = ( end ) b", "ca")?, "cb");
Ok(())
}
#[test]
fn input_spaces_are_preserved_and_do_not_bridge_matches() -> TestResult {
assert_eq!(run_source("a= b", "a bc")?, "b bc");
assert_eq!(run_source("a b=bb", "a bc")?, "a bc");
assert_eq!(run_source("ab=bb", "a bc")?, "a bc");
Ok(())
}
#[test]
fn code_cannot_create_or_match_space_even_when_space_is_written_near_rules() -> TestResult {
assert_eq!(run_source("a= ", "a ")?, " ");
assert_eq!(run_source(" a = b ", "a")?, "b");
Ok(())
}
#[test]
fn hash_starts_a_comment() -> TestResult {
assert_eq!(run_source("a=b#c", "a")?, "b");
assert_eq!(run_source("#a=b", "a")?, "a");
assert_eq!(run_source("a=b#コメント内の非ASCIIは許可", "a")?, "b");
Ok(())
}
#[test]
fn comments_may_contain_non_utf8_bytes_because_the_core_parser_is_byte_oriented() -> TestResult
{
let source = b"a=b#\xff\xfe\n";
let program = Program::parse(source)?;
let result = program.run(b"a", RunOptions::new(10_000))?;
assert_eq!(result.output(), b"b");
Ok(())
}
#[test]
fn code_body_rejects_non_ascii_outside_comments() -> TestResult {
assert!(Program::parse("a=あ").is_err());
assert!(Program::parse("あ=b# comment").is_err());
assert!(Program::parse("a=b#あ").is_ok());
let error = expect_parse_error("a=あ")?;
assert_eq!(error.line(), 1);
assert_eq!(error.column(), Some(3));
assert!(matches!(
error.kind(),
ParseErrorKind::NonAsciiInCode { .. }
));
Ok(())
}
#[test]
fn second_equals_is_a_parse_error_unless_it_is_in_a_comment() -> TestResult {
let error = expect_parse_error("a=b=c")?;
assert_eq!(error.column(), Some(4));
assert!(matches!(error.kind(), ParseErrorKind::MultipleEquals));
let error = expect_parse_error("a=b =c")?;
assert_eq!(error.column(), Some(5));
assert!(matches!(error.kind(), ParseErrorKind::MultipleEquals));
assert!(Program::parse("a=b#=c").is_ok());
Ok(())
}
#[test]
fn unsupported_parentheses_are_parse_errors() {
for source in [
"a=b(",
"a=b)",
"a=b()",
"a=()",
"a=b(start)",
"a=(once)b",
"a(once)=b",
] {
assert!(
Program::parse(source).is_err(),
"source should fail: {source}"
);
}
assert!(Program::parse("(once)(start)a=(end)b").is_ok());
assert!(Program::parse("a=(return)").is_ok());
}
#[test]
fn reserved_payload_errors_report_original_source_column_after_compaction() -> TestResult {
let error = expect_parse_error("a = b (")?;
assert_eq!(error.column(), Some(7));
assert!(matches!(
error.kind(),
ParseErrorKind::ReservedByteInPayload {
payload_kind: PayloadKind::RightSideData,
..
}
));
Ok(())
}
#[test]
fn invalid_left_modifier_order_is_structured() -> TestResult {
let error = expect_parse_error("(start)(once)a=b")?;
assert!(matches!(
error.kind(),
ParseErrorKind::UnsupportedLeftModifierOrder
));
Ok(())
}
#[test]
fn reserved_input_bytes_are_preserved_but_not_editable_from_code() -> TestResult {
assert_eq!(run_source("a=b", "a=()#c")?, "b=()#c");
assert!(
Program::parse("a=b")?
.run("aあ".as_bytes(), RunOptions::default())
.is_err()
);
Ok(())
}
#[test]
fn runtime_input_error_is_structured() -> TestResult {
let error =
expect_run_error(Program::parse("a=b")?.run("aあ".as_bytes(), RunOptions::default()))?;
let error = expect_input_error(error)?;
assert_eq!(error.column(), 2);
Ok(())
}
#[test]
fn runtime_state_can_hold_reserved_bytes_that_program_payloads_cannot_construct() -> TestResult
{
let program = Program::parse("a=b")?;
assert!(Program::parse("a=(return)(").is_err());
assert!(Program::parse("a=b)").is_err());
let result = program.run(b"a=#()", RunOptions::new(10_000))?;
assert_eq!(String::from_utf8(result.into_output())?, "b=#()");
Ok(())
}
#[test]
fn one_step_program_succeeds_at_exact_step_limit() -> TestResult {
let result = Program::parse("a=b")?.run(b"a", RunOptions::new(1))?;
assert_eq!(result.output(), b"b");
assert_eq!(result.steps(), 1);
assert!(!result.returned());
Ok(())
}
#[test]
fn return_program_succeeds_at_exact_step_limit() -> TestResult {
let result = Program::parse("a=(return)b")?.run(b"a", RunOptions::new(1))?;
assert_eq!(result.output(), b"b");
assert_eq!(result.steps(), 1);
assert!(result.returned());
Ok(())
}
#[test]
fn zero_step_limit_succeeds_when_no_rule_matches() -> TestResult {
let result = Program::parse("a=b")?.run(b"x", RunOptions::new(0))?;
assert_eq!(result.output(), b"x");
assert_eq!(result.steps(), 0);
assert!(!result.returned());
Ok(())
}
#[test]
fn zero_step_limit_fails_only_when_a_rule_would_apply() -> TestResult {
let error = expect_run_error(Program::parse("a=b")?.run(b"a", RunOptions::new(0)))?;
let error = expect_step_limit(error)?;
assert_eq!(error.max_steps(), 0);
assert_eq!(error.state(), b"a");
Ok(())
}
#[test]
fn step_limit_error_keeps_state_as_bytes() -> TestResult {
let error = expect_run_error(Program::parse("=a")?.run(b"", RunOptions::new(3)))?;
let error = expect_step_limit(error)?;
assert_eq!(error.max_steps(), 3);
assert_eq!(error.state(), b"aaa");
Ok(())
}
#[test]
fn palindrome_example_returns_true_or_false() -> TestResult {
let source = "\
b=a|a|
c=a|aa|
a|-=\n--=(return)false\n(start)a|=(end)-\n(start)a=(end)|-\n=(return)true";
assert_eq!(run_source(source, "aba")?, "true");
assert_eq!(run_source(source, "ab")?, "false");
Ok(())
}
}