sashite-feen 0.1.0

Field Expression Encoding Notation (FEEN): a compact, ASCII-only, no_std, zero-allocation validator and encoder for board-game positions in abstract strategy games, built on EPIN and SIN.
Documentation
//! The hands field (field 2): `<first-hand>/<second-hand>`.
//!
//! Each hand is a run of items `[count]piece`, where `count` is optional (and at
//! least 2 when present) and `piece` is an EPIN token. A valid hands field is
//! **canonical**:
//!
//! 1. identical tokens are aggregated into a single item, and
//! 2. items are ordered by the FEEN comparator — multiplicity descending, then
//!    base letter, then case (uppercase first), then state (`-`, `+`, none),
//!    then terminal marker (absent first), then derivation marker (absent
//!    first).
//!
//! Validation is a single allocation-free pass. Because the comparator orders by
//! multiplicity first, two items sharing a token but carrying different counts
//! can sit far apart in an otherwise sorted run (e.g. `3P3Q2P`); a strict
//! ordering check alone would miss that. A fixed 624-bit stack bitset — one slot
//! per distinct EPIN token — catches such duplicates wherever they occur.

use sashite_epin::{Identifier, Side, State};

use crate::error::ParseError;
use crate::token::epin_token;

/// A single item in a player's hand: a piece together with its multiplicity.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct HandItem {
    piece: Identifier,
    count: u32,
}

impl HandItem {
    /// The piece held, as an EPIN identifier.
    #[must_use]
    pub const fn piece(self) -> Identifier {
        self.piece
    }

    /// How many copies of the piece are held (always at least 1).
    #[must_use]
    pub const fn count(self) -> u32 {
        self.count
    }
}

/// A lazy iterator over the items of one hand.
///
/// It borrows a hand's bytes (already validated) and yields each [`HandItem`] in
/// canonical order without allocating.
#[derive(Debug)]
pub struct HandIter<'a> {
    bytes: &'a [u8],
    pos: usize,
}

impl<'a> HandIter<'a> {
    /// Creates an iterator over a single hand's bytes (one side of the `/`).
    pub(crate) const fn new(bytes: &'a [u8]) -> Self {
        Self { bytes, pos: 0 }
    }
}

impl Iterator for HandIter<'_> {
    type Item = HandItem;

    fn next(&mut self) -> Option<HandItem> {
        if self.pos >= self.bytes.len() {
            return None;
        }
        let Ok((next, count, piece)) = read_item(self.bytes, self.pos) else {
            // Defensive: the bytes were validated, so this should not occur;
            // stop cleanly rather than loop.
            self.pos = self.bytes.len();
            return None;
        };
        self.pos = next;
        Some(HandItem { piece, count })
    }
}

/// Validates the hands field and returns the total number of pieces in hand.
pub(crate) fn validate(field: &[u8]) -> Result<u32, ParseError> {
    // Exactly one '/' delimiter separates the two hands.
    let mut delimiter = None;
    let mut slashes = 0usize;
    for (idx, &b) in field.iter().enumerate() {
        if b == b'/' {
            slashes += 1;
            delimiter = Some(idx);
        }
    }
    let (1, Some(at)) = (slashes, delimiter) else {
        return Err(ParseError::InvalidHandsDelimiter);
    };

    let first = validate_one_hand(&field[..at])?;
    let second = validate_one_hand(&field[at + 1..])?;
    Ok(first.saturating_add(second))
}

/// Validates a single hand and returns its total piece count.
fn validate_one_hand(bytes: &[u8]) -> Result<u32, ParseError> {
    // One bit per distinct EPIN token (624 of them; 640 bits available).
    let mut seen = [0u64; 10];
    let mut prev: Option<(u32, u8, u8, u8, u8, u8)> = None;
    let mut total: u32 = 0;

    let len = bytes.len();
    let mut i = 0;
    while i < len {
        let (next, count, id) = read_item(bytes, i)?;
        i = next;

        let (letter, case, state, terminal, derived) = token_key(id);

        // Aggregation: each token may appear at most once per hand.
        let index = usize::from(letter) * 24
            + usize::from(case) * 12
            + usize::from(state) * 4
            + usize::from(terminal) * 2
            + usize::from(derived);
        let bit = 1u64 << (index % 64);
        let word = index / 64;
        if seen[word] & bit != 0 {
            return Err(ParseError::HandNotAggregated);
        }
        seen[word] |= bit;

        // Canonical ordering: the per-item key must be strictly increasing.
        // Multiplicity sorts descending, so it is stored inverted.
        let cur = (u32::MAX - count, letter, case, state, terminal, derived);
        if let Some(p) = prev {
            if cur < p {
                return Err(ParseError::HandNotCanonical);
            }
            // `cur == p` is impossible: distinct tokens (guaranteed by the
            // bitset) differ in at least one key beyond multiplicity.
        }
        prev = Some(cur);

        total = total.saturating_add(count);
    }

    Ok(total)
}

/// Reads one hand item starting at `start`, returning `(next_index, count, id)`.
///
/// `count` is the parsed multiplicity (1 when omitted; at least 2 when written).
fn read_item(bytes: &[u8], start: usize) -> Result<(usize, u32, Identifier), ParseError> {
    let len = bytes.len();
    let mut i = start;

    let count = if i < len && bytes[i].is_ascii_digit() {
        if bytes[i] == b'0' {
            return Err(ParseError::InvalidHandCount); // leading zero / zero
        }
        let mut value: u32 = 0;
        while i < len && bytes[i].is_ascii_digit() {
            value = value
                .saturating_mul(10)
                .saturating_add(u32::from(bytes[i] - b'0'));
            i += 1;
        }
        if value < 2 {
            return Err(ParseError::InvalidHandCount); // an explicit count must be >= 2
        }
        value
    } else {
        1
    };

    match epin_token(&bytes[i..]) {
        Some((tok_len, id)) => Ok((i + tok_len, count, id)),
        None => Err(ParseError::InvalidPieceToken),
    }
}

/// Extracts the canonical sort keys (everything but multiplicity) from a token:
/// `(base letter 0..26, case, state rank, terminal, derivation)`.
///
/// The state rank reorders [`State`] to the FEEN order `-` < `+` < none, which
/// differs from PIN's own `Diminished < Normal < Enhanced`.
pub(crate) fn token_key(id: Identifier) -> (u8, u8, u8, u8, u8) {
    let letter = id.letter().as_ascii() - b'A';
    let case = match id.side() {
        Side::First => 0,
        Side::Second => 1,
    };
    let state = match id.state() {
        State::Diminished => 0,
        State::Enhanced => 1,
        State::Normal => 2,
    };
    (
        letter,
        case,
        state,
        u8::from(id.is_terminal()),
        u8::from(id.is_derived()),
    )
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::error::ParseError;

    fn ok(field: &str) -> u32 {
        validate(field.as_bytes()).expect("valid hands")
    }
    fn err(field: &str) -> ParseError {
        validate(field.as_bytes()).unwrap_err()
    }

    // ---------- valid ----------
    #[test]
    fn both_empty() {
        assert_eq!(ok("/"), 0);
    }
    #[test]
    fn first_only() {
        assert_eq!(ok("P/"), 1);
    }
    #[test]
    fn second_only() {
        assert_eq!(ok("/p"), 1);
    }
    #[test]
    fn multiplicity() {
        assert_eq!(ok("3P2B/"), 5); // 3P then 2B: higher count first
    }
    #[test]
    fn case_order() {
        assert_eq!(ok("BbPp/"), 4); // letter asc, uppercase before lowercase
    }
    #[test]
    fn cross_hands() {
        assert_eq!(ok("3P2B/3p2b"), 10);
    }
    #[test]
    fn shogi_like_mixed() {
        assert_eq!(ok("2P/p"), 3);
    }
    #[test]
    fn state_terminal_derivation_keys() {
        // same base letter K, ordered by state(-,+,none), terminal, derivation:
        // -K < +K < K < K^ < K^'   (all multiplicity 1)
        assert_eq!(ok("-K+KKK^K^'/"), 5);
    }

    // ---------- iterator tokenization ----------
    #[test]
    fn iterator_round_trip() {
        let encoded: Vec<(char, u32)> = HandIter::new(b"3P2Bp")
            .map(|it| (it.piece().letter().as_char(), it.count()))
            .collect();
        assert_eq!(encoded, vec![('P', 3), ('B', 2), ('P', 1)]);
    }

    // ---------- invalid: delimiter ----------
    #[test]
    fn no_delimiter() {
        assert_eq!(err("PP"), ParseError::InvalidHandsDelimiter);
    }
    #[test]
    fn two_delimiters() {
        assert_eq!(err("P/p/"), ParseError::InvalidHandsDelimiter);
    }

    // ---------- invalid: count ----------
    #[test]
    fn explicit_count_one() {
        assert_eq!(err("1P/"), ParseError::InvalidHandCount);
    }
    #[test]
    fn leading_zero_count() {
        assert_eq!(err("02P/"), ParseError::InvalidHandCount);
    }
    #[test]
    fn dangling_count() {
        assert_eq!(err("2/"), ParseError::InvalidPieceToken);
    }

    // ---------- invalid: token ----------
    #[test]
    fn bad_piece() {
        assert_eq!(err("P$/"), ParseError::InvalidPieceToken);
    }

    // ---------- invalid: aggregation ----------
    #[test]
    fn adjacent_duplicate() {
        assert_eq!(err("PP/"), ParseError::HandNotAggregated); // should be 2P
    }
    #[test]
    fn nonadjacent_duplicate_different_counts() {
        // 3P ... 2P: same token P with different counts, separated by 3Q.
        // The bitset must catch this even though the run looks sorted.
        assert_eq!(err("3P3Q2P/"), ParseError::HandNotAggregated);
    }

    // ---------- invalid: ordering ----------
    #[test]
    fn wrong_letter_order() {
        assert_eq!(err("QP/"), ParseError::HandNotCanonical); // should be PQ
    }
    #[test]
    fn wrong_multiplicity_order() {
        assert_eq!(err("2P3Q/"), ParseError::HandNotCanonical); // 3Q must precede 2P
    }
    #[test]
    fn wrong_case_order() {
        assert_eq!(err("pP/"), ParseError::HandNotCanonical); // uppercase first
    }
}