iocaine 3.0.0

The deadliest poison known to AI
Documentation
// SPDX-FileCopyrightText: 2025 @iadd
// SPDX-FileContributor: @iadd
//
// SPDX-License-Identifier: MIT
use std::{collections::HashMap, str::CharIndices};

#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
pub struct Substr {
    pub start: usize,
    pub end: usize,
}

impl Substr {
    pub fn extract_str<'a>(&'_ self, relative_to: &'a str) -> &'a str {
        &relative_to[self.start..self.end]
    }
}

// Normalizes Substrs so that the same substring gets turned into the
// same Substr.
pub struct Interner<'a>(HashMap<&'a str, Substr>);

impl<'a> Interner<'a> {
    pub fn new() -> Self {
        Self(HashMap::new())
    }

    pub fn intern(&mut self, str: &'a str, substr: Substr) -> Substr {
        *self
            .0
            .entry(&str[substr.start..substr.end])
            .or_insert(substr)
    }
}

// An iterator that splits a string into Substrs on whitespace.
// Equivalent to the iterator returned by `str::split_whitespace`
// but returns `Substr`s instead of string slices.
pub struct WhitespaceSplitIterator<'a> {
    underlying: CharIndices<'a>,
}

impl<'a> WhitespaceSplitIterator<'a> {
    pub fn new(s: &'a str) -> Self {
        Self {
            underlying: s.char_indices(),
        }
    }
}

impl Iterator for WhitespaceSplitIterator<'_> {
    type Item = Substr;

    fn next(&mut self) -> Option<Self::Item> {
        let start = loop {
            let (pos, c) = self.underlying.next()?;
            if !c.is_whitespace() {
                break pos;
            }
        };

        let end = loop {
            let Some((pos, c)) = self.underlying.next() else {
                break self.underlying.offset();
            };
            if c.is_whitespace() {
                break pos;
            }
        };

        Some(Substr { start, end })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn compare_same(s: &str) {
        let substrs = WhitespaceSplitIterator::new(s)
            .map(|ss| ss.extract_str(s))
            .collect::<Vec<_>>();
        let std_split = s.split_whitespace().collect::<Vec<_>>();

        assert_eq!(substrs, std_split);
    }

    #[test]
    fn splits_simple_whitespace() {
        compare_same("hello there world");
    }

    #[test]
    fn multiple_interior_whitespace() {
        compare_same("hello\t\t\tthere     world");
    }

    #[test]
    fn leading_whitespace() {
        compare_same("   hello there world");
    }

    #[test]
    fn trailing_whitespace() {
        compare_same("   hello there world");
    }
}