pub(crate) mod automaton;
mod automaton_chip;
pub mod regex;
mod serialization;
pub mod static_specs;
mod substring;
mod substring_varlen;
pub(crate) mod varlen;
use std::{
cell::RefCell,
collections::{BTreeMap, BTreeSet},
rc::Rc,
};
use automaton::Automaton;
use midnight_proofs::{
circuit::{Chip, Layouter},
plonk::{Advice, Column, ConstraintSystem, Error, Expression, Fixed, Selector, TableColumn},
poly::Rotation,
};
use regex::Regex;
use rustc_hash::FxHashMap;
pub use static_specs::StdLibParser;
#[cfg(test)]
pub(crate) type MarkerTestVector<'a> = (&'a [u8], &'a [(usize, &'a [u8])]);
#[cfg(test)]
#[allow(dead_code)]
pub(crate) type OutputTestVector<'a> = (&'a [u8], &'a [usize]);
#[cfg(test)]
use {
crate::field::decomposition::chip::P2RDecompositionConfig,
crate::field::decomposition::pow2range::Pow2RangeChip, crate::field::native::NB_ARITH_COLS,
crate::testing_utils::FromScratch, midnight_proofs::plonk::Instance, regex::RegexInstructions,
};
use crate::{
field::{
decomposition::chip::P2RDecompositionChip, native::AssignedBit, AssignedNative, NativeChip,
NativeGadget,
},
utils::ComposableChip,
vec::vector_gadget::VectorGadget,
CircuitField,
};
const ALPHABET_MAX_SIZE: usize = 256;
const NB_AUTOMATON_COLS: usize = 3;
const NB_SUBSTRING_COLS: usize = 2;
const AUTOMATON_PARALLELISM: usize = 2;
const SUBSTRING_PARALLELISM: usize = 3;
const PARSING_MAX_LEN_BITS: u32 = 64;
pub const NB_SCANNER_ADVICE_COLS: usize = {
let automaton = NB_AUTOMATON_COLS * AUTOMATON_PARALLELISM;
let substring = SUBSTRING_PARALLELISM * NB_SUBSTRING_COLS;
if automaton > substring {
automaton
} else {
substring
}
};
pub const NB_SCANNER_FIXED_COLS: usize = 1;
type NG<F> = NativeGadget<F, P2RDecompositionChip<F>, NativeChip<F>>;
#[derive(Clone, Debug)]
pub struct NativeAutomaton<F> {
pub nb_states: usize,
pub initial_state: F,
pub final_states: BTreeSet<F>,
pub transitions: BTreeMap<F, BTreeMap<F, (F, F)>>,
}
impl<F> From<&Automaton> for NativeAutomaton<F>
where
F: CircuitField + Ord,
{
fn from(value: &Automaton) -> Self {
let mut transitions = BTreeMap::new();
for (&source, inner) in &value.transitions {
let native_inner: BTreeMap<F, (F, F)> = inner
.iter()
.map(|(&letter, &(target, output))| {
(
F::from(letter as u64),
(F::from(target as u64), F::from(output as u64)),
)
})
.collect();
transitions.insert(F::from(source as u64), native_inner);
}
let initial_state = F::from(value.initial_state as u64);
let final_states: BTreeSet<F> =
(value.final_states.iter()).map(|s| F::from(*s as u64)).collect();
let filler = F::from(ALPHABET_MAX_SIZE as u64);
transitions
.entry(initial_state)
.or_default()
.insert(filler, (initial_state, F::ZERO));
for &state in &final_states {
transitions.entry(state).or_default().insert(filler, (state, F::ZERO));
}
NativeAutomaton {
nb_states: value.nb_states,
initial_state,
final_states,
transitions,
}
}
}
impl<F> From<Automaton> for NativeAutomaton<F>
where
F: CircuitField + Ord,
{
fn from(value: Automaton) -> Self {
(&value).into()
}
}
impl<F> NativeAutomaton<F>
where
F: CircuitField + Ord,
{
fn get_transition(&self, state: &F, letter: &F) -> Option<(F, F)> {
self.transitions.get(state).and_then(|inner| inner.get(letter)).copied()
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum AutomatonParser {
Static(StdLibParser),
Dynamic(Regex),
}
impl From<&StdLibParser> for AutomatonParser {
fn from(value: &StdLibParser) -> Self {
AutomatonParser::Static(*value)
}
}
impl From<StdLibParser> for AutomatonParser {
fn from(value: StdLibParser) -> Self {
AutomatonParser::from(&value)
}
}
impl From<Regex> for AutomatonParser {
fn from(value: Regex) -> Self {
AutomatonParser::Dynamic(value)
}
}
type ParsingLibrary = FxHashMap<StdLibParser, (Regex, Automaton)>;
type AutomatonCache<F> = FxHashMap<AutomatonParser, NativeAutomaton<F>>;
type Sequence<F> = Vec<AssignedNative<F>>;
type PaddingFlags<F> = Option<Vec<AssignedBit<F>>>;
type CachedSub<F> = (
AssignedNative<F>,
Vec<(F, AssignedNative<F>)>,
Sequence<F>,
PaddingFlags<F>,
);
type SequenceCache<F> = FxHashMap<Sequence<F>, (Vec<CachedSub<F>>, usize)>;
#[derive(Clone, Debug)]
pub struct ScannerConfig {
advice_cols: [Column<Advice>; NB_SCANNER_ADVICE_COLS],
static_library: ParsingLibrary,
q_automaton: Selector,
t_source: TableColumn,
t_letter: TableColumn,
t_target: TableColumn,
t_output: TableColumn,
q_substring: Selector,
index_col: Column<Fixed>,
tag_col: Column<Fixed>,
}
#[derive(Clone, Debug)]
pub struct ScannerChip<F>
where
F: CircuitField,
{
config: ScannerConfig,
native_gadget: NG<F>,
vector_gadget: VectorGadget<F>,
automaton_cache: Rc<RefCell<AutomatonCache<F>>>,
max_state: Rc<RefCell<usize>>,
sequence_cache: Rc<RefCell<SequenceCache<F>>>,
}
impl<F> ScannerChip<F>
where
F: CircuitField,
{
pub fn specs_regex(&self, parser: &StdLibParser) -> &Regex {
let (regex, _) = self
.config
.static_library
.get(parser)
.unwrap_or_else(|| panic!("parser {:?} not found", parser));
regex
}
}
impl<F> Chip<F> for ScannerChip<F>
where
F: CircuitField,
{
type Config = ScannerConfig;
type Loaded = ();
fn config(&self) -> &Self::Config {
&self.config
}
fn loaded(&self) -> &Self::Loaded {
&()
}
}
impl<F> ComposableChip<F> for ScannerChip<F>
where
F: CircuitField + Ord,
{
type InstructionDeps = NG<F>;
type SharedResources = (
[Column<Advice>; NB_SCANNER_ADVICE_COLS],
Column<Fixed>,
FxHashMap<StdLibParser, (Regex, Automaton)>,
);
fn new(config: &ScannerConfig, deps: &Self::InstructionDeps) -> Self {
Self {
config: config.clone(),
vector_gadget: VectorGadget::new(deps),
native_gadget: deps.clone(),
automaton_cache: Rc::new(RefCell::new(FxHashMap::default())),
max_state: Rc::new(RefCell::new(1)),
sequence_cache: Rc::new(RefCell::new(FxHashMap::default())),
}
}
fn configure(
meta: &mut ConstraintSystem<F>,
shared_res: &Self::SharedResources,
) -> ScannerConfig {
#[allow(clippy::assertions_on_constants)]
{
assert!(
AUTOMATON_PARALLELISM > 0 && SUBSTRING_PARALLELISM > 0,
"at least 1 lookup required for automata and substring checks"
);
assert!(
PARSING_MAX_LEN_BITS <= F::CAPACITY - (u8::BITS + 1),
"check_subsequence batching exceeds field capacity ({} / {})",
PARSING_MAX_LEN_BITS + u8::BITS + 1,
F::CAPACITY
)
}
let (advice_cols, index_col, automata) = shared_res;
for &col in advice_cols {
meta.enable_equality(col);
}
let q_automaton = meta.complex_selector();
let t_source = meta.lookup_table_column();
let t_letter = meta.lookup_table_column();
let t_target = meta.lookup_table_column();
let t_output = meta.lookup_table_column();
for batch in 0..AUTOMATON_PARALLELISM {
meta.lookup(
format!("automaton transition check (batch {batch})"),
|meta| {
let q = meta.query_selector(q_automaton);
let base = NB_AUTOMATON_COLS * batch;
let [source, letter, output] = core::array::from_fn(|i| {
meta.query_advice(advice_cols[base + i], Rotation::cur())
});
let target = if batch + 1 < AUTOMATON_PARALLELISM {
meta.query_advice(advice_cols[base + NB_AUTOMATON_COLS], Rotation::cur())
} else {
meta.query_advice(advice_cols[0], Rotation::next())
};
vec![
(q.clone() * source, t_source),
(q.clone() * letter, t_letter),
(q.clone() * target, t_target),
(q * output, t_output),
]
},
);
}
let tag_col = meta.fixed_column();
let q_substring = meta.complex_selector();
for batch in 0..SUBSTRING_PARALLELISM {
meta.lookup_any(format!("substring lookup (batch {batch})"), |meta| {
let sel = meta.query_selector(q_substring);
let not_sel = Expression::Constant(F::ONE) - sel.clone();
let index = meta.query_fixed(*index_col, Rotation::cur());
let tag = meta.query_fixed(tag_col, Rotation::cur());
let shift = Expression::Constant(F::from(ALPHABET_MAX_SIZE as u64 + 1));
let base = NB_SUBSTRING_COLS * batch;
let table = meta.query_advice(advice_cols[base], Rotation::cur());
let query = meta.query_advice(advice_cols[base + 1], Rotation::cur());
vec![
(tag.clone(), sel.clone() * tag.clone()),
(
query.clone(),
sel * (index * shift + table) + not_sel * query,
),
]
});
}
ScannerConfig {
advice_cols: *advice_cols,
static_library: automata.clone(),
q_automaton,
t_source,
t_letter,
t_target,
t_output,
q_substring,
index_col: *index_col,
tag_col,
}
}
fn load(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
self.load_automata_table(layouter)?;
self.finalise_substring_checks(layouter)
}
}
#[cfg(test)]
impl Regex {
fn hard_coded_example0() -> Self {
let hellos = Regex::word("hello").separated_non_empty_list(Regex::blanks_strict());
let worlds = Regex::word("world").separated_non_empty_list(Regex::cat([
Regex::blanks(),
",".into(),
Regex::blanks(),
]));
let marks5 = Regex::word("!").repeat(5);
let trail = Regex::any_byte().minus("!".into()).list();
Regex::separated_cat(
[
hellos.terminated(Regex::one_blank()),
worlds
.delimited(Regex::blanks(), Regex::blanks())
.delimited("(".into(), ")".into()),
marks5,
trail,
],
Regex::blanks(),
)
}
fn hard_coded_example1() -> Self {
let output_regex = Regex::any_byte()
.output(&|b| {
if b == b'l' {
Some(2)
} else if !b"h\n\t ".contains(&b) {
Some(1)
} else {
None
}
})
.list();
let holy = Regex::word("holy").terminated(Regex::word("y").list());
let hell = Regex::word("hell");
let marks = Regex::word("!").non_empty_list();
let sentence = Regex::separated_cat([holy, hell, marks], Regex::blanks_strict());
sentence.and(output_regex)
}
}
#[cfg(test)]
impl<F> FromScratch<F> for ScannerChip<F>
where
F: CircuitField + Ord,
{
type Config = (P2RDecompositionConfig, ScannerConfig);
fn new_from_scratch(config: &Self::Config) -> Self {
let max_bit_len = 8;
let native_chip = NativeChip::new(&config.0.native_config, &());
let core_decomposition_chip = P2RDecompositionChip::new(&config.0, &max_bit_len);
let native_gadget = NG::<F>::new(core_decomposition_chip, native_chip);
<ScannerChip<F> as ComposableChip<F>>::new(&config.1, &native_gadget)
}
fn configure_from_scratch(
meta: &mut ConstraintSystem<F>,
advice_columns: &mut Vec<Column<Advice>>,
fixed_columns: &mut Vec<Column<Fixed>>,
instance_columns: &[Column<Instance>; 2],
) -> Self::Config {
let nb_advice_needed = std::cmp::max(NB_SCANNER_ADVICE_COLS, NB_ARITH_COLS);
let nb_fixed_needed = NB_ARITH_COLS + 4;
while advice_columns.len() < nb_advice_needed {
advice_columns.push(meta.advice_column());
}
while fixed_columns.len() < nb_fixed_needed {
fixed_columns.push(meta.fixed_column());
}
let advice_cols = &advice_columns[..nb_advice_needed];
let fixed_cols = &fixed_columns[..nb_fixed_needed];
let native_config = NativeChip::configure(
meta,
&(
advice_cols[..NB_ARITH_COLS].try_into().unwrap(),
fixed_cols[..NB_ARITH_COLS + 4].try_into().unwrap(),
*instance_columns,
),
);
let scanner_config = ScannerChip::configure(
meta,
&(
advice_cols[..NB_SCANNER_ADVICE_COLS].try_into().unwrap(),
fixed_cols[0],
FxHashMap::default(),
),
);
let pow2range_config = Pow2RangeChip::configure(meta, &advice_cols[1..=4]);
let native_gadget_config = P2RDecompositionConfig {
native_config,
pow2range_config,
};
(native_gadget_config, scanner_config)
}
fn load_from_scratch(&self, layouter: &mut impl Layouter<F>) -> Result<(), Error> {
self.native_gadget.load_from_scratch(layouter)?;
self.load(layouter)
}
}