use std::mem;
use bit_vec::BitVec;
use bit_matrix::BitMatrix;
use forest::{Forest, NullForest, Bocage};
use forest::bocage::MarkAndSweep;
use forest::node::{NodeHandle, Graph};
use grammar::InternalGrammar;
use recognizer::Recognizer;
use item::{Item, CompletedItem};
const ITEMS_PER_SET: usize = 16;
pub trait MemoryUse {
type Arg;
fn memory_use(&self) -> usize;
fn new_with_limit(arg: Self::Arg, memory_limit: usize) -> Self;
}
impl<'g, F> MemoryUse for Recognizer<'g, F>
where F: MemoryUse<Arg=&'g InternalGrammar> + Forest,
{
type Arg = &'g InternalGrammar;
fn memory_use(&self) -> usize {
self.forest.memory_use()
+ self.predicted.memory_use()
+ self.medial.memory_use()
+ self.complete.memory_use()
+ self.indices.memory_use()
}
fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
let forest_use_bytes = memory_limit * F::FOREST_BYTES_PER_RECOGNIZER_BYTE / (F::FOREST_BYTES_PER_RECOGNIZER_BYTE + 1);
let complete_use = match memory_limit {
0..=1000 => 16,
1000..=500_000 => 32,
500_000..=2_000_000 => 64,
_ => 128,
};
let recognizer_use_bytes = memory_limit
- forest_use_bytes
- complete_use * mem::size_of::<CompletedItem<F::NodeRef>>();
let bytes_per_set = mem::size_of::<usize>()
+ (grammar.num_syms() + 31) / 32 * 4
+ ITEMS_PER_SET * mem::size_of::<Item<F::NodeRef>>();
let sets_use = recognizer_use_bytes / bytes_per_set;
let mut recognizer = Recognizer {
forest: F::new_with_limit(grammar, forest_use_bytes),
grammar,
earleme: 0,
indices: Vec::with_capacity(sets_use),
current_medial_start: 0,
predicted: BitMatrix::new(sets_use, grammar.num_syms()),
medial: Vec::with_capacity(sets_use * ITEMS_PER_SET),
complete: Vec::with_capacity(complete_use),
lookahead_hint: None,
};
recognizer.indices.push(0);
recognizer.indices.push(0);
recognizer.predict(grammar.start_sym());
recognizer
}
}
impl<'g, F> Recognizer<'g, F>
where F: MemoryUse<Arg=&'g InternalGrammar> + Forest,
{
#[inline]
pub fn new_with_hint(grammar: &'g InternalGrammar, tokens: usize) -> Self {
let forest_use_bytes = tokens * 16;
let complete_use = match tokens {
0..=200 => 16,
200..=10_000 => 32,
10_000..=100_000 => 64,
_ => 128,
};
let mut recognizer = Recognizer {
forest: F::new_with_limit(grammar, forest_use_bytes),
grammar,
earleme: 0,
indices: Vec::with_capacity(tokens + 1),
current_medial_start: 0,
predicted: BitMatrix::new(tokens + 1, grammar.num_syms()),
medial: Vec::with_capacity(tokens * ITEMS_PER_SET),
complete: Vec::with_capacity(complete_use),
lookahead_hint: None,
};
recognizer.indices.push(0);
recognizer.indices.push(0);
recognizer.predict(grammar.start_sym());
recognizer
}
}
impl<'g> MemoryUse for Recognizer<'g, NullForest> {
type Arg = &'g InternalGrammar;
fn memory_use(&self) -> usize {
self.forest.memory_use()
+ self.predicted.memory_use()
+ self.medial.memory_use()
+ self.complete.memory_use()
+ self.indices.memory_use()
}
fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
let complete_use = match memory_limit {
0..=1000 => 16,
1000..=500_000 => 32,
500_000..=2_000_000 => 64,
_ => 128,
};
let recognizer_use_bytes = memory_limit
- complete_use * mem::size_of::<CompletedItem<()>>();
let bytes_per_set = mem::size_of::<usize>()
+ (grammar.num_syms() + 31) / 32 * 4
+ ITEMS_PER_SET * mem::size_of::<Item<()>>();
let sets_use = recognizer_use_bytes / bytes_per_set;
let mut recognizer = Recognizer {
forest: NullForest,
grammar,
earleme: 0,
indices: Vec::with_capacity(sets_use),
current_medial_start: 0,
predicted: BitMatrix::new(sets_use, grammar.num_syms()),
medial: Vec::with_capacity(sets_use * ITEMS_PER_SET),
complete: Vec::with_capacity(complete_use),
lookahead_hint: None,
};
recognizer.indices.push(0);
recognizer.indices.push(0);
recognizer.predict(grammar.start_sym());
recognizer
}
}
impl<T> MemoryUse for Vec<T> {
type Arg = ();
fn memory_use(&self) -> usize {
self.capacity() * mem::size_of::<T>()
}
fn new_with_limit(_arg: (), memory_limit: usize) -> Self {
let capacity = memory_limit / mem::size_of::<T>();
Self::with_capacity(capacity)
}
}
impl MemoryUse for BitMatrix {
type Arg = usize;
fn memory_use(&self) -> usize {
let (rows, columns) = self.size();
rows * ((columns + 31) / 32 * 4)
}
fn new_with_limit(num_columns: usize, memory_limit: usize) -> Self {
let row_size = (num_columns + 31) / 32 * 4;
let capacity = memory_limit / row_size;
Self::new(capacity, num_columns)
}
}
impl MemoryUse for BitVec {
type Arg = ();
fn memory_use(&self) -> usize {
(self.capacity() + 31) / 32 * 4
}
fn new_with_limit(_arg: (), memory_limit: usize) -> Self {
let capacity = memory_limit * 8;
Self::with_capacity(capacity)
}
}
impl MemoryUse for NullForest {
type Arg = ();
fn memory_use(&self) -> usize {
0
}
fn new_with_limit(_arg: (), _memory_limit: usize) -> Self {
NullForest
}
}
impl<'g> MemoryUse for Bocage<&'g InternalGrammar> {
type Arg = &'g InternalGrammar;
fn memory_use(&self) -> usize {
self.graph.vec.memory_use() + self.gc.liveness.memory_use() + self.gc.dfs.memory_use()
}
fn new_with_limit(grammar: &'g InternalGrammar, memory_limit: usize) -> Self {
let dfs_size = match memory_limit {
0..=1000 => 8,
1000..=100_000 => 32,
_ => 64,
};
let remaining_use = memory_limit - dfs_size * std::mem::size_of::<NodeHandle>();
let bytes_per_node = mem::size_of::<u16>() as f32 + 1.0 / 8.0;
let graph_size = (remaining_use as f32 / bytes_per_node) as usize;
let mut result = Bocage {
graph: Graph::with_capacity(graph_size),
gc: MarkAndSweep {
liveness: BitVec::with_capacity(graph_size),
dfs: Vec::with_capacity(dfs_size),
},
grammar,
summand_count: 0,
first_summand: NodeHandle(0),
};
result.initialize_nulling();
result
}
}