use core::fmt;
use aozora_pipeline::{BorrowedLexOutput, NodeRef, SourceNode, lex_into_arena};
use aozora_render::{html as borrowed_html, serialize as borrowed_serialize};
use aozora_spec::{Diagnostic, NormalizedOffset, PairLink, SourceOffset};
use aozora_syntax::borrowed::{Arena, ContainerPair};
const ARENA_CAPACITY_FACTOR: usize = 4;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[non_exhaustive]
pub enum DiagnosticPolicy {
#[default]
CollectAll,
DropInternal,
}
#[derive(Debug, Clone, Copy, Default)]
#[must_use]
pub struct ParseOptions {
arena_capacity: Option<usize>,
diagnostic_policy: DiagnosticPolicy,
}
impl ParseOptions {
pub fn new() -> Self {
Self::default()
}
pub fn arena_capacity(mut self, capacity: usize) -> Self {
self.arena_capacity = Some(capacity);
self
}
pub fn diagnostic_policy(mut self, policy: DiagnosticPolicy) -> Self {
self.diagnostic_policy = policy;
self
}
pub fn build(self, source: impl Into<Box<str>>) -> Document {
let source: Box<str> = source.into();
let capacity = self
.arena_capacity
.unwrap_or_else(|| source.len().saturating_mul(ARENA_CAPACITY_FACTOR));
Document {
source,
arena: Arena::with_capacity(capacity),
diagnostic_policy: self.diagnostic_policy,
}
}
}
pub struct Document {
source: Box<str>,
arena: Arena,
diagnostic_policy: DiagnosticPolicy,
}
impl Document {
#[must_use]
pub fn new(source: impl Into<Box<str>>) -> Self {
ParseOptions::new().build(source)
}
pub fn options() -> ParseOptions {
ParseOptions::new()
}
#[deprecated(
since = "0.3.0",
note = "use Document::options().arena_capacity(n).build(source)"
)]
#[must_use]
pub fn with_arena_capacity(source: impl Into<Box<str>>, capacity_hint: usize) -> Self {
ParseOptions::new()
.arena_capacity(capacity_hint)
.build(source)
}
#[must_use]
pub fn source(&self) -> &str {
&self.source
}
#[must_use]
pub fn arena_bytes(&self) -> usize {
self.arena.allocated_bytes()
}
#[must_use]
pub fn edit(&self, span: aozora_spec::Span, replacement: &str) -> Self {
let start = span.start as usize;
let end = span.end as usize;
assert!(start <= end, "edit: span start ({start}) > end ({end})");
assert!(
end <= self.source.len(),
"edit: span end ({end}) past source length ({len})",
len = self.source.len(),
);
let prefix = &self.source[..start];
let suffix = &self.source[end..];
let mut new_source = String::with_capacity(
prefix
.len()
.saturating_add(replacement.len())
.saturating_add(suffix.len()),
);
new_source.push_str(prefix);
new_source.push_str(replacement);
new_source.push_str(suffix);
ParseOptions::new()
.diagnostic_policy(self.diagnostic_policy)
.build(new_source.into_boxed_str())
}
#[must_use]
pub fn parse(&self) -> AozoraTree<'_> {
let mut inner = lex_into_arena(&self.source, &self.arena);
if self.diagnostic_policy == DiagnosticPolicy::DropInternal {
inner
.diagnostics
.retain(|d| d.source() != aozora_spec::DiagnosticSource::Internal);
}
AozoraTree {
source: &self.source,
inner,
}
}
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Document")
.field("source_len", &self.source.len())
.field("arena_bytes", &self.arena.allocated_bytes())
.field("diagnostic_policy", &self.diagnostic_policy)
.finish()
}
}
#[derive(Debug)]
pub struct AozoraTree<'a> {
source: &'a str,
inner: BorrowedLexOutput<'a>,
}
impl<'a> AozoraTree<'a> {
#[must_use]
pub fn source(&self) -> &'a str {
self.source
}
#[must_use]
pub fn diagnostics(&self) -> &[Diagnostic] {
&self.inner.diagnostics
}
#[must_use]
pub fn pairs(&self) -> &'a [PairLink] {
self.inner.pairs
}
#[must_use]
pub fn lex_output(&self) -> &BorrowedLexOutput<'a> {
&self.inner
}
#[must_use]
pub fn node_at_source(&self, src_off: SourceOffset) -> Option<&SourceNode<'a>> {
self.inner.node_at_source(src_off)
}
#[must_use]
pub fn node_at_normalized(&self, normalized_off: NormalizedOffset) -> Option<NodeRef<'a>> {
self.inner.registry.node_at(normalized_off)
}
#[must_use]
pub fn source_nodes(&self) -> &'a [SourceNode<'a>] {
self.inner.source_nodes
}
#[must_use]
pub fn container_pairs(&self) -> &'a [ContainerPair] {
self.inner.container_pairs
}
#[must_use]
pub fn to_html(&self) -> String {
borrowed_html::render_to_string(&self.inner)
}
#[must_use]
pub fn serialize(&self) -> String {
borrowed_serialize::serialize(&self.inner)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn document_borrows_source() {
let s = "hello";
let d = Document::new(s);
assert_eq!(d.source(), s);
}
#[test]
fn parse_returns_borrowed_tree_with_same_source() {
let s = "world";
let d = Document::new(s);
let t = d.parse();
assert_eq!(t.source(), s);
}
#[test]
fn diagnostics_empty_for_clean_input() {
let d = Document::new("plain");
let t = d.parse();
assert!(t.diagnostics().is_empty());
}
#[test]
fn diagnostics_populated_for_pua_collision() {
let d = Document::new("contains \u{E001} sentinel");
let t = d.parse();
assert!(!t.diagnostics().is_empty());
}
#[test]
fn edit_splices_source_at_span() {
let d = Document::new("hello world!");
let span = aozora_spec::Span::new(6, 11);
let edited = d.edit(span, "Aozora");
assert_eq!(edited.source(), "hello Aozora!");
}
#[test]
fn edit_at_start_and_end_boundaries() {
let d = Document::new("middle");
let head = d.edit(aozora_spec::Span::new(0, 0), "PRE-");
assert_eq!(head.source(), "PRE-middle");
let len = u32::try_from(d.source().len()).expect("test source fits u32");
let tail = d.edit(aozora_spec::Span::new(len, len), "-POST");
assert_eq!(tail.source(), "middle-POST");
}
#[test]
fn edit_equivalence_full_reparse() {
let original = Document::new("|青梅《おうめ》です。");
let span_start = original.source().find('《').expect("《 present");
let span_end = original.source().find('》').expect("》 present") + '》'.len_utf8();
let edited = original.edit(
aozora_spec::Span::new(
u32::try_from(span_start).expect("test span fits u32"),
u32::try_from(span_end).expect("test span fits u32"),
),
"《せいばい》",
);
let spliced_source = format!(
"{prefix}{replacement}{suffix}",
prefix = &original.source()[..span_start],
replacement = "《せいばい》",
suffix = &original.source()[span_end..],
);
let from_scratch = Document::new(spliced_source);
assert_eq!(edited.source(), from_scratch.source());
assert_eq!(
edited.parse().serialize(),
from_scratch.parse().serialize(),
"edit() must be equivalent to splice + reparse"
);
}
#[test]
#[should_panic(expected = "span start")]
fn edit_rejects_inverted_span() {
drop(Document::new("ok").edit(aozora_spec::Span::new(2, 1), ""));
}
#[test]
fn round_trip_through_serialize_is_a_fixed_point() {
let s = "|青梅《おうめ》";
let first = Document::new(s).parse().serialize();
let second = Document::new(first.clone()).parse().serialize();
assert_eq!(first, second, "round-trip must be a fixed point");
}
#[test]
fn pairs_records_simple_ruby() {
let d = Document::new("|青梅《おうめ》");
let t = d.parse();
let pairs = t.pairs();
assert_eq!(pairs.len(), 1);
let link = pairs[0];
assert_eq!(link.kind, aozora_spec::PairKind::Ruby);
let src = t.source();
let open_byte = src.find('《').expect("source contains 《");
let close_byte = src.find('》').expect("source contains 》");
assert_eq!(link.open.start as usize, open_byte);
assert_eq!(link.close.start as usize, close_byte);
}
#[test]
fn pairs_records_multiple_brackets_in_close_order() {
let d = Document::new("[#外[#内]終]");
let t = d.parse();
let pairs = t.pairs();
assert_eq!(pairs.len(), 2);
assert!(pairs[0].open.start > pairs[1].open.start);
assert!(pairs[0].close.start < pairs[1].close.start);
}
#[test]
fn pairs_excludes_unclosed_open() {
let d = Document::new("[#orphan");
let t = d.parse();
assert!(t.pairs().is_empty());
assert!(!t.diagnostics().is_empty());
}
#[test]
fn pairs_excludes_unmatched_close() {
let d = Document::new("orphan]");
let t = d.parse();
assert!(t.pairs().is_empty());
}
#[test]
fn node_at_source_finds_inline_ruby() {
let src = "前|青梅《おうめ》後";
let d = Document::new(src);
let t = d.parse();
let bar_off =
u32::try_from(src.find('|').expect("source contains |")).expect("offset fits in u32");
let entry = t
.node_at_source(SourceOffset::new(bar_off))
.expect("ruby span at | offset");
assert_eq!(entry.source_span.start, bar_off);
assert!(entry.source_span.end > bar_off);
assert!(matches!(entry.node, NodeRef::Inline(_)));
}
#[test]
fn node_at_source_returns_none_for_plain_run() {
let src = "前|青梅《おうめ》後";
let d = Document::new(src);
let t = d.parse();
assert!(t.node_at_source(SourceOffset::new(0)).is_none());
}
#[test]
fn source_nodes_are_sorted_by_source_start() {
let src = "|青梅《おうめ》街道沿いに、※[#「木+吶のつくり」、第3水準1-85-54]";
let d = Document::new(src);
let t = d.parse();
let nodes = t.source_nodes();
for window in nodes.windows(2) {
assert!(window[0].source_span.start <= window[1].source_span.start);
}
}
#[test]
fn parse_options_default_matches_document_new() {
let src = "|青梅《おうめ》";
let via_new = Document::new(src);
let via_options = ParseOptions::new().build(src);
assert_eq!(via_new.parse().serialize(), via_options.parse().serialize());
}
#[test]
fn parse_options_arena_capacity_is_honoured() {
let doc = ParseOptions::new()
.arena_capacity(16 * 1024)
.build("plain text");
drop(doc.parse()); assert!(
doc.arena_bytes() <= 64 * 1024,
"arena bytes should not balloon for a tiny source: {}",
doc.arena_bytes()
);
}
#[test]
fn parse_options_drop_internal_filters_internal_diagnostics() {
let doc_collect = Document::options()
.diagnostic_policy(DiagnosticPolicy::CollectAll)
.build("plain text");
let doc_drop = Document::options()
.diagnostic_policy(DiagnosticPolicy::DropInternal)
.build("plain text");
assert_eq!(
doc_collect.parse().diagnostics().len(),
doc_drop.parse().diagnostics().len(),
"policy is a no-op when no Internal diagnostics exist"
);
}
#[test]
fn arena_grows_with_source_size() {
let small = Document::new("a");
drop(small.parse());
let big_src = "|青梅《おうめ》".repeat(100);
let big = Document::new(big_src);
drop(big.parse());
assert!(big.arena_bytes() > small.arena_bytes());
}
}