#![cfg_attr(all(not(feature = "std"), not(test)), no_std)]
#[cfg(feature = "alloc")]
#[macro_use]
extern crate alloc;
#[cfg(feature = "std")]
extern crate std;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::fmt;
#[cfg(feature = "std")]
use std::fmt;
#[cfg(feature = "alloc")]
mod edit;
#[cfg(feature = "alloc")]
pub mod editor;
#[cfg(feature = "alloc")]
pub use edit::EditError;
#[cfg(feature = "alloc")]
mod validate;
#[cfg(feature = "alloc")]
pub use validate::{ValidationError, ValidationErrorKind, ValidationMode};
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct Span {
pub kind: SpanKind,
pub start: u32,
pub end: u32,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
#[repr(u8)]
pub enum SpanKind {
Whitespace = 0,
Newline = 1,
Comment = 2,
BareKey = 3,
BasicString = 4,
LiteralString = 5,
MlBasicString = 6,
MlLiteralString = 7,
Integer = 8,
Float = 9,
Boolean = 10,
Datetime = 11,
ArrayOpen = 12,
ArrayClose = 13,
ArrayTableOpen = 14,
ArrayTableClose = 15,
InlineTableOpen = 16,
InlineTableClose = 17,
Equals = 18,
Dot = 19,
Comma = 20,
}
pub trait SpanSink {
fn emit(&mut self, kind: SpanKind, start: u32, end: u32);
}
#[derive(Debug)]
pub struct ParseError {
pub pos: u32,
pub msg: &'static str,
}
#[cfg(feature = "std")]
impl std::fmt::Display for ParseError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "parse error at byte {}: {}", self.pos, self.msg)
}
}
#[cfg(all(feature = "alloc", not(feature = "std")))]
impl alloc::fmt::Display for ParseError {
fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
write!(f, "parse error at byte {}: {}", self.pos, self.msg)
}
}
#[cfg(feature = "std")]
impl std::error::Error for ParseError {}
fn skip_trim(bytes: &[u8], pos: usize, len: usize) -> usize {
let mut p = pos;
if p < len && bytes[p] == b'\r' {
p += 1;
}
if p < len && bytes[p] == b'\n' {
p += 1;
}
while p < len && matches!(bytes[p], b' ' | b'\t') {
p += 1;
}
p
}
fn lex_string<S: SpanSink>(
bytes: &[u8],
mut pos: usize,
len: usize,
sink: &mut S,
start: usize,
error: &mut Option<ParseError>,
) -> usize {
if pos + 2 < len && bytes[pos + 1] == b'"' && bytes[pos + 2] == b'"' {
pos += 3;
if pos < len && bytes[pos] == b'\n' {
pos += 1;
} else if pos + 1 < len && bytes[pos] == b'\r' && bytes[pos + 1] == b'\n' {
pos += 2;
}
loop {
if pos >= len {
*error = Some(ParseError {
pos: start as u32,
msg: "unterminated multi-line basic string",
});
return pos;
}
if bytes[pos] == b'"'
&& pos + 1 < len
&& bytes[pos + 1] == b'"'
&& pos + 2 < len
&& bytes[pos + 2] == b'"'
{
let mut n = 3;
while pos + n < len && bytes[pos + n] == b'"' {
n += 1;
}
if n >= 6 {
pos += 3;
break;
}
if n > 3 {
pos += n - 3;
continue;
}
pos += 3;
break;
}
if bytes[pos] == b'\\' && pos + 1 < len {
pos += 1;
let prev = pos;
pos = skip_trim(bytes, pos, len);
if pos == prev {
pos += 1;
}
} else {
pos += 1;
}
}
sink.emit(SpanKind::MlBasicString, start as u32, pos as u32);
} else {
pos += 1;
while pos < len && bytes[pos] != b'"' {
if bytes[pos] == b'\\' && pos + 1 < len {
pos += 1;
let prev = pos;
pos = skip_trim(bytes, pos, len);
if pos == prev {
pos += 1;
}
} else {
pos += 1;
}
}
if pos >= len {
*error = Some(ParseError {
pos: start as u32,
msg: "unterminated basic string",
});
return pos;
}
pos += 1;
sink.emit(SpanKind::BasicString, start as u32, pos as u32);
}
pos
}
fn lex_literal_string<S: SpanSink>(
bytes: &[u8],
mut pos: usize,
len: usize,
sink: &mut S,
start: usize,
error: &mut Option<ParseError>,
) -> usize {
if pos + 2 < len && bytes[pos + 1] == b'\'' && bytes[pos + 2] == b'\'' {
pos += 3;
if pos < len && bytes[pos] == b'\n' {
pos += 1;
} else if pos + 1 < len && bytes[pos] == b'\r' && bytes[pos + 1] == b'\n' {
pos += 2;
}
loop {
if pos >= len {
*error = Some(ParseError {
pos: start as u32,
msg: "unterminated multi-line literal string",
});
return pos;
}
if bytes[pos] == b'\''
&& pos + 1 < len
&& bytes[pos + 1] == b'\''
&& pos + 2 < len
&& bytes[pos + 2] == b'\''
{
let mut n = 3;
while pos + n < len && bytes[pos + n] == b'\'' {
n += 1;
}
if n >= 6 {
pos += 3;
break;
}
if n > 3 {
pos += n - 3;
continue;
}
pos += 3;
break;
}
pos += 1;
}
sink.emit(SpanKind::MlLiteralString, start as u32, pos as u32);
} else {
pos += 1;
while pos < len && bytes[pos] != b'\'' {
if bytes[pos] == b'\n' || bytes[pos] == b'\r' {
*error = Some(ParseError {
pos: start as u32,
msg: "newline in literal string",
});
return pos;
}
pos += 1;
}
if pos >= len {
*error = Some(ParseError {
pos: start as u32,
msg: "unterminated literal string",
});
return pos;
}
pos += 1; sink.emit(SpanKind::LiteralString, start as u32, pos as u32);
}
pos
}
fn lex_bare_key(bytes: &[u8], mut pos: usize, len: usize) -> usize {
while pos < len && is_bare_key_char(bytes[pos]) {
pos += 1;
}
pos
}
fn is_bare_key_lead(b: u8) -> bool {
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-')
}
fn is_bare_key_char(b: u8) -> bool {
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_')
}
fn lex_number_or_datetime<S: SpanSink>(
bytes: &[u8],
mut pos: usize,
len: usize,
sink: &mut S,
start: usize,
) -> usize {
if let Some(end) = try_datetime(bytes, pos, len) {
pos = end;
sink.emit(SpanKind::Datetime, start as u32, pos as u32);
return pos;
}
if let Some(end) = try_number(bytes, pos, len) {
pos = end;
let kind = if is_float(bytes, start, pos) {
SpanKind::Float
} else {
SpanKind::Integer
};
sink.emit(kind, start as u32, pos as u32);
return pos;
}
let old = pos;
pos = lex_bare_key(bytes, pos, len);
if pos == old {
pos += 1;
}
sink.emit(SpanKind::BareKey, start as u32, pos as u32);
pos
}
fn try_number(bytes: &[u8], mut pos: usize, len: usize) -> Option<usize> {
let start = pos;
if pos < len && matches!(bytes[pos], b'+' | b'-') {
pos += 1;
}
if pos >= len {
return None;
}
let remains = &bytes[pos..];
if remains.starts_with(b"inf") {
return Some(pos + 3);
}
if remains.starts_with(b"nan") {
return Some(pos + 3);
}
if pos < len && bytes[pos] == b'0' && pos + 1 < len {
match bytes[pos + 1] {
b'x' | b'o' | b'b' => {
let prefix = bytes[pos + 1];
pos += 2;
if pos >= len || !is_radix_digit(bytes[pos], prefix) {
return None;
}
while pos < len && (is_radix_digit(bytes[pos], prefix) || bytes[pos] == b'_') {
pos += 1;
}
return Some(pos);
}
_ => {}
}
}
if pos < len && !bytes[pos].is_ascii_digit() {
return None;
}
while pos < len && (bytes[pos].is_ascii_digit() || bytes[pos] == b'_') {
pos += 1;
}
if pos < len && bytes[pos] == b'.' {
pos += 1;
while pos < len && (bytes[pos].is_ascii_digit() || bytes[pos] == b'_') {
pos += 1;
}
}
if pos < len && matches!(bytes[pos], b'e' | b'E') {
pos += 1;
if pos < len && matches!(bytes[pos], b'+' | b'-') {
pos += 1;
}
while pos < len && (bytes[pos].is_ascii_digit() || bytes[pos] == b'_') {
pos += 1;
}
}
if pos == start || (pos == start + 1 && matches!(bytes[start], b'+' | b'-')) {
return None;
}
Some(pos)
}
fn is_float(bytes: &[u8], start: usize, end: usize) -> bool {
if start + 1 < end && bytes[start] == b'0' && matches!(bytes[start + 1], b'x' | b'o' | b'b') {
return false;
}
let len = end - start;
if len >= 3 {
let tail = &bytes[end - 3..end];
if tail == b"inf" || tail == b"nan" {
return true;
}
}
bytes[start..end]
.iter()
.any(|&b| b == b'.' || b == b'e' || b == b'E')
}
fn is_radix_digit(b: u8, prefix: u8) -> bool {
match prefix {
b'x' => b.is_ascii_hexdigit(),
b'o' => matches!(b, b'0'..=b'7'),
b'b' => matches!(b, b'0' | b'1'),
_ => false,
}
}
fn try_datetime(bytes: &[u8], mut pos: usize, len: usize) -> Option<usize> {
if pos >= len || !bytes[pos].is_ascii_digit() {
return None;
}
let start = pos;
if pos + 8 <= len
&& bytes[pos + 2] == b':'
&& bytes[pos + 5] == b':'
&& bytes[pos..pos + 2].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 3..pos + 5].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 6..pos + 8].iter().all(|b| b.is_ascii_digit())
{
pos += 8;
if pos < len && bytes[pos] == b'.' {
pos += 1;
while pos < len && bytes[pos].is_ascii_digit() {
pos += 1;
}
}
return Some(pos);
}
if pos + 5 <= len
&& bytes[pos + 2] == b':'
&& bytes[pos..pos + 2].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 3..pos + 5].iter().all(|b| b.is_ascii_digit())
{
pos += 5;
return Some(pos);
}
if pos + 10 > len {
return None;
}
if bytes[pos + 4] != b'-' || bytes[pos + 7] != b'-' {
return None;
}
if !bytes[pos..pos + 4].iter().all(|b| b.is_ascii_digit()) {
return None;
}
if !bytes[pos + 5..pos + 7].iter().all(|b| b.is_ascii_digit()) {
return None;
}
if !bytes[pos + 8..pos + 10].iter().all(|b| b.is_ascii_digit()) {
return None;
}
pos += 10;
if pos < len && (bytes[pos] == b'T' || bytes[pos] == b't' || bytes[pos] == b' ') {
pos += 1;
let time_start = pos;
if pos + 8 <= len
&& bytes[pos + 2] == b':'
&& bytes[pos + 5] == b':'
&& bytes[pos..pos + 2].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 3..pos + 5].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 6..pos + 8].iter().all(|b| b.is_ascii_digit())
{
pos += 8;
} else if pos + 5 <= len
&& bytes[pos + 2] == b':'
&& bytes[pos..pos + 2].iter().all(|b| b.is_ascii_digit())
&& bytes[pos + 3..pos + 5].iter().all(|b| b.is_ascii_digit())
{
pos += 5;
} else {
return Some(time_start);
}
if pos < len && bytes[pos] == b'.' {
pos += 1;
while pos < len && bytes[pos].is_ascii_digit() {
pos += 1;
}
}
if pos < len {
if bytes[pos] == b'Z' || bytes[pos] == b'z' {
pos += 1;
} else if pos + 6 <= len && matches!(bytes[pos], b'+' | b'-') && bytes[pos + 3] == b':'
{
pos += 6;
}
}
return Some(pos);
}
if pos == start { None } else { Some(pos) }
}
pub fn parse_into(input: &str, sink: &mut impl SpanSink) -> Result<(), ParseError> {
let bytes = input.as_bytes();
let len = bytes.len();
let mut pos = 0usize;
let mut error = None;
let mut aot_depth: usize = 0;
fn is_line_start(bytes: &[u8], p: usize) -> bool {
if p == 0 {
return true;
}
let mut i = p;
while i > 0 {
i -= 1;
match bytes[i] {
b' ' | b'\t' => continue,
b'\n' | b'\r' => return true,
_ => return false,
}
}
true
}
while pos < len {
let start = pos;
match bytes[pos] {
b' ' | b'\t' => {
while pos < len && matches!(bytes[pos], b' ' | b'\t') {
pos += 1;
}
sink.emit(SpanKind::Whitespace, start as u32, pos as u32);
}
b'\n' => {
pos += 1;
sink.emit(SpanKind::Newline, start as u32, pos as u32);
}
b'\r' => {
pos += 1;
if pos < len && bytes[pos] == b'\n' {
pos += 1;
}
sink.emit(SpanKind::Newline, start as u32, pos as u32);
}
b'#' | b';' => {
while pos < len && bytes[pos] != b'\n' && bytes[pos] != b'\r' {
pos += 1;
}
sink.emit(SpanKind::Comment, start as u32, pos as u32);
}
b'[' => {
pos += 1;
if pos < len && bytes[pos] == b'[' && is_line_start(bytes, start) {
pos += 1;
sink.emit(SpanKind::ArrayTableOpen, start as u32, pos as u32);
aot_depth += 1;
} else if pos < len && bytes[pos] == b'[' {
pos += 1;
sink.emit(SpanKind::ArrayOpen, start as u32, (start + 1) as u32);
sink.emit(SpanKind::ArrayOpen, (start + 1) as u32, pos as u32);
} else {
sink.emit(SpanKind::ArrayOpen, start as u32, pos as u32);
}
}
b']' => {
pos += 1;
if pos < len && bytes[pos] == b']' && aot_depth > 0 {
pos += 1;
sink.emit(SpanKind::ArrayTableClose, start as u32, pos as u32);
aot_depth -= 1;
} else {
sink.emit(SpanKind::ArrayClose, start as u32, pos as u32);
}
}
b'{' => {
pos += 1;
sink.emit(SpanKind::InlineTableOpen, start as u32, pos as u32);
}
b'}' => {
pos += 1;
sink.emit(SpanKind::InlineTableClose, start as u32, pos as u32);
}
b'=' => {
pos += 1;
sink.emit(SpanKind::Equals, start as u32, pos as u32);
}
b'.' => {
pos += 1;
sink.emit(SpanKind::Dot, start as u32, pos as u32);
}
b',' => {
pos += 1;
sink.emit(SpanKind::Comma, start as u32, pos as u32);
}
b'"' => {
pos = lex_string(bytes, pos, len, sink, start, &mut error);
}
b'\'' => {
pos = lex_literal_string(bytes, pos, len, sink, start, &mut error);
}
b't' | b'f' => {
let remains = &bytes[pos..];
if remains.starts_with(b"true") {
pos += 4;
sink.emit(SpanKind::Boolean, start as u32, pos as u32);
} else if remains.starts_with(b"false") {
pos += 5;
sink.emit(SpanKind::Boolean, start as u32, pos as u32);
} else {
pos = lex_bare_key(bytes, pos, len);
sink.emit(SpanKind::BareKey, start as u32, pos as u32);
}
}
b'+' | b'-' | b'0'..=b'9' | b'i' | b'n' => {
pos = lex_number_or_datetime(bytes, pos, len, sink, start);
}
_ => {
if is_bare_key_lead(bytes[pos]) {
pos = lex_bare_key(bytes, pos, len);
sink.emit(SpanKind::BareKey, start as u32, pos as u32);
} else {
pos += 1;
sink.emit(SpanKind::BareKey, start as u32, pos as u32);
}
}
}
if error.is_some() {
break;
}
}
match error {
Some(e) => Err(e),
None => Ok(()),
}
}
#[cfg(feature = "alloc")]
use alloc::string::{String, ToString};
#[cfg(feature = "alloc")]
use alloc::vec::Vec;
#[cfg(feature = "alloc")]
pub struct FlatDoc {
pub source: String,
pub spans: Vec<Span>,
pub(crate) index: Option<Vec<(Vec<String>, edit::Entry)>>,
}
#[cfg(feature = "alloc")]
pub fn parse(input: &str) -> Result<FlatDoc, ParseError> {
struct VecSink {
spans: Vec<Span>,
}
impl SpanSink for VecSink {
fn emit(&mut self, kind: SpanKind, start: u32, end: u32) {
self.spans.push(Span { kind, start, end });
}
}
let mut sink = VecSink {
spans: Vec::with_capacity(input.len() / 6),
};
parse_into(input, &mut sink)?;
Ok(FlatDoc {
source: input.to_string(),
spans: sink.spans,
index: None,
})
}
#[cfg(feature = "alloc")]
impl Default for FlatDoc {
fn default() -> Self {
FlatDoc::new()
}
}
#[cfg(feature = "alloc")]
fn decode_toml_string(raw: &str, kind: SpanKind) -> String {
if matches!(kind, SpanKind::LiteralString | SpanKind::MlLiteralString) {
return raw.trim_matches('\'').to_string();
}
let inner = match kind {
SpanKind::BasicString => &raw[1..raw.len() - 1],
SpanKind::MlBasicString => {
let s = raw.find('\n').map(|i| i + 1).unwrap_or(3);
let e = raw.rfind("\"\"\"").unwrap_or(raw.len());
&raw[s..e]
}
_ => return raw.to_string(),
};
let mut out = String::with_capacity(inner.len());
let mut chars = inner.chars();
while let Some(c) = chars.next() {
if c != '\\' {
out.push(c);
continue;
}
match chars.next() {
Some('n') => out.push('\n'),
Some('t') => out.push('\t'),
Some('r') => out.push('\r'),
Some('\\') => out.push('\\'),
Some('"') => out.push('"'),
Some('b') => out.push('\x08'),
Some('f') => out.push('\x0C'),
Some('u') => {
let hex: String = chars.by_ref().take(4).collect();
if let Ok(cp) = u32::from_str_radix(&hex, 16) {
out.push(char::from_u32(cp).unwrap_or('\u{FFFD}'));
}
}
Some('U') => {
let hex: String = chars.by_ref().take(8).collect();
if let Ok(cp) = u32::from_str_radix(&hex, 16) {
out.push(char::from_u32(cp).unwrap_or('\u{FFFD}'));
}
}
_ => {}
}
}
out
}
#[cfg(feature = "alloc")]
impl FlatDoc {
pub(crate) fn build_index_if_needed(&mut self) {
if self.index.is_none() {
self.index = Some(edit::build_index(self));
}
}
}
#[cfg(feature = "alloc")]
impl FlatDoc {
pub fn new() -> Self {
FlatDoc {
source: String::new(),
spans: Vec::new(),
index: None,
}
}
pub fn has(&mut self, path: &str) -> bool {
self.build_index_if_needed();
let idx = self.index.as_ref().unwrap();
let (table, key) = editor::split_path(path);
let target: Vec<&str> = table
.iter()
.map(|s| s.as_str())
.chain(core::iter::once(key.as_str()))
.collect();
idx.iter().any(|(p, _)| editor::path_eq(p, &target))
}
pub fn get(&mut self, path: &str) -> Option<&str> {
self.build_index_if_needed();
let idx = self.index.as_ref().unwrap();
let (table, key) = editor::split_path(path);
let target: Vec<&str> = table
.iter()
.map(|s| s.as_str())
.chain(core::iter::once(key.as_str()))
.collect();
let entry = idx.iter().find(|(p, _)| editor::path_eq(p, &target))?;
let value_span = self.spans[entry.1.value_idx];
Some(&self.source[value_span.start as usize..value_span.end as usize])
}
pub fn get_decoded(&mut self, path: &str) -> Option<String> {
self.build_index_if_needed();
let idx = self.index.as_ref().unwrap();
let (table, key) = editor::split_path(path);
let target: Vec<&str> = table
.iter()
.map(|s| s.as_str())
.chain(core::iter::once(key.as_str()))
.collect();
let entry = idx.iter().find(|(p, _)| editor::path_eq(p, &target))?;
let span = self.spans[entry.1.value_idx];
let raw = &self.source[span.start as usize..span.end as usize];
Some(decode_toml_string(raw, span.kind))
}
pub fn keys(&mut self) -> Vec<String> {
self.build_index_if_needed();
let idx = self.index.as_ref().unwrap();
let mut keys: Vec<String> = idx
.iter()
.filter(|(p, _)| !p.is_empty())
.map(|(p, _)| p[0].clone())
.collect();
keys.sort();
keys.dedup();
keys
}
pub fn is_table(&mut self, key: &str) -> bool {
self.build_index_if_needed();
let idx = self.index.as_ref().unwrap();
idx.iter().any(|(p, _)| p.len() >= 2 && p[0] == key)
|| idx
.iter()
.any(|(p, _)| p.len() == 1 && p[0] == key && self.is_value_table(key))
}
fn is_value_table(&self, key: &str) -> bool {
self.index
.as_ref()
.unwrap()
.iter()
.filter(|(p, _)| p.len() == 1 && p[0] == key)
.any(|(_, e)| {
let span = self.spans[e.value_idx];
matches!(span.kind, SpanKind::InlineTableOpen)
})
}
pub fn edit(&mut self) -> editor::EditorHandle<'_> {
editor::EditorHandle {
doc: self,
editor: editor::Editor::new(),
}
}
pub fn validate(&mut self, mode: ValidationMode) -> Vec<ValidationError> {
validate::validate(self, mode)
}
}
#[cfg(feature = "alloc")]
impl fmt::Display for FlatDoc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.source)
}
}
#[cfg(feature = "std")]
impl std::error::Error for EditError {}