carta_core/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![warn(missing_docs)]
3//! Shared carta core: the conversion traits, their option types, and the common error type.
4//!
5//! [`Reader`] turns input text into a [`Document`]; [`Writer`] turns a [`Document`] back into
6//! output text. Readers and writers depend only on the AST contract and this crate, so input and
7//! output formats stay independent.
8
9use std::fmt;
10use std::io;
11use std::sync::Arc;
12
13use carta_ast::{Block, Document, Inline};
14
15#[cfg(feature = "container")]
16#[cfg_attr(docsrs, doc(cfg(feature = "container")))]
17pub mod container;
18pub mod extensions;
19pub mod media;
20pub mod sections;
21#[cfg(feature = "template")]
22#[cfg_attr(docsrs, doc(cfg(feature = "template")))]
23pub mod template;
24pub mod walk;
25
26pub use extensions::{Extension, Extensions, presets};
27pub use media::{MediaBag, MediaItem};
28
29/// The error type returned across the conversion pipeline.
30#[derive(Debug, thiserror::Error)]
31pub enum Error {
32 /// JSON input or output could not be (de)serialized.
33 #[error("JSON error: {0}")]
34 Json(#[from] serde_json::Error),
35 /// An I/O operation failed.
36 #[error("I/O error: {0}")]
37 Io(#[from] io::Error),
38 /// Input handed to a text reader was not valid UTF-8.
39 #[error("input is not valid UTF-8: {0}")]
40 InvalidUtf8(#[from] std::str::Utf8Error),
41 /// A text-only API was asked for a format whose output is binary; use the byte-capable API.
42 #[error("format '{0}' converts binary data; use the byte-capable API (convert)")]
43 BinaryFormat(String),
44 /// The named format is not recognized.
45 #[error("unsupported format: {0}")]
46 UnsupportedFormat(String),
47 /// The named format is recognized but not compiled into this build.
48 #[error("format '{0}' is recognized but not enabled in this build")]
49 FormatNotEnabled(String),
50 /// A `+`/`-` toggle named an extension that is not modeled.
51 #[error("unknown extension: {0}")]
52 UnknownExtension(String),
53 /// A modeled extension does not apply to the given format.
54 #[error(
55 "The extension '{extension}' is not supported for {format}.\nUse --list-extensions={format} to list supported extensions."
56 )]
57 UnsupportedExtension {
58 /// The extension the format does not support.
59 extension: String,
60 /// The format that does not support the extension.
61 format: String,
62 },
63 /// Document metadata could not be parsed.
64 #[error("invalid document metadata: {0}")]
65 InvalidMetadata(String),
66 /// A standalone template failed to parse or render.
67 #[error("template error: {0}")]
68 Template(String),
69 /// The document holds content the target format cannot represent.
70 #[error("cannot represent this content in the target format: {0}")]
71 Unrepresentable(String),
72 /// Building or reading a container archive failed.
73 #[error("container error: {0}")]
74 Container(String),
75 /// A document filter failed to run or returned an unusable result.
76 #[error("filter error: {0}")]
77 Filter(String),
78}
79
80#[cfg(feature = "template")]
81impl From<template::TemplateError> for Error {
82 fn from(error: template::TemplateError) -> Self {
83 Error::Template(error.to_string())
84 }
85}
86
87/// A `Result` whose error is [`Error`].
88pub type Result<T> = std::result::Result<T, Error>;
89
90/// Options controlling a [`Reader`]. Extended (not resignatured) as real options land.
91#[derive(Debug, Clone, Default)]
92#[non_exhaustive]
93pub struct ReaderOptions {
94 /// Format extensions to enable. Strict-CommonMark readers ignore this (the empty preset).
95 pub extensions: Extensions,
96 /// When set, an open paragraph is greedy: a following line that would otherwise open a block —
97 /// a blockquote, heading, list, thematic break, fenced div, or footnote definition — is folded
98 /// into the paragraph as a lazy continuation instead. Only a blank line, a fenced code block, or
99 /// an HTML block ends the paragraph. Unset, every such line interrupts the paragraph.
100 pub greedy_paragraphs: bool,
101}
102
103/// How math is presented by a format that offers a choice of renderers (the HTML family). The
104/// method decides both the inline markup inside a `span.math` and which loader a standalone document
105/// pulls in to typeset it: a MathJax (or plain) document carries the source TeX wrapped in `\(…\)` /
106/// `\[…\]`, whereas a KaTeX document carries the bare TeX, which its in-browser loader reads from the
107/// span directly.
108#[derive(Debug, Clone, PartialEq, Eq, Default)]
109pub enum MathMethod {
110 /// No renderer: the `\(…\)` / `\[…\]` markup is left for the reader to typeset (or read as
111 /// source). The default.
112 #[default]
113 Plain,
114 /// MathJax, loaded from the given script URL. The markup keeps the `\(…\)` / `\[…\]` delimiters.
115 MathJax(String),
116 /// KaTeX, loaded from the given asset base URL (the directory holding `katex.min.js` and its
117 /// stylesheet). The span carries bare TeX without delimiters.
118 Katex(String),
119}
120
121/// How a writer supplies a table of contents.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
123pub enum TocStyle {
124 /// The contents are rendered as a nested list and placed in the `toc` template variable. The
125 /// default.
126 #[default]
127 List,
128 /// The format assembles its own contents from a directive in its template, so only a boolean
129 /// `toc` flag is exposed and no list is generated.
130 Native,
131}
132
133/// How a text writer lays out the lines of a paragraph.
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
135pub enum WrapMode {
136 /// Reflow inline content, breaking lines to keep them within the fill column. A soft line break
137 /// in the source is just inter-word space and is re-flowed like any other.
138 #[default]
139 Auto,
140 /// Never break a paragraph: each one is a single line, with soft breaks rendered as spaces. Lines
141 /// run as long as their content (only an explicit hard break starts a new line).
142 None,
143 /// Keep the source's own line breaks: a soft break stays a line break and content is not
144 /// reflowed, but lines are not wrapped to a column either.
145 Preserve,
146}
147
148/// Options for the EPUB container writer. Ignored by every other writer. The default is an empty
149/// book: no cover, no embedded fonts, the built-in stylesheet only, and chapters split at the top
150/// heading level.
151#[derive(Debug, Clone, Default)]
152#[non_exhaustive]
153pub struct EpubOptions {
154 /// A cover image as `(file name, bytes)`. Produces a dedicated cover page and marks the image
155 /// as the publication cover.
156 pub cover_image: Option<(String, Vec<u8>)>,
157
158 /// Fonts to embed verbatim, each as `(file name, bytes)`. A stylesheet refers to them by name.
159 pub fonts: Vec<(String, Vec<u8>)>,
160
161 /// User stylesheet contents, linked from every page. When any are given they replace the
162 /// built-in stylesheet entirely; several are linked in order. Empty leaves the built-in in place.
163 pub stylesheets: Vec<String>,
164
165 /// A Dublin Core metadata fragment (bare `<dc:*>` elements) merged into the package metadata.
166 pub metadata_xml: Option<String>,
167
168 /// The container directory holding all publication content. `None` uses the conventional
169 /// `EPUB`; an empty string places the content at the archive root.
170 pub subdirectory: Option<String>,
171
172 /// The heading level at which the book is split into separate chapter files. `None` splits at
173 /// the top level, so each level-one heading starts a new file.
174 pub split_level: Option<usize>,
175
176 /// Seconds since the Unix epoch fixing the publication's modification timestamp. `None` uses a
177 /// fixed epoch so output stays byte-reproducible.
178 pub source_date_epoch: Option<i64>,
179
180 /// The process locale (the `LANG` environment variable) whose language tag stands in when the
181 /// document names no `lang`. `None` falls back to `en-US`, keeping output independent of the
182 /// environment.
183 pub locale: Option<String>,
184}
185
186/// Options controlling a [`Writer`]. Extended (not resignatured) as real options land.
187#[derive(Debug, Clone, Default)]
188#[non_exhaustive]
189pub struct WriterOptions {
190 /// Format extensions to enable.
191 pub extensions: Extensions,
192
193 /// The embedded resources the document references by name but does not carry inline. A writer
194 /// that re-embeds resource bytes — a notebook re-encoding its image outputs — reads them from
195 /// here; most writers ignore it. Shared cheaply, so cloning the options does not copy the bytes.
196 pub media: Arc<MediaBag>,
197
198 /// Options for the EPUB container writer; ignored by every other writer.
199 pub epub: EpubOptions,
200
201 /// How paragraphs are laid out: reflowed to the fill column, never wrapped, or with the source's
202 /// own line breaks preserved.
203 pub wrap: WrapMode,
204
205 /// The fill column a wrapping writer reflows to under [`WrapMode::Auto`]. `None` uses the
206 /// writer's built-in default width.
207 pub columns: Option<usize>,
208
209 /// Splice a hierarchical section number into each heading. A format that numbers headings with a
210 /// typesetting counter applies it through its template instead (see
211 /// [`Writer::numbers_sections_natively`]).
212 pub number_sections: bool,
213
214 /// Emit a table of contents in a standalone document.
215 pub toc: bool,
216
217 /// The deepest heading level the table of contents includes. `None` uses the conventional depth
218 /// of three.
219 pub toc_depth: Option<usize>,
220
221 /// How math is presented by a format offering a choice of renderers (the HTML family).
222 pub math_method: MathMethod,
223
224 /// Emit a complete document by wrapping the rendered body in the target format's template,
225 /// rather than a bare fragment.
226 #[cfg(feature = "template")]
227 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
228 pub standalone: bool,
229
230 /// Template source overriding the format's built-in default. Its presence implies standalone
231 /// output.
232 #[cfg(feature = "template")]
233 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
234 pub template: Option<String>,
235
236 /// Directory used to resolve template partials (`$name()$`).
237 #[cfg(feature = "template")]
238 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
239 pub template_dir: Option<std::path::PathBuf>,
240
241 /// A shared directory of partials (`$name()$`) consulted when a partial is not found beside the
242 /// including template — the data directory's `templates/`. `None` when no data directory applies.
243 #[cfg(feature = "template")]
244 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
245 pub template_datadir: Option<std::path::PathBuf>,
246
247 /// Extension a partial (`$name()$`) inherits from the including template: the `--template`
248 /// file's own extension, so the same partial name resolves to the same kind of file whatever
249 /// the output format. An empty string means the template file had no extension (the partial is
250 /// looked up bare). Absent for a built-in default, where the format name is used instead.
251 #[cfg(feature = "template")]
252 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
253 pub template_ext: Option<String>,
254
255 /// Raw template variables, in order; a repeated key accumulates into a list. Inserted verbatim
256 /// (unescaped) at the highest precedence when building the template context.
257 #[cfg(feature = "template")]
258 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
259 pub variables: Vec<(String, String)>,
260
261 /// Metadata layered *above* the document's own (the `-M` layer): each key replaces the reader's
262 /// value for that key when the context is built.
263 #[cfg(feature = "template")]
264 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
265 pub metadata: std::collections::BTreeMap<String, carta_ast::MetaValue>,
266
267 /// Metadata layered *below* the document's own (the metadata-file layer): supplies defaults the
268 /// reader's values and `-M` override.
269 #[cfg(feature = "template")]
270 #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
271 pub metadata_defaults: std::collections::BTreeMap<String, carta_ast::MetaValue>,
272
273 /// The source name a standalone document falls back to when no `title` metadata is present: an
274 /// input file's stem, or `-` for standard input. `None` outside the command line, where there is
275 /// no source name and the fallback is empty. Consumed by the HTML family (for its `pagetitle`)
276 /// and by the container writer (for the navigation document's title).
277 #[cfg(any(feature = "template", feature = "container"))]
278 #[cfg_attr(docsrs, doc(cfg(any(feature = "template", feature = "container"))))]
279 pub source_name: Option<String>,
280}
281
282/// Parses input text in some source format into the document model.
283pub trait Reader {
284 /// Parses `input` text into a document.
285 ///
286 /// # Errors
287 /// Propagates any error from parsing the input.
288 fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document>;
289
290 /// Reads `input` into a document together with the embedded resources it references. The default
291 /// carries no resources; a container format — a notebook with image outputs — overrides this to
292 /// decode those bytes into the returned [`MediaBag`], and implements [`read`](Reader::read) by
293 /// discarding the bag.
294 ///
295 /// # Errors
296 /// Propagates any error from parsing the input.
297 fn read_media(&self, input: &str, options: &ReaderOptions) -> Result<(Document, MediaBag)> {
298 Ok((self.read(input, options)?, MediaBag::new()))
299 }
300}
301
302/// Which plain-text identity variables a writer's standalone template draws on. The document's
303/// title, authors, and date are exposed as markup-free, target-escaped text for places that cannot
304/// carry markup — a web document head or a PDF document's properties. See [`Writer::meta_var_style`].
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
306pub enum MetaVarStyle {
307 /// The format exposes none of these variables.
308 #[default]
309 None,
310 /// A web document head: `pagetitle` (the title, falling back to the source name), `date-meta`
311 /// (the date), and `author-meta` (the authors, one list entry each).
312 Web,
313 /// A PDF document's properties: `title-meta` (the title) and `author-meta` (the authors joined
314 /// into one string with `; `).
315 Pdf,
316}
317
318/// Renders the document model into some target format's text.
319///
320/// The returned string carries no trailing newline; the CLI appends exactly one.
321pub trait Writer {
322 /// Renders `document` into this format's text.
323 ///
324 /// # Errors
325 /// Propagates any error from rendering the document.
326 fn write(&self, document: &Document, options: &WriterOptions) -> Result<String>;
327
328 /// Render an inline sequence in this format, for interpolating inline metadata (a `title`, an
329 /// `author`) into a template variable. Wrapping the inlines in a [`Block::Plain`] yields them
330 /// with no paragraph chrome across formats; a writer whose `Plain` diverges overrides this.
331 ///
332 /// # Errors
333 /// Propagates any error from [`Writer::write`].
334 fn render_meta_inlines(&self, inlines: &[Inline], options: &WriterOptions) -> Result<String> {
335 let document = Document {
336 blocks: vec![Block::Plain(inlines.to_vec())],
337 ..Document::default()
338 };
339 Ok(self
340 .write(&document, options)?
341 .trim_end_matches('\n')
342 .to_string())
343 }
344
345 /// Render a block sequence in this format, for interpolating block metadata (an `abstract`
346 /// authored as Markdown blocks) into a template variable.
347 ///
348 /// # Errors
349 /// Propagates any error from [`Writer::write`].
350 fn render_meta_blocks(&self, blocks: &[Block], options: &WriterOptions) -> Result<String> {
351 let document = Document {
352 blocks: blocks.to_vec(),
353 ..Document::default()
354 };
355 Ok(self
356 .write(&document, options)?
357 .trim_end_matches('\n')
358 .to_string())
359 }
360
361 /// This format's own standalone template, or `None` when standalone output is identical to the
362 /// fragment (no wrapping document exists for the format).
363 fn default_template(&self) -> Option<&'static str> {
364 None
365 }
366
367 /// A standalone document this format assembles structurally, embedding the metadata and block
368 /// list in one value rather than wrapping a text body in a template — the data form is the
369 /// canonical example. Returned in place of template rendering. `None` (the default) when the
370 /// format wraps its body with a text template instead.
371 ///
372 /// # Errors
373 /// Propagates any error from rendering the document.
374 fn standalone_document(
375 &self,
376 document: &Document,
377 options: &WriterOptions,
378 ) -> Result<Option<String>> {
379 let _ = (document, options);
380 Ok(None)
381 }
382
383 /// Which plain-text identity variables this writer's standalone template draws on — the title,
384 /// authors, and date as markup-free text. The default is [`MetaVarStyle::None`]; an HTML-family
385 /// writer returns [`MetaVarStyle::Web`] and a LaTeX-family writer [`MetaVarStyle::Pdf`].
386 fn meta_var_style(&self) -> MetaVarStyle {
387 MetaVarStyle::None
388 }
389
390 /// Whether block-shaped metadata is flattened to its inline content when built into the template
391 /// context. A writer that places title, author, and date into single-line header fields — a man
392 /// page's `.TH` line cannot carry paragraph structure — sets this so a lone-paragraph value
393 /// contributes its inline text and any other block shape contributes nothing. The default `false`
394 /// renders block metadata as blocks.
395 fn flatten_block_metadata(&self) -> bool {
396 false
397 }
398
399 /// A title presentation the template language cannot express from individual variables — an
400 /// underlined title for reStructuredText, say, whose rule length depends on the rendered title
401 /// width. Exposed to the template as the `titleblock` variable. `None` (the default) when the
402 /// format builds its title presentation from individual variables instead.
403 ///
404 /// # Errors
405 /// Propagates any error from rendering the metadata.
406 fn title_block(&self, document: &Document, options: &WriterOptions) -> Result<Option<String>> {
407 let _ = (document, options);
408 Ok(None)
409 }
410
411 /// Whether this writer lays the document out as newline-terminated lines, so a non-empty `body`
412 /// template variable ends with a newline. Writers that build their markup as one string ending
413 /// at its final glyph (HTML, LaTeX, and the like) leave the default `false`.
414 fn body_ends_with_newline(&self) -> bool {
415 false
416 }
417
418 /// How this writer supplies a table of contents. The default renders a nested list into the
419 /// `toc` variable; a format whose template assembles its own contents from a directive overrides
420 /// to [`TocStyle::Native`].
421 fn toc_style(&self) -> TocStyle {
422 TocStyle::List
423 }
424
425 /// Whether a list-style table of contents attaches a back-reference anchor — an `id` on each
426 /// entry's link — so the entries can be linked to. The default includes them; a format that
427 /// cannot represent an inline identifier (so an attributed link would degrade to raw markup)
428 /// overrides to `false`. Honored only when [`toc_style`](Writer::toc_style) is [`TocStyle::List`].
429 fn toc_link_anchors(&self) -> bool {
430 true
431 }
432
433 /// Whether this format numbers sections with its own typesetting counter rather than carrying the
434 /// number in the heading text. The default splices a `header-section-number` span into each
435 /// heading; a format with a native counter (the typesetting formats) overrides to `true` and is
436 /// driven by a `numbersections` template flag instead.
437 fn numbers_sections_natively(&self) -> bool {
438 false
439 }
440
441 /// Whether this writer carries section numbers in the heading text, so the number is spliced into
442 /// each heading before rendering (and contents entries inherit it). The default leaves headings
443 /// untouched; a format that renders the number inline (HTML) overrides to `true`. A format with a
444 /// native counter relies on [`numbers_sections_natively`](Writer::numbers_sections_natively)
445 /// instead and leaves this `false`.
446 fn numbers_sections_in_body(&self) -> bool {
447 false
448 }
449}
450
451/// Parses input bytes in some source format into the document model. The byte-shaped counterpart of
452/// [`Reader`], for formats whose wire form is not text — zip containers and the like.
453pub trait BytesReader {
454 /// Parses `input` bytes into a document.
455 ///
456 /// # Errors
457 /// Propagates any error from parsing the input.
458 fn read(&self, input: &[u8], options: &ReaderOptions) -> Result<Document>;
459
460 /// Reads `input` into a document together with the embedded resources it references. The
461 /// byte-shaped counterpart of [`Reader::read_media`]; the default carries no resources.
462 ///
463 /// # Errors
464 /// Propagates any error from parsing the input.
465 fn read_media(&self, input: &[u8], options: &ReaderOptions) -> Result<(Document, MediaBag)> {
466 Ok((self.read(input, options)?, MediaBag::new()))
467 }
468}
469
470/// Renders the document model into some target format's bytes. The byte-shaped counterpart of
471/// [`Writer`], for formats whose output is not text — zip containers and the like.
472///
473/// This trait carries no decoration hooks (templates, table of contents, metadata rendering): a
474/// container writer produces a complete document by construction. Hooks are added when a real format
475/// needs them.
476pub trait BytesWriter {
477 /// Renders `document` into this format's bytes.
478 ///
479 /// # Errors
480 /// Propagates any error from rendering the document.
481 fn write(&self, document: &Document, options: &WriterOptions) -> Result<Vec<u8>>;
482}
483
484/// The output of a conversion: text from a text writer, bytes from a byte-shaped writer.
485#[derive(Debug, Clone, PartialEq, Eq)]
486pub enum Output {
487 /// Text produced by a text-shaped writer.
488 Text(String),
489 /// Bytes produced by a byte-shaped writer.
490 Bytes(Vec<u8>),
491}
492
493/// A resolved reader, either text-shaped ([`Reader`]) or byte-shaped ([`BytesReader`]).
494pub enum AnyReader {
495 /// A text-shaped reader; input is decoded as UTF-8 before parsing.
496 Text(Box<dyn Reader>),
497 /// A byte-shaped reader; input is parsed from raw bytes.
498 Bytes(Box<dyn BytesReader>),
499}
500
501impl fmt::Debug for AnyReader {
502 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
503 let variant = match self {
504 AnyReader::Text(_) => "Text",
505 AnyReader::Bytes(_) => "Bytes",
506 };
507 f.debug_tuple(variant).finish()
508 }
509}
510
511impl AnyReader {
512 /// Reads `input` into a document. A text reader decodes the bytes as UTF-8 first; a byte reader
513 /// takes the raw slice.
514 ///
515 /// # Errors
516 /// [`Error::InvalidUtf8`] if a text reader is handed input that is not valid UTF-8, plus any error
517 /// the underlying reader returns.
518 pub fn read(&self, input: &[u8], options: &ReaderOptions) -> Result<Document> {
519 match self {
520 AnyReader::Text(reader) => reader.read(std::str::from_utf8(input)?, options),
521 AnyReader::Bytes(reader) => reader.read(input, options),
522 }
523 }
524
525 /// Reads `input` into a document together with the embedded resources it references. A text
526 /// reader decodes the bytes as UTF-8 first; a byte reader takes the raw slice. A reader that
527 /// carries no resources returns an empty [`MediaBag`].
528 ///
529 /// # Errors
530 /// [`Error::InvalidUtf8`] if a text reader is handed input that is not valid UTF-8, plus any
531 /// error the underlying reader returns.
532 pub fn read_media(
533 &self,
534 input: &[u8],
535 options: &ReaderOptions,
536 ) -> Result<(Document, MediaBag)> {
537 match self {
538 AnyReader::Text(reader) => reader.read_media(std::str::from_utf8(input)?, options),
539 AnyReader::Bytes(reader) => reader.read_media(input, options),
540 }
541 }
542}
543
544/// A resolved writer, either text-shaped ([`Writer`]) or byte-shaped ([`BytesWriter`]).
545pub enum AnyWriter {
546 /// A text-shaped writer; rendering produces a string.
547 Text(Box<dyn Writer>),
548 /// A byte-shaped writer; rendering produces raw bytes.
549 Bytes(Box<dyn BytesWriter>),
550}
551
552impl fmt::Debug for AnyWriter {
553 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
554 let variant = match self {
555 AnyWriter::Text(_) => "Text",
556 AnyWriter::Bytes(_) => "Bytes",
557 };
558 f.debug_tuple(variant).finish()
559 }
560}
561
562impl AnyWriter {
563 /// This format's own standalone template, or `None` when standalone output is identical to the
564 /// fragment. A byte-shaped writer never has one.
565 #[must_use]
566 pub fn default_template(&self) -> Option<&'static str> {
567 match self {
568 AnyWriter::Text(writer) => writer.default_template(),
569 AnyWriter::Bytes(_) => None,
570 }
571 }
572}
573
574#[cfg(test)]
575mod tests {
576 use super::{
577 AnyReader, AnyWriter, BytesReader, BytesWriter, Error, Reader, ReaderOptions, Result,
578 WriterOptions,
579 };
580 use carta_ast::Document;
581
582 struct FixedBytesWriter;
583 impl BytesWriter for FixedBytesWriter {
584 fn write(&self, _document: &Document, _options: &WriterOptions) -> Result<Vec<u8>> {
585 Ok(vec![0x00, 0xff, 0x9f])
586 }
587 }
588
589 struct RawBytesReader;
590 impl BytesReader for RawBytesReader {
591 fn read(&self, input: &[u8], _options: &ReaderOptions) -> Result<Document> {
592 assert_eq!(input, &[0xff, 0xfe]);
593 Ok(Document::default())
594 }
595 }
596
597 struct EmptyTextReader;
598 impl Reader for EmptyTextReader {
599 fn read(&self, _input: &str, _options: &ReaderOptions) -> Result<Document> {
600 Ok(Document::default())
601 }
602 }
603
604 #[test]
605 fn bytes_writer_round_trips_bytes() {
606 let writer = AnyWriter::Bytes(Box::new(FixedBytesWriter));
607 assert!(writer.default_template().is_none());
608 let AnyWriter::Bytes(inner) = &writer else {
609 panic!("expected a byte writer");
610 };
611 let output = inner
612 .write(&Document::default(), &WriterOptions::default())
613 .unwrap();
614 assert_eq!(output, vec![0x00, 0xff, 0x9f]);
615 }
616
617 #[test]
618 fn text_reader_rejects_invalid_utf8() {
619 let reader = AnyReader::Text(Box::new(EmptyTextReader));
620 let error = reader
621 .read(&[0xff, 0xfe], &ReaderOptions::default())
622 .unwrap_err();
623 assert!(matches!(error, Error::InvalidUtf8(_)), "{error:?}");
624 }
625
626 #[test]
627 fn bytes_reader_accepts_invalid_utf8() {
628 let reader = AnyReader::Bytes(Box::new(RawBytesReader));
629 assert!(
630 reader
631 .read(&[0xff, 0xfe], &ReaderOptions::default())
632 .is_ok()
633 );
634 }
635
636 #[test]
637 fn default_read_media_carries_no_resources() {
638 let reader = AnyReader::Text(Box::new(EmptyTextReader));
639 let (_, media) = reader
640 .read_media(b"anything", &ReaderOptions::default())
641 .expect("read succeeds");
642 assert!(media.is_empty());
643 }
644}