Skip to main content

carta_core/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![warn(missing_docs)]
3//! Shared carta core: the conversion traits, their option types, and the common error type.
4//!
5//! [`Reader`] turns input text into a [`Document`]; [`Writer`] turns a [`Document`] back into
6//! output text. Readers and writers depend only on the AST contract and this crate, so input and
7//! output formats stay independent.
8
9use std::fmt;
10use std::io;
11use std::sync::Arc;
12
13use carta_ast::{Block, Document, Inline};
14
15#[cfg(feature = "container")]
16#[cfg_attr(docsrs, doc(cfg(feature = "container")))]
17pub mod container;
18pub mod extensions;
19pub mod media;
20pub mod sections;
21#[cfg(feature = "template")]
22#[cfg_attr(docsrs, doc(cfg(feature = "template")))]
23pub mod template;
24pub mod walk;
25
26pub use extensions::{Extension, Extensions, presets};
27pub use media::{MediaBag, MediaItem};
28
29/// The error type returned across the conversion pipeline.
30#[derive(Debug, thiserror::Error)]
31pub enum Error {
32    /// JSON input or output could not be (de)serialized.
33    #[error("JSON error: {0}")]
34    Json(#[from] serde_json::Error),
35    /// An I/O operation failed.
36    #[error("I/O error: {0}")]
37    Io(#[from] io::Error),
38    /// Input handed to a text reader was not valid UTF-8.
39    #[error("input is not valid UTF-8: {0}")]
40    InvalidUtf8(#[from] std::str::Utf8Error),
41    /// A text-only API was asked for a format whose output is binary; use the byte-capable API.
42    #[error("format '{0}' converts binary data; use the byte-capable API (convert)")]
43    BinaryFormat(String),
44    /// The named format is not recognized.
45    #[error("unsupported format: {0}")]
46    UnsupportedFormat(String),
47    /// The named format is recognized but not compiled into this build.
48    #[error("format '{0}' is recognized but not enabled in this build")]
49    FormatNotEnabled(String),
50    /// A `+`/`-` toggle named an extension that is not modeled.
51    #[error("unknown extension: {0}")]
52    UnknownExtension(String),
53    /// A modeled extension does not apply to the given format.
54    #[error(
55        "The extension '{extension}' is not supported for {format}.\nUse --list-extensions={format} to list supported extensions."
56    )]
57    UnsupportedExtension {
58        /// The extension the format does not support.
59        extension: String,
60        /// The format that does not support the extension.
61        format: String,
62    },
63    /// Document metadata could not be parsed.
64    #[error("invalid document metadata: {0}")]
65    InvalidMetadata(String),
66    /// A standalone template failed to parse or render.
67    #[error("template error: {0}")]
68    Template(String),
69    /// The document holds content the target format cannot represent.
70    #[error("cannot represent this content in the target format: {0}")]
71    Unrepresentable(String),
72    /// Building or reading a container archive failed.
73    #[error("container error: {0}")]
74    Container(String),
75    /// A document filter failed to run or returned an unusable result.
76    #[error("filter error: {0}")]
77    Filter(String),
78}
79
80#[cfg(feature = "template")]
81impl From<template::TemplateError> for Error {
82    fn from(error: template::TemplateError) -> Self {
83        Error::Template(error.to_string())
84    }
85}
86
87/// A `Result` whose error is [`Error`].
88pub type Result<T> = std::result::Result<T, Error>;
89
90/// Options controlling a [`Reader`]. Extended (not resignatured) as real options land.
91#[derive(Debug, Clone, Default)]
92#[non_exhaustive]
93pub struct ReaderOptions {
94    /// Format extensions to enable. Strict-CommonMark readers ignore this (the empty preset).
95    pub extensions: Extensions,
96    /// When set, an open paragraph is greedy: a following line that would otherwise open a block —
97    /// a blockquote, heading, list, thematic break, fenced div, or footnote definition — is folded
98    /// into the paragraph as a lazy continuation instead. Only a blank line, a fenced code block, or
99    /// an HTML block ends the paragraph. Unset, every such line interrupts the paragraph.
100    pub greedy_paragraphs: bool,
101}
102
103/// How math is presented by a format that offers a choice of renderers (the HTML family). The
104/// method decides both the inline markup inside a `span.math` and which loader a standalone document
105/// pulls in to typeset it: a MathJax (or plain) document carries the source TeX wrapped in `\(…\)` /
106/// `\[…\]`, whereas a KaTeX document carries the bare TeX, which its in-browser loader reads from the
107/// span directly.
108#[derive(Debug, Clone, PartialEq, Eq, Default)]
109pub enum MathMethod {
110    /// No renderer: the `\(…\)` / `\[…\]` markup is left for the reader to typeset (or read as
111    /// source). The default.
112    #[default]
113    Plain,
114    /// MathJax, loaded from the given script URL. The markup keeps the `\(…\)` / `\[…\]` delimiters.
115    MathJax(String),
116    /// KaTeX, loaded from the given asset base URL (the directory holding `katex.min.js` and its
117    /// stylesheet). The span carries bare TeX without delimiters.
118    Katex(String),
119}
120
121/// How a writer supplies a table of contents.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
123pub enum TocStyle {
124    /// The contents are rendered as a nested list and placed in the `toc` template variable. The
125    /// default.
126    #[default]
127    List,
128    /// The format assembles its own contents from a directive in its template, so only a boolean
129    /// `toc` flag is exposed and no list is generated.
130    Native,
131}
132
133/// How a text writer lays out the lines of a paragraph.
134#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
135pub enum WrapMode {
136    /// Reflow inline content, breaking lines to keep them within the fill column. A soft line break
137    /// in the source is just inter-word space and is re-flowed like any other.
138    #[default]
139    Auto,
140    /// Never break a paragraph: each one is a single line, with soft breaks rendered as spaces. Lines
141    /// run as long as their content (only an explicit hard break starts a new line).
142    None,
143    /// Keep the source's own line breaks: a soft break stays a line break and content is not
144    /// reflowed, but lines are not wrapped to a column either.
145    Preserve,
146}
147
148/// Options for the EPUB container writer. Ignored by every other writer. The default is an empty
149/// book: no cover, no embedded fonts, the built-in stylesheet only, and chapters split at the top
150/// heading level.
151#[derive(Debug, Clone, Default)]
152#[non_exhaustive]
153pub struct EpubOptions {
154    /// A cover image as `(file name, bytes)`. Produces a dedicated cover page and marks the image
155    /// as the publication cover.
156    pub cover_image: Option<(String, Vec<u8>)>,
157
158    /// Fonts to embed verbatim, each as `(file name, bytes)`. A stylesheet refers to them by name.
159    pub fonts: Vec<(String, Vec<u8>)>,
160
161    /// User stylesheet contents, linked from every page. When any are given they replace the
162    /// built-in stylesheet entirely; several are linked in order. Empty leaves the built-in in place.
163    pub stylesheets: Vec<String>,
164
165    /// A Dublin Core metadata fragment (bare `<dc:*>` elements) merged into the package metadata.
166    pub metadata_xml: Option<String>,
167
168    /// The container directory holding all publication content. `None` uses the conventional
169    /// `EPUB`; an empty string places the content at the archive root.
170    pub subdirectory: Option<String>,
171
172    /// The heading level at which the book is split into separate chapter files. `None` splits at
173    /// the top level, so each level-one heading starts a new file.
174    pub split_level: Option<usize>,
175
176    /// Seconds since the Unix epoch fixing the publication's modification timestamp. `None` uses a
177    /// fixed epoch so output stays byte-reproducible.
178    pub source_date_epoch: Option<i64>,
179
180    /// The process locale (the `LANG` environment variable) whose language tag stands in when the
181    /// document names no `lang`. `None` falls back to `en-US`, keeping output independent of the
182    /// environment.
183    pub locale: Option<String>,
184}
185
186/// Options controlling a [`Writer`]. Extended (not resignatured) as real options land.
187#[derive(Debug, Clone, Default)]
188#[non_exhaustive]
189pub struct WriterOptions {
190    /// Format extensions to enable.
191    pub extensions: Extensions,
192
193    /// The embedded resources the document references by name but does not carry inline. A writer
194    /// that re-embeds resource bytes — a notebook re-encoding its image outputs — reads them from
195    /// here; most writers ignore it. Shared cheaply, so cloning the options does not copy the bytes.
196    pub media: Arc<MediaBag>,
197
198    /// Options for the EPUB container writer; ignored by every other writer.
199    pub epub: EpubOptions,
200
201    /// How paragraphs are laid out: reflowed to the fill column, never wrapped, or with the source's
202    /// own line breaks preserved.
203    pub wrap: WrapMode,
204
205    /// The fill column a wrapping writer reflows to under [`WrapMode::Auto`]. `None` uses the
206    /// writer's built-in default width.
207    pub columns: Option<usize>,
208
209    /// Splice a hierarchical section number into each heading. A format that numbers headings with a
210    /// typesetting counter applies it through its template instead (see
211    /// [`Writer::numbers_sections_natively`]).
212    pub number_sections: bool,
213
214    /// Emit a table of contents in a standalone document.
215    pub toc: bool,
216
217    /// The deepest heading level the table of contents includes. `None` uses the conventional depth
218    /// of three.
219    pub toc_depth: Option<usize>,
220
221    /// How math is presented by a format offering a choice of renderers (the HTML family).
222    pub math_method: MathMethod,
223
224    /// Emit a complete document by wrapping the rendered body in the target format's template,
225    /// rather than a bare fragment.
226    #[cfg(feature = "template")]
227    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
228    pub standalone: bool,
229
230    /// Template source overriding the format's built-in default. Its presence implies standalone
231    /// output.
232    #[cfg(feature = "template")]
233    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
234    pub template: Option<String>,
235
236    /// Directory used to resolve template partials (`$name()$`).
237    #[cfg(feature = "template")]
238    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
239    pub template_dir: Option<std::path::PathBuf>,
240
241    /// A shared directory of partials (`$name()$`) consulted when a partial is not found beside the
242    /// including template — the data directory's `templates/`. `None` when no data directory applies.
243    #[cfg(feature = "template")]
244    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
245    pub template_datadir: Option<std::path::PathBuf>,
246
247    /// Extension a partial (`$name()$`) inherits from the including template: the `--template`
248    /// file's own extension, so the same partial name resolves to the same kind of file whatever
249    /// the output format. An empty string means the template file had no extension (the partial is
250    /// looked up bare). Absent for a built-in default, where the format name is used instead.
251    #[cfg(feature = "template")]
252    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
253    pub template_ext: Option<String>,
254
255    /// Raw template variables, in order; a repeated key accumulates into a list. Inserted verbatim
256    /// (unescaped) at the highest precedence when building the template context.
257    #[cfg(feature = "template")]
258    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
259    pub variables: Vec<(String, String)>,
260
261    /// Metadata layered *above* the document's own (the `-M` layer): each key replaces the reader's
262    /// value for that key when the context is built.
263    #[cfg(feature = "template")]
264    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
265    pub metadata: std::collections::BTreeMap<String, carta_ast::MetaValue>,
266
267    /// Metadata layered *below* the document's own (the metadata-file layer): supplies defaults the
268    /// reader's values and `-M` override.
269    #[cfg(feature = "template")]
270    #[cfg_attr(docsrs, doc(cfg(feature = "template")))]
271    pub metadata_defaults: std::collections::BTreeMap<String, carta_ast::MetaValue>,
272
273    /// The source name a standalone document falls back to when no `title` metadata is present: an
274    /// input file's stem, or `-` for standard input. `None` outside the command line, where there is
275    /// no source name and the fallback is empty. Consumed by the HTML family (for its `pagetitle`)
276    /// and by the container writer (for the navigation document's title).
277    #[cfg(any(feature = "template", feature = "container"))]
278    #[cfg_attr(docsrs, doc(cfg(any(feature = "template", feature = "container"))))]
279    pub source_name: Option<String>,
280}
281
282/// Parses input text in some source format into the document model.
283pub trait Reader {
284    /// Parses `input` text into a document.
285    ///
286    /// # Errors
287    /// Propagates any error from parsing the input.
288    fn read(&self, input: &str, options: &ReaderOptions) -> Result<Document>;
289
290    /// Reads `input` into a document together with the embedded resources it references. The default
291    /// carries no resources; a container format — a notebook with image outputs — overrides this to
292    /// decode those bytes into the returned [`MediaBag`], and implements [`read`](Reader::read) by
293    /// discarding the bag.
294    ///
295    /// # Errors
296    /// Propagates any error from parsing the input.
297    fn read_media(&self, input: &str, options: &ReaderOptions) -> Result<(Document, MediaBag)> {
298        Ok((self.read(input, options)?, MediaBag::new()))
299    }
300}
301
302/// Which plain-text identity variables a writer's standalone template draws on. The document's
303/// title, authors, and date are exposed as markup-free, target-escaped text for places that cannot
304/// carry markup — a web document head or a PDF document's properties. See [`Writer::meta_var_style`].
305#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
306pub enum MetaVarStyle {
307    /// The format exposes none of these variables.
308    #[default]
309    None,
310    /// A web document head: `pagetitle` (the title, falling back to the source name), `date-meta`
311    /// (the date), and `author-meta` (the authors, one list entry each).
312    Web,
313    /// A PDF document's properties: `title-meta` (the title) and `author-meta` (the authors joined
314    /// into one string with `; `).
315    Pdf,
316}
317
318/// Renders the document model into some target format's text.
319///
320/// The returned string carries no trailing newline; the CLI appends exactly one.
321pub trait Writer {
322    /// Renders `document` into this format's text.
323    ///
324    /// # Errors
325    /// Propagates any error from rendering the document.
326    fn write(&self, document: &Document, options: &WriterOptions) -> Result<String>;
327
328    /// Render an inline sequence in this format, for interpolating inline metadata (a `title`, an
329    /// `author`) into a template variable. Wrapping the inlines in a [`Block::Plain`] yields them
330    /// with no paragraph chrome across formats; a writer whose `Plain` diverges overrides this.
331    ///
332    /// # Errors
333    /// Propagates any error from [`Writer::write`].
334    fn render_meta_inlines(&self, inlines: &[Inline], options: &WriterOptions) -> Result<String> {
335        let document = Document {
336            blocks: vec![Block::Plain(inlines.to_vec())],
337            ..Document::default()
338        };
339        Ok(self
340            .write(&document, options)?
341            .trim_end_matches('\n')
342            .to_string())
343    }
344
345    /// Render a block sequence in this format, for interpolating block metadata (an `abstract`
346    /// authored as Markdown blocks) into a template variable.
347    ///
348    /// # Errors
349    /// Propagates any error from [`Writer::write`].
350    fn render_meta_blocks(&self, blocks: &[Block], options: &WriterOptions) -> Result<String> {
351        let document = Document {
352            blocks: blocks.to_vec(),
353            ..Document::default()
354        };
355        Ok(self
356            .write(&document, options)?
357            .trim_end_matches('\n')
358            .to_string())
359    }
360
361    /// This format's own standalone template, or `None` when standalone output is identical to the
362    /// fragment (no wrapping document exists for the format).
363    fn default_template(&self) -> Option<&'static str> {
364        None
365    }
366
367    /// A standalone document this format assembles structurally, embedding the metadata and block
368    /// list in one value rather than wrapping a text body in a template — the data form is the
369    /// canonical example. Returned in place of template rendering. `None` (the default) when the
370    /// format wraps its body with a text template instead.
371    ///
372    /// # Errors
373    /// Propagates any error from rendering the document.
374    fn standalone_document(
375        &self,
376        document: &Document,
377        options: &WriterOptions,
378    ) -> Result<Option<String>> {
379        let _ = (document, options);
380        Ok(None)
381    }
382
383    /// Which plain-text identity variables this writer's standalone template draws on — the title,
384    /// authors, and date as markup-free text. The default is [`MetaVarStyle::None`]; an HTML-family
385    /// writer returns [`MetaVarStyle::Web`] and a LaTeX-family writer [`MetaVarStyle::Pdf`].
386    fn meta_var_style(&self) -> MetaVarStyle {
387        MetaVarStyle::None
388    }
389
390    /// Whether block-shaped metadata is flattened to its inline content when built into the template
391    /// context. A writer that places title, author, and date into single-line header fields — a man
392    /// page's `.TH` line cannot carry paragraph structure — sets this so a lone-paragraph value
393    /// contributes its inline text and any other block shape contributes nothing. The default `false`
394    /// renders block metadata as blocks.
395    fn flatten_block_metadata(&self) -> bool {
396        false
397    }
398
399    /// A title presentation the template language cannot express from individual variables — an
400    /// underlined title for reStructuredText, say, whose rule length depends on the rendered title
401    /// width. Exposed to the template as the `titleblock` variable. `None` (the default) when the
402    /// format builds its title presentation from individual variables instead.
403    ///
404    /// # Errors
405    /// Propagates any error from rendering the metadata.
406    fn title_block(&self, document: &Document, options: &WriterOptions) -> Result<Option<String>> {
407        let _ = (document, options);
408        Ok(None)
409    }
410
411    /// Whether this writer lays the document out as newline-terminated lines, so a non-empty `body`
412    /// template variable ends with a newline. Writers that build their markup as one string ending
413    /// at its final glyph (HTML, LaTeX, and the like) leave the default `false`.
414    fn body_ends_with_newline(&self) -> bool {
415        false
416    }
417
418    /// How this writer supplies a table of contents. The default renders a nested list into the
419    /// `toc` variable; a format whose template assembles its own contents from a directive overrides
420    /// to [`TocStyle::Native`].
421    fn toc_style(&self) -> TocStyle {
422        TocStyle::List
423    }
424
425    /// Whether a list-style table of contents attaches a back-reference anchor — an `id` on each
426    /// entry's link — so the entries can be linked to. The default includes them; a format that
427    /// cannot represent an inline identifier (so an attributed link would degrade to raw markup)
428    /// overrides to `false`. Honored only when [`toc_style`](Writer::toc_style) is [`TocStyle::List`].
429    fn toc_link_anchors(&self) -> bool {
430        true
431    }
432
433    /// Whether this format numbers sections with its own typesetting counter rather than carrying the
434    /// number in the heading text. The default splices a `header-section-number` span into each
435    /// heading; a format with a native counter (the typesetting formats) overrides to `true` and is
436    /// driven by a `numbersections` template flag instead.
437    fn numbers_sections_natively(&self) -> bool {
438        false
439    }
440
441    /// Whether this writer carries section numbers in the heading text, so the number is spliced into
442    /// each heading before rendering (and contents entries inherit it). The default leaves headings
443    /// untouched; a format that renders the number inline (HTML) overrides to `true`. A format with a
444    /// native counter relies on [`numbers_sections_natively`](Writer::numbers_sections_natively)
445    /// instead and leaves this `false`.
446    fn numbers_sections_in_body(&self) -> bool {
447        false
448    }
449}
450
451/// Parses input bytes in some source format into the document model. The byte-shaped counterpart of
452/// [`Reader`], for formats whose wire form is not text — zip containers and the like.
453pub trait BytesReader {
454    /// Parses `input` bytes into a document.
455    ///
456    /// # Errors
457    /// Propagates any error from parsing the input.
458    fn read(&self, input: &[u8], options: &ReaderOptions) -> Result<Document>;
459
460    /// Reads `input` into a document together with the embedded resources it references. The
461    /// byte-shaped counterpart of [`Reader::read_media`]; the default carries no resources.
462    ///
463    /// # Errors
464    /// Propagates any error from parsing the input.
465    fn read_media(&self, input: &[u8], options: &ReaderOptions) -> Result<(Document, MediaBag)> {
466        Ok((self.read(input, options)?, MediaBag::new()))
467    }
468}
469
470/// Renders the document model into some target format's bytes. The byte-shaped counterpart of
471/// [`Writer`], for formats whose output is not text — zip containers and the like.
472///
473/// This trait carries no decoration hooks (templates, table of contents, metadata rendering): a
474/// container writer produces a complete document by construction. Hooks are added when a real format
475/// needs them.
476pub trait BytesWriter {
477    /// Renders `document` into this format's bytes.
478    ///
479    /// # Errors
480    /// Propagates any error from rendering the document.
481    fn write(&self, document: &Document, options: &WriterOptions) -> Result<Vec<u8>>;
482}
483
484/// The output of a conversion: text from a text writer, bytes from a byte-shaped writer.
485#[derive(Debug, Clone, PartialEq, Eq)]
486pub enum Output {
487    /// Text produced by a text-shaped writer.
488    Text(String),
489    /// Bytes produced by a byte-shaped writer.
490    Bytes(Vec<u8>),
491}
492
493/// A resolved reader, either text-shaped ([`Reader`]) or byte-shaped ([`BytesReader`]).
494pub enum AnyReader {
495    /// A text-shaped reader; input is decoded as UTF-8 before parsing.
496    Text(Box<dyn Reader>),
497    /// A byte-shaped reader; input is parsed from raw bytes.
498    Bytes(Box<dyn BytesReader>),
499}
500
501impl fmt::Debug for AnyReader {
502    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
503        let variant = match self {
504            AnyReader::Text(_) => "Text",
505            AnyReader::Bytes(_) => "Bytes",
506        };
507        f.debug_tuple(variant).finish()
508    }
509}
510
511impl AnyReader {
512    /// Reads `input` into a document. A text reader decodes the bytes as UTF-8 first; a byte reader
513    /// takes the raw slice.
514    ///
515    /// # Errors
516    /// [`Error::InvalidUtf8`] if a text reader is handed input that is not valid UTF-8, plus any error
517    /// the underlying reader returns.
518    pub fn read(&self, input: &[u8], options: &ReaderOptions) -> Result<Document> {
519        match self {
520            AnyReader::Text(reader) => reader.read(std::str::from_utf8(input)?, options),
521            AnyReader::Bytes(reader) => reader.read(input, options),
522        }
523    }
524
525    /// Reads `input` into a document together with the embedded resources it references. A text
526    /// reader decodes the bytes as UTF-8 first; a byte reader takes the raw slice. A reader that
527    /// carries no resources returns an empty [`MediaBag`].
528    ///
529    /// # Errors
530    /// [`Error::InvalidUtf8`] if a text reader is handed input that is not valid UTF-8, plus any
531    /// error the underlying reader returns.
532    pub fn read_media(
533        &self,
534        input: &[u8],
535        options: &ReaderOptions,
536    ) -> Result<(Document, MediaBag)> {
537        match self {
538            AnyReader::Text(reader) => reader.read_media(std::str::from_utf8(input)?, options),
539            AnyReader::Bytes(reader) => reader.read_media(input, options),
540        }
541    }
542}
543
544/// A resolved writer, either text-shaped ([`Writer`]) or byte-shaped ([`BytesWriter`]).
545pub enum AnyWriter {
546    /// A text-shaped writer; rendering produces a string.
547    Text(Box<dyn Writer>),
548    /// A byte-shaped writer; rendering produces raw bytes.
549    Bytes(Box<dyn BytesWriter>),
550}
551
552impl fmt::Debug for AnyWriter {
553    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
554        let variant = match self {
555            AnyWriter::Text(_) => "Text",
556            AnyWriter::Bytes(_) => "Bytes",
557        };
558        f.debug_tuple(variant).finish()
559    }
560}
561
562impl AnyWriter {
563    /// This format's own standalone template, or `None` when standalone output is identical to the
564    /// fragment. A byte-shaped writer never has one.
565    #[must_use]
566    pub fn default_template(&self) -> Option<&'static str> {
567        match self {
568            AnyWriter::Text(writer) => writer.default_template(),
569            AnyWriter::Bytes(_) => None,
570        }
571    }
572}
573
574#[cfg(test)]
575mod tests {
576    use super::{
577        AnyReader, AnyWriter, BytesReader, BytesWriter, Error, Reader, ReaderOptions, Result,
578        WriterOptions,
579    };
580    use carta_ast::Document;
581
582    struct FixedBytesWriter;
583    impl BytesWriter for FixedBytesWriter {
584        fn write(&self, _document: &Document, _options: &WriterOptions) -> Result<Vec<u8>> {
585            Ok(vec![0x00, 0xff, 0x9f])
586        }
587    }
588
589    struct RawBytesReader;
590    impl BytesReader for RawBytesReader {
591        fn read(&self, input: &[u8], _options: &ReaderOptions) -> Result<Document> {
592            assert_eq!(input, &[0xff, 0xfe]);
593            Ok(Document::default())
594        }
595    }
596
597    struct EmptyTextReader;
598    impl Reader for EmptyTextReader {
599        fn read(&self, _input: &str, _options: &ReaderOptions) -> Result<Document> {
600            Ok(Document::default())
601        }
602    }
603
604    #[test]
605    fn bytes_writer_round_trips_bytes() {
606        let writer = AnyWriter::Bytes(Box::new(FixedBytesWriter));
607        assert!(writer.default_template().is_none());
608        let AnyWriter::Bytes(inner) = &writer else {
609            panic!("expected a byte writer");
610        };
611        let output = inner
612            .write(&Document::default(), &WriterOptions::default())
613            .unwrap();
614        assert_eq!(output, vec![0x00, 0xff, 0x9f]);
615    }
616
617    #[test]
618    fn text_reader_rejects_invalid_utf8() {
619        let reader = AnyReader::Text(Box::new(EmptyTextReader));
620        let error = reader
621            .read(&[0xff, 0xfe], &ReaderOptions::default())
622            .unwrap_err();
623        assert!(matches!(error, Error::InvalidUtf8(_)), "{error:?}");
624    }
625
626    #[test]
627    fn bytes_reader_accepts_invalid_utf8() {
628        let reader = AnyReader::Bytes(Box::new(RawBytesReader));
629        assert!(
630            reader
631                .read(&[0xff, 0xfe], &ReaderOptions::default())
632                .is_ok()
633        );
634    }
635
636    #[test]
637    fn default_read_media_carries_no_resources() {
638        let reader = AnyReader::Text(Box::new(EmptyTextReader));
639        let (_, media) = reader
640            .read_media(b"anything", &ReaderOptions::default())
641            .expect("read succeeds");
642        assert!(media.is_empty());
643    }
644}