asciidoc_parser/parser/parser.rs
1use std::collections::HashMap;
2
3use crate::{
4 Document, HasSpan,
5 document::{Attribute, InterpretedValue},
6 parser::{
7 AllowableValue, AttributeValue, HtmlSubstitutionRenderer, InlineSubstitutionRenderer,
8 ModificationContext, PathResolver,
9 },
10 warnings::{Warning, WarningType},
11};
12
13/// The [`Parser`] struct and its related structs allow a caller to configure
14/// how AsciiDoc parsing occurs and then to initiate the parsing process.
15#[derive(Clone, Debug)]
16pub struct Parser<'p> {
17 /// Attribute values at current state of parsing.
18 pub(crate) attribute_values: HashMap<String, AttributeValue>,
19
20 /// Default values for attributes if "set."
21 default_attribute_values: HashMap<String, String>,
22
23 /// Specifies how the basic raw text of a simple block will be converted to
24 /// the format which will ultimately be presented in the final output.
25 ///
26 /// Typically this is an [`HtmlSubstitutionRenderer`] but clients may
27 /// provide alternative implementations.
28 pub(crate) renderer: &'p dyn InlineSubstitutionRenderer,
29
30 /// Specifies how to generate clean and secure paths relative to the parsing
31 /// context.
32 pub path_resolver: PathResolver,
33}
34
35impl<'p> Parser<'p> {
36 /// Parse a UTF-8 string as an AsciiDoc document.
37 ///
38 /// Note that the document references the underlying source string and
39 /// necessarily has the same lifetime as the source.
40 ///
41 /// The [`Document`] data structure returned by this call and nearly all
42 /// data structures contained within it are gated by the lifetime of the
43 /// `source` text passed in to this function. For that reason all of
44 /// those data structures are given the lifetime `'src`.
45 ///
46 /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
47 /// encoding is allowed if a byte-order-mark (BOM) is present at the
48 /// start of a file. This format is not directly supported by the
49 /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
50 /// UTF-8 prior to parsing.
51 ///
52 /// **IMPORTANT:** The `Parser` struct will be updated with attributes and
53 /// similar values discovered during parsing.
54 ///
55 /// # Warnings, not errors
56 ///
57 /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
58 /// return an [`Option`] or [`Result`] data type. There may be any number of
59 /// character sequences that have ambiguous or potentially unintended
60 /// meanings. For that reason, a caller is advised to review the warnings
61 /// provided via the [`warnings()`] iterator.
62 ///
63 /// [`warnings()`]: Document::warnings
64 pub fn parse<'src>(&mut self, source: &'src str) -> Document<'src> {
65 Document::parse(source, self)
66 }
67
68 /// Retrieves the current interpreted value of a [document attribute].
69 ///
70 /// Each document holds a set of name-value pairs called document
71 /// attributes. These attributes provide a means of configuring the AsciiDoc
72 /// processor, declaring document metadata, and defining reusable content.
73 /// This page introduces document attributes and answers some questions
74 /// about the terminology used when referring to them.
75 ///
76 /// ## What are document attributes?
77 ///
78 /// Document attributes are effectively document-scoped variables for the
79 /// AsciiDoc language. The AsciiDoc language defines a set of built-in
80 /// attributes, and also allows the author (or extensions) to define
81 /// additional document attributes, which may replace built-in attributes
82 /// when permitted.
83 ///
84 /// Built-in attributes either provide access to read-only information about
85 /// the document and its environment or allow the author to configure
86 /// behavior of the AsciiDoc processor for a whole document or select
87 /// regions. Built-in attributes are effectively unordered. User-defined
88 /// attribute serve as a powerful text replacement tool. User-defined
89 /// attributes are stored in the order in which they are defined.
90 ///
91 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
92 pub fn attribute_value<N: AsRef<str>>(&self, name: N) -> InterpretedValue {
93 self.attribute_values
94 .get(name.as_ref())
95 .map(|av| av.value.clone())
96 .map(|av| {
97 if let InterpretedValue::Set = av
98 && let Some(default) = self.default_attribute_values.get(name.as_ref())
99 {
100 InterpretedValue::Value(default.clone())
101 } else {
102 av
103 }
104 })
105 .unwrap_or(InterpretedValue::Unset)
106 }
107
108 /// Returns `true` if the parser has a [document attribute] by this name.
109 ///
110 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
111 pub fn has_attribute<N: AsRef<str>>(&self, name: N) -> bool {
112 self.attribute_values.contains_key(name.as_ref())
113 }
114
115 /// Returns `true` if the parser has a [document attribute] by this name
116 /// which has been set (i.e. is present and not [unset]).
117 ///
118 /// [document attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes/
119 /// [unset]: https://docs.asciidoctor.org/asciidoc/latest/attributes/unset-attributes/
120 pub fn is_attribute_set<N: AsRef<str>>(&self, name: N) -> bool {
121 self.attribute_values
122 .get(name.as_ref())
123 .map(|a| a.value != InterpretedValue::Unset)
124 .unwrap_or(false)
125 }
126
127 /// Sets the value of an [intrinsic attribute].
128 ///
129 /// Intrinsic attributes are set automatically by the processor. These
130 /// attributes provide information about the document being processed (e.g.,
131 /// `docfile`), the security mode under which the processor is running
132 /// (e.g., `safe-mode-name`), and information about the user’s environment
133 /// (e.g., `user-home`).
134 ///
135 /// The [`modification_context`](ModificationContext) establishes whether
136 /// the value can be subsequently modified by the document header and/or in
137 /// the document body.
138 ///
139 /// Subsequent calls to this function or [`with_intrinsic_attribute_bool()`]
140 /// are always permitted. The last such call for any given attribute name
141 /// takes precendence.
142 ///
143 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
144 ///
145 /// [`with_intrinsic_attribute_bool()`]: Self::with_intrinsic_attribute_bool
146 pub fn with_intrinsic_attribute<N: AsRef<str>, V: AsRef<str>>(
147 mut self,
148 name: N,
149 value: V,
150 modification_context: ModificationContext,
151 ) -> Self {
152 let attribute_value = AttributeValue {
153 allowable_value: AllowableValue::Any,
154 modification_context,
155 value: InterpretedValue::Value(value.as_ref().to_string()),
156 };
157
158 self.attribute_values
159 .insert(name.as_ref().to_lowercase(), attribute_value);
160
161 self
162 }
163
164 /* Comment out until we're prepared to use and test this.
165 /// Sets the default value for an [intrinsic attribute].
166 ///
167 /// Default values for attributes are provided automatically by the
168 /// processor. These values provide a falllback textual value for an
169 /// attribute when it is merely "set" by the document via API, header, or
170 /// document body.
171 ///
172 /// Calling this does not imply that the value is set automatically by
173 /// default, nor does it establish any policy for where the value may be
174 /// modified. For that, please use [`with_intrinsic_attribute`].
175 ///
176 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
177 /// [`with_intrinsic_attribute`]: Self::with_intrinsic_attribute
178 pub fn with_default_attribute_value<N: AsRef<str>, V: AsRef<str>>(
179 mut self,
180 name: N,
181 value: V,
182 ) -> Self {
183 self.default_attribute_values
184 .insert(name.as_ref().to_string(), value.as_ref().to_string());
185
186 self
187 }
188 */
189
190 /// Sets the value of an [intrinsic attribute] from a boolean flag.
191 ///
192 /// A boolean `true` is interpreted as "set." A boolean `false` is
193 /// interpreted as "unset."
194 ///
195 /// Intrinsic attributes are set automatically by the processor. These
196 /// attributes provide information about the document being processed (e.g.,
197 /// `docfile`), the security mode under which the processor is running
198 /// (e.g., `safe-mode-name`), and information about the user’s environment
199 /// (e.g., `user-home`).
200 ///
201 /// The [`modification_context`](ModificationContext) establishes whether
202 /// the value can be subsequently modified by the document header and/or in
203 /// the document body.
204 ///
205 /// Subsequent calls to this function or [`with_intrinsic_attribute()`] are
206 /// always permitted. The last such call for any given attribute name takes
207 /// precendence.
208 ///
209 /// [intrinsic attribute]: https://docs.asciidoctor.org/asciidoc/latest/attributes/document-attributes-ref/#intrinsic-attributes
210 ///
211 /// [`with_intrinsic_attribute()`]: Self::with_intrinsic_attribute
212 pub fn with_intrinsic_attribute_bool<N: AsRef<str>>(
213 mut self,
214 name: N,
215 value: bool,
216 modification_context: ModificationContext,
217 ) -> Self {
218 let attribute_value = AttributeValue {
219 allowable_value: AllowableValue::Any,
220 modification_context,
221 value: if value {
222 InterpretedValue::Set
223 } else {
224 InterpretedValue::Unset
225 },
226 };
227
228 self.attribute_values
229 .insert(name.as_ref().to_lowercase(), attribute_value);
230
231 self
232 }
233
234 /// Called from [`Header::parse()`] to accept or reject an attribute value.
235 pub(crate) fn set_attribute_from_header<'src>(
236 &mut self,
237 attr: &Attribute<'src>,
238 warnings: &mut Vec<Warning<'src>>,
239 ) {
240 let attr_name = attr.name().data().to_lowercase();
241
242 let existing_attr = self.attribute_values.get(&attr_name);
243
244 // Verify that we have permission to overwrite any existing attribute value.
245 if let Some(existing_attr) = existing_attr
246 && existing_attr.modification_context == ModificationContext::ApiOnly
247 {
248 warnings.push(Warning {
249 source: attr.span(),
250 warning: WarningType::AttributeValueIsLocked(attr_name),
251 });
252 return;
253 }
254
255 let mut value = attr.value().clone();
256
257 if let InterpretedValue::Set = value
258 && let Some(default_value) = self.default_attribute_values.get(&attr_name)
259 {
260 value = InterpretedValue::Value(default_value.clone());
261 }
262
263 let attribute_value = AttributeValue {
264 allowable_value: AllowableValue::Any,
265 modification_context: ModificationContext::Anywhere,
266 value,
267 };
268
269 self.attribute_values.insert(attr_name, attribute_value);
270 }
271
272 /// Called from [`Header::parse()`] for a value that is derived from parsing
273 /// the header (except for attribute lines).
274 pub(crate) fn set_attribute_by_value_from_header<N: AsRef<str>, V: AsRef<str>>(
275 &mut self,
276 name: N,
277 value: V,
278 ) {
279 let attr_name = name.as_ref().to_lowercase();
280
281 let attribute_value = AttributeValue {
282 allowable_value: AllowableValue::Any,
283 modification_context: ModificationContext::Anywhere,
284 value: InterpretedValue::Value(value.as_ref().to_owned()),
285 };
286
287 self.attribute_values.insert(attr_name, attribute_value);
288 }
289
290 /// Called from [`Block::parse()`] to accept or reject an attribute value
291 /// from a document (body) attribute.
292 pub(crate) fn set_attribute_from_body<'src>(
293 &mut self,
294 attr: &Attribute<'src>,
295 warnings: &mut Vec<Warning<'src>>,
296 ) {
297 let attr_name = attr.name().data().to_lowercase();
298
299 // Verify that we have permission to overwrite any existing attribute value.
300 if let Some(existing_attr) = self.attribute_values.get(&attr_name)
301 && existing_attr.modification_context != ModificationContext::Anywhere
302 {
303 warnings.push(Warning {
304 source: attr.span(),
305 warning: WarningType::AttributeValueIsLocked(attr_name),
306 });
307 return;
308 }
309
310 let attribute_value = AttributeValue {
311 allowable_value: AllowableValue::Any,
312 modification_context: ModificationContext::Anywhere,
313 value: attr.value().clone(),
314 };
315
316 self.attribute_values.insert(attr_name, attribute_value);
317 }
318}
319
320const DEFAULT_RENDERER: &'static dyn InlineSubstitutionRenderer = &HtmlSubstitutionRenderer {};
321
322impl Default for Parser<'_> {
323 fn default() -> Self {
324 Self {
325 attribute_values: built_in_attrs(),
326 default_attribute_values: built_in_default_values(),
327 renderer: DEFAULT_RENDERER,
328 path_resolver: PathResolver::default(),
329 }
330 }
331}
332
333fn built_in_attrs() -> HashMap<String, AttributeValue> {
334 let mut attrs: HashMap<String, AttributeValue> = HashMap::new();
335
336 attrs.insert(
337 "empty".to_owned(),
338 AttributeValue {
339 allowable_value: AllowableValue::Any,
340 modification_context: ModificationContext::ApiOnly,
341 value: InterpretedValue::Value("".into()),
342 },
343 );
344
345 attrs.insert(
346 "sp".to_owned(),
347 AttributeValue {
348 allowable_value: AllowableValue::Any,
349 modification_context: ModificationContext::ApiOnly,
350 value: InterpretedValue::Value(" ".into()),
351 },
352 );
353
354 attrs.insert(
355 "deg".to_owned(),
356 AttributeValue {
357 allowable_value: AllowableValue::Any,
358 modification_context: ModificationContext::ApiOnly,
359 value: InterpretedValue::Value("°".into()),
360 },
361 );
362
363 attrs.insert(
364 "plus".to_owned(),
365 AttributeValue {
366 allowable_value: AllowableValue::Any,
367 modification_context: ModificationContext::ApiOnly,
368 value: InterpretedValue::Value("+".into()),
369 },
370 );
371
372 attrs.insert(
373 "toc".to_owned(),
374 AttributeValue {
375 allowable_value: AllowableValue::Any,
376 modification_context: ModificationContext::ApiOrHeader,
377 value: InterpretedValue::Unset,
378 },
379 );
380
381 attrs.insert(
382 "sectids".to_owned(),
383 AttributeValue {
384 allowable_value: AllowableValue::Empty,
385 modification_context: ModificationContext::Anywhere,
386 value: InterpretedValue::Set,
387 },
388 );
389
390 attrs.insert(
391 "example-caption".to_owned(),
392 AttributeValue {
393 allowable_value: AllowableValue::Any,
394 modification_context: ModificationContext::Anywhere,
395 value: InterpretedValue::Set,
396 },
397 );
398
399 // TO DO: Replace ./images with value of imagesdir if that is non-default.
400 attrs.insert(
401 "iconsdir".to_owned(),
402 AttributeValue {
403 allowable_value: AllowableValue::Any,
404 modification_context: ModificationContext::Anywhere,
405 value: InterpretedValue::Set,
406 },
407 );
408
409 attrs
410}
411
412fn built_in_default_values() -> HashMap<String, String> {
413 let mut defaults: HashMap<String, String> = HashMap::new();
414
415 defaults.insert("example-caption".to_owned(), "Example".to_owned());
416 defaults.insert("iconsdir".to_owned(), "./images/icons".to_owned());
417 defaults.insert("sectnums".to_owned(), "all".to_owned());
418 defaults.insert("toc".to_owned(), "auto".to_owned());
419
420 defaults
421}