rustla/common.rs
1/*!
2This submodule contains useful functions and other constructs that don't
3sensibly belong to any specific entity in the program.
4
5Copyright © 2020 Santtu Söderholm
6*/
7use std::io::BufRead;
8use std::{fs, io, path, str};
9
10// =======================
11// Text handling utilities
12// =======================
13
14/// Returns a `Vec<String>` from a given `&str`,
15/// split at new lines `\n` or `\r\n`.
16pub fn str_to_lines(string: &str) -> Vec<String> {
17 let line_vec = string
18 .lines()
19 .map(|s| s.to_string())
20 .collect::<Vec<String>>();
21
22 line_vec
23}
24
25/// Read the lines of a given file into a buffer.
26pub fn read_path_lines<P>(file_path: P) -> io::Result<io::Lines<io::BufReader<fs::File>>>
27where
28 P: AsRef<path::Path>,
29{
30 let file: fs::File = fs::File::open(file_path)?;
31 Ok(io::BufReader::new(file).lines())
32}
33
34/// Normalizes the given `&str` according to the reStructuredText specification.
35/// In this context, normalization means converting any contiguous whitespace into
36/// a single ASCII space character and converting letters to their lower-case variants.
37///
38/// ### Note
39/// This might return something nonsensical, as converting a single multi-scalar grapheme
40/// into lower-case will return the multiple constituent "characters" as their lower-case variants.
41pub fn normalize_refname(name: &str) -> String {
42 name.split_whitespace()
43 .collect::<Vec<&str>>() // Collects the SplitWhiteSpace iterator into a vector of &strs
44 .join(" ") // Joins the vector of &strs into an allocated String
45 .to_lowercase() // Performs a UTF8-compliant transformation of unicode scalars in the String
46 // into their lower-case counterparts
47}
48
49/// A whitespace-aware function for stripping indentation
50/// from `String`s. Returns `Ok(String)` if successful.
51/// If non-whitespace characters are encountered before
52/// the notified `amount` has been stripped, an `Err(message)`
53/// is returned instead.
54pub fn strip_indent(line: String, amount: usize) -> Result<String, &'static str> {
55 if line.is_empty() {
56 return Ok(line);
57 }
58
59 let mut chars = line.chars();
60
61 for i in 0..amount {
62 let c = chars.next().unwrap();
63
64 if !c.is_whitespace() && i < amount {
65 return Err("\nNon-whitespace character encountered before supposed indentation level reached.\n");
66 }
67 }
68
69 Ok(chars.as_str().to_string())
70}
71
72// ============
73// Type aliases
74// ============
75
76/// A type alias for an integer used as a node identifier.
77pub type NodeId = u32;
78
79/// A type alias for different kinds of enumerators such as list or foonote ordinals in integer format.
80pub type EnumAsInt = u32;
81
82/// A type alias for question points.
83pub type QuizPoints = u32;
84
85
86/// A type alias for the number type used in the `Length` enum.
87pub type LengthNum = f64;
88
89// ==========================
90// Enumerators and converters
91// ==========================
92
93/// An enumeration fo the different A+ questionnaire types. This is used the differentiate
94/// between questionnaire hint output formats, among other things.
95#[derive(Debug)]
96pub enum AplusQuestionnaireType {
97 PickOne,
98 PickAny,
99 FreeText,
100}
101
102/// A section can be underlined, or over- and underlined with a certain character.
103#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
104pub enum SectionLineStyle {
105 OverAndUnder(char),
106 Under(char),
107}
108
109/// Enumerated list item labels can either end with a period `.` or a right parenthesis `)`.
110/// A third option is to enclose them in matching parentheses `(` and `)`.
111#[derive(Debug, Clone, Copy, PartialEq)]
112pub enum EnumDelims {
113 Period,
114 Parens,
115 RParen,
116}
117
118/// List enumerator labels can be Arabic numerals, lower- or upper-case alphet `a--z` or `A--Z`,
119/// or lower- or upper-case Roman numerals between `1--4999`.
120#[derive(Debug, Clone, Copy, PartialEq)]
121pub enum EnumKind {
122 Arabic,
123 LowerAlpha,
124 UpperAlpha,
125 LowerRoman,
126 UpperRoman,
127 Automatic,
128}
129
130/// There are 4 different kinds of footnote markers:
131/// 1. Manually numbered: `.. [1]` , `.. [2]`, ...
132/// 2. automatically numbered: .. [#]
133/// 3. automatically nubered with a simple reference name: .. [#simple_ref-name]
134/// 4. Automatically generated symbolic markers: .. [*]
135#[derive(Debug, Clone, Copy)]
136pub enum FootnoteKind {
137 /// Signifies a manually numbered footnote.
138 Manual,
139 /// Signifies an automatically numbered footnote.
140 AutoNumbered,
141 /// Signifies an automatically numbered footnote,
142 /// with a simple reference name as its display label.
143 SimpleRefName,
144 /// Signifies a footnote with an automatically generated string as its label.
145 AutoSymbol,
146}
147
148/// A hyperlink target may be one of 3 types:
149///
150/// 1. internal,
151/// 2. external or
152/// 3. indirect
153///
154/// **Internal** hyperlink targets have empty link blocks. They provide an end point allowing
155/// a hyperlink to connect one place to another within a document.
156/// An internal hyperlink target points to the element following the target.
157///
158/// **External** hyperlink targets have an absolute or relative URI or email address in their link blocks.
159/// An external hyperlink's URI may begin on the same line as the explicit markup start and target name, or it may begin in an indented text block immediately following, with no intervening blank lines.
160/// If there are multiple lines in the link block, they are concatenated.
161/// Any unescaped whitespace is removed.
162///
163/// **Indirect** hyperlink targets have a hyperlink reference in their link blocks.
164/// Just as with hyperlink references anywhere else in a document,
165/// if a phrase-reference is used in the link block it must be enclosed in backquotes.
166/// As with external hyperlink targets, the link block of an indirect hyperlink target may
167/// begin on the same line as the explicit markup start or the next line.
168/// It may also be split over multiple lines, in which case the lines are
169/// joined with whitespace before being normalized.
170#[derive(Debug, Clone)]
171pub enum LinkTarget {
172 Internal(String),
173 External(String),
174 Indirect(String),
175}
176
177/// An enumeration of the different types of references that a reference node might contain.
178#[derive(Debug)]
179pub enum Reference {
180 Internal(String),
181 URI(String),
182 EMail(String),
183}
184
185/// There are 3 types of interpreted inline text, such as math:
186/// 1. where the given role precedes the interpreted content and
187/// 2. where the interpreted content precedes the given role.
188/// 3. where the type is not specified and the default role is used.
189#[derive(Debug, Clone, Copy)]
190pub enum InterpretedTextKind {
191 Default,
192 RoleThenContent,
193 ContentThenRole,
194}
195
196/// An enumeration of how lengths can be interpreted.
197/// This includes precentages of current context and absolute length
198#[derive(Debug)]
199pub enum MetricType {
200 Percentage(f64),
201 Lenght(Length),
202}
203
204/// Units of length recognized by reStructuredText.
205#[derive(Debug)]
206pub enum Length {
207
208 /// em unit, the element's font size
209 Em(LengthNum),
210
211
212 /// ex unit, x-height of the element's font size
213 Ex(LengthNum),
214
215
216 /// Millimeters
217 Mm(LengthNum),
218
219
220 /// Centimeters.
221 Cm(LengthNum),
222
223
224 /// Inches. 1in == 2.54 cm == 96 px.
225 In(LengthNum),
226
227
228 /// Pixels. 1px == 1/96 in
229 ///
230 /// ### Note!
231 /// In LaTeX, 1 px == 1/72 in.
232 Px(LengthNum),
233
234
235 /// Points. 1pt == 1/72 in
236 Pt(LengthNum),
237
238
239 /// Picas. 1 pc == 1/6 in == 12 pt
240 Pc(LengthNum),
241}
242
243impl std::fmt::Display for Length {
244 fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
245 let fmt_str = match self {
246 Self::Em(num) => format!("{}em", num),
247 Self::Ex(num) => format!("{}ex", num),
248 Self::Mm(num) => format!("{}mm", num),
249 Self::Cm(num) => format!("{}cm", num),
250 Self::In(num) => format!("{}in", num),
251 Self::Px(num) => format!("{}px", num),
252 Self::Pt(num) => format!("{}pt", num),
253 Self::Pc(num) => format!("{}pc", num),
254 };
255 write!(f, "{}", fmt_str)
256 }
257}
258
259/// An enumeration of different horizontal alignment options.
260#[derive(Debug)]
261pub enum TableColWidths {
262 Columns(Vec<f64>),
263 Auto, // Determined by writer
264}
265
266/// An enumeration of different horizontal alignment options:
267/// `Left`, `Middle` or `Right`.
268#[derive(Debug)]
269pub enum HorizontalAlignment {
270 Left,
271 Center,
272 Right,
273}
274
275impl std::fmt::Display for HorizontalAlignment {
276 fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
277 let fmt_str = match self {
278 Self::Left => "left",
279 Self::Center => "center",
280 Self::Right => "right",
281 };
282 write!(f, "align={}", fmt_str)
283 }
284}
285
286/// An enumeration of different backlinking alternatives for a table of contents node.
287/// Instructs the doctree to generate links from section headers back to the table of
288/// contents entries, the table of contents itself, or generate no backlinks.
289#[derive(Debug)]
290pub enum ToCBacklinks {
291 Entry,
292 Top,
293 None,
294}
295
296/// An enumeration of the (deprecated) "align" attribute alternatives
297/// recognized by the HTML `<img>` tag.
298#[derive(Debug)]
299pub enum HTMLAlignment {
300 Top,
301 Middle,
302 Bottom,
303 Left,
304 Center,
305 Right,
306}
307
308impl std::fmt::Display for HTMLAlignment {
309 fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
310 let fmt_str = match self {
311 Self::Top => "top",
312 Self::Middle => "middle",
313 Self::Bottom => "bottom",
314 Self::Left => "left",
315 Self::Center => "center",
316 Self::Right => "right",
317 };
318 write!(f, "{}", fmt_str)
319 }
320}
321
322///
323/// Enumerated the types of tree traversals that one of the `DocTree` walk methods might perform.
324pub enum TraversalType {
325
326 /// Traversal based on node ID. Causes the walker method to look for a specific node
327 /// with the given ID.
328 ID(NodeId),
329}
330
331use crate::doctree::DocTree;
332use crate::parser::state_machine::State;
333
334/// An enumeration of the different ways a (nested) parsing session might terminate.
335/// The return type of the `Parser::parse` method. Generally, finishing conditions
336/// that are not outright failures will enclose the document tree fed to the parser
337/// when it was initialized.
338pub enum ParsingResult {
339
340 /// This will be returned, if the parser finished by passing over the last line of the source.
341 /// This generally indicates that the source file was parsed successfully.
342 EOF {
343 doctree: DocTree,
344 state_stack: Vec<State>,
345 },
346
347 /// This will be returned if the parser was unable to parse any elements on some line of the source,
348 /// as patterns not matching will drain the parser state stack of states. This might be useful during
349 /// nested parsing sessions, when an empty stack right at the start of the parsing process indicates
350 /// that there were no expected nested structures on the same line.
351 EmptyStateStack {
352 doctree: DocTree,
353 state_stack: Vec<State>,
354 },
355
356 /// A simple failure type. This will be returned when there was clearly no way to recover.
357 Failure { message: String, doctree: DocTree },
358}
359
360impl ParsingResult {
361
362 /// Unwraps the contained doctree in one of the non-failure variants.
363 /// Simply panics if this is attempted for the `Failure` variant.
364 pub fn unwrap_tree(self) -> DocTree {
365 match self {
366 Self::EOF {
367 doctree,
368 state_stack,
369 } => doctree,
370 Self::EmptyStateStack {
371 doctree,
372 state_stack,
373 } => doctree,
374 Self::Failure { doctree, .. } => doctree,
375 }
376 }
377}
378
379///
380/// There are 6 possible statuses for A+ exercises:
381///
382/// * ready: Visible exercise listed in table of contents.
383/// * unlisted (default): Unlisted in table of contents, otherwise same as ready.
384/// * hidden: Hidden from non course staff.
385/// * enrollment: Questions for students when they enroll to a course.
386/// * enrollment_ext: Same as enrollment but for external students.
387/// * maintenance: Hides the exercise description and prevents submissions.
388#[derive(Debug)]
389pub enum AplusExerciseStatus {
390 Ready,
391 Unlisted,
392 Hidden,
393 Enrollment,
394 EnrollmentExt,
395 Maintenance,
396}
397
398///
399/// An enumeration of the different tokenizers offered by the A+ Radar tokenizer.
400///
401/// See [the docs](https://github.com/Aalto-LeTech/radar/tree/master/tokenizer#tokenizers) for more details.
402#[derive(Clone, Copy, Debug)]
403pub enum AplusRadarTokenizer {
404 Python3,
405 Scala,
406 JavaScript,
407 CSS,
408 HTML,
409 None,
410}
411
412///
413/// The variant "both" forces the element to a new line, "left" ("right") allows
414/// no floating elements on the left (right)
415#[derive(Clone, Copy, Debug)]
416pub enum AplusActiveElementClear {
417 /// Forces the element to a new line
418 Both,
419
420 /// Allows no floating elements on the left.
421 Left,
422
423 /// Allows no floating elements on the right.
424 Right,
425}
426
427///
428/// Use "file" for file inputs, "clickable" for clickable inputs, and
429/// "dropdown" for dropdown. For dropdowns, the available options should
430/// be listed after the type indicating "dropdown" in this
431/// format: "dropdown:option1,option2,option3"
432#[derive(Debug)]
433pub enum AplusActiveElementInputType {
434 /// Use for file inputs
435 File,
436
437 /// Use for clickable inputs
438 Clickable,
439
440 /// Use for dropdown menu. Comes with options in a String.
441 Dropdown(String),
442}
443
444///
445/// Default type is text; for image (png) outputs use "image"
446#[derive(Clone, Copy, Debug)]
447pub enum AplusActiveElementOutputType {
448 Text,
449 Image,
450}
451
452///
453/// An enumeration of the different writer output formats.
454/// Currently stdout and files are supported.
455pub enum OutputStream {
456 /// Directs the output to the stdout stream.
457 StdOut,
458 /// Directs the output to the stderr stream.
459 StdErr,
460 /// Directs to output to a file.
461 File,
462}
463
464// ===========
465// Constants
466// ===========
467
468/// The standard Docutils system uses these symbols as footnote marks
469/// when a FootnoteKind::AutoSymbol is detected.
470/// They are, from first to last:
471///
472/// 1. asterisk/star (`*`)
473/// 2. dagger (`†`|`U+02020`)
474/// 3. double dagger (`‡`|`U+02021`)
475/// 4. section mark (`§`|`U+000A7`)
476/// 5. pilcrow or paragraph mark (`¶`|`U+000B6`)
477/// 6. number sign (`#`)
478/// 7. spade suit (`♠`|`U+02660`)
479/// 8. heart suit (`♥`|`U+02665`)
480/// 9. diamond suit (`♦`|`U+02666`)
481/// 10. club suit (`♣`|`U+02663`)
482///
483/// As the next autosymbol is detected the next unused item
484/// from this list will be used as the footnote label character.
485/// If `n` is the number of times this list has been iterated over
486/// and `s` the current autosymbol, then the actual label
487/// of the footnote is `s^(n+1)`. For example, if a document has
488/// `12` automatically symboled footnotes and a new one is constructed,
489/// then its label will be `‡‡ = ‡² = ‡¹⁺¹`.
490pub const FOOTNOTE_SYMBOLS: [char; 10] = ['*', '†', '‡', '§', '¶', '#', '♠', '♥', '♦', '♣'];
491
492///
493/// These are the characters that can be used in underlining section titles,
494/// marking the lines of literal text blocks and creating transitions.
495pub const SECTION_AND_QUOTING_CHARS: [char; 32] = [
496 '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
497 '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
498];