rustla/
common.rs

1/*!
2This submodule contains useful functions and other constructs that don't
3sensibly belong to any specific entity in the program.
4
5Copyright © 2020 Santtu Söderholm
6*/
7use std::io::BufRead;
8use std::{fs, io, path, str};
9
10// =======================
11// Text handling utilities
12// =======================
13
14/// Returns a `Vec<String>` from a given `&str`,
15/// split at new lines `\n` or `\r\n`.
16pub fn str_to_lines(string: &str) -> Vec<String> {
17    let line_vec = string
18        .lines()
19        .map(|s| s.to_string())
20        .collect::<Vec<String>>();
21
22    line_vec
23}
24
25/// Read the lines of a given file into a buffer.
26pub fn read_path_lines<P>(file_path: P) -> io::Result<io::Lines<io::BufReader<fs::File>>>
27where
28    P: AsRef<path::Path>,
29{
30    let file: fs::File = fs::File::open(file_path)?;
31    Ok(io::BufReader::new(file).lines())
32}
33
34/// Normalizes the given `&str` according to the reStructuredText specification.
35/// In this context, normalization means converting any contiguous whitespace into
36/// a single ASCII space character and converting letters to their lower-case variants.
37///
38/// ### Note
39/// This might return something nonsensical, as converting a single multi-scalar grapheme
40/// into lower-case will return the multiple constituent "characters" as their lower-case variants.
41pub fn normalize_refname(name: &str) -> String {
42    name.split_whitespace()
43        .collect::<Vec<&str>>() // Collects the SplitWhiteSpace iterator into a vector of &strs
44        .join(" ") // Joins the vector of &strs into an allocated String
45        .to_lowercase() // Performs a UTF8-compliant transformation of unicode scalars in the String
46                        // into their lower-case counterparts
47}
48
49/// A whitespace-aware function for stripping indentation
50/// from `String`s. Returns `Ok(String)` if successful.
51/// If non-whitespace characters are encountered before
52/// the notified `amount` has been stripped, an `Err(message)`
53/// is returned instead.
54pub fn strip_indent(line: String, amount: usize) -> Result<String, &'static str> {
55    if line.is_empty() {
56        return Ok(line);
57    }
58
59    let mut chars = line.chars();
60
61    for i in 0..amount {
62        let c = chars.next().unwrap();
63
64        if !c.is_whitespace() && i < amount {
65            return Err("\nNon-whitespace character encountered before supposed indentation level reached.\n");
66        }
67    }
68
69    Ok(chars.as_str().to_string())
70}
71
72// ============
73// Type aliases
74// ============
75
76/// A type alias for an integer used as a node identifier.
77pub type NodeId = u32;
78
79/// A type alias for different kinds of enumerators such as list or foonote ordinals in integer format.
80pub type EnumAsInt = u32;
81
82/// A type alias for question points.
83pub type QuizPoints = u32;
84
85
86/// A type alias for the number type used in the `Length` enum.
87pub type LengthNum = f64;
88
89// ==========================
90// Enumerators and converters
91// ==========================
92
93/// An enumeration fo the different A+ questionnaire types. This is used the differentiate
94/// between questionnaire hint output formats, among other things.
95#[derive(Debug)]
96pub enum AplusQuestionnaireType {
97    PickOne,
98    PickAny,
99    FreeText,
100}
101
102/// A section can be underlined, or over- and underlined with a certain character.
103#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
104pub enum SectionLineStyle {
105    OverAndUnder(char),
106    Under(char),
107}
108
109/// Enumerated list item labels can either end with a period `.` or a right parenthesis `)`.
110/// A third option is to enclose them in matching parentheses `(` and `)`.
111#[derive(Debug, Clone, Copy, PartialEq)]
112pub enum EnumDelims {
113    Period,
114    Parens,
115    RParen,
116}
117
118/// List enumerator labels can be Arabic numerals, lower- or upper-case alphet `a--z` or `A--Z`,
119/// or lower- or upper-case Roman numerals between `1--4999`.
120#[derive(Debug, Clone, Copy, PartialEq)]
121pub enum EnumKind {
122    Arabic,
123    LowerAlpha,
124    UpperAlpha,
125    LowerRoman,
126    UpperRoman,
127    Automatic,
128}
129
130/// There are 4 different kinds of footnote markers:
131/// 1. Manually numbered: `.. [1]` , `.. [2]`, ...
132/// 2. automatically numbered: .. [#]
133/// 3. automatically nubered with a simple reference name: .. [#simple_ref-name]
134/// 4. Automatically generated symbolic markers: .. [*]
135#[derive(Debug, Clone, Copy)]
136pub enum FootnoteKind {
137    /// Signifies a manually numbered footnote.
138    Manual,
139    /// Signifies an automatically numbered footnote.
140    AutoNumbered,
141    /// Signifies an automatically numbered footnote,
142    /// with a simple reference name as its display label.
143    SimpleRefName,
144    /// Signifies a footnote with an automatically generated string as its label.
145    AutoSymbol,
146}
147
148/// A hyperlink target may be one of 3 types:
149///
150/// 1. internal,
151/// 2. external or
152/// 3. indirect
153///
154/// **Internal** hyperlink targets have empty link blocks. They provide an end point allowing
155/// a hyperlink to connect one place to another within a document.
156/// An internal hyperlink target points to the element following the target.
157///
158/// **External** hyperlink targets have an absolute or relative URI or email address in their link blocks.
159/// An external hyperlink's URI may begin on the same line as the explicit markup start and target name, or it may begin in an indented text block immediately following, with no intervening blank lines.
160/// If there are multiple lines in the link block, they are concatenated.
161/// Any unescaped whitespace is removed.
162///
163/// **Indirect** hyperlink targets have a hyperlink reference in their link blocks.
164/// Just as with hyperlink references anywhere else in a document,
165/// if a phrase-reference is used in the link block it must be enclosed in backquotes.
166/// As with external hyperlink targets, the link block of an indirect hyperlink target may
167/// begin on the same line as the explicit markup start or the next line.
168/// It may also be split over multiple lines, in which case the lines are
169/// joined with whitespace before being normalized.
170#[derive(Debug, Clone)]
171pub enum LinkTarget {
172    Internal(String),
173    External(String),
174    Indirect(String),
175}
176
177/// An enumeration of the different types of references that a reference node might contain.
178#[derive(Debug)]
179pub enum Reference {
180    Internal(String),
181    URI(String),
182    EMail(String),
183}
184
185/// There are 3 types of interpreted inline text, such as math:
186/// 1. where the given role precedes the interpreted content and
187/// 2. where the interpreted content precedes the given role.
188/// 3. where  the type is not specified and the default role is used.
189#[derive(Debug, Clone, Copy)]
190pub enum InterpretedTextKind {
191    Default,
192    RoleThenContent,
193    ContentThenRole,
194}
195
196/// An enumeration of how lengths can be interpreted.
197/// This includes precentages of current context and absolute length
198#[derive(Debug)]
199pub enum MetricType {
200    Percentage(f64),
201    Lenght(Length),
202}
203
204/// Units of length recognized by reStructuredText.
205#[derive(Debug)]
206pub enum Length {
207
208    /// em unit, the element's font size
209    Em(LengthNum),
210
211
212    /// ex unit, x-height of the element's font size
213    Ex(LengthNum),
214
215
216    /// Millimeters
217    Mm(LengthNum),
218
219
220    /// Centimeters.
221    Cm(LengthNum),
222
223
224    /// Inches. 1in == 2.54 cm == 96 px.
225    In(LengthNum),
226
227
228    /// Pixels. 1px == 1/96 in
229    ///
230    /// ### Note!
231    /// In LaTeX, 1 px == 1/72 in.
232    Px(LengthNum),
233
234
235    /// Points. 1pt == 1/72 in
236    Pt(LengthNum),
237
238
239    /// Picas. 1 pc == 1/6 in == 12 pt
240    Pc(LengthNum),
241}
242
243impl std::fmt::Display for Length {
244    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
245        let fmt_str = match self {
246            Self::Em(num) => format!("{}em", num),
247            Self::Ex(num) => format!("{}ex", num),
248            Self::Mm(num) => format!("{}mm", num),
249            Self::Cm(num) => format!("{}cm", num),
250            Self::In(num) => format!("{}in", num),
251            Self::Px(num) => format!("{}px", num),
252            Self::Pt(num) => format!("{}pt", num),
253            Self::Pc(num) => format!("{}pc", num),
254        };
255        write!(f, "{}", fmt_str)
256    }
257}
258
259/// An enumeration of different horizontal alignment options.
260#[derive(Debug)]
261pub enum TableColWidths {
262    Columns(Vec<f64>),
263    Auto, // Determined by writer
264}
265
266/// An enumeration of different horizontal alignment options:
267/// `Left`, `Middle` or `Right`.
268#[derive(Debug)]
269pub enum HorizontalAlignment {
270    Left,
271    Center,
272    Right,
273}
274
275impl std::fmt::Display for HorizontalAlignment {
276    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
277        let fmt_str = match self {
278            Self::Left => "left",
279            Self::Center => "center",
280            Self::Right => "right",
281        };
282        write!(f, "align={}", fmt_str)
283    }
284}
285
286/// An enumeration of different backlinking alternatives for a table of contents node.
287/// Instructs the doctree to generate links from section headers back to the table of
288/// contents entries, the table of contents itself, or generate no backlinks.
289#[derive(Debug)]
290pub enum ToCBacklinks {
291    Entry,
292    Top,
293    None,
294}
295
296/// An enumeration of the (deprecated) "align" attribute alternatives
297/// recognized by the HTML `<img>` tag.
298#[derive(Debug)]
299pub enum HTMLAlignment {
300    Top,
301    Middle,
302    Bottom,
303    Left,
304    Center,
305    Right,
306}
307
308impl std::fmt::Display for HTMLAlignment {
309    fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
310        let fmt_str = match self {
311            Self::Top => "top",
312            Self::Middle => "middle",
313            Self::Bottom => "bottom",
314            Self::Left => "left",
315            Self::Center => "center",
316            Self::Right => "right",
317        };
318        write!(f, "{}", fmt_str)
319    }
320}
321
322///
323/// Enumerated the types of tree traversals that one of the `DocTree` walk methods might perform.
324pub enum TraversalType {
325
326    /// Traversal based on node ID. Causes the walker method to look for a specific node
327    /// with the given ID.
328    ID(NodeId),
329}
330
331use crate::doctree::DocTree;
332use crate::parser::state_machine::State;
333
334/// An enumeration of the different ways a (nested) parsing session might terminate.
335/// The return type of the `Parser::parse` method. Generally, finishing conditions
336/// that are not outright failures will enclose the document tree fed to the parser
337/// when it was initialized.
338pub enum ParsingResult {
339
340    /// This will be returned, if the parser finished by passing over the last line of the source.
341    /// This generally indicates that the source file was parsed successfully.
342    EOF {
343        doctree: DocTree,
344        state_stack: Vec<State>,
345    },
346
347    /// This will be returned if the parser was unable to parse any elements on some line of the source,
348    /// as patterns not matching will drain the parser state stack of states. This might be useful during
349    /// nested parsing sessions, when an empty stack right at the start of the parsing process indicates
350    /// that there were no expected nested structures on the same line.
351    EmptyStateStack {
352        doctree: DocTree,
353        state_stack: Vec<State>,
354    },
355
356    /// A simple failure type. This will be returned when there was clearly no way to recover.
357    Failure { message: String, doctree: DocTree },
358}
359
360impl ParsingResult {
361
362    /// Unwraps the contained doctree in one of the non-failure variants.
363    /// Simply panics if this is attempted for the `Failure` variant.
364    pub fn unwrap_tree(self) -> DocTree {
365        match self {
366            Self::EOF {
367                doctree,
368                state_stack,
369            } => doctree,
370            Self::EmptyStateStack {
371                doctree,
372                state_stack,
373            } => doctree,
374            Self::Failure { doctree, .. } => doctree,
375        }
376    }
377}
378
379///
380/// There are 6 possible statuses for A+ exercises:
381///
382/// * ready: Visible exercise listed in table of contents.
383/// * unlisted (default): Unlisted in table of contents, otherwise same as ready.
384/// * hidden: Hidden from non course staff.
385/// * enrollment: Questions for students when they enroll to a course.
386/// * enrollment_ext: Same as enrollment but for external students.
387/// * maintenance: Hides the exercise description and prevents submissions.
388#[derive(Debug)]
389pub enum AplusExerciseStatus {
390    Ready,
391    Unlisted,
392    Hidden,
393    Enrollment,
394    EnrollmentExt,
395    Maintenance,
396}
397
398///
399/// An enumeration of the different tokenizers offered by the A+ Radar tokenizer.
400///
401/// See [the docs](https://github.com/Aalto-LeTech/radar/tree/master/tokenizer#tokenizers)  for more details.
402#[derive(Clone, Copy, Debug)]
403pub enum AplusRadarTokenizer {
404    Python3,
405    Scala,
406    JavaScript,
407    CSS,
408    HTML,
409    None,
410}
411
412///
413/// The variant "both" forces the element to a new line, "left" ("right") allows
414/// no floating elements on the left (right)
415#[derive(Clone, Copy, Debug)]
416pub enum AplusActiveElementClear {
417    /// Forces the element to a new line
418    Both,
419
420    /// Allows no floating elements on the left.
421    Left,
422
423    /// Allows no floating elements on the right.
424    Right,
425}
426
427///
428/// Use "file" for file inputs, "clickable" for clickable inputs, and
429/// "dropdown" for dropdown. For dropdowns, the available options should
430/// be listed after the type indicating "dropdown" in this
431/// format: "dropdown:option1,option2,option3"
432#[derive(Debug)]
433pub enum AplusActiveElementInputType {
434    /// Use for file inputs
435    File,
436
437    /// Use for clickable inputs
438    Clickable,
439
440    /// Use for dropdown menu. Comes with options in a String.
441    Dropdown(String),
442}
443
444///
445/// Default type is text; for image (png) outputs use "image"
446#[derive(Clone, Copy, Debug)]
447pub enum AplusActiveElementOutputType {
448    Text,
449    Image,
450}
451
452///
453/// An enumeration of the different writer output formats.
454/// Currently stdout and files are supported.
455pub enum OutputStream {
456    /// Directs the output to the stdout stream.
457    StdOut,
458    /// Directs the output to the stderr stream.
459    StdErr,
460    /// Directs to output to a file.
461    File,
462}
463
464// ===========
465//  Constants
466// ===========
467
468/// The standard Docutils system uses these symbols as footnote marks
469/// when a FootnoteKind::AutoSymbol is detected.
470/// They are, from first to last:
471///
472/// 1.  asterisk/star (`*`)
473/// 2.  dagger (`†`|`U+02020`)
474/// 3.  double dagger (`‡`|`U+02021`)
475/// 4.  section mark (`§`|`U+000A7`)
476/// 5.  pilcrow or paragraph mark (`¶`|`U+000B6`)
477/// 6.  number sign (`#`)
478/// 7.  spade suit (`♠`|`U+02660`)
479/// 8.  heart suit (`♥`|`U+02665`)
480/// 9.  diamond suit (`♦`|`U+02666`)
481/// 10. club suit (`♣`|`U+02663`)
482///
483/// As the next autosymbol is detected the next unused item
484/// from this list will be used as the footnote label character.
485/// If `n` is the number of times this list has been iterated over
486/// and `s` the current autosymbol, then the actual label
487/// of the footnote is `s^(n+1)`. For example, if a document has
488/// `12` automatically symboled footnotes and a new one is constructed,
489/// then its label will be `‡‡ = ‡² = ‡¹⁺¹`.
490pub const FOOTNOTE_SYMBOLS: [char; 10] = ['*', '†', '‡', '§', '¶', '#', '♠', '♥', '♦', '♣'];
491
492///
493/// These are the characters that can be used in underlining section titles,
494///  marking the lines of literal text blocks and creating transitions.
495pub const SECTION_AND_QUOTING_CHARS: [char; 32] = [
496    '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=',
497    '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~',
498];