pukram2html/
lib.rs

1#![deny(missing_docs)]
2
3/*!
4A Rust library for converting Pukram-formatted text to HTML.
5
6Pukram is a lightweight markup language inspired by Markdown, designed for easy formatting of text content.
7This library is primarily targeted at developers building web servers who need to convert Pukram-formatted
8text into HTML for displaying on websites. It can also be used to convert Pukram files to HTML, which
9can then be further converted to PDF using other tools.
10*/
11
12use std::{
13    collections::HashMap,
14    io::{Result, Write},
15    marker::PhantomData,
16};
17
18use pukram_formatting::Formatting;
19
20fn try_split_once(value: &str, delimiter: char) -> (&str, Option<&str>) {
21    value
22        .split_once(delimiter)
23        .map_or((value, None), |(first, second)| (first, Some(second)))
24}
25
26fn split_once_or_empty(value: &str, delimiter: char) -> (&str, &str) {
27    value.split_once(delimiter).unwrap_or((value, ""))
28}
29
30fn split_link(value: &str, delimiter: char) -> (&str, &str) {
31    let (name, link) = try_split_once(value, delimiter);
32    let name = name.trim();
33    (name, link.map_or(name, |link| link.trim()))
34}
35
36trait FormattingExtensions {
37    fn start<W: Write>(&self, output: &mut W) -> Result<()>;
38    fn finish<W: Write>(&self, output: &mut W) -> Result<()>;
39}
40
41impl FormattingExtensions for Formatting {
42    fn start<W: Write>(&self, output: &mut W) -> Result<()> {
43        fn w<W: Write>(output: &mut W, name: &str) -> Result<()> {
44            write!(output, "<{name}>")
45        }
46
47        if self.is_bold() {
48            w(output, "b")?;
49        }
50        if self.is_italic() {
51            w(output, "i")?;
52        }
53        if self.is_mono() {
54            w(output, "code")?;
55        }
56        match (self.is_top(), self.is_bottom()) {
57            (true, true) => w(output, "small")?,
58            (true, false) => w(output, "sup")?,
59            (false, true) => w(output, "sub")?,
60            (false, false) => (),
61        }
62        if self.is_underscore() {
63            w(output, "u")?;
64        }
65        if self.is_strikethrough() {
66            w(output, "s")?;
67        }
68
69        Ok(())
70    }
71
72    fn finish<W: Write>(&self, output: &mut W) -> Result<()> {
73        fn w<W: Write>(output: &mut W, name: &str) -> Result<()> {
74            write!(output, "</{name}>")
75        }
76
77        if self.is_strikethrough() {
78            w(output, "s")?;
79        }
80        if self.is_underscore() {
81            w(output, "u")?;
82        }
83        match (self.is_top(), self.is_bottom()) {
84            (true, true) => w(output, "small")?,
85            (true, false) => w(output, "sup")?,
86            (false, true) => w(output, "sub")?,
87            (false, false) => (),
88        }
89        if self.is_mono() {
90            w(output, "code")?;
91        }
92        if self.is_italic() {
93            w(output, "i")?;
94        }
95        if self.is_bold() {
96            w(output, "b")?;
97        }
98
99        Ok(())
100    }
101}
102
103/// Struct to hold information about the converted text.
104pub struct TextInfo {
105    /// Number of lines in the converted text.
106    pub lines: usize,
107    /// Number of words in the converted text.
108    pub words: usize,
109    /// Number of characters in the converted text.
110    pub chars: usize,
111}
112
113/// Convert Pukram to HTML.
114///
115/// This function is a convenience wrapper around `convert_subheader` with a start level of 0.
116#[inline]
117pub fn convert<R: AsRef<str>>(
118    input: impl IntoIterator<Item = R>,
119    output: &mut impl Write,
120) -> TextInfo {
121    convert_subheader(input, output, 0)
122}
123
124/// A list to set which features are allowed.
125/// By default, all features are allowed.
126#[derive(Copy, Clone, PartialEq, Eq)]
127pub struct FeatureList(u8);
128
129impl Default for FeatureList {
130    fn default() -> Self {
131        Self(Self::ALL)
132    }
133}
134
135impl FeatureList {
136    const ALL: u8 = 0b11111111;
137
138    const LINKS: u8 = 0b00000001;
139    const IMAGES: u8 = 0b00000010;
140    const AUDIO: u8 = 0b00000100;
141
142    /// Creates an empty feature list, where nothing is allowed.
143    pub fn none() -> Self {
144        Self(0)
145    }
146
147    /// Links are allowed.
148    #[inline]
149    pub fn links(&self) -> bool {
150        self.0 & Self::LINKS > 0
151    }
152
153    /// Images are allowed.
154    #[inline]
155    pub fn images(&self) -> bool {
156        self.0 & Self::IMAGES > 0
157    }
158
159    /// Audio is allowed.
160    #[inline]
161    pub fn audio(&self) -> bool {
162        self.0 & Self::AUDIO > 0
163    }
164
165    /// Set if links are allowed.
166    pub fn with_links(self, enable: bool) -> Self {
167        Self(if enable {
168            self.0 | Self::LINKS
169        } else {
170            self.0 & !Self::LINKS
171        })
172    }
173
174    /// Set if images are allowed.
175    pub fn with_images(self, enable: bool) -> Self {
176        Self(if enable {
177            self.0 | Self::IMAGES
178        } else {
179            self.0 & !Self::IMAGES
180        })
181    }
182
183    /// Set if audio is allowed.
184    pub fn with_audio(self, enable: bool) -> Self {
185        Self(if enable {
186            self.0 | Self::AUDIO
187        } else {
188            self.0 & !Self::AUDIO
189        })
190    }
191
192    #[inline]
193    fn set_links(&mut self, enable: bool) {
194        *self = self.with_links(enable);
195    }
196
197    #[inline]
198    fn set_images(&mut self, enable: bool) {
199        *self = self.with_images(enable);
200    }
201
202    #[inline]
203    fn set_audio(&mut self, enable: bool) {
204        *self = self.with_audio(enable);
205    }
206}
207
208/// Configuration settings for the conversion process.
209pub struct Settings<W: Write, F: Fn(&str, &mut W, usize)> {
210    handler: F,
211    start_level: usize,
212    use_textboxes: bool,
213    allow: FeatureList,
214    handler_data: PhantomData<W>,
215}
216
217impl<W: Write> Default for Settings<W, fn(&str, &mut W, usize)> {
218    fn default() -> Self {
219        fn handler(_input: &str, _output: &mut impl Write, _level: usize) {}
220        Self {
221            handler,
222            start_level: 0,
223            use_textboxes: false,
224            allow: FeatureList::default(),
225            handler_data: PhantomData,
226        }
227    }
228}
229
230impl<W: Write, F: Fn(&str, &mut W, usize)> Settings<W, F> {
231    /// Set the starting header level for the converted HTML.
232    pub fn with_start_level(self, start_level: usize) -> Self {
233        Self {
234            start_level,
235            ..self
236        }
237    }
238
239    /// Set whether to use textboxes for certain elements.
240    pub fn with_use_textboxes(self, use_textboxes: bool) -> Self {
241        Self {
242            use_textboxes,
243            ..self
244        }
245    }
246
247    /// Set which features are allowed.
248    pub fn with_features(self, allow: FeatureList) -> Self {
249        Self { allow, ..self }
250    }
251
252    /// Set whether links are allowed.
253    pub fn with_allow_links(mut self, allow_links: bool) -> Self {
254        self.allow.set_links(allow_links);
255        self
256    }
257
258    /// Set whether images are allowed.
259    pub fn with_allow_images(mut self, allow_images: bool) -> Self {
260        self.allow.set_images(allow_images);
261        self
262    }
263
264    /// Set whether audio is allowed.
265    pub fn with_allow_audio(mut self, allow_audio: bool) -> Self {
266        self.allow.set_audio(allow_audio);
267        self
268    }
269
270    /// Set a custom handler function for processing special list elements.
271    pub fn with_handler<F2: Fn(&str, &mut W, usize)>(self, handler: F2) -> Settings<W, F2> {
272        let Self {
273            start_level,
274            use_textboxes,
275            allow,
276            handler_data,
277            ..
278        } = self;
279
280        Settings {
281            handler,
282            start_level,
283            use_textboxes,
284            allow,
285            handler_data,
286        }
287    }
288}
289
290/// Convert Pukram to HTML with subheaders.
291///
292/// This function is a convenience wrapper around `convert_extended` with default settings and a specified start level.
293#[inline]
294pub fn convert_subheader<R: AsRef<str>>(
295    input: impl IntoIterator<Item = R>,
296    output: &mut impl Write,
297    start_level: usize,
298) -> TextInfo {
299    convert_extended(
300        input,
301        output,
302        Settings::default().with_start_level(start_level),
303    )
304}
305
306/// Convert Pukram to HTML with extended settings.
307pub fn convert_extended<R: AsRef<str>, W: Write, F: Fn(&str, &mut W, usize)>(
308    input: impl IntoIterator<Item = R>,
309    mut output: &mut W,
310    settings: Settings<W, F>,
311) -> TextInfo {
312    let Settings {
313        start_level,
314        handler,
315        use_textboxes,
316        ..
317    } = settings;
318
319    let mut lines = 0;
320    let mut words = 0;
321    let mut chars = 0;
322
323    let mut ignore_next = false;
324
325    enum Action {
326        Name {
327            original: Box<str>,
328            shown: Option<Box<str>>,
329        },
330        Link {
331            name: Box<str>,
332            link: Box<str>,
333        },
334        Image {
335            name: Box<str>,
336            path: Box<str>,
337        },
338        Audio {
339            path: Box<str>,
340        },
341    }
342
343    impl Action {
344        fn handle(self, output: &mut impl Write, names: &mut HashMap<Box<str>, Box<str>>) {
345            use Action::*;
346            match self {
347                Name { original, shown } => {
348                    if let Some(shown) = shown {
349                        names.insert(original, shown);
350                    } else {
351                        names.remove(&original);
352                    }
353                }
354                Link { name, link } => {
355                    let _ = writeln!(output, "<a href=\"{link}\">{name}</a>");
356                }
357                Image { name, path } => {
358                    let _ = writeln!(
359                        output,
360                        "<img src=\"{path}\" alt=\"{name}\" style='max-height: 100%; max-width: 100%; object-fit: cover'>"
361                    );
362                }
363                Audio { path } => {
364                    let _ = writeln!(output, "<audio controls src=\"{path}\"/>");
365                }
366            }
367        }
368    }
369
370    fn handle_actions(
371        actions: &mut Vec<Action>,
372        output: &mut impl Write,
373        names: &mut HashMap<Box<str>, Box<str>>,
374    ) {
375        let actions = std::mem::take(actions);
376        let mut actions = actions.into_iter();
377        let Some(action) = actions.next() else {
378            return;
379        };
380        action.handle(output, names);
381        for action in actions {
382            let _ = writeln!(output, "<br>");
383            action.handle(output, names);
384        }
385    }
386
387    let mut actions = Vec::new();
388
389    #[derive(PartialEq, Eq)]
390    enum State {
391        Pause,
392        Block,
393        Name,
394        NameOnly,
395    }
396
397    let mut state = State::Pause;
398    let mut last_level = start_level;
399    let mut names = HashMap::new();
400
401    let finish_block = |state: &mut State,
402                        actions: &mut Vec<Action>,
403                        output: &mut W,
404                        names: &mut HashMap<Box<str>, Box<str>>| {
405        if !actions.is_empty() {
406            let _ = writeln!(output, "{}", if *state == Pause { "<p>" } else { "<br>" });
407            handle_actions(actions, output, names);
408            if *state == Pause {
409                let _ = writeln!(output, "</p>");
410            }
411        }
412
413        use State::*;
414        let _ = match *state {
415            Name | NameOnly => writeln!(output, "</fieldset></p>"),
416            Block => writeln!(output, "</p>"),
417            Pause => return,
418        };
419
420        *state = Pause;
421    };
422
423    for line in input {
424        let mut line = line.as_ref();
425
426        let mut line_chars = line.chars();
427        let mut level = 0;
428        while line_chars.next() == Some('#') {
429            level += 1;
430        }
431        if level > 0 {
432            line = &line[level..];
433        }
434        let line = line.trim();
435        if level > 0 {
436            let level = start_level + level;
437            let _ = write!(output, "<h{level}>{line}</h{level}>");
438            last_level = level;
439            continue;
440        }
441
442        if let Some(stripped) = line.strip_prefix('-') {
443            let (kind, args) = split_once_or_empty(stripped.trim_start(), ' ');
444            let (kind, default) = kind
445                .strip_suffix('!')
446                .map_or((kind, false), |kind| (kind, true));
447            let (variant, sub) = try_split_once(kind, ':');
448            match variant {
449                "Character" => {
450                    if let Some(sub) = sub
451                        && let Some((original, "name")) = sub.split_once(':')
452                    {
453                        let shown = if default {
454                            None
455                        } else {
456                            Some(args.trim().into())
457                        };
458                        actions.push(Action::Name {
459                            original: original.into(),
460                            shown,
461                        });
462                    }
463                }
464                "Link" if settings.allow.links() => {
465                    let (name, link) = split_link(args, ' ');
466
467                    actions.push(Action::Link {
468                        name: name.into(),
469                        link: link.into(),
470                    });
471                }
472                "Image" if settings.allow.images() => {
473                    let (name, path) = split_link(args, ' ');
474
475                    actions.push(Action::Image {
476                        name: name.into(),
477                        path: path.into(),
478                    });
479                }
480                "Audio" if settings.allow.audio() => {
481                    actions.push(Action::Audio { path: args.into() })
482                }
483                _ => continue,
484            }
485            continue;
486        }
487        if let Some(stripped) = line.strip_prefix('+') {
488            handler(stripped.trim_start(), output, last_level);
489            continue;
490        }
491        if line.starts_with('=') {
492            continue;
493        }
494
495        if line.is_empty() {
496            finish_block(&mut state, &mut actions, output, &mut names);
497            continue;
498        }
499
500        let mut line = line;
501
502        if state == State::Pause {
503            let _ = writeln!(output, "<p>");
504            if let Some((name, text)) = line.split_once(':') {
505                let name = name.trim_end();
506                let text = text.trim_start();
507                let name = names.get(name).map_or(name, |name| name.as_ref());
508                let name = name
509                    .replace('&', "&amp;")
510                    .replace('<', "&lt;")
511                    .replace('>', "&gt;");
512                state = if use_textboxes {
513                    let _ = writeln!(output, "<fieldset>",);
514                    let _ = writeln!(output, "<legend>{name}</legend>");
515                    if text.is_empty() {
516                        state = State::NameOnly;
517                        continue;
518                    }
519                    State::Name
520                } else {
521                    let _ = writeln!(output, "<i>{name}</i>:");
522                    State::Block
523                };
524                line = text;
525            } else {
526                state = State::Block;
527            }
528        } else if state == State::NameOnly {
529            state = State::Name;
530        } else {
531            let _ = writeln!(output, "<br>");
532        }
533
534        if !actions.is_empty() {
535            handle_actions(&mut actions, output, &mut names);
536            let _ = writeln!(output, "<br>");
537        }
538
539        let mut was_whitespace = false;
540
541        lines += 1;
542
543        let mut use_formatting = false;
544        let mut formatting = Formatting::default();
545
546        for c in line.chars() {
547            if ignore_next {
548                ignore_next = false;
549            } else {
550                if c == '\\' {
551                    ignore_next = true;
552                    continue;
553                }
554
555                let last_formatting = formatting;
556                if formatting.apply(c) {
557                    if use_formatting {
558                        let _ = last_formatting.finish(&mut output);
559                        use_formatting = false;
560                    }
561                    continue;
562                }
563
564                if !use_formatting {
565                    let _ = formatting.start(&mut output);
566                    use_formatting = true;
567                }
568            }
569
570            chars += 1;
571
572            let is_whitespace = c.is_whitespace();
573            if is_whitespace && !was_whitespace {
574                words += 1;
575            }
576            was_whitespace = is_whitespace;
577
578            let _ = match c {
579                '<' => write!(output, "&lt;"),
580                '>' => write!(output, "&gt;"),
581                '&' => write!(output, "&amp;"),
582                _ => write!(output, "{c}"),
583            };
584        }
585
586        if use_formatting {
587            let _ = formatting.finish(&mut output);
588        }
589    }
590
591    finish_block(&mut state, &mut actions, output, &mut names);
592
593    TextInfo {
594        lines,
595        words,
596        chars,
597    }
598}