gpp/
lib.rs

1//! gpp is a Generic PreProcessor written in Rust.
2//!
3//! It supports:
4//! - Simple macros, no function macros
5//! - #include
6//! - #define and #undef
7//! - #ifdef, #ifndef, #elifdef, #elifndef, #else and #endif
8//! - #exec for running commands
9//! - #in and #endin for giving input to commands
10//!
11//! #includes work differently from C, as they do not require (and do not work with) quotes or <>,
12//! so `#include file.txt` is the correct syntax. It does not support #if or #elif, and recursive
13//! macros will cause the library to get stuck.
14//!
15//! # About
16//!
17//! The hash in any command may be succeeded by optional whitespace, so for example `# undef Macro`
18//! is valid, but ` # undef Macro` is not.
19//!
20//! ## #define and #undef
21//!
22//! #define works similar to C: `#define [name] [value]`, and #undef too: `#undef [name]`. Be
23//! careful though, because unlike C macro expansion is recursive: if you `#define A A` and then
24//! use A, gpp will run forever.
25//! If #define is not given a value, then it will default to an empty string.
26//!
27//! ## #include
28//!
29//! Includes, unlike C, do not require quotes or angle brackets, so this: `#include "file.txt"` or
30//! this: `#include <file.txt>` will not work; you must write `#include file.txt`.
31//!
32//! Also, unlike C the directory does not change when you #include; otherwise, gpp would change its
33//! current directory and wouldn't be thread safe. This means that if you `#include dir/file.txt`
34//! and in `dir/file.txt` it says `#include other_file.txt`, that would refer to `other_file.txt`,
35//! not `dir/other_file.txt`.
36//!
37//! ## Ifs
38//!
39//! The #ifdef, #ifndef, #elifdef, #elifndef, #else and #endif commands work exactly as you expect.
40//! I did not add generic #if commands to gpp, as it would make it much more complex and require a
41//! lot of parsing, and most of the time these are all you need anyway.
42//!
43//! ## #exec, #in and #endin
44//!
45//! The exec command executes the given command with `cmd /C` for Windows and `sh -c` for
46//! everything else, and captures the command's standard output. For example, `#exec echo Hi!` will
47//! output `Hi!`. It does not capture the command's standard error, and parsing stops if the
48//! command exits with a nonzero status.
49//!
50//! Due to the security risk enabling #exec causes, by default exec is disabled, however you can
51//! enable it by changing the `allow_exec` flag in your context. If the input tries to `#exec` when
52//! exec is disabled, it will cause an error.
53//!
54//! The in command is similar to exec, but all text until the endin command is passed into the
55//! program's standard input. For example,
56//! ```text
57//! #in sed 's/tree/three/g'
58//! One, two, tree.
59//! #endin
60//! ```
61//! Would output `One, two, three.`. Note that you shouldn't do this, just using `#define tree
62//! three` would be much faster and less platform-dependent. You can also place more commands in
63//! the in block, including other in blocks. For a useful example:
64//! ```text
65//! <style>
66//! #in sassc -s
67//! # include styles.scss
68//! #endin
69//! </style>
70//! ```
71//! This compiles your scss file into css using Sassc and includes in the HTML every time you
72//! generate your webpage with gpp.
73//!
74//! ## Literal hashes
75//!
76//! In order to insert literal hash symbols at the start of the line, simply use two hashes.
77//! `##some text` will convert into `#some text`, while `#some text` will throw an error as `some`
78//! is not a command.
79//!
80//! # Examples
81//!
82//! ```
83//! // Create a context for preprocessing
84//! let mut context = gpp::Context::new();
85//!
86//! // Add a macro to that context manually (context.macros is a HashMap)
87//! context.macros.insert("my_macro".to_owned(), "my_value".to_owned());
88//!
89//! // Process some text using that
90//! assert_eq!(gpp::process_str("My macro is my_macro\n", &mut context).unwrap(), "My macro is my_value\n");
91//!
92//! // Process some multi-line text, changing the context
93//! assert_eq!(gpp::process_str("
94//! #define Line Row
95//! Line One
96//! Line Two
97//! The Third Line", &mut context).unwrap(), "
98//! Row One
99//! Row Two
100//! The Third Row
101//! ");
102//!
103//! // The context persists
104//! assert_eq!(context.macros.get("Line").unwrap(), "Row");
105//!
106//! // Try some more advanced commands
107//! assert_eq!(gpp::process_str("
108//! Line Four
109//! #ifdef Line
110//! #undef Line
111//! #endif
112//! Line Five", &mut context).unwrap(), "
113//! Row Four
114//! Line Five
115//! ");
116//! ```
117
118#[cfg(test)]
119mod tests;
120
121use std::collections::HashMap;
122use std::error;
123use std::fmt;
124use std::fs::File;
125use std::io::{self, BufRead, BufReader, Write};
126use std::process::{Child, Command as SystemCommand, ExitStatus, Stdio};
127use std::string::FromUtf8Error;
128
129/// Context of the current processing.
130///
131/// Contains a set of currently defined macros, as well as the number of nested if statements that
132/// are being ignored; this is so that if the parser failed an if statement, and it is currently
133/// ignoring data, it knows how many endifs it needs to encounter before resuming reading data
134/// again. Only if this value is 0 then the parser will read data. It also stores whether the
135/// current if group has been accepted; this is for if groups with over three parts.
136///
137/// There are no limits on what variable names can be; by directly altering Context::macros, you
138/// can set variable names not possible with #defines. However, when replacing variable names in
139/// text the variable name must be surrounded by two characters that are **not** alphanumeric or an
140/// underscore.
141#[derive(Debug, Default)]
142pub struct Context {
143    /// Map of all currently defined macros.
144    pub macros: HashMap<String, String>,
145    /// Number of layers of inactive if statements.
146    pub inactive_stack: u32,
147    /// Whether the current if statement has been accepted.
148    pub used_if: bool,
149    /// Whether #exec and #in commands are allowed.
150    pub allow_exec: bool,
151    /// The stack of processes that #in is piping to.
152    pub in_stack: Vec<Child>,
153}
154
155impl Context {
156    /// Create a new empty context with no macros or inactive stack and exec commands disallowed.
157    pub fn new() -> Self {
158        Self::default()
159    }
160    /// Create a new empty context with no macros or inactive stack and exec commands allowed.
161    pub fn new_exec() -> Self {
162        Self::new().exec(true)
163    }
164    /// Create a context from a map of macros.
165    pub fn from_macros(macros: impl Into<HashMap<String, String>>) -> Self {
166        Self {
167            macros: macros.into(),
168            ..Default::default()
169        }
170    }
171    /// Create a context from an iterator over tuples.
172    pub fn from_macros_iter(macros: impl IntoIterator<Item = (String, String)>) -> Self {
173        Self::from_macros(macros.into_iter().collect::<HashMap<_, _>>())
174    }
175    /// Set whther exec commands are allowed.
176    pub fn exec(mut self, allow_exec: bool) -> Self {
177        self.allow_exec = allow_exec;
178        self
179    }
180}
181
182/// Error enum for parsing errors.
183///
184/// # Examples
185///
186/// ```
187/// let error = gpp::Error::TooManyParameters { command: "my_command" };
188/// assert_eq!(format!("{}", error), "Too many parameters for #my_command");
189/// ```
190/// ```
191/// let error = gpp::Error::FileError {
192///     filename: "my_file".to_string(),
193///     line: 10,
194///     error: Box::new(gpp::Error::UnexpectedCommand {
195///         command: "this_command",
196///     }),
197/// };
198/// assert_eq!(format!("{}", error), "Error in my_file:10: Unexpected command #this_command");
199/// ```
200#[derive(Debug)]
201pub enum Error {
202    /// An unknown command was encountered.
203    InvalidCommand { command_name: String },
204    /// Too many parameters were given for a command (for example using #endif with parameters).
205    TooManyParameters { command: &'static str },
206    /// There was an unexpected command; currently only generated for unexpected #endins.
207    UnexpectedCommand { command: &'static str },
208    /// The child process for an #exec exited with a nonzero status.
209    ChildFailed { status: ExitStatus },
210    /// A pipe was unable to be set up to the child.
211    PipeFailed,
212    /// An error with I/O occurred.
213    IoError(io::Error),
214    /// An error occurred parsing a child's standard output as UTF-8.
215    FromUtf8Error(FromUtf8Error),
216    /// An error occurred in another file.
217    FileError {
218        filename: String,
219        line: usize,
220        error: Box<Error>,
221    },
222}
223
224impl fmt::Display for Error {
225    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226        match self {
227            Error::InvalidCommand { command_name } => {
228                write!(f, "Invalid command '{}'", command_name)
229            }
230            Error::TooManyParameters { command } => {
231                write!(f, "Too many parameters for #{}", command)
232            }
233            Error::UnexpectedCommand { command } => write!(f, "Unexpected command #{}", command),
234            Error::ChildFailed { status } => write!(f, "Child failed with exit code {}", status),
235            Error::PipeFailed => write!(f, "Pipe to child failed"),
236            Error::IoError(e) => write!(f, "I/O Error: {}", e),
237            Error::FromUtf8Error(e) => write!(f, "UTF-8 Error: {}", e),
238            Error::FileError {
239                filename,
240                line,
241                error,
242            } => write!(f, "Error in {}:{}: {}", filename, line, error),
243        }
244    }
245}
246
247impl error::Error for Error {
248    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
249        match self {
250            Error::IoError(e) => Some(e),
251            Error::FromUtf8Error(e) => Some(e),
252            Error::FileError { error: e, .. } => Some(e),
253            _ => None,
254        }
255    }
256}
257
258impl From<io::Error> for Error {
259    fn from(e: io::Error) -> Self {
260        Error::IoError(e)
261    }
262}
263
264impl From<FromUtf8Error> for Error {
265    fn from(e: FromUtf8Error) -> Self {
266        Error::FromUtf8Error(e)
267    }
268}
269
270fn shell(cmd: &str) -> SystemCommand {
271    let (shell, flag) = if cfg!(target_os = "windows") {
272        ("cmd", "/C")
273    } else {
274        ("/bin/sh", "-c")
275    };
276    let mut command = SystemCommand::new(shell);
277    command.args(&[flag, cmd]);
278    command
279}
280
281fn process_exec(line: &str, _: &mut Context) -> Result<String, Error> {
282    let output = shell(line).output()?;
283    if !output.status.success() {
284        return Err(Error::ChildFailed {
285            status: output.status,
286        });
287    }
288    Ok(String::from_utf8(output.stdout)?)
289}
290
291fn process_in(line: &str, context: &mut Context) -> Result<String, Error> {
292    let child = shell(line)
293        .stdin(Stdio::piped())
294        .stdout(Stdio::piped())
295        .spawn()?;
296    context.in_stack.push(child);
297    Ok(String::new())
298}
299
300fn process_endin(line: &str, context: &mut Context) -> Result<String, Error> {
301    if !line.is_empty() {
302        return Err(Error::TooManyParameters { command: "endin" });
303    }
304    if context.in_stack.is_empty() {
305        return Err(Error::UnexpectedCommand { command: "endin" });
306    }
307    let child = context.in_stack.pop().unwrap();
308    let output = child.wait_with_output()?;
309    if !output.status.success() {
310        return Err(Error::ChildFailed {
311            status: output.status,
312        });
313    }
314    Ok(String::from_utf8(output.stdout)?)
315}
316
317fn process_include(line: &str, context: &mut Context) -> Result<String, Error> {
318    process_file(line, context)
319}
320
321fn process_define(line: &str, context: &mut Context) -> Result<String, Error> {
322    let mut parts = line.splitn(2, ' ');
323    let name = parts.next().unwrap();
324    let value = parts.next().unwrap_or("");
325
326    context.macros.insert(name.to_owned(), value.to_owned());
327    Ok(String::new())
328}
329
330fn process_undef(line: &str, context: &mut Context) -> Result<String, Error> {
331    context.macros.remove(line);
332    Ok(String::new())
333}
334
335fn process_ifdef(line: &str, context: &mut Context, inverted: bool) -> Result<String, Error> {
336    if context.inactive_stack > 0 {
337        context.inactive_stack += 1;
338    } else if context.macros.contains_key(line) == inverted {
339        context.inactive_stack = 1;
340        context.used_if = false;
341    } else {
342        context.used_if = true;
343    }
344    Ok(String::new())
345}
346
347fn process_elifdef(line: &str, context: &mut Context, inverted: bool) -> Result<String, Error> {
348    if context.inactive_stack == 0 {
349        context.inactive_stack = 1;
350    } else if context.inactive_stack == 1
351        && !context.used_if
352        && context.macros.contains_key(line) != inverted
353    {
354        context.inactive_stack = 0;
355    }
356    Ok(String::new())
357}
358
359fn process_else(line: &str, context: &mut Context) -> Result<String, Error> {
360    if !line.is_empty() {
361        return Err(Error::TooManyParameters { command: "else" });
362    }
363    context.inactive_stack = match context.inactive_stack {
364        0 => 1,
365        1 if !context.used_if => 0,
366        val => val,
367    };
368    Ok(String::new())
369}
370
371fn process_endif(line: &str, context: &mut Context) -> Result<String, Error> {
372    if !line.is_empty() {
373        return Err(Error::TooManyParameters { command: "endif" });
374    }
375    if context.inactive_stack != 0 {
376        context.inactive_stack -= 1;
377    }
378    Ok(String::new())
379}
380
381#[derive(Clone, Copy)]
382struct Command {
383    name: &'static str,
384    requires_exec: bool,
385    ignored_by_if: bool,
386    execute: fn(&str, &mut Context) -> Result<String, Error>,
387}
388
389const COMMANDS: &[Command] = &[
390    Command {
391        name: "exec",
392        requires_exec: true,
393        ignored_by_if: false,
394        execute: process_exec,
395    },
396    Command {
397        name: "in",
398        requires_exec: true,
399        ignored_by_if: false,
400        execute: process_in,
401    },
402    Command {
403        name: "endin",
404        requires_exec: true,
405        ignored_by_if: false,
406        execute: process_endin,
407    },
408    Command {
409        name: "include",
410        requires_exec: false,
411        ignored_by_if: false,
412        execute: process_include,
413    },
414    Command {
415        name: "define",
416        requires_exec: false,
417        ignored_by_if: false,
418        execute: process_define,
419    },
420    Command {
421        name: "undef",
422        requires_exec: false,
423        ignored_by_if: false,
424        execute: process_undef,
425    },
426    Command {
427        name: "ifdef",
428        requires_exec: false,
429        ignored_by_if: true,
430        execute: |line, context| process_ifdef(line, context, false),
431    },
432    Command {
433        name: "ifndef",
434        requires_exec: false,
435        ignored_by_if: true,
436        execute: |line, context| process_ifdef(line, context, true),
437    },
438    Command {
439        name: "elifdef",
440        requires_exec: false,
441        ignored_by_if: true,
442        execute: |line, context| process_elifdef(line, context, false),
443    },
444    Command {
445        name: "elifndef",
446        requires_exec: false,
447        ignored_by_if: true,
448        execute: |line, context| process_elifdef(line, context, true),
449    },
450    Command {
451        name: "else",
452        requires_exec: false,
453        ignored_by_if: true,
454        execute: process_else,
455    },
456    Command {
457        name: "endif",
458        requires_exec: false,
459        ignored_by_if: true,
460        execute: process_endif,
461    },
462];
463
464fn is_word_char(c: char) -> bool {
465    c.is_alphanumeric() || c == '_'
466}
467
468/// Finds the next macro name word in the line, and replaces it with its value, returning None when
469/// it can't find a macro.
470fn replace_next_macro(line: &str, macros: &HashMap<String, String>) -> Option<String> {
471    macros.iter().find_map(|(name, value)| {
472        let mut parts = line.splitn(2, name);
473        let before = parts.next().unwrap();
474        let after = parts.next()?;
475
476        if before.chars().next_back().map_or(false, is_word_char)
477            || after.chars().next().map_or(false, is_word_char)
478        {
479            return None;
480        }
481        let mut new_line = String::with_capacity(before.len() + value.len() + after.len());
482        new_line.push_str(before);
483        new_line.push_str(value);
484        new_line.push_str(after);
485        Some(new_line)
486    })
487}
488
489/// Process a string line of input.
490///
491/// This is the smallest processing function, and all other processing functions are wrappers
492/// around it. It only processes singular lines, and will not work on any string that contains
493/// newlines unless that newline is at the end.
494///
495/// It returns a Result<String, Error>. If an error occurs, then the Result will be that error.
496/// Otherwise, the returned string is the output. If the input did not contain a newline at the
497/// end, then this function will add it.
498///
499/// # Examples
500///
501/// ```
502/// let mut context = gpp::Context::new();
503/// context.macros.insert("Foo".to_string(), "Two".to_string());
504///
505/// assert_eq!(gpp::process_line("One Foo Three", &mut context).unwrap(), "One Two Three\n");
506/// ```
507/// ```
508/// let mut context = gpp::Context::new();
509///
510/// assert_eq!(gpp::process_line("#define Foo Bar", &mut context).unwrap(), "");
511/// assert_eq!(context.macros.get("Foo").unwrap(), "Bar");
512/// ```
513pub fn process_line(line: &str, context: &mut Context) -> Result<String, Error> {
514    let line = line
515        .strip_suffix("\r\n")
516        .or_else(|| line.strip_suffix('\n'))
517        .unwrap_or(line);
518
519    enum Line<'a> {
520        Text(&'a str),
521        Command(Command, &'a str),
522    }
523
524    let line = if let Some(rest) = line.strip_prefix('#') {
525        if rest.starts_with('#') {
526            Line::Text(rest)
527        } else {
528            let mut parts = rest.trim_start().splitn(2, ' ');
529            let command_name = parts.next().unwrap();
530            let content = parts.next().unwrap_or("").trim_start();
531
532            Line::Command(
533                COMMANDS
534                    .iter()
535                    .copied()
536                    .filter(|command| context.allow_exec || !command.requires_exec)
537                    .find(|command| command.name == command_name)
538                    .ok_or_else(|| Error::InvalidCommand {
539                        command_name: command_name.to_owned(),
540                    })?,
541                content,
542            )
543        }
544    } else {
545        Line::Text(line)
546    };
547
548    let line = match line {
549        Line::Text(_)
550        | Line::Command(
551            Command {
552                ignored_by_if: false,
553                ..
554            },
555            _,
556        ) if context.inactive_stack > 0 => String::new(),
557        Line::Text(text) => {
558            let mut line = format!("{}\n", text);
559
560            while let Some(s) = replace_next_macro(&line, &context.macros) {
561                line = s;
562            }
563
564            line
565        }
566        Line::Command(command, content) => (command.execute)(content, context)?,
567    };
568
569    Ok(if let Some(child) = context.in_stack.last_mut() {
570        let input = child.stdin.as_mut().ok_or(Error::PipeFailed)?;
571        input.write_all(line.as_bytes())?;
572        String::new()
573    } else {
574        line
575    })
576}
577
578/// Process a multi-line string of text.
579///
580/// See `process_buf` for more details.
581///
582/// # Examples
583///
584/// ```
585/// assert_eq!(gpp::process_str("#define A 1\n A 2 3 \n", &mut gpp::Context::new()).unwrap(), " 1 2 3 \n");
586/// ```
587pub fn process_str(s: &str, context: &mut Context) -> Result<String, Error> {
588    process_buf(s.as_bytes(), "<string>", context)
589}
590
591/// Process a file.
592///
593/// See `process_buf` for more details.
594pub fn process_file(filename: &str, context: &mut Context) -> Result<String, Error> {
595    let file_raw = File::open(filename)?;
596    let file = BufReader::new(file_raw);
597
598    process_buf(file, filename, context)
599}
600
601/// Process a generic BufRead.
602///
603/// This function is a wrapper around `process_line`. It splits up the input into lines (adding a
604/// newline on the end if there isn't one) and then processes each line.
605pub fn process_buf<T: BufRead>(
606    buf: T,
607    buf_name: &str,
608    context: &mut Context,
609) -> Result<String, Error> {
610    buf.lines()
611        .enumerate()
612        .map(|(num, line)| {
613            Ok({
614                process_line(&line?, context).map_err(|e| Error::FileError {
615                    filename: String::from(buf_name),
616                    line: num,
617                    error: Box::new(e),
618                })?
619            })
620        })
621        .collect()
622}