gpp/lib.rs
1//! gpp is a Generic PreProcessor written in Rust.
2//!
3//! It supports:
4//! - Simple macros, no function macros
5//! - #include
6//! - #define and #undef
7//! - #ifdef, #ifndef, #elifdef, #elifndef, #else and #endif
8//! - #exec for running commands
9//! - #in and #endin for giving input to commands
10//!
11//! #includes work differently from C, as they do not require (and do not work with) quotes or <>,
12//! so `#include file.txt` is the correct syntax. It does not support #if or #elif, and recursive
13//! macros will cause the library to get stuck.
14//!
15//! # About
16//!
17//! The hash in any command may be succeeded by optional whitespace, so for example `# undef Macro`
18//! is valid, but ` # undef Macro` is not.
19//!
20//! ## #define and #undef
21//!
22//! #define works similar to C: `#define [name] [value]`, and #undef too: `#undef [name]`. Be
23//! careful though, because unlike C macro expansion is recursive: if you `#define A A` and then
24//! use A, gpp will run forever.
25//! If #define is not given a value, then it will default to an empty string.
26//!
27//! ## #include
28//!
29//! Includes, unlike C, do not require quotes or angle brackets, so this: `#include "file.txt"` or
30//! this: `#include <file.txt>` will not work; you must write `#include file.txt`.
31//!
32//! Also, unlike C the directory does not change when you #include; otherwise, gpp would change its
33//! current directory and wouldn't be thread safe. This means that if you `#include dir/file.txt`
34//! and in `dir/file.txt` it says `#include other_file.txt`, that would refer to `other_file.txt`,
35//! not `dir/other_file.txt`.
36//!
37//! ## Ifs
38//!
39//! The #ifdef, #ifndef, #elifdef, #elifndef, #else and #endif commands work exactly as you expect.
40//! I did not add generic #if commands to gpp, as it would make it much more complex and require a
41//! lot of parsing, and most of the time these are all you need anyway.
42//!
43//! ## #exec, #in and #endin
44//!
45//! The exec command executes the given command with `cmd /C` for Windows and `sh -c` for
46//! everything else, and captures the command's standard output. For example, `#exec echo Hi!` will
47//! output `Hi!`. It does not capture the command's standard error, and parsing stops if the
48//! command exits with a nonzero status.
49//!
50//! Due to the security risk enabling #exec causes, by default exec is disabled, however you can
51//! enable it by changing the `allow_exec` flag in your context. If the input tries to `#exec` when
52//! exec is disabled, it will cause an error.
53//!
54//! The in command is similar to exec, but all text until the endin command is passed into the
55//! program's standard input. For example,
56//! ```text
57//! #in sed 's/tree/three/g'
58//! One, two, tree.
59//! #endin
60//! ```
61//! Would output `One, two, three.`. Note that you shouldn't do this, just using `#define tree
62//! three` would be much faster and less platform-dependent. You can also place more commands in
63//! the in block, including other in blocks. For a useful example:
64//! ```text
65//! <style>
66//! #in sassc -s
67//! # include styles.scss
68//! #endin
69//! </style>
70//! ```
71//! This compiles your scss file into css using Sassc and includes in the HTML every time you
72//! generate your webpage with gpp.
73//!
74//! ## Literal hashes
75//!
76//! In order to insert literal hash symbols at the start of the line, simply use two hashes.
77//! `##some text` will convert into `#some text`, while `#some text` will throw an error as `some`
78//! is not a command.
79//!
80//! # Examples
81//!
82//! ```
83//! // Create a context for preprocessing
84//! let mut context = gpp::Context::new();
85//!
86//! // Add a macro to that context manually (context.macros is a HashMap)
87//! context.macros.insert("my_macro".to_owned(), "my_value".to_owned());
88//!
89//! // Process some text using that
90//! assert_eq!(gpp::process_str("My macro is my_macro\n", &mut context).unwrap(), "My macro is my_value\n");
91//!
92//! // Process some multi-line text, changing the context
93//! assert_eq!(gpp::process_str("
94//! #define Line Row
95//! Line One
96//! Line Two
97//! The Third Line", &mut context).unwrap(), "
98//! Row One
99//! Row Two
100//! The Third Row
101//! ");
102//!
103//! // The context persists
104//! assert_eq!(context.macros.get("Line").unwrap(), "Row");
105//!
106//! // Try some more advanced commands
107//! assert_eq!(gpp::process_str("
108//! Line Four
109//! #ifdef Line
110//! #undef Line
111//! #endif
112//! Line Five", &mut context).unwrap(), "
113//! Row Four
114//! Line Five
115//! ");
116//! ```
117
118#[cfg(test)]
119mod tests;
120
121use std::collections::HashMap;
122use std::error;
123use std::fmt;
124use std::fs::File;
125use std::io::{self, BufRead, BufReader, Write};
126use std::process::{Child, Command as SystemCommand, ExitStatus, Stdio};
127use std::string::FromUtf8Error;
128
129/// Context of the current processing.
130///
131/// Contains a set of currently defined macros, as well as the number of nested if statements that
132/// are being ignored; this is so that if the parser failed an if statement, and it is currently
133/// ignoring data, it knows how many endifs it needs to encounter before resuming reading data
134/// again. Only if this value is 0 then the parser will read data. It also stores whether the
135/// current if group has been accepted; this is for if groups with over three parts.
136///
137/// There are no limits on what variable names can be; by directly altering Context::macros, you
138/// can set variable names not possible with #defines. However, when replacing variable names in
139/// text the variable name must be surrounded by two characters that are **not** alphanumeric or an
140/// underscore.
141#[derive(Debug, Default)]
142pub struct Context {
143 /// Map of all currently defined macros.
144 pub macros: HashMap<String, String>,
145 /// Number of layers of inactive if statements.
146 pub inactive_stack: u32,
147 /// Whether the current if statement has been accepted.
148 pub used_if: bool,
149 /// Whether #exec and #in commands are allowed.
150 pub allow_exec: bool,
151 /// The stack of processes that #in is piping to.
152 pub in_stack: Vec<Child>,
153}
154
155impl Context {
156 /// Create a new empty context with no macros or inactive stack and exec commands disallowed.
157 pub fn new() -> Self {
158 Self::default()
159 }
160 /// Create a new empty context with no macros or inactive stack and exec commands allowed.
161 pub fn new_exec() -> Self {
162 Self::new().exec(true)
163 }
164 /// Create a context from a map of macros.
165 pub fn from_macros(macros: impl Into<HashMap<String, String>>) -> Self {
166 Self {
167 macros: macros.into(),
168 ..Default::default()
169 }
170 }
171 /// Create a context from an iterator over tuples.
172 pub fn from_macros_iter(macros: impl IntoIterator<Item = (String, String)>) -> Self {
173 Self::from_macros(macros.into_iter().collect::<HashMap<_, _>>())
174 }
175 /// Set whther exec commands are allowed.
176 pub fn exec(mut self, allow_exec: bool) -> Self {
177 self.allow_exec = allow_exec;
178 self
179 }
180}
181
182/// Error enum for parsing errors.
183///
184/// # Examples
185///
186/// ```
187/// let error = gpp::Error::TooManyParameters { command: "my_command" };
188/// assert_eq!(format!("{}", error), "Too many parameters for #my_command");
189/// ```
190/// ```
191/// let error = gpp::Error::FileError {
192/// filename: "my_file".to_string(),
193/// line: 10,
194/// error: Box::new(gpp::Error::UnexpectedCommand {
195/// command: "this_command",
196/// }),
197/// };
198/// assert_eq!(format!("{}", error), "Error in my_file:10: Unexpected command #this_command");
199/// ```
200#[derive(Debug)]
201pub enum Error {
202 /// An unknown command was encountered.
203 InvalidCommand { command_name: String },
204 /// Too many parameters were given for a command (for example using #endif with parameters).
205 TooManyParameters { command: &'static str },
206 /// There was an unexpected command; currently only generated for unexpected #endins.
207 UnexpectedCommand { command: &'static str },
208 /// The child process for an #exec exited with a nonzero status.
209 ChildFailed { status: ExitStatus },
210 /// A pipe was unable to be set up to the child.
211 PipeFailed,
212 /// An error with I/O occurred.
213 IoError(io::Error),
214 /// An error occurred parsing a child's standard output as UTF-8.
215 FromUtf8Error(FromUtf8Error),
216 /// An error occurred in another file.
217 FileError {
218 filename: String,
219 line: usize,
220 error: Box<Error>,
221 },
222}
223
224impl fmt::Display for Error {
225 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
226 match self {
227 Error::InvalidCommand { command_name } => {
228 write!(f, "Invalid command '{}'", command_name)
229 }
230 Error::TooManyParameters { command } => {
231 write!(f, "Too many parameters for #{}", command)
232 }
233 Error::UnexpectedCommand { command } => write!(f, "Unexpected command #{}", command),
234 Error::ChildFailed { status } => write!(f, "Child failed with exit code {}", status),
235 Error::PipeFailed => write!(f, "Pipe to child failed"),
236 Error::IoError(e) => write!(f, "I/O Error: {}", e),
237 Error::FromUtf8Error(e) => write!(f, "UTF-8 Error: {}", e),
238 Error::FileError {
239 filename,
240 line,
241 error,
242 } => write!(f, "Error in {}:{}: {}", filename, line, error),
243 }
244 }
245}
246
247impl error::Error for Error {
248 fn source(&self) -> Option<&(dyn error::Error + 'static)> {
249 match self {
250 Error::IoError(e) => Some(e),
251 Error::FromUtf8Error(e) => Some(e),
252 Error::FileError { error: e, .. } => Some(e),
253 _ => None,
254 }
255 }
256}
257
258impl From<io::Error> for Error {
259 fn from(e: io::Error) -> Self {
260 Error::IoError(e)
261 }
262}
263
264impl From<FromUtf8Error> for Error {
265 fn from(e: FromUtf8Error) -> Self {
266 Error::FromUtf8Error(e)
267 }
268}
269
270fn shell(cmd: &str) -> SystemCommand {
271 let (shell, flag) = if cfg!(target_os = "windows") {
272 ("cmd", "/C")
273 } else {
274 ("/bin/sh", "-c")
275 };
276 let mut command = SystemCommand::new(shell);
277 command.args(&[flag, cmd]);
278 command
279}
280
281fn process_exec(line: &str, _: &mut Context) -> Result<String, Error> {
282 let output = shell(line).output()?;
283 if !output.status.success() {
284 return Err(Error::ChildFailed {
285 status: output.status,
286 });
287 }
288 Ok(String::from_utf8(output.stdout)?)
289}
290
291fn process_in(line: &str, context: &mut Context) -> Result<String, Error> {
292 let child = shell(line)
293 .stdin(Stdio::piped())
294 .stdout(Stdio::piped())
295 .spawn()?;
296 context.in_stack.push(child);
297 Ok(String::new())
298}
299
300fn process_endin(line: &str, context: &mut Context) -> Result<String, Error> {
301 if !line.is_empty() {
302 return Err(Error::TooManyParameters { command: "endin" });
303 }
304 if context.in_stack.is_empty() {
305 return Err(Error::UnexpectedCommand { command: "endin" });
306 }
307 let child = context.in_stack.pop().unwrap();
308 let output = child.wait_with_output()?;
309 if !output.status.success() {
310 return Err(Error::ChildFailed {
311 status: output.status,
312 });
313 }
314 Ok(String::from_utf8(output.stdout)?)
315}
316
317fn process_include(line: &str, context: &mut Context) -> Result<String, Error> {
318 process_file(line, context)
319}
320
321fn process_define(line: &str, context: &mut Context) -> Result<String, Error> {
322 let mut parts = line.splitn(2, ' ');
323 let name = parts.next().unwrap();
324 let value = parts.next().unwrap_or("");
325
326 context.macros.insert(name.to_owned(), value.to_owned());
327 Ok(String::new())
328}
329
330fn process_undef(line: &str, context: &mut Context) -> Result<String, Error> {
331 context.macros.remove(line);
332 Ok(String::new())
333}
334
335fn process_ifdef(line: &str, context: &mut Context, inverted: bool) -> Result<String, Error> {
336 if context.inactive_stack > 0 {
337 context.inactive_stack += 1;
338 } else if context.macros.contains_key(line) == inverted {
339 context.inactive_stack = 1;
340 context.used_if = false;
341 } else {
342 context.used_if = true;
343 }
344 Ok(String::new())
345}
346
347fn process_elifdef(line: &str, context: &mut Context, inverted: bool) -> Result<String, Error> {
348 if context.inactive_stack == 0 {
349 context.inactive_stack = 1;
350 } else if context.inactive_stack == 1
351 && !context.used_if
352 && context.macros.contains_key(line) != inverted
353 {
354 context.inactive_stack = 0;
355 }
356 Ok(String::new())
357}
358
359fn process_else(line: &str, context: &mut Context) -> Result<String, Error> {
360 if !line.is_empty() {
361 return Err(Error::TooManyParameters { command: "else" });
362 }
363 context.inactive_stack = match context.inactive_stack {
364 0 => 1,
365 1 if !context.used_if => 0,
366 val => val,
367 };
368 Ok(String::new())
369}
370
371fn process_endif(line: &str, context: &mut Context) -> Result<String, Error> {
372 if !line.is_empty() {
373 return Err(Error::TooManyParameters { command: "endif" });
374 }
375 if context.inactive_stack != 0 {
376 context.inactive_stack -= 1;
377 }
378 Ok(String::new())
379}
380
381#[derive(Clone, Copy)]
382struct Command {
383 name: &'static str,
384 requires_exec: bool,
385 ignored_by_if: bool,
386 execute: fn(&str, &mut Context) -> Result<String, Error>,
387}
388
389const COMMANDS: &[Command] = &[
390 Command {
391 name: "exec",
392 requires_exec: true,
393 ignored_by_if: false,
394 execute: process_exec,
395 },
396 Command {
397 name: "in",
398 requires_exec: true,
399 ignored_by_if: false,
400 execute: process_in,
401 },
402 Command {
403 name: "endin",
404 requires_exec: true,
405 ignored_by_if: false,
406 execute: process_endin,
407 },
408 Command {
409 name: "include",
410 requires_exec: false,
411 ignored_by_if: false,
412 execute: process_include,
413 },
414 Command {
415 name: "define",
416 requires_exec: false,
417 ignored_by_if: false,
418 execute: process_define,
419 },
420 Command {
421 name: "undef",
422 requires_exec: false,
423 ignored_by_if: false,
424 execute: process_undef,
425 },
426 Command {
427 name: "ifdef",
428 requires_exec: false,
429 ignored_by_if: true,
430 execute: |line, context| process_ifdef(line, context, false),
431 },
432 Command {
433 name: "ifndef",
434 requires_exec: false,
435 ignored_by_if: true,
436 execute: |line, context| process_ifdef(line, context, true),
437 },
438 Command {
439 name: "elifdef",
440 requires_exec: false,
441 ignored_by_if: true,
442 execute: |line, context| process_elifdef(line, context, false),
443 },
444 Command {
445 name: "elifndef",
446 requires_exec: false,
447 ignored_by_if: true,
448 execute: |line, context| process_elifdef(line, context, true),
449 },
450 Command {
451 name: "else",
452 requires_exec: false,
453 ignored_by_if: true,
454 execute: process_else,
455 },
456 Command {
457 name: "endif",
458 requires_exec: false,
459 ignored_by_if: true,
460 execute: process_endif,
461 },
462];
463
464fn is_word_char(c: char) -> bool {
465 c.is_alphanumeric() || c == '_'
466}
467
468/// Finds the next macro name word in the line, and replaces it with its value, returning None when
469/// it can't find a macro.
470fn replace_next_macro(line: &str, macros: &HashMap<String, String>) -> Option<String> {
471 macros.iter().find_map(|(name, value)| {
472 let mut parts = line.splitn(2, name);
473 let before = parts.next().unwrap();
474 let after = parts.next()?;
475
476 if before.chars().next_back().map_or(false, is_word_char)
477 || after.chars().next().map_or(false, is_word_char)
478 {
479 return None;
480 }
481 let mut new_line = String::with_capacity(before.len() + value.len() + after.len());
482 new_line.push_str(before);
483 new_line.push_str(value);
484 new_line.push_str(after);
485 Some(new_line)
486 })
487}
488
489/// Process a string line of input.
490///
491/// This is the smallest processing function, and all other processing functions are wrappers
492/// around it. It only processes singular lines, and will not work on any string that contains
493/// newlines unless that newline is at the end.
494///
495/// It returns a Result<String, Error>. If an error occurs, then the Result will be that error.
496/// Otherwise, the returned string is the output. If the input did not contain a newline at the
497/// end, then this function will add it.
498///
499/// # Examples
500///
501/// ```
502/// let mut context = gpp::Context::new();
503/// context.macros.insert("Foo".to_string(), "Two".to_string());
504///
505/// assert_eq!(gpp::process_line("One Foo Three", &mut context).unwrap(), "One Two Three\n");
506/// ```
507/// ```
508/// let mut context = gpp::Context::new();
509///
510/// assert_eq!(gpp::process_line("#define Foo Bar", &mut context).unwrap(), "");
511/// assert_eq!(context.macros.get("Foo").unwrap(), "Bar");
512/// ```
513pub fn process_line(line: &str, context: &mut Context) -> Result<String, Error> {
514 let line = line
515 .strip_suffix("\r\n")
516 .or_else(|| line.strip_suffix('\n'))
517 .unwrap_or(line);
518
519 enum Line<'a> {
520 Text(&'a str),
521 Command(Command, &'a str),
522 }
523
524 let line = if let Some(rest) = line.strip_prefix('#') {
525 if rest.starts_with('#') {
526 Line::Text(rest)
527 } else {
528 let mut parts = rest.trim_start().splitn(2, ' ');
529 let command_name = parts.next().unwrap();
530 let content = parts.next().unwrap_or("").trim_start();
531
532 Line::Command(
533 COMMANDS
534 .iter()
535 .copied()
536 .filter(|command| context.allow_exec || !command.requires_exec)
537 .find(|command| command.name == command_name)
538 .ok_or_else(|| Error::InvalidCommand {
539 command_name: command_name.to_owned(),
540 })?,
541 content,
542 )
543 }
544 } else {
545 Line::Text(line)
546 };
547
548 let line = match line {
549 Line::Text(_)
550 | Line::Command(
551 Command {
552 ignored_by_if: false,
553 ..
554 },
555 _,
556 ) if context.inactive_stack > 0 => String::new(),
557 Line::Text(text) => {
558 let mut line = format!("{}\n", text);
559
560 while let Some(s) = replace_next_macro(&line, &context.macros) {
561 line = s;
562 }
563
564 line
565 }
566 Line::Command(command, content) => (command.execute)(content, context)?,
567 };
568
569 Ok(if let Some(child) = context.in_stack.last_mut() {
570 let input = child.stdin.as_mut().ok_or(Error::PipeFailed)?;
571 input.write_all(line.as_bytes())?;
572 String::new()
573 } else {
574 line
575 })
576}
577
578/// Process a multi-line string of text.
579///
580/// See `process_buf` for more details.
581///
582/// # Examples
583///
584/// ```
585/// assert_eq!(gpp::process_str("#define A 1\n A 2 3 \n", &mut gpp::Context::new()).unwrap(), " 1 2 3 \n");
586/// ```
587pub fn process_str(s: &str, context: &mut Context) -> Result<String, Error> {
588 process_buf(s.as_bytes(), "<string>", context)
589}
590
591/// Process a file.
592///
593/// See `process_buf` for more details.
594pub fn process_file(filename: &str, context: &mut Context) -> Result<String, Error> {
595 let file_raw = File::open(filename)?;
596 let file = BufReader::new(file_raw);
597
598 process_buf(file, filename, context)
599}
600
601/// Process a generic BufRead.
602///
603/// This function is a wrapper around `process_line`. It splits up the input into lines (adding a
604/// newline on the end if there isn't one) and then processes each line.
605pub fn process_buf<T: BufRead>(
606 buf: T,
607 buf_name: &str,
608 context: &mut Context,
609) -> Result<String, Error> {
610 buf.lines()
611 .enumerate()
612 .map(|(num, line)| {
613 Ok({
614 process_line(&line?, context).map_err(|e| Error::FileError {
615 filename: String::from(buf_name),
616 line: num,
617 error: Box::new(e),
618 })?
619 })
620 })
621 .collect()
622}