1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright 2018 Nicholas Young (and contributors).
// All rights reserved.
//
// Released under a 3-Clause BSD License. You should have received a
// copy with this software. Otherwise, visit https://opensource.org
// to acquire a copy.

//! A universal frontmatter parser and extractor.
//!
//! Provided with input data, Matter is able to separate frontmatter
//! from content. Common delimiters for supported formats are also
//! predefined.

use regex::{Captures, Regex};

lazy_static::lazy_static! {
    static ref DEFAULT_EXP: Regex =
        Regex::new(r"^[[:space:]]*\-\-\-\r?\n((?s).*?(?-s))\-\-\-\r?\n((?s).*(?-s))$").unwrap();
    static ref TOML_EXP: Regex =
        Regex::new(r"^[[:space:]]*\+\+\+\r?\n((?s).*?(?-s))\+\+\+\r?\n((?s).*(?-s))$").unwrap();
}

/// Split a string (often resulting from reading in a file) into
/// frontmatter and content portions.
pub fn matter(input: &str) -> Option<(String, String)> {
    let mut captures: Option<Captures> = None;

    if DEFAULT_EXP.is_match(input) {
        captures = DEFAULT_EXP.captures(input);
    }

    if captures.is_none() && TOML_EXP.is_match(input) {
        captures = TOML_EXP.captures(input);
    }

    if let Some(cap) = captures {
        let res = (cap[1].trim().to_string(), cap[2].trim().to_string());
        return Some(res)
    }

    None
}

#[cfg(test)]
mod tests {
    use super::matter;

    #[test]
    fn extract_toml() {
        let contents = r#"
        +++
        title = "TOML Frontmatter"
        list = [
            "Of",
            "Things",
        ]
        [[assets]]
        contentType = "audio/mpeg"
        +++

        This is some content.
        "#;

        let (f, c) = matter(contents).unwrap();

        assert_ne!(f.len(), 0);
        assert_eq!(c, "This is some content.");
    }

    #[test]
    fn extract_basic_yaml() {
        let contents = r#"
        ---
        title: YAML Frontmatter
        ---

        This is some content.
        "#;

        let (f, c) = matter(contents).unwrap();

        assert_eq!(f, "title: YAML Frontmatter");
        assert_eq!(c, "This is some content.");
    }

    #[test]
    fn extract_unquoted_yaml() {
        let contents = r#"
        ---
        title: Yaml Frontmatter --- Revenge of the Unquoted Strings
        ---

        This is some content.
        "#;

        let (f, c) = matter(contents).unwrap();

        assert_eq!(f, "title: Yaml Frontmatter --- Revenge of the Unquoted Strings");
        assert_eq!(c, "This is some content.");
    }

    #[test]
    fn extract_multiline_yaml() {
        let contents = r#"
        ---
        text: |
            Nested multiline content, which may---contain loosely-formatted text.
        ---

        This is some content.
        "#;

        let (f, c) = matter(contents).unwrap();

        let substr = r#"text: |
            Nested multiline content, which may---contain loosely-formatted text."#;
        assert_eq!(f, substr);
        assert_eq!(c, "This is some content.");
    }

    #[test]
    fn extract_nested_yaml() {
        let contents = r#"
        ---
        availability: public
        when:
          start: 1471/3/28 MTR 4::22
          duration: 0::30
        date: 2012-02-18
        title: Rutejìmo
        ---

        Text
        "#;

        let (f, c) = matter(contents).unwrap();

        assert_ne!(f.len(), 0);
        assert_eq!(c, "Text");
    }
}