1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
use crate::error::Error;
use regex::Captures;
use std::path::PathBuf;

/// Representation of a paragraph in a [`Document`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Paragraph {
    /// Paragraph title.
    pub title: String,
    /// Raw contents of a paragraph.
    pub contents: String,
}

impl Paragraph {
    /// Constructs a new instance.
    pub fn new(title: String, contents: String) -> Self {
        Self { title, contents }
    }

    /// Constructs a vector of paragraphs from the given regex capture groups.
    pub fn from_captures(
        capture_group: Vec<Captures<'_>>,
        input: &str,
    ) -> Result<Vec<Self>, Error> {
        let mut paragraphs = Vec::new();
        for (i, captures) in capture_group.iter().enumerate() {
            let content_capture = captures.get(0).ok_or(Error::CaptureError)?;
            let title_capture = captures.get(1).ok_or(Error::CaptureError)?;
            paragraphs.push(Paragraph::new(
                title_capture.as_str().trim().to_string(),
                if let Some(next_capture) = capture_group.get(i + 1) {
                    let next_capture = next_capture
                        .iter()
                        .next()
                        .flatten()
                        .ok_or(Error::CaptureError)?;
                    (input[content_capture.end()..next_capture.start()]).to_string()
                } else {
                    (input[content_capture.end()..]).to_string()
                }
                .lines()
                .map(|v| v.trim_start_matches('\t'))
                .collect::<Vec<&str>>()
                .join("\n")
                .trim()
                .to_string(),
            ));
        }
        Ok(paragraphs)
    }
}

/// Representation of a parsed document which consists of paragraphs.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Document {
    /// Paragraphs in the document.
    pub paragraphs: Vec<Paragraph>,
    /// Source of the document.
    pub path: PathBuf,
}

impl Document {
    /// Constructs a new instance.
    pub fn new(paragraphs: Vec<Paragraph>, path: PathBuf) -> Self {
        Self { paragraphs, path }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::reader;
    use regex::RegexBuilder;

    #[test]
    fn test_paragraph() -> Result<(), Error> {
        let input =
            reader::read_to_string(PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("Cargo.toml"))?;
        let captures = RegexBuilder::new(r#"^(\[[a-zA-Z]+\])\n"#)
            .multi_line(true)
            .build()?
            .captures_iter(&input)
            .collect::<Vec<_>>();
        let paragraphs = Paragraph::from_captures(captures, &input)?;
        assert!(paragraphs.len() >= 2);

        assert_eq!("[package]", paragraphs[0].title);
        assert!(paragraphs[0]
            .contents
            .contains(&format!("version = \"{}\"", env!("CARGO_PKG_VERSION"))));

        if let Some(paragraph) = paragraphs.iter().find(|p| p.title == "[dependencies]") {
            assert!(paragraph.contents.contains("regex = "));
        }

        Ok(())
    }
}