1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Copyright 2019 Marcus Geiger
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/*!
This crate provides a simple regular expression based parsing library
for reading the `.gitmodules` file of a Git repository.

# Usage

Add this to your `Cargo.toml`:

``` toml
[dependencies]
gitmodules = "0.1"
```

Usage is trivial:
``` rust
use std::io::BufReader;
use gitmodules::{read_gitmodules, Submodule};

fn demo() {
    let text = r#"
# this is a comment line
[submodule "foo"]
    path = "some/path"
"#
    .as_bytes();
    let text = BufReader::new(text);
    let submodules = read_gitmodules(text).unwrap();
    println!("Submodule name {}", submodules.first().unwrap().name());
}
```
*/

#[macro_use]
extern crate log;
#[macro_use]
extern crate lazy_static;

use regex::Regex;
use std::io::prelude::*;

/// Represents a Git submodule entry with its attributes.
#[derive(Debug)]
pub struct Submodule {
    name: String,
    entries: Vec<(String, String)>,
}

#[allow(dead_code)]
impl Submodule {
    pub fn new(name: &str, entries: Vec<(String, String)>) -> Self {
        return Submodule {
            name: name.to_string(),
            entries: entries,
        };
    }

    /// Returns the name of the submodule. The name is the only
    /// required attribute of the submodule.
    pub fn name(&self) -> &str {
        &self.name
    }

    /// Returns the optional path of the Git submodule in the Git
    /// repository.
    pub fn path(&self) -> Option<String> {
        for (k, v) in &self.entries {
            if k == "path" {
                return Some(v.clone());
            }
        }
        return None;
    }

    /// Returns the optional entries of the Git submodule, excluding
    /// its name.
    pub fn entries(&self) -> &Vec<(String, String)> {
        &self.entries
    }
}

lazy_static! {
    // Note: be lenient about white spaces, although a proper
    // gitmodules file looks different.
    static ref RE_COMMENT: Regex = Regex::new(r#"^\s*#.*"#).unwrap();
    static ref RE_MODULE: Regex = Regex::new(r#"^\[submodule\s*"([^""]+)"\s*\]"#).unwrap();
    static ref RE_MODULE_ENTRY: Regex = Regex::new(r#"^\s*(\S+)\s*=\s*(.*)\s*"#).unwrap();
}

/// Read a `.gitmodules` file and return a vector of the configured
/// Git submodules.
pub fn read_gitmodules<R>(reader: R) -> std::io::Result<Vec<Submodule>>
where
    R: BufRead,
{
    let mut submodules: Vec<Submodule> = Vec::new();

    let mut module_name: Option<String> = None;
    let mut module_entries: Vec<(String, String)> = Vec::new();

    for (n, line) in reader.lines().enumerate() {
        let line = line.unwrap();
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        trace!("Parsing line {}: '{}'", n, &line);
        if RE_COMMENT.is_match(&line) {
            continue;
        } else if let Some(capture) = RE_MODULE.captures(&line) {
            let submodule_name = capture.get(1).unwrap().as_str();
            if let Some(name) = module_name.clone() {
                let submodule = Submodule::new(&name, module_entries.clone());
                submodules.push(submodule);
            }
            module_name = Some(submodule_name.to_string());
            module_entries = Vec::new();
        } else if let Some(capture) = RE_MODULE_ENTRY.captures(&line) {
            let key = capture.get(1).unwrap().as_str();
            let val = capture.get(2).unwrap().as_str();
            module_entries.push((key.to_string(), val.to_string()));
        } else {
            error!("ERROR: invalid line {}: '{}'", n, line);
        }
    }

    if let Some(name) = module_name {
        let submodule = Submodule::new(&name, module_entries.clone());
        submodules.push(submodule);
    }

    Ok(submodules)
}

#[cfg(test)]
mod tests {
    use std::io::BufReader;

    use super::*;

    use std::sync::{Once, ONCE_INIT};

    static INIT: Once = ONCE_INIT;

    /// Setup function that is only run once, even if called multiple times.
    fn setup() {
        INIT.call_once(|| {
            env_logger::init();
        });
    }

    #[test]
    fn gitmodules_with_comments() {
        setup();

        let text = r#"
# this is a comment line
[submodule "foo"]
	path = "some/path"
"#
        .as_bytes();
        let text = BufReader::new(text);
        let submodules = read_gitmodules(text).unwrap();

        assert_eq!(1, submodules.len());

        let module = submodules.first().unwrap();
        assert_eq!("foo", module.name());
        assert_eq!("\"some/path\"", module.path().unwrap());
    }

    #[test]
    fn gitmodules_with_broken_lines() {
        setup();

        let text = r#"
# the next line is normally invalid because of the missing white space before the identifier
  [submodule"foo"]
   [submodule	"bar"]

path="bar/path"
 one = 1
  two=2
[submodule "baz"]
	path = "baz/path"
	flag = true
"#
        .as_bytes();
        let text = BufReader::new(text);
        let submodules = read_gitmodules(text).unwrap();

        assert_eq!(3, submodules.len());

        let module = submodules.first().unwrap();
        assert_eq!("foo", module.name());
        assert!(module.entries().is_empty());

        let module = submodules.get(1).unwrap();
        assert_eq!("bar", module.name());
        assert_eq!("\"bar/path\"", module.path().unwrap());
        let actual_one = module.entries().iter().find(|&(key, _)| key == "one");
        let expected_one = ("one".to_string(), "1".to_string());
        assert_eq!(Some(&expected_one), actual_one);
        let actual_two = module.entries().iter().find(|&(key, _)| key == "two");
        let expected_two = ("two".to_string(), "2".to_string());
        assert_eq!(Some(&expected_two), actual_two);

        let module = submodules.get(2).unwrap();
        assert_eq!("baz", module.name());
        assert_eq!("\"baz/path\"", module.path().unwrap());
        let actual = module.entries().iter().find(|&(key, _)| key == "flag");
        let expected = ("flag".to_string(), "true".to_string());
        assert_eq!(Some(&expected), actual);
    }
}