1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
//! Library used to implement the cargo-sync-readme binary.

use regex::RegexBuilder;
use std::fmt;
use std::fs::{File, read_dir};
use std::io::Read;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use toml::Value;
use toml::de::Error as TomlError;

const MANIFEST_NAME: &str   = "Cargo.toml";
const MARKER_START: &str    = "<!-- cargo-sync-readme start -->";
const MARKER_END: &str      = "<!-- cargo-sync-readme end -->";
const MARKER_RE: &str       = "^<!-- cargo-sync-readme -->\r?$";
const MARKER_START_RE: &str = "^<!-- cargo-sync-readme start -->\r?$";
const MARKER_END_RE: &str   = "^<!-- cargo-sync-readme end -->\r?$";

/// Common Markdown code-block state.
///
/// This type helps track which state we are currently in when parsing code-blocks. It can either
/// be none or a code-block with either backticks (`) or tildes (~).
#[derive(Debug)]
enum CodeBlockState {
  None,
  InWithBackticks,
  InWithTildes
}

#[derive(Debug)]
pub enum FindManifestError {
  CannotFindManifest,
  CannotOpenManifest(PathBuf),
  TomlError(TomlError)
}

impl fmt::Display for FindManifestError {
  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
    match *self {
      FindManifestError::CannotFindManifest => f.write_str("Cannot find manifest (Cargo.toml)."),
      FindManifestError::CannotOpenManifest(ref path) =>
        write!(f, "Cannot open manifest at path {}.", path.display()),
      FindManifestError::TomlError(ref e) => write!(f, "TOML error: {}.", e)
    }
  }
}

#[derive(Debug)]
pub struct Manifest {
  pub toml: Value,
  pub parent_dir: PathBuf
}

impl Manifest {
  fn new(toml: Value, path: PathBuf) -> Self {
    Manifest { toml, parent_dir: path.parent().unwrap().to_owned() }
  }

  /// Get the TOML-formatted manifest by looking up the current directory; if not found, go to the
  /// parent directory and recursively retry until one is found… eventually.
  pub fn find_manifest<P>(dir: P) -> Result<Self, FindManifestError> where P: AsRef<Path> {
    let dir = dir.as_ref();

    if let Ok(mut dir_entry) = read_dir(dir) {
      if let Some(file_entry) = dir_entry.find(
        |entry| {
          match entry {
            Ok(entry) if entry.file_name() == MANIFEST_NAME => true,
            _ => false
          }
        }) {
        let path = file_entry.unwrap().path();
        let mut file = File::open(&path).map_err(|_| FindManifestError::CannotOpenManifest(path.clone()))?;
        let mut file_str = String::new();

        let _ = file.read_to_string(&mut file_str);
        let toml = file_str.parse().map_err(FindManifestError::TomlError)?;

        Ok(Manifest::new(toml, path))
      } else {
        // try to the parent
        if let Some(parent) = dir.parent() {
          Self::find_manifest(parent)
        } else {
          Err(FindManifestError::CannotFindManifest)
        }
      }
    } else {
      Err(FindManifestError::CannotFindManifest)
    }
  }

  /// Get the path to the file we want to take the documentation from.
  pub fn entry_point(&self, prefer_doc_from: Option<PreferDocFrom>) -> Option<PathBuf> {
    match self.entry_point_from_toml(prefer_doc_from) {
      Some(ep) => Some(ep.into()),
      None => {
        // we need to guess whether it’s a lib or a binary crate
        let lib_path = self.parent_dir.join("src/lib.rs");
        let main_path = self.parent_dir.join("src/main.rs");

        match (lib_path.is_file(), main_path.is_file()) {
          (true, true) => match prefer_doc_from {
            Some(PreferDocFrom::Binary) => Some(main_path),
            Some(PreferDocFrom::Library) => Some(lib_path),
            _ => None
          }

          (true, _) => Some(lib_path),
          (_, true) => Some(main_path),
          _ => None
        }
      }
    }
  }

  /// Extract the path to the readme file from the manifest.
  pub fn readme(&self) -> PathBuf {
    let readme = self.toml
      .get("package")
      .and_then(|p| p.get("readme"))
      .and_then(Value::as_str)
      //.map(|s| s.to_owned())
      .unwrap_or("README.md");

    self.parent_dir.join(readme)
  }

  fn entry_point_from_toml(&self, prefer_from: Option<PreferDocFrom>) -> Option<String> {
    let lib = self.toml.get("lib");
    let bin = self.toml.get("bin");
    let preference =
      match prefer_from {
        Some(PreferDocFrom::Binary) => bin.clone(),
        Some(PreferDocFrom::Library) => lib.clone(),
        _ => None
      };

    preference.or(lib).or(bin)
      .and_then(|v| v.get("path"))
      .and_then(Value::as_str)
      .map(|s| s.to_owned())
  }
}

/// Preferences from which file the documentation should be taken if both present.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum PreferDocFrom {
  Binary,
  Library
}

impl FromStr for PreferDocFrom {
  type Err = String;

  fn from_str(s: &str) -> Result<Self, Self::Err> {
    match s {
      "bin" => Ok(PreferDocFrom::Binary),
      "lib" => Ok(PreferDocFrom::Library),
      _ => Err("not a valid preference".to_owned())
    }
  }
}

/// Open a file and get its main inner documentation (//!), applying filters if needed.
pub fn extract_inner_doc<P>(path: P, show_hidden_doc: bool, crlf: bool) -> String where P: AsRef<Path> {
  let mut file = File::open(path.as_ref()).unwrap();
  let mut content = String::new();
  let mut codeblock_st = CodeBlockState::None;

  let _ = file.read_to_string(&mut content);

  let lines: Vec<_> = content
    .lines()
    .skip_while(|l| !l.starts_with("//!"))
    .take_while(|l| l.starts_with("//!"))
    .map(|l| {
      if crlf {
        format!("{}\r\n", l.trim_start_matches("//!"))
      } else {
        format!("{}\n", l.trim_start_matches("//!"))
      }
    })
    .collect();

  // find the minimal offset of all lines for which the first character is not a space
  let offset = lines
    .iter()
    .flat_map(|line| line.find(|c: char| !c.is_whitespace()))
    .min()
    .unwrap_or(0);

  // trim by the given offset to remove the introduced space by the Rust doc
  lines
    .iter()
    .map(|line| if crlf && line == "\r\n" || line == "\n" { line } else { &line[offset..] })
    .filter(|l| {
      if show_hidden_doc {
        true
      } else {
        strip_hidden_doc_tests(&mut codeblock_st, l)
      }
    })
    .collect()
}

#[derive(Debug, Eq, PartialEq)]
pub enum TransformError {
  CannotReadReadme(PathBuf),
  MissingOrIllFormatMarkers
}

impl fmt::Display for TransformError {
  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
    match *self {
      TransformError::CannotReadReadme(ref path) => write!(f, "Cannot read README at {}.", path.display()),
      TransformError::MissingOrIllFormatMarkers => f.write_str("Markers not found or ill-formed; check your file again."),
    }
  }
}

/// Open and read a README file.
pub fn read_readme<P>(path: P) -> Result<String, TransformError> where P: AsRef<Path> {
  let path = path.as_ref();
  let mut file = File::open(path).map_err(|_| TransformError::CannotReadReadme(path.to_owned()))?;
  let mut content = String::new();

  let _ = file.read_to_string(&mut content);

  Ok(content)
}

/// Transform a readme file and return its content with the documentation injected, if any.
///
/// Perform any required other transformations if asked by the user.
pub fn transform_readme<C, R>(
  content: C,
  doc: R,
  crlf: bool
) -> Result<String, TransformError>
where C: AsRef<str>,
      R: AsRef<str> {
  let content = content.as_ref();
  let doc = doc.as_ref();

  let mut marker_re_builder = RegexBuilder::new(MARKER_RE);
  marker_re_builder.multi_line(true);
  let marker_re = marker_re_builder.build().unwrap();

  if let Some(marker_match) = marker_re.find(&content) {
    // try to look for the sync marker (first time using the tool)
    let first_part = &content[0 .. marker_match.start()];
    let second_part = &content[if crlf { marker_match.end() - 1 } else { marker_match.end() } ..];

    Ok(reformat_with_markers(first_part, doc, second_part, crlf))
  } else {
    // try to look for the start and end markers (already used the tool)
    let mut marker_start_re_builder = RegexBuilder::new(MARKER_START_RE);
    marker_start_re_builder.multi_line(true);
    let marker_start_re = marker_start_re_builder.build().unwrap();
    let mut marker_end_re_builder = RegexBuilder::new(MARKER_END_RE);
    marker_end_re_builder.multi_line(true);
    let marker_end_re = marker_end_re_builder.build().unwrap();

    let marker_start = marker_start_re.find(&content);
    let marker_end = marker_end_re.find(&content);

    match (marker_start, marker_end) {
      (Some(start_match), Some(end_match)) => {
        let first_part = &content[0 .. start_match.start()];
        let second_part = &content[if crlf { end_match.end() - 1 } else { end_match.end() } ..];

        Ok(reformat_with_markers(first_part, doc, second_part, crlf))
      },

      _ => Err(TransformError::MissingOrIllFormatMarkers)
    }
  }
}

// Reformat the README by inserting the documentation between the start and end markers.
//
// The crlf` parameter is used to insert a '\r' before '\n'.
fn reformat_with_markers(first_part: &str, doc: &str, second_part: &str, crlf: bool) -> String {
  if crlf {
    format!("{}{}\r\n\r\n{}\r\n{}{}", first_part, MARKER_START, doc, MARKER_END, second_part)
  } else {
    format!("{}{}\n\n{}\n{}{}", first_part, MARKER_START, doc, MARKER_END, second_part)
  }
}

/// Strip hidden documentation tests from a readme.
fn strip_hidden_doc_tests(st: &mut CodeBlockState, line: &str) -> bool {
  match st {
    CodeBlockState::None => {
      // if we’re not currently in a code-block, check if we need to open one; in all cases,
      // we don’t want to filter that line out
      if line.starts_with("~~~") {
        *st = CodeBlockState::InWithTildes;
      } else if line.starts_with("```") {
        *st = CodeBlockState::InWithBackticks;
      }

      true
    }

    CodeBlockState::InWithTildes => {
      // we’re in a code-block, so filter only lines starting with a dash (#) and let others
      // go through; close the code-block if we find three tildes (~~~)
      if line.starts_with("# ") {
        false
      } else {
        if line.starts_with("~~~") {
          *st = CodeBlockState::None;
        }

        true
      }
    }

    CodeBlockState::InWithBackticks => {
      // we’re in a code-block, so filter only lines starting with a dash (#) and let others
      // go through; close the code-block if we find three backticks (```)
      if line.starts_with("# ") {
        false
      } else {
        if line.starts_with("```") {
          *st = CodeBlockState::None;
        }

        true
      }
    }
  }
}

#[cfg(test)]
mod tests {
  use super::*;

  #[test]
  fn strip_dash_starting_lines() {
    let mut st = CodeBlockState::None;

    assert_eq!(strip_hidden_doc_tests(&mut st, "# okay"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "```"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "foo bar zoo"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "# hello"), false);
    assert_eq!(strip_hidden_doc_tests(&mut st, "#"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "# "), false);
    assert_eq!(strip_hidden_doc_tests(&mut st, "# ### nope"), false);
    assert_eq!(strip_hidden_doc_tests(&mut st, "~~~"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "```"), true);
    assert_eq!(strip_hidden_doc_tests(&mut st, "# still okay"), true);
  }

  #[test]
  fn simple_transform() {
    let doc = "Test! <3";
    let readme = "Foo\n<!-- cargo-sync-readme -->\nbar\nzoo";
    let output = transform_readme(readme, doc, false);

    assert_eq!(output, Ok("Foo\n<!-- cargo-sync-readme start -->\n\nTest! <3\n<!-- cargo-sync-readme end -->\nbar\nzoo".to_owned()));
  }

  #[test]
  fn windows_line_endings() {
    let doc = "Test! <3";
    let readme = "Foo\r\n<!-- cargo-sync-readme -->\r\nbar\r\nzoo";
    let output = transform_readme(readme, doc, true);

    assert_eq!(output, Ok("Foo\r\n<!-- cargo-sync-readme start -->\r\n\r\nTest! <3\r\n<!-- cargo-sync-readme end -->\r\nbar\r\nzoo".to_owned()));
  }
}