cargo_sync_readme/
lib.rs

1//! Library used to implement the cargo-sync-readme binary.
2
3// #![deny(missing_docs)]
4
5pub mod intralinks;
6
7use crate::intralinks::{FQIdentifier, IntraLinkError};
8use regex::RegexBuilder;
9use std::{
10  collections::HashSet,
11  fmt,
12  fs::{self, read_dir, File},
13  io::{self, Read},
14  path::{Path, PathBuf},
15  str::FromStr,
16};
17use toml::{de::Error as TomlError, Value};
18
19/// Name of the manifest containing the project metadata.
20pub const MANIFEST_NAME: &str = "Cargo.toml";
21
22/// Start marker in the README file.
23pub const MARKER_START: &str = "<!-- cargo-sync-readme start -->";
24
25/// End marker in the README file.
26pub const MARKER_END: &str = "<!-- cargo-sync-readme end -->";
27
28/// Regular expression to find the initial marker.
29pub const MARKER_RE: &str = "^<!-- cargo-sync-readme -->\r?$";
30
31/// Regular expression to find the start marker.
32pub const MARKER_START_RE: &str = "^<!-- cargo-sync-readme start -->\r?$";
33
34/// Regular expression to find the end marker.
35pub const MARKER_END_RE: &str = "^<!-- cargo-sync-readme end -->\r?$";
36
37/// Common Markdown code-block state.
38///
39/// This type helps track which state we are currently in when parsing code-blocks. It can either
40/// be none or a code-block with either backticks (`) or tildes (~).
41#[derive(Debug, PartialEq)]
42enum CodeBlockState {
43  /// Not currently in a code block.
44  None,
45  /// Currently in a code block started (and that will end) with three backticks.
46  InWithBackticks,
47  /// Currently in a code block started (and that will end) with three tilds.
48  InWithTildes,
49}
50
51/// Possible error that might happen while looking for the project manifest.
52#[derive(Debug)]
53pub enum FindManifestError {
54  /// No manifest exists in the directory where the search was started, nor in any of its parent directories.
55  CannotFindManifest,
56  /// Cannot open the manifest.
57  CannotOpenManifest(PathBuf),
58  /// Cannot parse the manifest.
59  TomlError(TomlError),
60}
61
62impl fmt::Display for FindManifestError {
63  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
64    match *self {
65      FindManifestError::CannotFindManifest => f.write_str("Cannot find manifest (Cargo.toml)."),
66      FindManifestError::CannotOpenManifest(ref path) => {
67        write!(f, "Cannot open manifest at path {}.", path.display())
68      }
69      FindManifestError::TomlError(ref e) => write!(f, "TOML error: {}.", e),
70    }
71  }
72}
73
74/// The project manifest.
75///
76/// This type is used internally to retrieve various metadata on your project. The most important information for us
77/// is the `readme` field, which allows us to know which file we need to synchronize.
78#[derive(Debug)]
79pub struct Manifest {
80  /// Deserialized manifest.
81  pub toml: Value,
82  /// Path on the file system where the manifest exists.
83  pub parent_dir: PathBuf,
84}
85
86impl Manifest {
87  fn new<P>(toml: Value, path: P) -> Self
88  where
89    P: AsRef<Path>,
90  {
91    Manifest {
92      toml,
93      parent_dir: path.as_ref().parent().unwrap().to_owned(),
94    }
95  }
96
97  /// Load a manifest from the file system.
98  pub fn load(path: impl AsRef<Path>) -> Result<Self, FindManifestError> {
99    let path = path.as_ref();
100    let mut file =
101      File::open(&path).map_err(|_| FindManifestError::CannotOpenManifest(path.to_owned()))?;
102    let mut file_str = String::new();
103    let _ = file.read_to_string(&mut file_str);
104    let toml = file_str.parse().map_err(FindManifestError::TomlError)?;
105
106    Ok(Manifest::new(toml, path))
107  }
108
109  /// Get the TOML-formatted manifest by searching the current directory; if not found, go to the
110  /// parent directory and recursively retry until one is found… eventually.
111  pub fn find_manifest(dir: impl AsRef<Path>) -> Result<Self, FindManifestError> {
112    let dir = dir.as_ref();
113
114    // check the input directory
115    if let Ok(mut dir_entry) = read_dir(dir) {
116      // if we find the manifest file, load it
117      if let Some(file_entry) = dir_entry.find(|entry| match entry {
118        Ok(entry) if entry.file_name() == MANIFEST_NAME => true,
119        _ => false,
120      }) {
121        let path = file_entry.unwrap().path();
122
123        Manifest::load(path)
124      } else {
125        // go to the parent and retry there
126        if let Some(parent) = dir.parent() {
127          Self::find_manifest(parent)
128        } else {
129          // if there’s no parent, we exhausted the file system tree; hard error
130          Err(FindManifestError::CannotFindManifest)
131        }
132      }
133    } else {
134      // the current directory cannot be read; hard error
135      Err(FindManifestError::CannotFindManifest)
136    }
137  }
138
139  /// Extract the path to the crate name from the manifest.
140  pub fn crate_name(&self) -> Option<&str> {
141    self
142      .toml
143      .get("package")
144      .and_then(|p| p.get("name"))
145      .and_then(Value::as_str)
146  }
147
148  /// Extract the path to the readme file from the manifest.
149  ///
150  /// If the readme doesn’t exist, assume `README.md`.
151  pub fn readme(&self) -> PathBuf {
152    let readme = self
153      .toml
154      .get("package")
155      .and_then(|p| p.get("readme"))
156      .and_then(Value::as_str)
157      .unwrap_or("README.md");
158
159    self.parent_dir.join(readme)
160  }
161
162  /// Get the path to the Rust file we want to take the documentation from.
163  pub fn entry_point(&self, prefer_doc_from: Option<PreferDocFrom>) -> Option<PathBuf> {
164    // check first whether the information is in the manifest; if it’s not, we can check manually on the file system
165    match self.entry_point_from_toml(prefer_doc_from) {
166      Some(ep) => Some(ep.into()),
167      None => {
168        // we need to guess whether it’s a lib or a binary crate
169        let lib_path = self.parent_dir.join("src/lib.rs");
170        let main_path = self.parent_dir.join("src/main.rs");
171
172        match (lib_path.is_file(), main_path.is_file()) {
173          (true, true) => match prefer_doc_from {
174            Some(PreferDocFrom::Binary) => Some(main_path),
175            Some(PreferDocFrom::Library) => Some(lib_path),
176            _ => None,
177          },
178
179          (true, _) => Some(lib_path),
180          (_, true) => Some(main_path),
181          _ => None,
182        }
183      }
184    }
185  }
186
187  /// Get the path to the Rust file to extract documentation from based on the manifest.
188  fn entry_point_from_toml(&self, prefer_from: Option<PreferDocFrom>) -> Option<String> {
189    let lib = self.toml.get("lib");
190    let bin = self.toml.get("bin");
191    let preference = match prefer_from {
192      Some(PreferDocFrom::Binary) => bin.clone(),
193      Some(PreferDocFrom::Library) => lib.clone(),
194      _ => None,
195    };
196
197    preference
198      .or(lib)
199      .or(bin)
200      .and_then(|v| v.get("path"))
201      .and_then(Value::as_str)
202      .map(|s| s.to_owned())
203  }
204}
205
206/// Preferences from which file the documentation should be taken if both present.
207#[derive(Clone, Copy, Debug, Eq, PartialEq)]
208pub enum PreferDocFrom {
209  /// Take the documentation from the binary entry-point.
210  ///
211  /// Most of the time, this file will be `src/main.rs`.
212  Binary,
213
214  /// Take the documentation from the library entry-point.
215  ///
216  /// Most of the time, this file will be `src/lib.rs`.
217  Library,
218}
219
220impl FromStr for PreferDocFrom {
221  type Err = String;
222
223  fn from_str(s: &str) -> Result<Self, Self::Err> {
224    match s {
225      "bin" => Ok(PreferDocFrom::Binary),
226      "lib" => Ok(PreferDocFrom::Library),
227      _ => Err("not a valid preference".to_owned()),
228    }
229  }
230}
231
232/// Apply a series of transformation on the inner documentation of the entry point to adapt it to being embedded in a
233/// README.
234///
235/// This function will perform a bunch of transformations, such as – non-comprehensive list:
236///
237/// - Removing the `//!` annotations at the beginning of the lines.
238/// - Respecting the options of showing or hiding hidden Rust code (e.g. `# Something like this`).
239/// - Map correctly the code block languages used to respect doctest annotations.
240/// - Etc. etc.
241fn transform_inner_doc(doc: &str, show_hidden_doc: bool, crlf: bool) -> String {
242  // trim the module’s lines’ beginnings and extract the module’s documentation; this one starts at the first line
243  // starting with `//!` and ends when a line doesn’t start with `//!` — note that we do not support any kind of other
244  // annotations so far
245  let lines: Vec<String> = doc
246    .lines()
247    .skip_while(|l| !l.trim_start().starts_with("//!"))
248    .take_while(|l| l.trim_start().starts_with("//!"))
249    .map(|l| {
250      let l = l.trim_start().trim_start_matches("//!");
251      if crlf {
252        format!("{}\r\n", l)
253      } else {
254        format!("{}\n", l)
255      }
256    })
257    .collect();
258
259  // find the minimal offset of all lines for which the first character is not a space; that offset will be used later
260  // to preserve indentation in Rust code (so we don’t have to trim and lose all indentation levels)
261  let offset = lines
262    .iter()
263    .flat_map(|line| line.find(|c: char| !c.is_whitespace()))
264    .min()
265    .unwrap_or(0);
266
267  // cut the beginning of the each line and annotate code blocks
268  let mut codeblock_st = CodeBlockState::None;
269  let sanitized_annotated_lines: Vec<String> = lines
270    .into_iter()
271    .map(|line| {
272      if crlf && line == "\r\n" || line == "\n" {
273        line
274      } else {
275        line[offset..].to_owned()
276      }
277    })
278    .map(|line| annotate_code_blocks(&mut codeblock_st, line, crlf))
279    .collect();
280
281  // finally, eventually show hidden documentation if asked
282  if show_hidden_doc {
283    sanitized_annotated_lines.into_iter().collect()
284  } else {
285    sanitized_annotated_lines
286      .into_iter()
287      .filter(|l| strip_hidden_doc_tests(&mut codeblock_st, l))
288      .collect()
289  }
290}
291
292/// Errors that might happen while transforming documentation.
293#[derive(Debug)]
294pub enum TransformError {
295  /// Cannot read the Rust entry-point file containing the inner documentation.
296  CannotReadEntryPoint(PathBuf, io::Error),
297  /// Cannot read the README file.
298  CannotReadReadme(PathBuf, io::Error),
299  /// Intra-link error which occurred while transforming the readme documentation.
300  IntralinkError(intralinks::IntraLinkError),
301  /// Initial, start and/or end markers missing and/or ill-formatted.
302  MissingOrIllFormedMarkers,
303}
304
305impl fmt::Display for TransformError {
306  fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
307    match *self {
308      TransformError::CannotReadEntryPoint(ref path, ref err) => {
309        write!(f, "Cannot read entry-point at {}: {}", path.display(), err)
310      }
311
312      TransformError::CannotReadReadme(ref path, ref err) => {
313        write!(f, "Cannot read README at {}: {}", path.display(), err)
314      }
315
316      TransformError::IntralinkError(ref err) => {
317        write!(f, "Failed to process intra-links: {}", err)
318      }
319
320      TransformError::MissingOrIllFormedMarkers => {
321        f.write_str("Markers not found or ill-formed; check your file again")
322      }
323    }
324  }
325}
326
327impl From<IntraLinkError> for TransformError {
328  fn from(err: IntraLinkError) -> Self {
329    TransformError::IntralinkError(err)
330  }
331}
332
333/// Open a Rust file and get its main inner documentation (//!), applying filters if needed.
334pub fn extract_inner_doc(
335  path: impl AsRef<Path>,
336  show_hidden_doc: bool,
337  crlf: bool,
338) -> Result<String, TransformError> {
339  let path = path.as_ref();
340  let doc_path = fs::read_to_string(path)
341    .map_err(|e| TransformError::CannotReadEntryPoint(path.to_owned(), e))?;
342  let transformed = transform_inner_doc(&doc_path, show_hidden_doc, crlf);
343
344  Ok(transformed)
345}
346
347/// Open and read a README file.
348pub fn read_readme(path: impl AsRef<Path>) -> Result<String, TransformError> {
349  let path = path.as_ref();
350  fs::read_to_string(path).map_err(|e| TransformError::CannotReadReadme(path.to_owned(), e))
351}
352
353/// Rewrite intralinks found in the Markdown README.
354fn transform_doc_intralinks(
355  doc: &str,
356  crate_name: &str,
357  entry_point: &Path,
358) -> Result<WithWarnings<String>, TransformError> {
359  let symbols: HashSet<FQIdentifier> = intralinks::extract_markdown_intralink_symbols(doc);
360  let mut warnings = Vec::new();
361  let symbols_type = intralinks::crate_symbols_type(&entry_point, &symbols, &mut warnings)?;
362
363  Ok(intralinks::rewrite_markdown_links(
364    doc,
365    &symbols_type,
366    crate_name,
367    warnings,
368  ))
369}
370
371/// Look for the markers in the README and inject the transformed documentation at the right place.
372fn inject_doc_in_readme(content: &str, doc: &str, crlf: bool) -> Result<String, TransformError> {
373  let content = content.as_ref();
374  let mut marker_re_builder = RegexBuilder::new(MARKER_RE);
375  marker_re_builder.multi_line(true);
376  let marker_re = marker_re_builder.build().unwrap();
377
378  if let Some(marker_match) = marker_re.find(&content) {
379    // try to look for the sync marker (first time using the tool)
380    let first_part = &content[0..marker_match.start()];
381    let second_part_off = if crlf {
382      marker_match.end() - 1
383    } else {
384      marker_match.end()
385    };
386    let second_part = &content[second_part_off..];
387
388    Ok(reformat_with_markers(first_part, doc, second_part, crlf))
389  } else {
390    // try to look for the start and end markers (already used the tool)
391    let mut marker_start_re_builder = RegexBuilder::new(MARKER_START_RE);
392    marker_start_re_builder.multi_line(true);
393    let marker_start_re = marker_start_re_builder.build().unwrap();
394    let mut marker_end_re_builder = RegexBuilder::new(MARKER_END_RE);
395    marker_end_re_builder.multi_line(true);
396    let marker_end_re = marker_end_re_builder.build().unwrap();
397
398    let marker_start = marker_start_re.find(&content);
399    let marker_end = marker_end_re.find(&content);
400
401    match (marker_start, marker_end) {
402      (Some(start_match), Some(end_match)) => {
403        let first_part = &content[0..start_match.start()];
404        let second_part_off = if crlf {
405          end_match.end() - 1
406        } else {
407          end_match.end()
408        };
409        let second_part = &content[second_part_off..];
410
411        Ok(reformat_with_markers(first_part, doc, second_part, crlf))
412      }
413
414      _ => Err(TransformError::MissingOrIllFormedMarkers),
415    }
416  }
417}
418
419/// An object with warnings accumulated while creating this object.
420pub struct WithWarnings<A> {
421  pub value: A,
422  pub warnings: Vec<String>,
423}
424
425impl<A> WithWarnings<A> {
426  pub fn new(value: A, warnings: Vec<String>) -> Self {
427    Self { value, warnings }
428  }
429}
430
431/// Transform a readme and return its content with the documentation injected, if any.
432///
433/// Perform any required other transformations if asked by the user.
434pub fn transform_readme(
435  content: impl AsRef<str>,
436  readme: impl AsRef<str>,
437  crate_name: impl AsRef<str>,
438  entry_point: impl AsRef<Path>,
439  crlf: bool,
440) -> Result<WithWarnings<String>, TransformError> {
441  let WithWarnings {
442    value: readme,
443    warnings,
444  } = transform_doc_intralinks(readme.as_ref(), crate_name.as_ref(), entry_point.as_ref())?;
445
446  let injected_readme = inject_doc_in_readme(content.as_ref(), &readme, crlf)?;
447  let readme = WithWarnings::new(injected_readme, warnings);
448  Ok(readme)
449}
450
451// Reformat the README by inserting the documentation between the start and end markers.
452//
453// The crlf` parameter is used to insert a '\r' before '\n'.
454fn reformat_with_markers(first_part: &str, doc: &str, second_part: &str, crlf: bool) -> String {
455  if crlf {
456    format!(
457      "{}{}\r\n\r\n{}\r\n{}{}",
458      first_part, MARKER_START, doc, MARKER_END, second_part
459    )
460  } else {
461    format!(
462      "{}{}\n\n{}\n{}{}",
463      first_part, MARKER_START, doc, MARKER_END, second_part
464    )
465  }
466}
467
468/// Annotate code blocks for lines.
469///
470/// This function is expected to be called while iterating sequentially on lines, as it mutates its argument to
471/// accumulate the code block state. If it encounters a code block annotation, it will automatically accumulate the
472/// state and transform the lines to reflect the annotations.
473fn annotate_code_blocks(st: &mut CodeBlockState, line: String, crlf: bool) -> String {
474  match st {
475    CodeBlockState::None => {
476      if line.starts_with("~~~") {
477        *st = CodeBlockState::InWithTildes;
478      } else if line.starts_with("```") {
479        *st = CodeBlockState::InWithBackticks;
480      } else {
481        // not a code block annotation; skip
482        return line;
483      }
484
485      // language used in the code block; e.g. ```<lang>
486      // some “languages” are not really languages but doctest annotations, such as should_panic; in this case, we
487      // remap them to “rust”
488      if crlf && line.ends_with("\r\n") {
489        let lang = remap_code_block_lang(&line[3..line.len() - 2]);
490        format!("{}{}\r\n", &line[..3], lang)
491      // line.replace("\r\n", &format!("{}\r\n", lang))
492      } else if !crlf && line.ends_with("\n") {
493        let lang = remap_code_block_lang(&line[3..line.len() - 1]);
494        format!("{}{}\n", &line[..3], lang)
495      // line.replace("\n", &format!("{}\n", lang))
496      } else {
497        line
498      }
499    }
500
501    CodeBlockState::InWithTildes => {
502      if line.starts_with("~~~") {
503        *st = CodeBlockState::None;
504      }
505
506      line
507    }
508
509    CodeBlockState::InWithBackticks => {
510      if line.starts_with("```") {
511        *st = CodeBlockState::None;
512      }
513
514      line
515    }
516  }
517}
518
519/// Map a code block language to its target code block language.
520///
521/// Most of the time, this function will be the identity function, but some language are doctest attributes that need
522/// to be remapped to the correct language (most of the time, it will be rust).
523fn remap_code_block_lang(lang: &str) -> &str {
524  match lang {
525    // no lang is Rust by default; the rest are doctest attributes
526    "" | "ignore" | "should_panic" | "no_run" | "compile_fail" | "edition2015" | "edition2018" => {
527      "rust"
528    }
529
530    _ => lang,
531  }
532}
533
534/// Strip hidden documentation tests from a readme.
535fn strip_hidden_doc_tests(st: &mut CodeBlockState, line: &str) -> bool {
536  match st {
537    CodeBlockState::None => {
538      // if we’re not currently in a code-block, check if we need to open one; in all cases,
539      // we don’t want to filter that line out
540      if line.starts_with("~~~") {
541        *st = CodeBlockState::InWithTildes;
542      } else if line.starts_with("```") {
543        *st = CodeBlockState::InWithBackticks;
544      }
545
546      true
547    }
548
549    CodeBlockState::InWithTildes => {
550      // we’re in a code-block, so filter only lines starting with a dash (#) and let others
551      // go through; close the code-block if we find three tildes (~~~)
552      if line.starts_with("# ") || line.trim_end() == "#" {
553        false
554      } else {
555        if line.starts_with("~~~") {
556          *st = CodeBlockState::None;
557        }
558
559        true
560      }
561    }
562
563    CodeBlockState::InWithBackticks => {
564      // we’re in a code-block, so filter only lines starting with a dash (#) and let others
565      // go through; close the code-block if we find three backticks (```)
566      if line.starts_with("# ") || line.trim_end() == "#" {
567        false
568      } else {
569        if line.starts_with("```") {
570          *st = CodeBlockState::None;
571        }
572
573        true
574      }
575    }
576  }
577}
578
579#[cfg(test)]
580mod tests {
581  use super::*;
582
583  #[test]
584  fn strip_dash_starting_lines() {
585    let mut st = CodeBlockState::None;
586
587    assert_eq!(strip_hidden_doc_tests(&mut st, "# okay"), true);
588    assert_eq!(strip_hidden_doc_tests(&mut st, "```"), true);
589    assert_eq!(strip_hidden_doc_tests(&mut st, "foo bar zoo"), true);
590    assert_eq!(strip_hidden_doc_tests(&mut st, "# hello"), false);
591    assert_eq!(strip_hidden_doc_tests(&mut st, "#foo"), true);
592    assert_eq!(strip_hidden_doc_tests(&mut st, "#"), false);
593    assert_eq!(strip_hidden_doc_tests(&mut st, "# "), false);
594    assert_eq!(strip_hidden_doc_tests(&mut st, "# ### nope"), false);
595    assert_eq!(strip_hidden_doc_tests(&mut st, "~~~"), true);
596    assert_eq!(strip_hidden_doc_tests(&mut st, "```"), true);
597    assert_eq!(strip_hidden_doc_tests(&mut st, "# still okay"), true);
598  }
599
600  #[test]
601  fn simple_transform() {
602    let doc = "Test! <3";
603    let readme = "Foo\n<!-- cargo-sync-readme -->\nbar\nzoo";
604    let output = inject_doc_in_readme(readme, doc, false);
605
606    assert_eq!(
607      output.ok().unwrap(),
608      "Foo\n<!-- cargo-sync-readme start -->\n\nTest! <3\n<!-- cargo-sync-readme end -->\nbar\nzoo"
609    );
610  }
611
612  #[test]
613  fn windows_line_endings() {
614    let doc = "Test! <3";
615    let readme = "Foo\r\n<!-- cargo-sync-readme -->\r\nbar\r\nzoo";
616    let output = inject_doc_in_readme(readme, doc, true);
617
618    assert_eq!(output.ok().unwrap(), "Foo\r\n<!-- cargo-sync-readme start -->\r\n\r\nTest! <3\r\n<!-- cargo-sync-readme end -->\r\nbar\r\nzoo");
619  }
620
621  #[test]
622  fn annotate_default_code_blocks() {
623    let doc = "//!```\n//!fn add(a: u8, b: u8) -> u8 { a + b }\n//!```";
624    let output = transform_inner_doc(doc, false, false);
625
626    assert_eq!(
627      output,
628      "```rust\nfn add(a: u8, b: u8) -> u8 { a + b }\n```\n".to_owned()
629    );
630  }
631
632  #[test]
633  fn annotate_default_code_blocks_windows() {
634    let doc = "//!```\r\n//!fn add(a: u8, b: u8) -> u8 { a + b }\r\n//!```";
635    let output = transform_inner_doc(doc, false, true);
636
637    assert_eq!(
638      output,
639      "```rust\r\nfn add(a: u8, b: u8) -> u8 { a + b }\r\n```\r\n".to_owned()
640    );
641  }
642
643  #[test]
644  fn does_not_annotate_annotated_code_blocks() {
645    let doc = "//!```text\n//!echo Hello, World!\n//!```";
646    let output = transform_inner_doc(doc, false, false);
647
648    assert_eq!(output, "```text\necho Hello, World!\n```\n".to_owned());
649  }
650
651  #[test]
652  fn does_not_annotate_annotated_code_blocks_windows() {
653    let doc = "//!```text\r\n//!echo Hello, World!\r\n//!```";
654    let output = transform_inner_doc(doc, false, true);
655
656    assert_eq!(
657      output,
658      "```text\r\necho Hello, World!\r\n```\r\n".to_owned()
659    );
660  }
661}