parse_changelog/lib.rs
1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*!
4Simple changelog parser, written in Rust.
5
6### Usage
7
8<!-- Note: Document from sync-markdown-to-rustdoc:start through sync-markdown-to-rustdoc:end
9 is synchronized from README.md. Any changes to that range are not preserved. -->
10<!-- tidy:sync-markdown-to-rustdoc:start -->
11
12To use this crate as a library, add this to your `Cargo.toml`:
13
14```toml
15[dependencies]
16parse-changelog = { version = "0.6", default-features = false }
17```
18
19<div class="rustdoc-alert rustdoc-alert-note">
20
21> **ⓘ Note**
22>
23> We recommend disabling default features because they enable CLI-related
24> dependencies which the library part does not use.
25
26</div>
27
28<!-- omit in toc -->
29### Examples
30
31```
32let changelog = "\
33## 0.1.2 - 2020-03-01
34
35- Bug fixes.
36
37## 0.1.1 - 2020-02-01
38
39- Added `Foo`.
40- Added `Bar`.
41
42## 0.1.0 - 2020-01-01
43
44Initial release
45";
46
47// Parse changelog.
48let changelog = parse_changelog::parse(changelog).unwrap();
49
50// Get the latest release.
51assert_eq!(changelog[0].version, "0.1.2");
52assert_eq!(changelog[0].title, "0.1.2 - 2020-03-01");
53assert_eq!(changelog[0].notes, "- Bug fixes.");
54
55// Get the specified release.
56assert_eq!(changelog["0.1.0"].title, "0.1.0 - 2020-01-01");
57assert_eq!(changelog["0.1.0"].notes, "Initial release");
58assert_eq!(changelog["0.1.1"].title, "0.1.1 - 2020-02-01");
59assert_eq!(
60 changelog["0.1.1"].notes,
61 "- Added `Foo`.\n\
62 - Added `Bar`."
63);
64```
65
66<!-- omit in toc -->
67### Optional features
68
69- **`serde`** — Implements [`serde::Serialize`](https://docs.rs/serde/latest/serde/trait.Serialize.html) trait for parse-changelog types.
70
71## Supported Format
72
73By default, this crate is intended to support markdown-based changelogs
74that have the title of each release starts with the version format based on
75[Semantic Versioning][semver]. (e.g., [Keep a Changelog][keepachangelog]'s
76changelog format.)
77
78<!-- omit in toc -->
79### Headings
80
81The heading for each release must be Atx-style (1-6 `#`) or
82Setext-style (`=` or `-` in a line under text), and the heading levels
83must match with other releases.
84
85Atx-style headings:
86
87```markdown
88# 0.1.0
89```
90
91```markdown
92## 0.1.0
93```
94
95Setext-style headings:
96
97```markdown
980.1.0
99=====
100```
101
102```markdown
1030.1.0
104-----
105```
106
107<!-- omit in toc -->
108### Titles
109
110The title of each release must start with a text or a link text (text with
111`[` and `]`) that starts with a valid [version format](#versions) or
112[prefix format](#prefixes). For example:
113
114```markdown
115# [0.2.0]
116
117description...
118
119# 0.1.0
120
121description...
122```
123
124<!-- omit in toc -->
125#### Prefixes
126
127You can include characters before the version as prefix.
128
129```text
130## Version 0.1.0
131 ^^^^^^^^
132```
133
134By default only "v", "Version ", "Release ", and "" (no prefix) are
135allowed as prefixes.
136
137To customize the prefix format, use the [`Parser::prefix_format`] method (library) or `--prefix-format` option (CLI).
138
139<!-- omit in toc -->
140#### Versions
141
142```text
143## v0.1.0 -- 2020-01-01
144 ^^^^^
145```
146
147The default version format is based on [Semantic Versioning][semver].
148
149This is parsed by using the following regular expression:
150
151```text
152^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z\.-]+)?(\+[0-9A-Za-z\.-]+)?$|^Unreleased$
153```
154
155<div class="rustdoc-alert rustdoc-alert-note">
156
157> **ⓘ Note**
158>
159> To get the 'Unreleased' section in the CLI, you need to explicitly specify 'Unreleased' as the version.
160
161</div>
162
163To customize the version format, use the [`Parser::version_format`] method (library) or `--version-format` option (CLI).
164
165<!-- omit in toc -->
166#### Suffixes
167
168You can freely include characters after the version.
169
170```text
171# 0.1.0 - 2020-01-01
172 ^^^^^^^^^^^^^
173```
174
175## Related Projects
176
177- [create-gh-release-action]: GitHub Action for creating GitHub Releases based on changelog. This action uses this crate for changelog parsing.
178
179[`Parser::prefix_format`]: https://docs.rs/parse-changelog/latest/parse_changelog/struct.Parser.html#method.prefix_format
180[`Parser::version_format`]: https://docs.rs/parse-changelog/latest/parse_changelog/struct.Parser.html#method.version_format
181[create-gh-release-action]: https://github.com/taiki-e/create-gh-release-action
182[keepachangelog]: https://keepachangelog.com
183[semver]: https://semver.org
184
185<!-- tidy:sync-markdown-to-rustdoc:end -->
186*/
187
188#![doc(test(
189 no_crate_inject,
190 attr(
191 deny(warnings, rust_2018_idioms, single_use_lifetimes),
192 allow(dead_code, unused_variables)
193 )
194))]
195#![forbid(unsafe_code)]
196#![warn(
197 // Lints that may help when writing public library.
198 missing_debug_implementations,
199 missing_docs,
200 clippy::alloc_instead_of_core,
201 clippy::exhaustive_enums,
202 clippy::exhaustive_structs,
203 clippy::impl_trait_in_params,
204 // clippy::missing_inline_in_public_items,
205 // clippy::std_instead_of_alloc,
206 clippy::std_instead_of_core,
207)]
208// docs.rs only (cfg is enabled by docs.rs, not build script)
209#![cfg_attr(docsrs, feature(doc_cfg))]
210
211#[cfg(test)]
212mod tests;
213
214#[cfg(test)]
215#[path = "gen/tests/assert_impl.rs"]
216mod assert_impl;
217#[cfg(feature = "serde")]
218#[path = "gen/serde.rs"]
219mod serde_impl;
220#[cfg(test)]
221#[path = "gen/tests/track_size.rs"]
222mod track_size;
223
224mod error;
225
226use core::mem;
227use std::{borrow::Cow, sync::OnceLock};
228
229use indexmap::IndexMap;
230use memchr::memmem;
231use regex::Regex;
232
233pub use self::error::Error;
234use self::error::Result;
235
236/// A changelog.
237///
238/// The key is a version, and the value is the release note for that version.
239///
240/// The order is the same as the order written in the original text. (e.g., if
241/// [the latest version comes first][keepachangelog], `changelog[0]` is the
242/// release note for the latest version)
243///
244/// This type is returned by [`parse`] function or [`Parser::parse`] method.
245///
246/// [keepachangelog]: https://keepachangelog.com
247pub type Changelog<'a> = IndexMap<&'a str, Release<'a>>;
248
249/// Parses release notes from the given `text`.
250///
251/// This function uses the default version and prefix format. If you want to use
252/// another format, consider using the [`Parser`] type instead.
253///
254/// See the [crate-level documentation](crate) for changelog and version
255/// format supported by default.
256///
257/// # Errors
258///
259/// Returns an error if any of the following:
260///
261/// - There are multiple release notes for one version.
262/// - No release note was found. This usually means that the changelog isn't
263/// written in the supported format.
264///
265/// If you want to handle these cases manually without making errors,
266/// consider using [`parse_iter`].
267pub fn parse(text: &str) -> Result<Changelog<'_>> {
268 Parser::new().parse(text)
269}
270
271/// Returns an iterator over all release notes in the given `text`.
272///
273/// Unlike [`parse`] function, the returned iterator doesn't error on
274/// duplicate release notes or empty changelog.
275///
276/// This function uses the default version and prefix format. If you want to use
277/// another format, consider using the [`Parser`] type instead.
278///
279/// See the [crate-level documentation](crate) for changelog and version
280/// format supported by default.
281pub fn parse_iter(text: &str) -> ParseIter<'_, 'static> {
282 ParseIter::new(text, None, None)
283}
284
285/// A release note for a version.
286#[derive(Debug, Clone, PartialEq, Eq)]
287#[non_exhaustive]
288pub struct Release<'a> {
289 /// The version of this release.
290 ///
291 /// ```text
292 /// ## Version 0.1.0 -- 2020-01-01
293 /// ^^^^^
294 /// ```
295 ///
296 /// This is the same value as the key of the [`Changelog`] type.
297 pub version: &'a str,
298 /// The title of this release.
299 ///
300 /// ```text
301 /// ## Version 0.1.0 -- 2020-01-01
302 /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^
303 /// ```
304 ///
305 /// Note:
306 /// - Leading and trailing [whitespaces](char::is_whitespace) have been removed.
307 /// - This retains links in the title. Use [`title_no_link`](Self::title_no_link)
308 /// if you want to use the title with links removed.
309 pub title: &'a str,
310 /// The descriptions of this release.
311 ///
312 /// Note that leading and trailing newlines have been removed.
313 pub notes: &'a str,
314}
315
316impl<'a> Release<'a> {
317 /// Returns the title of this release with link removed.
318 #[must_use]
319 pub fn title_no_link(&self) -> Cow<'a, str> {
320 full_unlink(self.title)
321 }
322}
323
324/// A changelog parser.
325#[derive(Debug, Default)]
326pub struct Parser {
327 /// Version format. e.g., "0.1.0" in "# v0.1.0 (2020-01-01)".
328 ///
329 /// If `None`, `DEFAULT_VERSION_FORMAT` is used.
330 version_format: Option<Regex>,
331 /// Prefix format. e.g., "v" in "# v0.1.0 (2020-01-01)", "Version " in
332 /// "# Version 0.1.0 (2020-01-01)".
333 ///
334 /// If `None`, `DEFAULT_PREFIX_FORMAT` is used.
335 prefix_format: Option<Regex>,
336}
337
338impl Parser {
339 /// Creates a new changelog parser.
340 #[must_use]
341 pub fn new() -> Self {
342 Self::default()
343 }
344
345 /// Sets the version format.
346 ///
347 /// ```text
348 /// ## v0.1.0 -- 2020-01-01
349 /// ^^^^^
350 /// ```
351 ///
352 /// *Tip*: To customize the text before the version number (e.g., "v" in "# v0.1.0",
353 /// "Version " in "# Version 0.1.0", etc.), use the [`prefix_format`] method
354 /// instead of this method.
355 ///
356 /// # Default
357 ///
358 /// The default version format is based on [Semantic Versioning][semver].
359 ///
360 /// This is parsed by using the following regular expression:
361 ///
362 /// ```text
363 /// ^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z\.-]+)?(\+[0-9A-Za-z\.-]+)?$|^Unreleased$
364 /// ```
365 ///
366 /// **Note:** To get the 'Unreleased' section in the CLI, you need to explicitly specify 'Unreleased' as the version.
367 ///
368 /// # Errors
369 ///
370 /// Returns an error if any of the following:
371 ///
372 /// - The specified format is not a valid regular expression or supported by
373 /// [regex] crate.
374 /// - The specified format is empty or contains only
375 /// [whitespace](char::is_whitespace).
376 ///
377 /// [`prefix_format`]: Self::prefix_format
378 /// [regex]: https://docs.rs/regex
379 /// [semver]: https://semver.org
380 pub fn version_format(&mut self, format: &str) -> Result<&mut Self> {
381 if format.trim_start().is_empty() {
382 return Err(Error::format("empty or whitespace version format"));
383 }
384 self.version_format = Some(Regex::new(format).map_err(Error::new)?);
385 Ok(self)
386 }
387
388 /// Sets the prefix format.
389 ///
390 /// "Prefix" means the range from the first non-whitespace character after
391 /// heading to the character before the version (including whitespace
392 /// characters). For example:
393 ///
394 /// ```text
395 /// ## Version 0.1.0 -- 2020-01-01
396 /// ^^^^^^^^
397 /// ```
398 ///
399 /// ```text
400 /// ## v0.1.0 -- 2020-01-01
401 /// ^
402 /// ```
403 ///
404 /// # Default
405 ///
406 /// By default only "v", "Version ", "Release ", and "" (no prefix) are
407 /// allowed as prefixes.
408 ///
409 /// This is parsed by using the following regular expression:
410 ///
411 /// ```text
412 /// ^(v|Version |Release )?
413 /// ```
414 ///
415 /// # Errors
416 ///
417 /// Returns an error if any of the following:
418 ///
419 /// - The specified format is not a valid regular expression or supported by
420 /// [regex] crate.
421 ///
422 /// [regex]: https://docs.rs/regex
423 pub fn prefix_format(&mut self, format: &str) -> Result<&mut Self> {
424 self.prefix_format = Some(Regex::new(format).map_err(Error::new)?);
425 Ok(self)
426 }
427
428 /// Parses release notes from the given `text`.
429 ///
430 /// See the [crate-level documentation](crate) for changelog and version
431 /// format supported by default.
432 ///
433 /// # Errors
434 ///
435 /// Returns an error if any of the following:
436 ///
437 /// - There are multiple release notes for one version.
438 /// - No release note was found. This usually means that the changelog isn't
439 /// written in the supported format, or that the specified format is wrong
440 /// if you specify your own format.
441 ///
442 /// If you want to handle these cases manually without making errors,
443 /// consider using [`parse_iter`].
444 ///
445 /// [`parse_iter`]: Self::parse_iter
446 pub fn parse<'a>(&self, text: &'a str) -> Result<Changelog<'a>> {
447 let mut map = IndexMap::new();
448 for release in self.parse_iter(text) {
449 if let Some(release) = map.insert(release.version, release) {
450 return Err(Error::parse(format!(
451 "multiple release notes for '{}'",
452 release.version
453 )));
454 }
455 }
456 if map.is_empty() {
457 return Err(Error::parse("no release note was found"));
458 }
459 Ok(map)
460 }
461
462 /// Returns an iterator over all release notes in the given `text`.
463 ///
464 /// Unlike [`parse`] method, the returned iterator doesn't error on
465 /// duplicate release notes or empty changelog.
466 ///
467 /// See the [crate-level documentation](crate) for changelog and version
468 /// format supported by default.
469 ///
470 /// [`parse`]: Self::parse
471 pub fn parse_iter<'a, 'r>(&'r self, text: &'a str) -> ParseIter<'a, 'r> {
472 ParseIter::new(text, self.version_format.as_ref(), self.prefix_format.as_ref())
473 }
474}
475
476/// An iterator over release notes.
477///
478/// This type is returned by [`parse_iter`] function or [`Parser::parse_iter`] method.
479#[allow(missing_debug_implementations)]
480#[must_use = "iterators are lazy and do nothing unless consumed"]
481pub struct ParseIter<'a, 'r> {
482 version_format: &'r Regex,
483 prefix_format: &'r Regex,
484 find_open: memmem::Finder<'static>,
485 find_close: memmem::Finder<'static>,
486 lines: Lines<'a>,
487 /// The heading level of release sections. 1-6
488 level: Option<u8>,
489}
490
491const OPEN: &[u8] = b"<!--";
492const CLOSE: &[u8] = b"-->";
493
494fn default_prefix_format() -> &'static Regex {
495 static DEFAULT_PREFIX_FORMAT: OnceLock<Regex> = OnceLock::new();
496 fn init() -> Regex {
497 Regex::new(r"^(v|Version |Release )?").unwrap()
498 }
499 DEFAULT_PREFIX_FORMAT.get_or_init(init)
500}
501fn default_version_format() -> &'static Regex {
502 static DEFAULT_VERSION_FORMAT: OnceLock<Regex> = OnceLock::new();
503 fn init() -> Regex {
504 Regex::new(r"^(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)(-[0-9A-Za-z\.-]+)?(\+[0-9A-Za-z\.-]+)?$|^Unreleased$")
505 .unwrap()
506 }
507 DEFAULT_VERSION_FORMAT.get_or_init(init)
508}
509
510impl<'a, 'r> ParseIter<'a, 'r> {
511 fn new(
512 text: &'a str,
513 version_format: Option<&'r Regex>,
514 prefix_format: Option<&'r Regex>,
515 ) -> Self {
516 Self {
517 version_format: version_format.unwrap_or_else(|| default_version_format()),
518 prefix_format: prefix_format.unwrap_or_else(|| default_prefix_format()),
519 find_open: memmem::Finder::new(OPEN),
520 find_close: memmem::Finder::new(CLOSE),
521 lines: Lines::new(text),
522 level: None,
523 }
524 }
525
526 fn end_release(
527 &self,
528 mut cur_release: Release<'a>,
529 release_note_start: usize,
530 line_start: usize,
531 ) -> Release<'a> {
532 assert!(!cur_release.version.is_empty());
533 if release_note_start < line_start {
534 // Remove trailing newlines.
535 cur_release.notes = self.lines.text[release_note_start..line_start - 1].trim_end();
536 }
537 cur_release
538 }
539
540 fn handle_comment(&self, on_comment: &mut bool, line: &'a str) {
541 let mut line = Some(line);
542 while let Some(l) = line {
543 match (self.find_open.find(l.as_bytes()), self.find_close.find(l.as_bytes())) {
544 (None, None) => {}
545 // <!-- ...
546 (Some(_), None) => *on_comment = true,
547 // ... -->
548 (None, Some(_)) => *on_comment = false,
549 (Some(open), Some(close)) => {
550 if open < close {
551 // <!-- ... -->
552 *on_comment = false;
553 line = l.get(close + CLOSE.len()..);
554 } else {
555 // --> ... <!--
556 *on_comment = true;
557 line = l.get(open + OPEN.len()..);
558 }
559 continue;
560 }
561 }
562 break;
563 }
564 }
565}
566
567impl<'a> Iterator for ParseIter<'a, '_> {
568 type Item = Release<'a>;
569
570 fn next(&mut self) -> Option<Self::Item> {
571 // If `true`, we are in a code block ("```").
572 let mut on_code_block = false;
573 // TODO: nested case?
574 // If `true`, we are in a comment (`<!--` and `-->`).
575 let mut on_comment = false;
576 let mut release_note_start = None;
577 let mut cur_release = Release { version: "", title: "", notes: "" };
578
579 while let Some((line, line_start, line_end)) = self.lines.peek() {
580 let heading =
581 if on_code_block || on_comment { None } else { heading(line, &mut self.lines) };
582 if heading.is_none() {
583 self.lines.next();
584 if trim_start(line).starts_with("```") {
585 on_code_block = !on_code_block;
586 }
587
588 if !on_code_block {
589 self.handle_comment(&mut on_comment, line);
590 }
591
592 // Non-heading lines are always considered part of the current
593 // section.
594
595 if line_end == self.lines.text.len() {
596 break;
597 }
598 continue;
599 }
600 let heading = heading.unwrap();
601 if let Some(release_level) = self.level {
602 if heading.level > release_level {
603 // Consider sections that have lower heading levels than
604 // release sections are part of the current section.
605 self.lines.next();
606 if line_end == self.lines.text.len() {
607 break;
608 }
609 continue;
610 }
611 if heading.level < release_level {
612 // Ignore sections that have higher heading levels than
613 // release sections.
614 self.lines.next();
615 if let Some(release_note_start) = release_note_start {
616 return Some(self.end_release(cur_release, release_note_start, line_start));
617 }
618 if line_end == self.lines.text.len() {
619 break;
620 }
621 continue;
622 }
623 if let Some(release_note_start) = release_note_start {
624 return Some(self.end_release(cur_release, release_note_start, line_start));
625 }
626 }
627
628 debug_assert!(release_note_start.is_none());
629 let version = extract_version_from_title(heading.text, self.prefix_format).0;
630 if !self.version_format.is_match(version) {
631 // Ignore non-release sections that have the same heading
632 // levels as release sections.
633 self.lines.next();
634 if line_end == self.lines.text.len() {
635 break;
636 }
637 continue;
638 }
639
640 cur_release.version = version;
641 cur_release.title = heading.text;
642 self.level.get_or_insert(heading.level);
643
644 self.lines.next();
645 if heading.style == HeadingStyle::Setext {
646 // Skip an underline after a Setext-style heading.
647 self.lines.next();
648 }
649 while let Some((next, ..)) = self.lines.peek() {
650 if next.trim_start().is_empty() {
651 // Skip newlines after a heading.
652 self.lines.next();
653 } else {
654 break;
655 }
656 }
657 if let Some((_, line_start, _)) = self.lines.peek() {
658 release_note_start = Some(line_start);
659 } else {
660 break;
661 }
662 }
663
664 if !cur_release.version.is_empty() {
665 if let Some(release_note_start) = release_note_start {
666 if let Some(nodes) = self.lines.text.get(release_note_start..) {
667 // Remove trailing newlines.
668 cur_release.notes = nodes.trim_end();
669 }
670 }
671 return Some(cur_release);
672 }
673
674 None
675 }
676}
677
678struct Lines<'a> {
679 text: &'a str,
680 iter: memchr::Memchr<'a>,
681 line_start: usize,
682 peeked: Option<(&'a str, usize, usize)>,
683 peeked2: Option<(&'a str, usize, usize)>,
684}
685
686impl<'a> Lines<'a> {
687 fn new(text: &'a str) -> Self {
688 Self {
689 text,
690 iter: memchr::memchr_iter(b'\n', text.as_bytes()),
691 line_start: 0,
692 peeked: None,
693 peeked2: None,
694 }
695 }
696
697 fn peek(&mut self) -> Option<(&'a str, usize, usize)> {
698 self.peeked = self.next();
699 self.peeked
700 }
701
702 fn peek2(&mut self) -> Option<(&'a str, usize, usize)> {
703 let peeked = self.next();
704 let peeked2 = self.next();
705 self.peeked = peeked;
706 self.peeked2 = peeked2;
707 self.peeked2
708 }
709}
710
711impl<'a> Iterator for Lines<'a> {
712 type Item = (&'a str, usize, usize);
713
714 fn next(&mut self) -> Option<Self::Item> {
715 if let Some(triple) = self.peeked.take() {
716 return Some(triple);
717 }
718 if let Some(triple) = self.peeked2.take() {
719 return Some(triple);
720 }
721 let (line, line_end) = match self.iter.next() {
722 Some(line_end) => (&self.text[self.line_start..line_end], line_end),
723 None => (self.text.get(self.line_start..)?, self.text.len()),
724 };
725 let line_start = mem::replace(&mut self.line_start, line_end + 1);
726 Some((line, line_start, line_end))
727 }
728}
729
730struct Heading<'a> {
731 text: &'a str,
732 level: u8,
733 style: HeadingStyle,
734}
735
736#[derive(PartialEq)]
737enum HeadingStyle {
738 /// Atx-style headings use 1-6 `#` characters at the start of the line,
739 /// corresponding to header levels 1-6.
740 Atx,
741 /// Setext-style headings are "underlined" using equal signs `=` (for
742 /// first-level headings) and dashes `-` (for second-level headings).
743 Setext,
744}
745
746fn heading<'a>(line: &'a str, lines: &mut Lines<'a>) -> Option<Heading<'a>> {
747 let line = trim_start(line);
748 if line.as_bytes().first() == Some(&b'#') {
749 let mut level = 1;
750 while level <= 7 && line.as_bytes().get(level) == Some(&b'#') {
751 level += 1;
752 }
753 // https://pandoc.org/try/?params=%7B%22text%22%3A%22%23%23%23%23%23%23%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23%23%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23+%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23%5Ct%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23+a%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23%5Cta%5Cn%3D%3D%3D%5Cn%5Cn%23%23%23%23%23%23+b%5Cn%5Cn%22%2C%22to%22%3A%22html5%22%2C%22from%22%3A%22commonmark%22%2C%22standalone%22%3Afalse%2C%22embed-resources%22%3Afalse%2C%22table-of-contents%22%3Afalse%2C%22number-sections%22%3Afalse%2C%22citeproc%22%3Afalse%2C%22html-math-method%22%3A%22plain%22%2C%22wrap%22%3A%22auto%22%2C%22highlight-style%22%3Anull%2C%22files%22%3A%7B%7D%2C%22template%22%3Anull%7D
754 if level < 7 && line.as_bytes().get(level).is_none_or(|&b| matches!(b, b' ' | b'\t')) {
755 return Some(Heading {
756 text: line.get(level + 1..).map(str::trim).unwrap_or_default(),
757 #[allow(clippy::cast_possible_truncation)] // false positive: level is < 7: https://github.com/rust-lang/rust-clippy/issues/7486
758 level: level as u8,
759 style: HeadingStyle::Atx,
760 });
761 }
762 }
763 if let Some((next, ..)) = lines.peek2() {
764 let next = trim_start(next);
765 match next.as_bytes().first() {
766 Some(b'=') => {
767 if next[1..].trim_end().as_bytes().iter().all(|&b| b == b'=') {
768 return Some(Heading {
769 text: line.trim_end(),
770 level: 1,
771 style: HeadingStyle::Setext,
772 });
773 }
774 }
775 Some(b'-') => {
776 if next[1..].trim_end().as_bytes().iter().all(|&b| b == b'-') {
777 return Some(Heading {
778 text: line.trim_end(),
779 level: 2,
780 style: HeadingStyle::Setext,
781 });
782 }
783 }
784 _ => {}
785 }
786 }
787 None
788}
789
790fn trim_start(s: &str) -> &str {
791 let mut count = 0;
792 while s.as_bytes().get(count) == Some(&b' ') {
793 count += 1;
794 if count == 4 {
795 return s;
796 }
797 }
798 // Indents less than 4 are ignored.
799 &s[count..]
800}
801
802fn extract_version_from_title<'a>(mut text: &'a str, prefix_format: &Regex) -> (&'a str, &'a str) {
803 // Remove link from prefix
804 // [Version 1.0.0 2022-01-01]
805 // ^
806 text = text.strip_prefix('[').unwrap_or(text);
807 // Remove prefix
808 // Version 1.0.0 2022-01-01]
809 // ^^^^^^^^
810 if let Some(m) = prefix_format.find(text) {
811 text = &text[m.end()..];
812 }
813 // Remove whitespace after the version and the strings following it
814 // 1.0.0 2022-01-01]
815 // ^^^^^^^^^^^^
816 text = text.split(char::is_whitespace).next().unwrap();
817 // Remove link from version
818 // Version [1.0.0 2022-01-01]
819 // ^
820 // [Version 1.0.0] 2022-01-01
821 // ^
822 // Version [1.0.0] 2022-01-01
823 // ^ ^
824 unlink(text)
825}
826
827/// Remove a link from the given markdown text.
828///
829/// # Note
830///
831/// This is not a full "unlink" on markdown. See `full_unlink` for "full" version.
832fn unlink(mut s: &str) -> (&str, &str) {
833 // [1.0.0]
834 // ^
835 s = s.strip_prefix('[').unwrap_or(s);
836 if let Some(pos) = memchr::memchr(b']', s.as_bytes()) {
837 // 1.0.0]
838 // ^
839 if pos + 1 == s.len() {
840 return (&s[..pos], "");
841 }
842 let remaining = &s[pos + 1..];
843 // 1.0.0](link)
844 // ^^^^^^^
845 // 1.0.0][link]
846 // ^^^^^^^
847 for (open, close) in [(b'(', b')'), (b'[', b']')] {
848 if remaining.as_bytes().first() == Some(&open) {
849 if let Some(r_pos) = memchr::memchr(close, &remaining.as_bytes()[1..]) {
850 return (&s[..pos], &remaining[r_pos + 2..]);
851 }
852 }
853 }
854 return (&s[..pos], remaining);
855 }
856 (s, "")
857}
858
859/// Remove links from the given markdown text.
860fn full_unlink(s: &str) -> Cow<'_, str> {
861 let mut remaining = s;
862 if let Some(mut pos) = memchr::memchr(b'[', remaining.as_bytes()) {
863 let mut buf = String::with_capacity(remaining.len());
864 loop {
865 buf.push_str(&remaining[..pos]);
866 let (t, r) = unlink(&remaining[pos..]);
867 buf.push_str(t);
868 remaining = r;
869 match memchr::memchr(b'[', remaining.as_bytes()) {
870 Some(p) => pos = p,
871 None => break,
872 }
873 }
874 buf.push_str(remaining);
875 buf.into()
876 } else {
877 remaining.into()
878 }
879}