1use polib::catalog::Catalog;
27use pulldown_cmark::{
28 BrokenLinkCallback, CodeBlockKind, DefaultBrokenLinkCallback, Event, LinkType, Tag, TagEnd,
29};
30use pulldown_cmark_to_cmark::{calculate_code_block_token_count, cmark_resume_with_options};
31use pulldown_cmark_to_cmark::{Error as CmarkError, Options, State};
32use std::sync::OnceLock;
33use syntect::easy::ScopeRangeIterator;
34use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet};
35
36pub mod directives;
37pub mod gettext;
38pub mod normalize;
39pub mod preprocessors;
40pub mod renderers;
41pub mod xgettext;
42
43pub fn wrap_sources(sources: &str) -> String {
48 let options = textwrap::Options::new(76)
49 .break_words(false)
50 .word_splitter(textwrap::WordSplitter::NoHyphenation);
51 textwrap::refill(sources, options)
52}
53
54pub fn new_cmark_parser<'input, F: BrokenLinkCallback<'input>>(
57 text: &'input str,
58 broken_link_callback: Option<F>,
59) -> pulldown_cmark::Parser<'input, F> {
60 let mut options = pulldown_cmark::Options::empty();
61 options.insert(pulldown_cmark::Options::ENABLE_TABLES);
62 options.insert(pulldown_cmark::Options::ENABLE_OLD_FOOTNOTES);
63 options.insert(pulldown_cmark::Options::ENABLE_STRIKETHROUGH);
64 options.insert(pulldown_cmark::Options::ENABLE_TASKLISTS);
65 options.insert(pulldown_cmark::Options::ENABLE_HEADING_ATTRIBUTES);
66 pulldown_cmark::Parser::new_with_broken_link_callback(text, options, broken_link_callback)
67}
68
69pub fn extract_events<'a>(text: &'a str, state: Option<State<'a>>) -> Vec<(usize, Event<'a>)> {
99 fn expand_shortcut_link(tag: Tag<'_>) -> Tag<'_> {
101 match tag {
102 Tag::Link {
103 link_type: LinkType::Shortcut | LinkType::Collapsed | LinkType::Reference,
104 dest_url,
105 title,
106 id,
107 } => Tag::Link {
108 link_type: LinkType::Inline,
109 dest_url,
110 title,
111 id,
112 },
113 Tag::Image {
114 link_type: LinkType::Shortcut | LinkType::Collapsed | LinkType::Reference,
115 dest_url,
116 title,
117 id,
118 } => Tag::Image {
119 link_type: LinkType::Inline,
120 dest_url,
121 title,
122 id,
123 },
124 _ => tag,
125 }
126 }
127
128 fn convert_event_common(event: Event<'_>) -> Event<'_> {
130 match event {
131 Event::SoftBreak => Event::Text(" ".into()),
132 Event::Start(tag @ (Tag::Link { .. } | Tag::Image { .. })) => {
137 Event::Start(expand_shortcut_link(tag))
138 }
139 _ => event,
140 }
141 }
142
143 let offsets = text
146 .match_indices('\n')
147 .map(|(offset, _)| offset)
148 .collect::<Vec<_>>();
149
150 match state {
151 Some(state) if state.is_in_code_block() => text
155 .split_inclusive('\n')
156 .enumerate()
157 .map(|(idx, line)| (idx + 1, Event::Text(line.into())))
158 .collect(),
159 Some(state) if state.in_table_cell => {
163 let text = format!("|{text}|\n|-|");
164 new_cmark_parser::<'_, DefaultBrokenLinkCallback>(&text, None)
165 .filter_map(|event| {
166 if let Event::Start(Tag::Table(..) | Tag::TableHead | Tag::TableCell)
167 | Event::End(TagEnd::Table | TagEnd::TableHead | TagEnd::TableCell) = event
168 {
169 return None;
170 }
171 Some((1, convert_event_common(event).into_static()))
173 })
174 .collect()
175 }
176 _ => new_cmark_parser::<'a, DefaultBrokenLinkCallback>(text, None)
178 .into_offset_iter()
179 .map(|(event, range)| {
180 let lineno = offsets.partition_point(|&o| o < range.start) + 1;
181 (lineno, convert_event_common(event))
182 })
183 .collect(),
184 }
185}
186
187#[derive(Debug, Clone, PartialEq)]
189pub enum Group<'a> {
190 Translate {
195 events: Vec<(usize, Event<'a>)>,
196 comment: String,
198 },
199
200 Skip(Vec<(usize, Event<'a>)>),
205}
206
207#[derive(Debug, Default)]
208struct GroupingContext {
209 skip_next_group: bool,
210 comments: Vec<String>,
211}
212
213impl GroupingContext {
214 fn clear_skip_next_group(self) -> Self {
215 Self {
216 skip_next_group: false,
217 ..self
218 }
219 }
220}
221
222pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Result<Vec<Group<'a>>, CmarkError> {
266 #[derive(Debug)]
267 enum State {
268 Translate(usize),
269 Skip(usize),
270 }
271
272 impl State {
273 fn into_groups<'a>(
275 self,
276 idx: usize,
277 events: &'a [(usize, Event<'a>)],
278 mut ctx: GroupingContext,
279 ) -> Result<(Vec<Group<'a>>, GroupingContext), CmarkError> {
280 let groups = match self {
281 State::Translate(start) => {
282 if ctx.skip_next_group {
283 (
284 vec![Group::Skip(events[start..idx].into())],
285 ctx.clear_skip_next_group(),
286 )
287 } else if is_codeblock_group(&events[start..idx]) {
288 parse_codeblock(&events[start..idx], ctx)?
289 } else {
290 (
291 vec![Group::Translate {
292 events: events[start..idx].into(),
293 comment: std::mem::take(&mut ctx.comments).join(" "),
294 }],
295 ctx,
296 )
297 }
298 }
299 State::Skip(start) => (vec![Group::Skip(events[start..idx].into())], ctx),
300 };
301 Ok(groups)
302 }
303 }
304
305 let mut groups = Vec::new();
306 let mut state = State::Skip(0);
307 let mut ctx = GroupingContext::default();
308
309 for (idx, (_, event)) in events.iter().enumerate() {
310 match event {
311 Event::Start(Tag::Paragraph | Tag::CodeBlock(..)) => {
315 let mut next_groups;
317 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
318 groups.append(&mut next_groups);
319
320 state = State::Translate(idx);
321 }
322 Event::End(TagEnd::Paragraph | TagEnd::CodeBlock) => {
323 let idx = idx + 1;
325 let mut next_groups;
326 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
327 groups.append(&mut next_groups);
328
329 state = State::Skip(idx);
330 }
331
332 Event::Start(
334 Tag::Emphasis
335 | Tag::Strong
336 | Tag::Strikethrough
337 | Tag::Link { .. }
338 | Tag::Image { .. },
339 )
340 | Event::End(
341 TagEnd::Emphasis
342 | TagEnd::Strong
343 | TagEnd::Strikethrough
344 | TagEnd::Link
345 | TagEnd::Image,
346 )
347 | Event::Text(_)
348 | Event::Code(_)
349 | Event::FootnoteReference(_)
350 | Event::SoftBreak
351 | Event::HardBreak => {
352 if let State::Skip(_) = state {
355 let mut next_groups;
356 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
357 groups.append(&mut next_groups);
358
359 state = State::Translate(idx);
360 }
361 }
362
363 Event::Html(s) | Event::InlineHtml(s) => {
364 match directives::find(s) {
365 Some(directives::Directive::Skip) => {
366 if let State::Translate(_) = state {
368 let mut next_groups;
369 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
370 groups.append(&mut next_groups);
371
372 state = State::Translate(idx);
376 }
377
378 ctx.skip_next_group = true;
379 }
380
381 Some(directives::Directive::Comment(comment)) => {
382 if let State::Translate(_) = state {
384 let mut next_groups;
385 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
386 groups.append(&mut next_groups);
387
388 state = State::Translate(idx);
392 }
393
394 ctx.comments.push(comment);
395 }
396 _ => {
397 match event {
398 Event::Html(_) => {
399 if let State::Translate(_) = state {
401 let mut next_groups;
402 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
403 groups.append(&mut next_groups);
404
405 state = State::Skip(idx);
406 }
407 }
408 Event::InlineHtml(_) =>
409 {
412 if let State::Skip(_) = state {
413 let mut next_groups;
414 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
415 groups.append(&mut next_groups);
416
417 state = State::Translate(idx);
418 }
419 }
420 _ => unreachable!(),
422 }
423 }
424 }
425 }
426
427 _ => {
430 if let State::Translate(_) = state {
431 let mut next_groups;
432 (next_groups, ctx) = state.into_groups(idx, events, ctx)?;
433 groups.append(&mut next_groups);
434
435 state = State::Skip(idx);
436 }
437 }
438 }
439 }
440
441 match state {
442 State::Translate(start) => groups.push(Group::Translate {
443 events: events[start..].into(),
444 comment: "".into(),
445 }),
446 State::Skip(start) => groups.push(Group::Skip(events[start..].into())),
447 }
448
449 Ok(groups)
450}
451
452fn is_codeblock_group(events: &[(usize, Event<'_>)]) -> bool {
454 matches!(
455 events,
456 [
457 (_, Event::Start(Tag::CodeBlock(_))),
458 ..,
459 (_, Event::End(TagEnd::CodeBlock))
460 ]
461 )
462}
463
464fn is_translate_scope(x: Scope) -> bool {
466 static SCOPE_STRING: OnceLock<Scope> = OnceLock::new();
467 static SCOPE_COMMENT: OnceLock<Scope> = OnceLock::new();
468
469 let scope_string = SCOPE_STRING.get_or_init(|| Scope::new("string").unwrap());
470 let scope_comment = SCOPE_COMMENT.get_or_init(|| Scope::new("comment").unwrap());
471 scope_string.is_prefix_of(x) || scope_comment.is_prefix_of(x)
472}
473
474fn heuristic_codeblock<'a>(
476 events: &'a [(usize, Event<'_>)],
477 mut ctx: GroupingContext,
478) -> Result<(Vec<Group<'a>>, GroupingContext), CmarkError> {
479 let is_translate = match events {
480 [(_, Event::Start(Tag::CodeBlock(_))), .., (_, Event::End(TagEnd::CodeBlock))] => {
481 let (codeblock_text, _) = reconstruct_markdown(events, None)?;
482 codeblock_text.contains('"') || codeblock_text.contains("//")
486 }
487 _ => true,
488 };
489
490 let (groups, ctx) = if is_translate {
491 (
492 vec![Group::Translate {
493 events: events.into(),
494 comment: std::mem::take(&mut ctx.comments).join(" "),
495 }],
496 ctx,
497 )
498 } else {
499 (vec![Group::Skip(events.into())], ctx)
500 };
501 Ok((groups, ctx))
502}
503
504fn admonish_codeblock<'a>(
509 events: &'a [(usize, Event<'_>)],
510 mut ctx: GroupingContext,
511) -> Result<(Vec<Group<'a>>, GroupingContext), CmarkError> {
512 let groups = vec![Group::Translate {
516 events: events.into(),
517 comment: std::mem::take(&mut ctx.comments).join(" "),
518 }];
519
520 Ok((groups, ctx))
521}
522
523fn is_admonish(events: &[(usize, Event<'_>)]) -> bool {
525 const ADMONISH_CODEBLOCK_NAME: &str = "admonish";
526
527 match events {
530 [(_, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(info_string)))), .., (_, Event::End(TagEnd::CodeBlock))] =>
531 {
532 matches!(info_string.split_once(' '), Some((keyword, _)) if keyword == ADMONISH_CODEBLOCK_NAME)
534 }
535 _ => false,
536 }
537}
538
539fn parse_codeblock<'a>(
541 events: &'a [(usize, Event<'_>)],
542 mut ctx: GroupingContext,
543) -> Result<(Vec<Group<'a>>, GroupingContext), CmarkError> {
544 static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
546 let ss = SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines);
547
548 let syntax = if let (_, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(x)))) = &events[0] {
549 ss.find_syntax_by_token(x.split(',').next().unwrap())
550 } else {
551 None
552 };
553
554 let Some(syntax) = syntax else {
555 if is_admonish(events) {
556 return admonish_codeblock(events, ctx);
557 } else {
558 return heuristic_codeblock(events, ctx);
560 }
561 };
562
563 let mut ps = ParseState::new(syntax);
564 let mut ret = vec![];
565
566 for (idx, event) in events.iter().enumerate() {
567 match event {
568 (text_line, Event::Text(text)) => {
569 let mut stack = ScopeStack::new();
570 let mut stack_failure = false;
571
572 let Ok(ops) = ps.parse_line(text, ss) else {
573 ret.push(Group::Translate {
575 events: events[idx..idx + 1].into(),
576 comment: std::mem::take(&mut ctx.comments).join(" "),
577 });
578 continue;
579 };
580
581 let mut translate_events = vec![];
582 let mut groups = vec![];
583
584 for (range, op) in ScopeRangeIterator::new(&ops, text) {
585 if stack.apply(op).is_err() {
586 stack_failure = true;
587 break;
588 }
589
590 if range.is_empty() {
591 continue;
592 }
593
594 let range_line = if range.start == 0 {
596 *text_line
597 } else {
598 text_line + text[0..range.start].lines().count() - 1
599 };
600
601 let text = &text[range];
602
603 let is_whitespace = text.trim_matches(&[' ', '\t'] as &[_]).is_empty();
608
609 let is_translate = stack.scopes.iter().any(|x| is_translate_scope(*x));
610
611 if is_translate || (is_whitespace && !translate_events.is_empty()) {
612 translate_events.push((range_line, Event::Text(text.into())));
613 } else {
614 let whitespace_events = extract_trailing_whitespaces(&mut translate_events);
615 if !translate_events.is_empty() {
616 groups.push(Group::Translate {
617 events: std::mem::take(&mut translate_events),
618 comment: std::mem::take(&mut ctx.comments).join(" "),
619 });
620 }
621 if !whitespace_events.is_empty() {
622 groups.push(Group::Skip(whitespace_events));
623 }
624 groups.push(Group::Skip(vec![(range_line, Event::Text(text.into()))]));
625 }
626 }
627
628 let whitespace_events = extract_trailing_whitespaces(&mut translate_events);
629 if !translate_events.is_empty() {
630 groups.push(Group::Translate {
631 events: std::mem::take(&mut translate_events),
632 comment: std::mem::take(&mut ctx.comments).join(" "),
633 });
634 }
635 if !whitespace_events.is_empty() {
636 groups.push(Group::Skip(whitespace_events));
637 }
638
639 if stack_failure {
640 ret.push(Group::Translate {
642 events: events[idx..idx + 1].into(),
643 comment: std::mem::take(&mut ctx.comments).join(" "),
644 });
645 } else {
646 ret.append(&mut groups);
647 }
648 }
649 _ => {
650 ret.push(Group::Skip(events[idx..idx + 1].into()));
651 }
652 }
653 }
654 Ok((ret, ctx))
655}
656
657fn extract_trailing_whitespaces<'a>(buf: &mut Vec<(usize, Event<'a>)>) -> Vec<(usize, Event<'a>)> {
659 let mut ret = vec![];
660
661 while let Some(last) = buf.last() {
662 match &last.1 {
663 Event::Text(text) if text.as_ref().trim_matches(&[' ', '\t'] as &[_]).is_empty() => {
664 let last = buf.pop().unwrap();
665 ret.push(last);
666 }
667 _ => break,
668 }
669 }
670 ret.reverse();
671 ret
672}
673
674pub fn reconstruct_markdown<'a>(
692 group: &[(usize, Event<'a>)],
693 state: Option<State<'a>>,
694) -> Result<(String, State<'a>), CmarkError> {
695 let events = group.iter().map(|(_, event)| event);
696 let code_block_token_count = calculate_code_block_token_count(events.clone()).unwrap_or(3);
697 let mut markdown = String::new();
698 let options = Options {
699 code_block_token_count,
700 list_token: '-',
701 emphasis_token: '_',
702 strong_token: "**",
703 ..Options::default()
704 };
705 let new_state = cmark_resume_with_options(
708 events.clone(),
709 String::new(),
710 state.clone(),
711 options.clone(),
712 )?;
713
714 let simplified_state = state.map(|mut state| {
720 state.newlines_before_start = 0;
721 state.padding.clear();
722 state
723 });
724 cmark_resume_with_options(events, &mut markdown, simplified_state, options)?;
725 Ok((String::from(markdown.trim_start_matches('\n')), new_state))
730}
731
732#[derive(Debug, PartialEq)]
733pub struct ExtractedMessage {
734 pub message: String,
735 pub comment: String,
736}
737impl From<&str> for ExtractedMessage {
738 fn from(s: &str) -> Self {
739 ExtractedMessage {
740 message: s.to_owned(),
741 comment: "".into(),
742 }
743 }
744}
745
746pub fn extract_messages(document: &str) -> Result<Vec<(usize, ExtractedMessage)>, CmarkError> {
794 let events = extract_events(document, None);
795 let mut messages = Vec::new();
796 let mut state = None;
797
798 for group in group_events(&events)? {
799 match group {
800 Group::Translate { events, comment } => {
801 if let Some((lineno, _)) = events.first() {
802 let (text, new_state) = reconstruct_markdown(&events, state)?;
803 if !text.trim().is_empty() {
806 messages.push((
807 *lineno,
808 ExtractedMessage {
809 message: text,
810 comment,
811 },
812 ));
813 }
814 state = Some(new_state);
815 }
816 }
817 Group::Skip(events) => {
818 let (_, new_state) = reconstruct_markdown(&events, state)?;
819 state = Some(new_state);
820 }
821 }
822 }
823
824 Ok(messages)
825}
826
827pub fn trim_paragraph<'a, 'event>(
856 new_events: &'a [(usize, Event<'event>)],
857 old_events: &'a [(usize, Event<'event>)],
858) -> &'a [(usize, Event<'event>)] {
859 use pulldown_cmark::Event::{End, Start};
860 use pulldown_cmark::Tag::Paragraph;
861 match new_events {
862 [(_, Start(Paragraph)), inner @ .., (_, End(TagEnd::Paragraph))] => match old_events {
863 [(_, Start(Paragraph)), .., (_, End(TagEnd::Paragraph))] => new_events,
864 [..] => inner,
865 },
866 [..] => new_events,
867 }
868}
869
870pub fn translate_events<'a>(
872 events: &'a [(usize, Event<'a>)],
873 catalog: &'a Catalog,
874) -> Result<Vec<(usize, Event<'a>)>, CmarkError> {
875 let mut translated_events = Vec::new();
876 let mut state = None;
877
878 for group in group_events(events)? {
879 match group {
880 Group::Translate { events, .. } => {
881 let (msgid, new_state) = reconstruct_markdown(&events, state.clone())?;
883 let translated = catalog
884 .find_message(None, &msgid, None)
885 .filter(|msg| !msg.flags().is_fuzzy() && msg.is_translated())
886 .and_then(|msg| msg.msgstr().ok());
887 match translated {
888 Some(msgstr) => {
889 translated_events.extend_from_slice(trim_paragraph(
892 &extract_events(msgstr, state),
893 &events,
894 ));
895 }
896 None => translated_events.extend_from_slice(&events),
897 }
898 state = Some(new_state);
900 }
901 Group::Skip(events) => {
902 translated_events.extend_from_slice(&events);
904 let (_, new_state) = reconstruct_markdown(&events, state)?;
906 state = Some(new_state);
907 }
908 }
909 }
910
911 Ok(translated_events)
912}
913
914#[cfg(test)]
915mod tests {
916 use super::*;
917 use pretty_assertions::assert_eq;
918 use pulldown_cmark::Alignment;
919 use pulldown_cmark::CodeBlockKind;
920 use pulldown_cmark::Event::*;
921 use pulldown_cmark::HeadingLevel::*;
922 use pulldown_cmark::Tag::*;
923
924 #[track_caller]
926 fn assert_extract_messages(document: &str, expected: &[(usize, &str)]) {
927 assert_eq!(
928 extract_messages(document)
929 .unwrap()
930 .iter()
931 .map(|(lineno, msg)| (*lineno, &msg.message[..]))
932 .collect::<Vec<_>>(),
933 expected,
934 );
935 }
936
937 #[test]
938 fn extract_events_empty() {
939 assert_eq!(extract_events("", None), vec![]);
940 }
941
942 #[test]
943 fn extract_events_paragraph() {
944 assert_eq!(
945 extract_events("foo bar", None),
946 vec![
947 (1, Start(Paragraph)),
948 (1, Text("foo bar".into())),
949 (1, End(TagEnd::Paragraph)),
950 ]
951 );
952 }
953
954 #[test]
955 fn extract_events_softbreak() {
956 assert_eq!(
957 extract_events("foo\nbar", None),
958 vec![
959 (1, Start(Paragraph)),
960 (1, Text("foo".into())),
961 (1, Text(" ".into())),
962 (2, Text("bar".into())),
963 (1, End(TagEnd::Paragraph)),
964 ]
965 );
966 }
967
968 #[test]
969 fn extract_events_heading() {
970 assert_eq!(
971 extract_events("# Foo Bar", None),
972 vec![
973 (
974 1,
975 Start(Tag::Heading {
976 level: H1,
977 id: None,
978 classes: vec![],
979 attrs: vec![]
980 })
981 ),
982 (1, Text("Foo Bar".into())),
983 (1, End(TagEnd::Heading(H1))),
984 ]
985 );
986 }
987
988 #[test]
989 fn extract_events_list_item() {
990 assert_eq!(
991 extract_events("* foo bar", None),
992 vec![
993 (1, Start(List(None))),
994 (1, Start(Item)),
995 (1, Text("foo bar".into())),
996 (1, End(TagEnd::Item)),
997 (1, End(TagEnd::List(false))),
998 ]
999 );
1000 }
1001
1002 #[test]
1003 fn extract_events_code_block() {
1004 let (_, state) =
1005 reconstruct_markdown(&[(1, Start(CodeBlock(CodeBlockKind::Indented)))], None).unwrap();
1006 assert_eq!(
1007 extract_events("foo\nbar\nbaz", Some(state)),
1008 vec![
1009 (1, Text("foo\n".into())),
1010 (2, Text("bar\n".into())),
1011 (3, Text("baz".into())),
1012 ]
1013 );
1014
1015 assert_eq!(
1017 extract_events("foo\nbar\nbaz", None),
1018 vec![
1019 (1, Start(Paragraph)),
1020 (1, Text("foo".into())),
1021 (1, Text(" ".into())),
1022 (2, Text("bar".into())),
1023 (2, Text(" ".into())),
1024 (3, Text("baz".into())),
1025 (1, End(TagEnd::Paragraph)),
1026 ]
1027 );
1028 }
1029
1030 #[test]
1031 fn extract_events_comments() {
1032 assert_eq!(
1033 extract_events("<!-- mdbook-xgettext:skip -->\nHello", None),
1034 vec![
1035 (1, Start(HtmlBlock)),
1036 (1, Html("<!-- mdbook-xgettext:skip -->\n".into())),
1037 (1, End(TagEnd::HtmlBlock)),
1038 (2, Start(Paragraph)),
1039 (2, Text("Hello".into())),
1040 (2, End(TagEnd::Paragraph)),
1041 ]
1042 );
1043 }
1044
1045 #[test]
1046 fn extract_events_html_block() {
1047 let (_, state) = reconstruct_markdown(
1048 &[
1049 (1, Start(Table(vec![Alignment::None]))),
1050 (1, Start(TableHead)),
1051 (1, Start(TableCell)),
1052 ],
1053 None,
1054 )
1055 .unwrap();
1056 assert_eq!(
1058 extract_events("<img />", Some(state)),
1059 vec![(1, InlineHtml("<img />".into()))]
1060 );
1061
1062 assert_eq!(
1064 extract_events("<img />", None),
1065 vec![
1066 (1, Start(HtmlBlock)),
1067 (1, Html("<img />".into())),
1068 (1, End(TagEnd::HtmlBlock)),
1069 ]
1070 );
1071 }
1072
1073 #[test]
1074 fn extract_messages_empty() {
1075 assert_extract_messages("", &[]);
1076 }
1077
1078 #[test]
1079 fn extract_messages_keep_empty_inline_html() {
1080 assert_extract_messages("<span></span>", &[(1, "<span></span>")]);
1082 }
1083
1084 #[test]
1085 fn extract_messages_keep_whitespace_inline_html() {
1086 assert_extract_messages("<span> </span>", &[(1, "<span> </span>")]);
1088 }
1089
1090 #[test]
1091 fn extract_messages_ignore_whitespace_only_block_html() {
1092 assert_extract_messages("<p> </p>", &[]);
1094 }
1095
1096 #[test]
1097 fn extract_messages_single_line() {
1098 assert_extract_messages("This is a paragraph.", &[(1, "This is a paragraph.")]);
1099 }
1100
1101 #[test]
1102 fn extract_messages_simple() {
1103 assert_extract_messages(
1104 "This is\n\
1105 the first\n\
1106 paragraph.🦀\n\
1107 \n\
1108 Second paragraph.",
1109 &[
1110 (1, "This is the first paragraph.🦀"),
1111 (5, "Second paragraph."),
1112 ],
1113 );
1114 }
1115
1116 #[test]
1117 fn extract_messages_leading_newlines() {
1118 assert_extract_messages(
1119 "\n\
1120 \n\
1121 \n\
1122 This is the\n\
1123 first paragraph.",
1124 &[(4, "This is the first paragraph.")],
1125 );
1126 }
1127
1128 #[test]
1129 fn extract_messages_trailing_newlines() {
1130 assert_extract_messages(
1131 "This is\n\
1132 a paragraph.\n\
1133 \n\
1134 \n",
1135 &[(1, "This is a paragraph.")],
1136 );
1137 }
1138
1139 #[test]
1140 fn extract_messages_styled_text() {
1141 assert_extract_messages(
1144 "**This** __~~message~~__ _has_ `code` *style*\n",
1145 &[(1, "**This** **~~message~~** _has_ `code` _style_")],
1146 );
1147 }
1148
1149 #[test]
1150 fn extract_messages_inline_html() {
1151 assert_extract_messages(
1153 "Hi from <span dir=\"ltr\">Rust</div>",
1154 &[(1, "Hi from <span dir=\"ltr\">Rust</div>")],
1155 );
1156 }
1157
1158 #[test]
1159 fn extract_messages_block_html() {
1160 assert_extract_messages(
1162 "<div class=\"warning\">\n\
1163 \n\
1164 Beware of the dog!\n\
1165 \n\
1166 </div>",
1167 &[(3, "Beware of the dog!")],
1168 );
1169 }
1170
1171 #[test]
1172 fn extract_messages_mixed_html() {
1173 assert_extract_messages(
1175 "<div>\n\
1176 \n\
1177 Hi from <span dir=\"ltr\">Rust</span>\n\
1178 \n\
1179 </div>",
1180 &[(3, "Hi from <span dir=\"ltr\">Rust</span>")],
1181 );
1182 }
1183
1184 #[test]
1185 fn extract_messages_inline_link() {
1186 assert_extract_messages(
1187 "See [this page](https://example.com) for more info.",
1188 &[(1, "See [this page](https://example.com) for more info.")],
1189 );
1190 }
1191
1192 #[test]
1193 fn extract_messages_reference_link() {
1194 assert_extract_messages(
1195 "See [this page][1] for more info.\n\n\
1196 [1]: https://example.com",
1197 &[(1, "See [this page](https://example.com) for more info.")],
1199 );
1200 }
1201
1202 #[test]
1203 fn extract_messages_collapsed_link() {
1204 assert_extract_messages(
1206 "Click [here][]!\n\n\
1207 [here]: http://example.net/",
1208 &[(1, "Click [here](http://example.net/)!")],
1209 );
1210 }
1211
1212 #[test]
1213 fn extract_messages_shortcut_link() {
1214 assert_extract_messages(
1215 "Click [here]!\n\n\
1216 [here]: http://example.net/",
1217 &[(1, "Click [here](http://example.net/)!")],
1218 );
1219 }
1220
1221 #[test]
1222 fn extract_messages_autolink() {
1223 assert_extract_messages(
1224 "Visit <http://example.net>!",
1225 &[(1, "Visit <http://example.net>!")],
1226 );
1227 }
1228
1229 #[test]
1230 fn extract_messages_email() {
1231 assert_extract_messages(
1232 "Contact <info@example.net>!",
1233 &[(1, "Contact <info@example.net>!")],
1234 );
1235 }
1236
1237 #[test]
1238 fn extract_messages_broken_reference_link() {
1239 assert_extract_messages("[foo][unknown]", &[(1, r"\[foo\]\[unknown\]")]);
1245 }
1246
1247 #[test]
1248 fn extract_messages_footnotes() {
1249 assert_extract_messages(
1250 "
1251The document[^1] text.
1252
1253[^1]: The footnote text.
1254",
1255 &[
1256 (2, "The document[^1] text."), (4, "The footnote text."),
1258 ],
1259 );
1260 }
1261
1262 #[test]
1263 fn extract_messages_block_quote() {
1264 assert_extract_messages(
1265 r"One of my favorite quotes is:
1266
1267> Don't believe everything you read on the Internet.
1268>
1269> I didn't say this second part, but I needed a paragraph for testing.
1270
1271--Abraham Lincoln
1272",
1273 &[
1274 (1, "One of my favorite quotes is:"),
1275 (3, "Don't believe everything you read on the Internet."),
1276 (
1277 5,
1278 "I didn't say this second part, but I needed a paragraph for testing.",
1279 ),
1280 (7, "\\--Abraham Lincoln"),
1281 ],
1282 );
1283 }
1284
1285 #[test]
1286 fn extract_messages_table() {
1287 let input = "\
1288 | Module Type | Description\n\
1289 |-------------------|-------------------------\n\
1290 | `rust_binary` | Produces a Rust binary.\n\
1291 | `rust_library` | Produces a Rust library.\n\
1292 ";
1293 assert_extract_messages(
1294 input,
1295 &[
1296 (1, "Module Type"),
1297 (1, "Description"),
1298 (3, "`rust_binary`"),
1299 (3, "Produces a Rust binary."),
1300 (4, "`rust_library`"),
1301 (4, "Produces a Rust library."),
1302 ],
1303 );
1304 }
1305
1306 #[test]
1307 fn extract_messages_code_block() {
1308 assert_extract_messages(
1309 "Preamble\n```rust\n// Example:\nfn hello() {\n some_code()\n\n todo!()\n}\n```\nPostamble",
1310 &[
1311 (1, "Preamble"),
1312 (
1313 3,
1314 "// Example:\n",
1315 ),
1316 (10, "Postamble"),
1317 ],
1318 );
1319 }
1320
1321 #[test]
1322 fn extract_messages_two_code_blocks() {
1323 assert_extract_messages(
1324 "```\n\
1325 \"First\" block\n\
1326 ```\n\
1327 ```\n\
1328 \"Second\" block\n\
1329 ```\n\
1330 ",
1331 &[
1332 (1, "```\n\"First\" block\n```"), (4, "```\n\"Second\" block\n```"),
1334 ],
1335 );
1336 }
1337
1338 #[test]
1339 fn extract_messages_quoted_code_block() {
1340 assert_extract_messages(
1341 "\
1342 > Preamble\n\
1343 > ```rust\n\
1344 > fn hello() {\n\
1345 > some_code()\n\
1346 >\n\
1347 > // FIXME: do something here!\n\
1348 > todo!()\n\
1349 > }\n\
1350 > ```\n\
1351 > Postamble",
1352 &[
1353 (1, "Preamble"),
1354 (6, "// FIXME: do something here!\n"),
1355 (10, "Postamble"),
1356 ],
1357 );
1358 }
1359
1360 #[test]
1361 fn extract_messages_code_block_with_block_comment() {
1362 assert_extract_messages(
1363 "```rust\n\
1364 /* block comment\n\
1365 * /* nested block comment\n\
1366 * */\n\
1367 * \n\
1368 * \n\
1369 * \n\
1370 * */\n\
1371 ```\n",
1372 &[(
1373 2,
1374 "/* block comment\n* /* nested block comment\n* */\n* \n* \n* \n* */",
1375 )],
1376 );
1377 }
1378
1379 #[test]
1380 fn extract_messages_code_block_with_continuous_line_comments() {
1381 assert_extract_messages(
1382 r"```rust
1383// continuous
1384// line
1385// comments
1386{
1387 // continuous
1388 // line
1389 // comments
1390 let a = 1; // single line comment
1391 let b = 1; // single line comment
1392}
1393```",
1394 &[
1395 (2, "// continuous\n// line\n// comments\n"),
1396 (6, "// continuous\n // line\n // comments\n"),
1397 (9, "// single line comment\n"),
1398 (10, "// single line comment\n"),
1399 ],
1400 );
1401 }
1402
1403 #[test]
1404 fn extract_messages_multi_language_code_blocks() {
1405 assert_extract_messages(
1406 r#"```c
1407// C
1408'C'; "C";
1409```
1410```html
1411<!-- HTML
1412HTML -->
1413```
1414```javascript
1415`JavaScript`
1416```
1417```ruby
1418# Ruby
1419```"#,
1420 &[
1421 (2, "// C\n'C'"),
1422 (3, "\"C\""),
1423 (6, "<!-- HTML\nHTML -->"),
1424 (10, "`JavaScript`"),
1425 (13, "# Ruby\n"),
1426 ],
1427 );
1428 }
1429
1430 #[test]
1431 fn extract_messages_details() {
1432 assert_extract_messages(
1434 "Preamble\n\
1435 <details>\n\
1436 Some Details\n\
1437 </details>\n\
1438 \n\
1439 Postamble",
1440 &[
1441 (1, "Preamble"), (6, "Postamble"),
1444 ],
1445 );
1446 assert_extract_messages(
1449 "Preamble\n\
1450 \n\
1451 <details>\n\
1452 \n\
1453 Some Details\n\
1454 \n\
1455 </details>\n\
1456 \n\
1457 Postamble",
1458 &[
1459 (1, "Preamble"), (5, "Some Details"),
1461 (9, "Postamble"),
1462 ],
1463 );
1464 }
1465
1466 #[test]
1467 fn extract_messages_list() {
1468 assert_extract_messages(
1469 "Some text\n * List item 1🦀\n * List item 2\n\nMore text",
1470 &[
1471 (1, "Some text"), (2, "List item 1🦀"),
1473 (3, "List item 2"),
1474 (5, "More text"),
1475 ],
1476 );
1477 }
1478
1479 #[test]
1480 fn extract_messages_multilevel_list() {
1481 assert_extract_messages(
1482 "Some text\n * List item 1\n * List item 2\n * Sublist 1\n * Sublist 2\n\nMore text",
1483 &[
1484 (1, "Some text"), (2, "List item 1"),
1486 (3, "List item 2"),
1487 (4, "Sublist 1"),
1488 (5, "Sublist 2"),
1489 (7, "More text"),
1490 ],
1491 );
1492 }
1493
1494 #[test]
1495 fn extract_messages_list_with_paragraphs() {
1496 assert_extract_messages(
1497 r"* Item 1.
1498* Item 2,
1499 two lines.
1500
1501 * Sub 1.
1502 * Sub 2.
1503",
1504 &[
1505 (1, "Item 1."),
1506 (2, "Item 2, two lines."),
1507 (5, "Sub 1."),
1508 (6, "Sub 2."),
1509 ],
1510 );
1511 }
1512
1513 #[test]
1514 fn extract_messages_headings() {
1515 assert_extract_messages(
1516 r"Some text
1517# Headline News🦀
1518
1519* A
1520* List
1521
1522## Subheading
1523",
1524 &[
1525 (1, "Some text"),
1526 (2, "Headline News🦀"),
1527 (4, "A"),
1528 (5, "List"),
1529 (7, "Subheading"),
1530 ],
1531 );
1532 }
1533
1534 #[test]
1535 fn extract_messages_code_followed_by_details() {
1536 assert_extract_messages(
1539 r"```bob
1540// BOB
1541```
1542
1543<details>
1544
1545* Blah blah
1546
1547</details>
1548",
1549 &[
1550 (1, "```bob\n// BOB\n```"), (7, "Blah blah"),
1552 ],
1553 );
1554 }
1555
1556 #[test]
1557 fn extract_messages_backslashes() {
1558 assert_extract_messages(
1565 r"
1566$$
1567\sum_{n=1}^{\infty} 2^{-n} = 1
1568$$
1569",
1570 &[(2, r"$$ \\sum\_{n=1}^{\infty} 2^{-n} = 1 $$")],
1571 );
1572 }
1573
1574 #[test]
1575
1576 fn extract_messages_skip_simple() {
1577 assert_extract_messages(
1578 r"<!-- mdbook-xgettext:skip -->
1579
1580This is a paragraph.",
1581 &[],
1582 );
1583 }
1584
1585 #[test]
1586 fn extract_messages_skip_next_paragraph_ok() {
1587 assert_extract_messages(
1588 r"<!-- mdbook-xgettext:skip -->
1589This is a paragraph.
1590
1591This should be translated.
1592",
1593 &[(4, "This should be translated.")],
1594 );
1595 }
1596
1597 #[test]
1598 fn extract_messages_skip_next_codeblock() {
1599 assert_extract_messages(
1600 r"<!-- mdbook-xgettext:skip -->
1601```
1602def f(x): return x * x
1603```
1604This should be translated.
1605",
1606 &[(5, "This should be translated.")],
1607 );
1608 }
1609
1610 #[test]
1611 fn extract_messages_skip_back_to_back() {
1612 assert_extract_messages(
1613 r"<!-- mdbook-xgettext:skip -->
1614```
1615def f(x): return x * x
1616```
1617<!-- mdbook-xgettext:skip -->
1618This should not translated.
1619
1620But *this* should!
1621",
1622 &[(8, "But _this_ should!")],
1623 );
1624 }
1625
1626 #[test]
1627 fn extract_messages_block_html_skip() {
1628 assert_extract_messages(
1630 "<!-- mdbook-xgettext:skip -->\n\
1631 This is ignored\n\
1632 \n\
1633 but this is not",
1634 &[(4, "but this is not")],
1635 );
1636 }
1637
1638 #[test]
1639 fn extract_messages_inline_html_skips() {
1640 assert_extract_messages(
1642 "
1643this should be translated <!-- mdbook-xgettext:skip --> but not this.
1644... nor this.
1645
1646But *this* should!",
1647 &[(2, "this should be translated "), (5, "But _this_ should!")],
1648 );
1649 }
1650
1651 #[test]
1652 fn extract_messages_skipping_second_item() {
1653 assert_extract_messages(
1654 "
1655* A
1656<!-- mdbook-xgettext:skip -->
1657* B
1658* C
1659",
1660 &[(2, "A"), (5, "C")],
1661 );
1662 }
1663
1664 #[test]
1665 fn extract_messages_skipping_second_paragraphed_item() {
1666 assert_extract_messages(
1667 "
1668* A
1669
1670<!-- mdbook-xgettext:skip -->
1671* B
1672
1673* C
1674",
1675 &[(2, "A"), (7, "C")],
1676 );
1677 }
1678
1679 #[test]
1680 fn extract_messages_skipping_inline_second_item() {
1681 assert_extract_messages(
1690 "
1691* A
1692* <!-- mdbook-xgettext:skip --> B
1693* C
1694",
1695 &[(2, "A")],
1696 );
1697 }
1698
1699 #[test]
1700 fn extract_messages_inline_skip_to_end_of_block() {
1701 assert_extract_messages(
1702 "foo <!-- mdbook-xgettext:skip --> **bold** bar
1703still skipped
1704
1705not-skipped",
1706 &[(1, "foo "), (4, "not-skipped")],
1707 );
1708 }
1709
1710 #[test]
1711 fn extract_messages_automatic_skipping_nontranslatable_codeblocks_simple() {
1712 assert_extract_messages(
1713 r"
1714```python
1715def g(x):
1716 this_should_be_skipped_no_strings_or_comments()
1717```
1718",
1719 &[],
1720 );
1721 }
1722
1723 #[test]
1724 fn extract_messages_automatic_skipping_nontranslatable_codeblocks() {
1725 assert_extract_messages(
1726 r#"
1727```python
1728def f(x):
1729 print("this should be translated")
1730```
1731
1732
1733```python
1734def g(x):
1735 but_this_should_not()
1736```
1737"#,
1738 &[(4, "\"this should be translated\"")],
1739 );
1740 }
1741
1742 #[test]
1743 fn extract_messages_without_language_specifier() {
1744 assert_extract_messages(
1745 r#"
1746```
1747def f(x):
1748 print("this should be translated")
1749```
1750
1751
1752```
1753def g(x):
1754 but_this_should_not()
1755```
1756"#,
1757 &[(
1758 2,
1759 "```\ndef f(x):\n print(\"this should be translated\")\n```",
1760 )],
1761 );
1762 }
1763
1764 #[test]
1765 fn extract_messages_codeblock_in_codeblock() {
1766 assert_extract_messages(
1767 r#"
1768````
1769```
1770// codeblock in codeblock
1771```
1772````
1773"#,
1774 &[(2, "````\n```\n// codeblock in codeblock\n```\n````")],
1775 );
1776 }
1777
1778 #[test]
1779 fn extract_message_comments() {
1780 assert_eq!(
1781 extract_messages(
1782 "
1783<!-- mdbook-xgettext:comment: first comment! -->
1784Hello world!
1785"
1786 )
1787 .unwrap(),
1788 vec![(
1789 3,
1790 ExtractedMessage {
1791 message: "Hello world!".into(),
1792 comment: "first comment!".into(),
1793 }
1794 )]
1795 );
1796 }
1797
1798 #[test]
1799 fn extract_message_comments_multiple_joined() {
1800 assert_eq!(
1801 extract_messages(
1802 "
1803<!-- mdbook-xgettext:comment: this is a test -->
1804<!-- mdbook-xgettext:comment: of a comment that spans. -->
1805Greetings!
1806"
1807 )
1808 .unwrap(),
1809 vec![(
1810 4,
1811 ExtractedMessage {
1812 message: "Greetings!".into(),
1813 comment: "this is a test of a comment that spans.".into(),
1814 }
1815 )]
1816 );
1817 }
1818
1819 #[test]
1820 fn extract_message_multiple_comments() {
1821 assert_eq!(
1822 extract_messages(
1823 "
1824before-no-comment
1825
1826<!-- mdbook-xgettext:comment: another -->
1827Hello again, this is some text
1828with a comment on it.
1829
1830<!-- mdbook-xgettext:comment: one more comment. -->
1831after
1832
1833after-no-comment
1834"
1835 )
1836 .unwrap(),
1837 vec![
1838 (
1839 2,
1840 ExtractedMessage {
1841 message: "before-no-comment".into(),
1842 comment: "".into(),
1843 }
1844 ),
1845 (
1846 5,
1847 ExtractedMessage {
1848 message: "Hello again, this is some text with a comment on it.".into(),
1849 comment: "another".into(),
1850 }
1851 ),
1852 (
1853 9,
1854 ExtractedMessage {
1855 message: "after".into(),
1856 comment: "one more comment.".into(),
1857 }
1858 ),
1859 (
1860 11,
1861 ExtractedMessage {
1862 message: "after-no-comment".into(),
1863 comment: "".into(),
1864 }
1865 ),
1866 ]
1867 );
1868 }
1869
1870 #[test]
1871 fn extract_message_comments_on_codeblock() {
1872 assert_eq!(
1873 extract_messages(
1874 r#"
1875<!-- mdbook-xgettext:comment: greetings! -->
1876```python
1877print("Hello world")
1878```
1879"#
1880 )
1881 .unwrap(),
1882 vec![(
1883 4,
1884 ExtractedMessage {
1885 message: "\"Hello world\"".into(),
1886 comment: "greetings!".into(),
1887 }
1888 ),]
1889 );
1890 }
1891
1892 #[test]
1893 fn extract_admonish_codeblock() {
1894 assert_extract_messages(
1895 r#"```admonish tip title="Important Tips"
1896My Message
1897```"#,
1898 &[(
1899 1,
1900 "```admonish tip title=\"Important Tips\"\nMy Message\n```",
1901 )],
1902 );
1903 }
1904
1905 #[test]
1906 fn extract_admonish_codeblock_no_title() {
1907 assert_extract_messages(
1908 r#"```admonish tip
1909My Message
1910```"#,
1911 &[(1, "```admonish tip\nMy Message\n```")],
1912 );
1913 }
1914
1915 #[test]
1916 fn extract_admonish_codeblock_no_close_codeblock() {
1917 assert_extract_messages(
1918 r#"```admonish tip
1919My Message
1920"#,
1921 &[(1, "```admonish tip\nMy Message\n```")],
1922 );
1923 }
1924
1925 #[test]
1926 fn extract_newlang_codeblock_string() {
1927 assert_extract_messages(
1928 r#"```new_lang
1929some_syntax = "My String";
1930```"#,
1931 &[(1, "```new_lang\nsome_syntax = \"My String\";\n```")],
1932 );
1933 }
1934
1935 #[test]
1936 fn extract_nolang_codeblock_string() {
1937 assert_extract_messages(
1938 r#"```
1939some_syntax = "My String";
1940```"#,
1941 &[(1, "```\nsome_syntax = \"My String\";\n```")],
1942 );
1943 }
1944
1945 #[test]
1946 fn extract_nolang_nostring_codeblock() {
1947 assert_extract_messages(
1948 r#"```
1949some_syntax = do_something();
1950```"#,
1951 &[],
1952 );
1953 }
1954}