1use crate::diag::{Diagnostic, MarkupKind};
18use crate::segment::{BufferRef, Segment};
19use crate::segment_set::SegmentSet;
20use crate::statement::{
21 Command, CommandToken, FilePos, FloatDef, GlobalDv, GlobalSpan, HeadingDef, LabelDef,
22 LocalVarDef, Span, Statement, StatementAddress, StatementIndex, SymbolDef, SymbolType, Token,
23 TokenIndex, TokenPtr, NO_STATEMENT,
24};
25use crate::typesetting::TypesettingData;
26use crate::StatementType::{self, *};
27use regex::bytes::Regex;
28use std::cmp;
29use std::collections::hash_map::Entry;
30use std::fmt::Debug;
31use std::mem;
32use std::str;
33use std::sync::{Arc, OnceLock};
34
35#[derive(Default)]
37struct Scanner<'a> {
38 buffer: &'a [u8],
40 buffer_ref: BufferRef,
42 position: FilePos,
45 diagnostics: Vec<(StatementIndex, Diagnostic)>,
47 span_pool: Vec<Span>,
49 unget: Span,
55 labels: Vec<Span>,
57 statement_start: FilePos,
59 statement_index: StatementIndex,
61 statement_math_start: usize,
63 statement_proof_start: usize,
64 invalidated: bool,
68}
69
70const MM_VALID_SPACES: u64 =
75 (1u64 << 9) | (1u64 << 10) | (1u64 << 12) | (1u64 << 13) | (1u64 << 32);
76
77const fn is_mm_space_c0(byte: u8) -> bool {
80 (MM_VALID_SPACES & (1u64 << byte)) != 0
81}
82
83const fn is_mm_space(byte: u8) -> bool {
88 byte <= 32 && is_mm_space_c0(byte)
89}
90
91#[derive(Default, Debug, Eq, PartialEq, Ord, PartialOrd, Copy, Clone)]
92pub enum HeadingLevel {
94 #[default]
96 Database,
97 MajorPart,
99 Section,
101 SubSection,
103 SubSubSection,
105 Statement,
107}
108
109#[derive(Eq, PartialEq, Copy, Clone)]
110enum CommentType {
111 Normal,
112 Typesetting,
113 Extra,
114 Heading(HeadingLevel),
115}
116
117impl<'a> Scanner<'a> {
118 fn diag(&mut self, diag: Diagnostic) {
120 self.diagnostics.push((self.statement_index, diag));
121 }
122
123 fn get_raw(&mut self) -> Span {
136 #[inline(never)]
137 #[cold]
138 fn badchar(slf: &mut Scanner<'_>, ix: usize) -> Span {
139 let ch = slf.buffer[ix];
140 slf.diag(Diagnostic::BadCharacter(ix, ch));
141 slf.get_raw()
145 }
146
147 let len = self.buffer.len();
148 let mut ix = self.position as usize;
149
150 while ix < len && self.buffer[ix] <= 32 {
151 if !is_mm_space_c0(self.buffer[ix]) {
154 self.position = (ix + 1) as FilePos;
155 return badchar(self, ix);
156 }
157 ix += 1;
158 }
159
160 let start = ix;
161 while ix < len {
162 if self.buffer[ix] <= 32 || self.buffer[ix] > 126 {
163 if self.buffer[ix] <= 32 {
164 break;
165 }
166 let badix = ix;
168 ix += 1;
170 while ix < len && !is_mm_space(self.buffer[ix]) {
171 ix += 1;
172 }
173 self.position = ix as FilePos;
174 return badchar(self, badix);
175 }
176
177 ix += 1;
178 }
179
180 self.position = ix as FilePos;
181 if start == ix {
182 Span::NULL
183 } else {
184 Span::new(start, ix)
185 }
186 }
187
188 fn get_comment(&mut self, opener: Span, mid_statement: bool) -> CommentType {
193 let mut ctype = CommentType::Normal;
194 let mut first = true;
195 loop {
196 let tok = self.get_raw();
197 if tok.is_null() {
198 break;
199 }
200 let tok_ref = tok.as_ref(self.buffer);
201 if tok_ref == b"$)" {
202 return ctype;
203 } else if tok_ref == b"$j" || tok_ref == b"$t" {
204 if !first {
205 self.diag(Diagnostic::CommentMarkerNotStart(tok))
206 } else if mid_statement {
207 self.diag(Diagnostic::MidStatementCommentMarker(tok))
208 } else {
209 ctype = if tok_ref == b"$j" {
210 CommentType::Extra
211 } else {
212 CommentType::Typesetting
213 }
214 }
215 } else if tok_ref.contains(&b'$') {
216 let tok_str = str::from_utf8(tok_ref).unwrap();
217 if tok_str.contains("$)") {
218 self.diag(Diagnostic::BadCommentEnd(tok, opener));
219 }
220 if tok_str.contains("$(") {
221 self.diag(Diagnostic::NestedComment(tok, opener));
222 }
223 } else if first && tok_ref.len() >= 4 {
224 if tok_ref[..4] == *b"####" {
225 ctype = CommentType::Heading(HeadingLevel::MajorPart);
226 } else if tok_ref[..4] == *b"#*#*" {
227 ctype = CommentType::Heading(HeadingLevel::Section);
228 } else if tok_ref[..4] == *b"=-=-" {
229 ctype = CommentType::Heading(HeadingLevel::SubSection);
230 } else if tok_ref[..4] == *b"-.-." {
231 ctype = CommentType::Heading(HeadingLevel::SubSubSection);
232 }
233 }
234 first = false;
235 }
236
237 let cspan = Span::new2(opener.start, self.buffer.len() as FilePos);
238 self.diag(Diagnostic::UnclosedComment(cspan));
239 ctype
240 }
241
242 fn get(&mut self) -> Span {
245 if !self.unget.is_null() {
246 return mem::replace(&mut self.unget, Span::NULL);
247 }
248
249 loop {
250 let tok = self.get_raw();
251 if tok.is_null() {
252 return Span::NULL;
253 }
254 let tok_ref = tok.as_ref(self.buffer);
255 if tok_ref == b"$(" {
256 self.get_comment(tok, true);
257 } else {
258 return tok;
259 }
260 }
261 }
262
263 fn out_statement(&mut self, stype: StatementType, label: Span) -> Statement {
266 Statement {
267 stype,
268 label,
269 math_start: self.statement_math_start,
270 proof_start: self.statement_proof_start,
271 proof_end: self.span_pool.len(),
272 group: NO_STATEMENT,
273 group_end: NO_STATEMENT,
274 span: Span::new2(
275 mem::replace(&mut self.statement_start, self.position),
276 self.position,
277 ),
278 }
279 }
280
281 fn get_comment_statement(&mut self) -> Option<Statement> {
283 let ftok = if self.unget.is_null() {
284 self.get_raw()
285 } else {
286 mem::replace(&mut self.unget, Span::NULL)
287 };
288 if ftok != Span::NULL {
289 let ftok_ref = ftok.as_ref(self.buffer);
290 if ftok_ref == b"$(" {
291 let ctype = self.get_comment(ftok, false);
292 let s_type = match ctype {
293 CommentType::Typesetting => TypesettingComment,
294 CommentType::Heading(level) => HeadingComment(level),
295 CommentType::Extra => AdditionalInfoComment,
296 CommentType::Normal => Comment,
297 };
298 return Some(self.out_statement(s_type, Span::new2(ftok.start, ftok.start)));
299 }
300 self.unget = ftok;
301 }
302 None
303 }
304
305 fn read_labels(&mut self) {
310 self.labels.clear();
311 loop {
312 let ltok = self.get();
313 if ltok.is_null() {
314 break;
315 }
316 let lref = ltok.as_ref(self.buffer);
317 if lref.contains(&b'$') {
318 self.unget = ltok;
319 break;
320 } else if is_valid_label(lref) {
321 self.labels.push(ltok);
322 } else {
323 self.diag(Diagnostic::BadLabel(ltok));
324 }
325 }
326 }
327
328 fn get_no_label(&mut self, kwtok: Span) -> Span {
330 for &lspan in &self.labels {
332 self.diagnostics
333 .push((self.statement_index, Diagnostic::SpuriousLabel(lspan)));
334 }
335
336 Span::new2(kwtok.start, kwtok.start)
339 }
340
341 fn get_label(&mut self) -> Span {
344 match self.labels.len() {
345 1 => self.labels[0],
346 0 => {
347 self.diag(Diagnostic::MissingLabel);
348 self.invalidated = true;
349 Span::NULL
350 }
351 _ => {
352 for &addl in self.labels.iter().skip(1) {
353 self.diagnostics.push((
354 self.statement_index,
355 Diagnostic::RepeatedLabel(addl, self.labels[0]),
356 ));
357 }
358 self.invalidated = true;
360 Span::NULL
361 }
362 }
363 }
364
365 fn get_string(&mut self, expect_proof: bool, is_proof: bool) -> bool {
374 loop {
375 let tokn = self.get();
376 if tokn.is_null() {
377 break;
378 }
379 let toknref = tokn.as_ref(self.buffer);
380 if toknref.contains(&b'$') {
381 if toknref == b"$." {
382 if expect_proof {
384 self.diag(Diagnostic::MissingProof(tokn));
385 }
386 return false;
387 } else if toknref == b"$=" && !is_proof {
388 if !expect_proof {
390 self.diag(Diagnostic::SpuriousProof(tokn));
391 }
392 return true;
393 }
394 self.unget = tokn;
396 break;
397 }
398 self.span_pool.push(tokn);
399 }
400
401 self.diag(if is_proof {
402 Diagnostic::UnclosedProof
403 } else {
404 Diagnostic::UnclosedMath
405 });
406 false
407 }
408
409 fn get_strings(&mut self, want_proof: bool) {
412 let has_proof = self.get_string(want_proof, false);
413 self.statement_proof_start = self.span_pool.len();
414
415 if self.statement_proof_start == self.statement_math_start {
416 self.invalidated = true;
417 self.diag(Diagnostic::EmptyMathString);
419 }
420
421 if has_proof {
422 self.get_string(false, true);
425 } else {
426 }
430 }
431
432 fn eat_invalid(&mut self) {
435 loop {
436 let tok = self.get();
437 if tok.is_null() {
438 break;
439 }
440 let tref = tok.as_ref(self.buffer);
441 if tref == b"$." {
442 break;
444 } else if tref == b"$=" {
445 } else if tref.contains(&b'$') {
447 self.unget = tok;
449 break;
450 }
451 }
452 }
453
454 fn get_file_include(&mut self) -> Span {
458 let mut res = Span::NULL;
459 let mut count = 0;
460 loop {
461 let tok = self.get();
462 if tok.is_null() {
463 break;
464 }
465 let tref = tok.as_ref(self.buffer);
466 if tref == b"$]" {
467 if count == 0 {
468 self.diag(Diagnostic::EmptyFilename);
469 self.invalidated = true;
470 } else if count > 1 {
471 self.diag(Diagnostic::FilenameSpaces);
472 self.invalidated = true;
473 } else if res.as_ref(self.buffer).contains(&b'$') {
474 self.diag(Diagnostic::FilenameDollar);
475 }
476 return res;
477 } else if !tref.is_empty() && tref[0] == b'$' {
478 break;
479 }
480 count += 1;
481 res = tok;
482 }
483 self.diag(Diagnostic::UnclosedInclude);
484 self.invalidated = true;
485 res
486 }
487
488 fn get_statement(&mut self) -> Statement {
490 self.statement_start = self.position;
491 self.statement_math_start = self.span_pool.len();
492 self.statement_proof_start = self.span_pool.len();
493
494 if let Some(stmt) = self.get_comment_statement() {
496 return stmt;
497 }
498
499 self.read_labels();
501
502 let kwtok = self.get();
503 let stype = if kwtok.is_null() {
504 Eof
505 } else {
506 let kwtok_ref = kwtok.as_ref(self.buffer);
507 if kwtok_ref.len() == 2 && kwtok_ref[0] == b'$' {
508 match kwtok_ref[1] {
509 b'[' => FileInclude,
510 b'a' => Axiom,
511 b'c' => Constant,
512 b'd' => Disjoint,
513 b'e' => Essential,
514 b'f' => Floating,
515 b'p' => Provable,
516 b'v' => Variable,
517 b'{' => OpenGroup,
518 b'}' => CloseGroup,
519 _ => Invalid,
520 }
521 } else {
522 Invalid
523 }
524 };
525 if stype == Invalid {
526 self.diag(Diagnostic::UnknownKeyword(kwtok));
527 }
528 self.invalidated = false;
529
530 let mut label = if stype.takes_label() {
532 self.get_label()
533 } else {
534 self.get_no_label(kwtok)
535 };
536
537 if stype.takes_math() {
540 self.get_strings(stype == Provable);
541 }
542
543 let math_len = self.statement_proof_start - self.statement_math_start;
545 match stype {
546 FileInclude => label = self.get_file_include(),
547 Disjoint => {
548 if math_len == 1 {
550 self.diag(Diagnostic::DisjointSingle);
551 self.invalidated = true;
552 }
553 }
554 Floating => {
555 if math_len != 0 && math_len != 2 {
557 self.diag(Diagnostic::BadFloating);
558 self.invalidated = true;
559 }
560 }
561 Invalid => self.eat_invalid(),
564 _ => {}
565 }
566
567 let stype = if self.invalidated { Invalid } else { stype };
568
569 self.out_statement(stype, label)
570 }
571
572 fn get_segment(&mut self) -> (Segment, bool) {
578 let mut seg = Segment {
579 statements: Vec::new(),
580 next_file: Span::NULL,
581 symbols: Vec::new(),
582 local_vars: Vec::new(),
583 global_dvs: Vec::new(),
584 labels: Vec::new(),
585 floats: Vec::new(),
586 buffer: self.buffer_ref.clone(),
587 diagnostics: Vec::new(),
588 span_pool: Vec::new(),
589 outline: Vec::new(),
590 t_commands: Vec::new(),
591 j_commands: Vec::new(),
592 };
593 let mut top_group = NO_STATEMENT;
594 let is_end;
595 let end_diag;
596
597 loop {
598 let index = seg.statements.len() as StatementIndex;
599 self.statement_index = index;
600 let mut stmt = self.get_statement();
601 stmt.group = top_group;
602 seg.statements.push(stmt);
603
604 match seg.statements[index as usize].stype {
608 OpenGroup => top_group = index,
609 CloseGroup if top_group != NO_STATEMENT => {
610 seg.statements[top_group as usize].group_end = index;
611 top_group = seg.statements[top_group as usize].group;
612 }
613 CloseGroup => self.diag(Diagnostic::UnmatchedCloseGroup),
614 Constant if top_group != NO_STATEMENT => {
615 self.diag(Diagnostic::ConstantNotTopLevel);
616 }
617 Essential if top_group == NO_STATEMENT => {
618 self.diag(Diagnostic::EssentialAtTopLevel)
619 }
620 FileInclude => {
621 seg.next_file = seg.statements[index as usize].label;
623 end_diag = Diagnostic::UnclosedBeforeInclude(index);
624 is_end = false;
625 break;
626 }
627 Eof => {
628 end_diag = Diagnostic::UnclosedBeforeEof;
629 is_end = true;
630 break;
631 }
632 _ => {}
633 }
634 }
635
636 while top_group != NO_STATEMENT {
639 seg.statements[top_group as usize].group_end = seg.statements.len() as StatementIndex;
640 self.diagnostics.push((top_group, end_diag.clone()));
641 top_group = seg.statements[top_group as usize].group;
642 }
643
644 for index in 0..seg.statements.len() {
647 if seg.statements[index].group != NO_STATEMENT
648 && seg.statements[index].stype != OpenGroup
649 {
650 seg.statements[index].group_end =
651 seg.statements[seg.statements[index].group as usize].group_end;
652 }
653 }
654
655 seg.diagnostics = mem::take(&mut self.diagnostics);
656 seg.span_pool = mem::take(&mut self.span_pool);
657 seg.span_pool.shrink_to_fit();
658 seg.statements.shrink_to_fit();
659 collect_definitions(&mut seg);
660 (seg, is_end)
661 }
662}
663
664fn collect_definitions(seg: &mut Segment) {
667 let buf: &[u8] = &seg.buffer;
668 for (index, stmt) in seg.statements.iter().enumerate() {
669 let index = index as StatementIndex;
670 if stmt.stype.takes_label() {
671 seg.labels.push(LabelDef { index });
672 }
673
674 if stmt.group_end != NO_STATEMENT {
675 if stmt.stype == Variable {
676 let math = &seg.span_pool[stmt.math_start..stmt.proof_start];
677 for sindex in 0..math.len() {
678 seg.local_vars.push(LocalVarDef {
679 index,
680 ordinal: sindex as TokenIndex,
681 });
682 }
683 }
684 if stmt.stype != AdditionalInfoComment {
686 continue;
687 }
688 }
689
690 let math = &seg.span_pool[stmt.math_start..stmt.proof_start];
691
692 match stmt.stype {
693 Constant => {
694 for (sindex, &span) in math.iter().enumerate() {
695 seg.symbols.push(SymbolDef {
696 stype: SymbolType::Constant,
697 start: index,
698 name: span.as_ref(buf).into(),
699 ordinal: sindex as TokenIndex,
700 });
701 }
702 }
703 Variable => {
704 for (sindex, &span) in math.iter().enumerate() {
705 seg.symbols.push(SymbolDef {
706 stype: SymbolType::Variable,
707 start: index,
708 name: span.as_ref(buf).into(),
709 ordinal: sindex as TokenIndex,
710 });
711 }
712 }
713 Disjoint => {
714 seg.global_dvs.push(GlobalDv {
715 start: index,
716 vars: math.iter().map(|sp| sp.as_ref(buf).into()).collect(),
717 });
718 }
719 Floating => {
720 seg.floats.push(FloatDef {
721 start: index,
722 typecode: math[0].as_ref(buf).into(),
723 label: stmt.label.as_ref(buf).into(),
724 name: math[1].as_ref(buf).into(),
725 });
726 }
727 HeadingComment(level) => {
728 seg.outline.push(HeadingDef {
729 name: get_heading_name(buf, stmt.span.start).into(),
730 index,
731 level,
732 });
733 }
734 TypesettingComment => match commands(buf, b't', stmt.span.start) {
735 Ok(commands) => {
736 for result in commands {
737 match result {
738 Ok(command) => seg.t_commands.push((index, command)),
739 Err(diag) => seg.diagnostics.push((index, diag)),
740 }
741 }
742 }
743 Err(diag) => seg.diagnostics.push((index, diag)),
744 },
745 AdditionalInfoComment => match commands(buf, b'j', stmt.span.start) {
746 Ok(commands) => {
747 for result in commands {
748 match result {
749 Ok(command) => seg.j_commands.push((index, command)),
750 Err(diag) => seg.diagnostics.push((index, diag)),
751 }
752 }
753 }
754 Err(diag) => seg.diagnostics.push((index, diag)),
755 },
756 _ => {}
757 }
758 }
759}
760
761#[must_use]
763pub fn is_valid_label(label: &[u8]) -> bool {
764 label
765 .iter()
766 .all(|&c| c == b'.' || c == b'-' || c == b'_' || c.is_ascii_alphanumeric())
767}
768
769fn get_heading_name(buffer: &[u8], pos: FilePos) -> TokenPtr<'_> {
771 let mut index = pos as usize;
772 while index < buffer.len() {
773 if buffer[index] == b' ' {
775 break;
776 }
777 while index < buffer.len() && buffer[index] != b'\n' {
779 index += 1;
780 }
781 if index < buffer.len() {
783 index += 1;
784 }
785 }
786 while index < buffer.len() && buffer[index] == b' ' {
790 index += 1;
791 }
792
793 let mut eol = index;
794 while eol < buffer.len() && buffer[eol] != b'\n' {
795 eol += 1;
796 }
797 while eol > index && (buffer[eol - 1] == b'\r' || buffer[eol - 1] == b' ') {
798 eol -= 1;
799 }
800 &buffer[index..eol]
801}
802
803fn commands(buffer: &[u8], ch: u8, pos: FilePos) -> Result<CommandIter<'_>, Diagnostic> {
805 let mut iter = CommandIter {
806 buffer,
807 index: pos as usize,
808 };
809 let _ = iter.skip_white_spaces()?;
810 iter.expect(b'$')?;
811 iter.expect(b'(')?;
812 let _ = iter.skip_white_spaces()?;
813 iter.expect(b'$')?;
814 iter.expect(ch)?;
815 Ok(iter)
816}
817
818struct CommandIter<'a> {
819 buffer: &'a [u8],
820 index: usize,
821}
822
823impl CommandIter<'_> {
824 const fn has_more(&self) -> bool {
825 self.index < self.buffer.len()
826 }
827
828 const fn next_char(&self) -> u8 {
829 self.buffer[self.index]
830 }
831
832 fn skip_white_spaces(&mut self) -> Result<Option<()>, Diagnostic> {
833 'outer_loop: while self.has_more() {
834 match self.next_char() {
835 b' ' | b'\t' | b'\n' | b'\r' => {} b'$' => return Ok(None),
838 b'/' if self.buffer.get(self.index + 1) == Some(&b'*') => {
840 for i in self.index + 2..self.buffer.len() - 1 {
841 if self.buffer[i] == b'*' && self.buffer[i + 1] == b'/' {
843 self.index = i + 2;
844 continue 'outer_loop;
845 }
846 }
847 let cspan = Span::new(self.index, self.buffer.len());
849 self.index = self.buffer.len();
850 return Err(Diagnostic::UnclosedCommandComment(cspan));
851 }
852 _ => break,
854 }
855 self.index += 1;
856 }
857 Ok(Some(()))
858 }
859
860 #[allow(clippy::if_not_else)]
861 fn expect(&mut self, c: u8) -> Result<(), Diagnostic> {
862 if !self.has_more() {
863 let cspan = Span::new(self.index, self.buffer.len());
864 Err(Diagnostic::UnclosedComment(cspan))
865 } else if self.next_char() != c {
866 let cspan = Span::new(self.index, self.buffer.len());
867 Err(Diagnostic::MalformedAdditionalInfo(cspan))
868 } else {
869 self.index += 1;
870 Ok(())
871 }
872 }
873}
874
875impl Iterator for CommandIter<'_> {
876 type Item = Result<Command, Diagnostic>;
877
878 fn next(&mut self) -> Option<Self::Item> {
879 match self.skip_white_spaces() {
880 Ok(Some(())) => {}
881 Ok(None) => return None,
882 Err(diag) => return Some(Err(diag)),
883 }
884
885 let command_start = self.index;
886 let mut command = Vec::new();
887 while self.has_more() {
888 let token = if let quote @ (b'\'' | b'\"') = self.next_char() {
889 self.index += 1;
890 let token_start = self.index;
891 loop {
893 if !self.has_more() {
894 let cspan = Span::new(token_start, self.buffer.len());
895 return Some(Err(Diagnostic::UnclosedCommandString(cspan)));
896 }
897 let ch = self.next_char();
898 self.index += 1;
899 if ch == quote {
900 if self.has_more() && self.next_char() == quote {
901 self.index += 1;
902 } else {
903 break;
904 }
905 }
906 }
907 CommandToken::String(Span::new(token_start, self.index - 1))
908 } else {
909 let token_start = self.index;
910 while self.has_more()
912 && !matches!(self.next_char(), b' ' | b'\t' | b'\n' | b'\r' | b';')
913 {
914 self.index += 1;
915 }
916 CommandToken::Keyword(Span::new(token_start, self.index))
917 };
918 command.push(token);
919
920 if let Err(diag) = self.skip_white_spaces() {
921 return Some(Err(diag));
922 }
923 if self.has_more() && self.next_char() == b';' {
924 self.index += 1;
926 return Some(Ok((Span::new(command_start, self.index), command)));
927 }
928 }
929 None
930 }
931}
932
933#[must_use]
939pub fn guess_buffer_name(buffer: &[u8]) -> &str {
940 let buffer = &buffer[0..cmp::min(500, buffer.len())];
941 let ptr = get_heading_name(buffer, 0);
942
943 if ptr.is_empty() {
945 "<no section name found>"
946 } else {
947 str::from_utf8(ptr).unwrap_or("<invalid UTF-8 in section name>")
948 }
949}
950
951#[must_use]
964pub fn parse_segments(input: &BufferRef) -> Vec<Arc<Segment>> {
965 let mut closed_spans = Vec::new();
966 let mut scanner = Scanner {
967 buffer_ref: input.clone(),
968 buffer: input,
969 ..Scanner::default()
970 };
971 assert!(input.len() < FilePos::MAX as usize);
972
973 loop {
974 let (seg, last) = scanner.get_segment();
975 closed_spans.push(Arc::new(seg));
978 if last {
979 return closed_spans;
980 }
981 }
982}
983
984#[must_use]
989pub fn dummy_segment(diag: Diagnostic) -> Arc<Segment> {
990 let mut seg = parse_segments(&Arc::new(Vec::new())).pop().unwrap();
991 Arc::get_mut(&mut seg).unwrap().diagnostics.push((0, diag));
992 seg
993}
994
995impl SegmentSet {
996 pub(crate) fn build_typesetting_data(&self) -> TypesettingData {
997 fn parse_one(
998 data: &mut TypesettingData,
999 buf: &[u8],
1000 addr: StatementAddress,
1001 span: Span,
1002 command: &[CommandToken],
1003 ) -> Result<(), Diagnostic> {
1004 const fn as_string(span: Span, tok: Option<&CommandToken>) -> Result<Span, Diagnostic> {
1005 match tok {
1006 Some(&CommandToken::String(s)) => Ok(s),
1007 None => Err(Diagnostic::CommandIncomplete(span)),
1008 Some(&CommandToken::Keyword(s)) => Err(Diagnostic::CommandExpectedString(s)),
1009 }
1010 }
1011
1012 let (cmd, mut rest) = match command.split_first() {
1013 Some((&CommandToken::Keyword(k), rest)) => (k, rest.iter()),
1014 _ => return Err(Diagnostic::BadCommand(span)),
1015 };
1016
1017 let sum = |mut rest: std::slice::Iter<'_, CommandToken>| {
1018 let mut span_out = as_string(span, rest.next())?;
1019 let mut accum: Vec<u8> = CommandToken::unescape_string(buf, span_out).into();
1020 span_out.start -= 1;
1021 loop {
1022 match rest.next() {
1023 None => {
1024 span_out.end += 1;
1025 return Ok(((addr.segment_id, span_out), accum.into()));
1026 }
1027 Some(CommandToken::Keyword(plus)) if plus.as_ref(buf) == b"+" => {}
1028 _ => return Err(Diagnostic::CommandIncomplete(span)),
1029 }
1030 let span2 = as_string(span, rest.next())?;
1031 CommandToken::append_unescaped_string(buf, span2, &mut accum);
1032 span_out.end = span2.end
1033 }
1034 };
1035
1036 let as_ = |rest: &mut std::slice::Iter<'_, CommandToken>| {
1037 let s = as_string(span, rest.next())?;
1038 match rest.next() {
1039 Some(CommandToken::Keyword(as_)) if as_.as_ref(buf) == b"as" => Ok(s),
1040 None => Err(Diagnostic::CommandIncomplete(span)),
1041 Some(tk) => Err(Diagnostic::CommandExpectedAs(tk.full_span())),
1042 }
1043 };
1044 let mut insert = |kind: MarkupKind, sp: Span, (sp2, val): (GlobalSpan, Token)| {
1045 let map = match kind {
1046 MarkupKind::Html => &mut data.html_defs,
1047 MarkupKind::AltHtml => &mut data.alt_html_defs,
1048 MarkupKind::Latex => &mut data.latex_defs,
1049 };
1050 match map.entry(CommandToken::unescape_string(buf, sp).into()) {
1051 Entry::Occupied(e) => {
1052 let (sp2, (id2, _), _) = *e.get();
1053 data.diagnostics
1054 .push((addr, Diagnostic::DuplicateMarkupDef(kind, (id2, sp2), sp)))
1055 }
1056 Entry::Vacant(e) => {
1057 e.insert((sp, sp2, val));
1058 }
1059 }
1060 };
1061 match cmd.as_ref(buf) {
1062 b"latexdef" => insert(MarkupKind::Latex, as_(&mut rest)?, sum(rest)?),
1063 b"htmldef" => insert(MarkupKind::Html, as_(&mut rest)?, sum(rest)?),
1064 b"althtmldef" => insert(MarkupKind::AltHtml, as_(&mut rest)?, sum(rest)?),
1065 b"htmlvarcolor" => data.html_var_color.push(sum(rest)?),
1066 b"htmltitle" => data.html_title = Some(sum(rest)?),
1067 b"htmlhome" => data.html_home = Some(sum(rest)?),
1068 b"exthtmltitle" => data.ext_html_title = Some(sum(rest)?),
1069 b"exthtmlhome" => data.ext_html_home = Some(sum(rest)?),
1070 b"exthtmllabel" => data.ext_html_label = Some(sum(rest)?),
1071 b"htmldir" => data.html_dir = Some(sum(rest)?),
1072 b"althtmldir" => data.alt_html_dir = Some(sum(rest)?),
1073 b"htmlbibliography" => data.html_bibliography = Some(sum(rest)?),
1074 b"exthtmlbibliography" => data.ext_html_bibliography = Some(sum(rest)?),
1075 b"htmlcss" => data.html_css = Some(sum(rest)?),
1076 b"htmlfont" => data.html_font = Some(sum(rest)?),
1077 b"htmlexturl" => data.html_ext_url = Some(sum(rest)?),
1078 _ => return Err(Diagnostic::UnknownTypesettingCommand(cmd)),
1079 }
1080 Ok(())
1081 }
1082
1083 let mut data = TypesettingData::default();
1084 for seg in self.segments(..) {
1085 for &(ix, (span, ref command)) in &seg.t_commands {
1086 let address = StatementAddress::new(seg.id, ix);
1087 if let Err(diag) = parse_one(&mut data, &seg.buffer, address, span, command) {
1088 data.diagnostics.push((address, diag))
1089 }
1090 }
1091 }
1092 data
1093 }
1094}
1095
1096#[derive(Debug, Clone, Copy)]
1098pub struct HeadingComment {
1099 pub header: Span,
1101 pub content: Span,
1103}
1104
1105impl HeadingComment {
1106 #[must_use]
1110 pub fn parse(buf: &[u8], lvl: HeadingLevel, sp: Span) -> Option<Self> {
1111 static MAJOR_PART: OnceLock<Regex> = OnceLock::new();
1112 static SECTION: OnceLock<Regex> = OnceLock::new();
1113 static SUBSECTION: OnceLock<Regex> = OnceLock::new();
1114 static SUBSUBSECTION: OnceLock<Regex> = OnceLock::new();
1115 let regex = match lvl {
1116 HeadingLevel::MajorPart => MAJOR_PART.get_or_init(|| {
1117 Regex::new(r"^[ \r\n]+#{4,}\r?\n *([^\n]*)\r?\n#{4,}\r?\n").unwrap()
1118 }),
1119 HeadingLevel::Section => SECTION.get_or_init(|| {
1120 Regex::new(r"^[ \r\n]+(?:#\*){2,}#?\r?\n *([^\n]*)\r?\n(?:#\*){2,}#?\r?\n").unwrap()
1121 }),
1122 HeadingLevel::SubSection => SUBSECTION.get_or_init(|| {
1123 Regex::new(r"^[ \r\n]+(?:=-){2,}=?\r?\n *([^\n]*)\r?\n(?:=-){2,}=?\r?\n").unwrap()
1124 }),
1125 HeadingLevel::SubSubSection => SUBSUBSECTION.get_or_init(|| {
1126 Regex::new(r"^[ \r\n]+(?:-\.){2,}-?\r?\n *([^\n]*)\r?\n(?:-\.){2,}-?\r?\n").unwrap()
1127 }),
1128 _ => unreachable!(),
1129 };
1130 let groups = regex.captures(sp.as_ref(buf))?;
1131 let m = groups.get(1)?;
1132 Some(Self {
1133 header: Span::new2(sp.start + m.start() as u32, sp.start + m.end() as u32),
1134 content: Span::new2(sp.start + groups.get(0)?.end() as u32, sp.end),
1135 })
1136 }
1137
1138 #[must_use]
1140 pub fn parse_mathbox_header(&self, buf: &[u8]) -> Option<Span> {
1141 static MATHBOX_FOR: OnceLock<Regex> = OnceLock::new();
1142 let mathbox_for = MATHBOX_FOR.get_or_init(|| Regex::new(r"^Mathbox for (.*)$").unwrap());
1143 let m = mathbox_for.captures(self.header.as_ref(buf))?.get(1)?;
1144 Some(Span::new2(
1145 self.header.start + m.start() as u32,
1146 self.header.start + m.end() as u32,
1147 ))
1148 }
1149}