ass_core/analysis/events/
text_analysis.rs1use crate::{
22 analysis::events::tags::{parse_override_block, DiagnosticKind, OverrideTag, TagDiagnostic},
23 utils::{errors::resource::check_depth_limit, CoreError},
24 Result,
25};
26
27#[cfg(feature = "plugins")]
28use crate::analysis::events::tags::parse_override_block_with_registry;
29
30#[cfg(feature = "plugins")]
31use crate::plugin::ExtensionRegistry;
32use alloc::{string::String, vec::Vec};
33
34#[derive(Debug, Clone)]
39pub struct TextAnalysis<'a> {
40 plain_text: String,
42 char_count: usize,
44 line_count: usize,
46 has_bidi_text: bool,
48 has_complex_unicode: bool,
50 override_tags: Vec<OverrideTag<'a>>,
52 parse_diagnostics: Vec<TagDiagnostic<'a>>,
54}
55
56impl<'a> TextAnalysis<'a> {
57 pub fn analyze(text: &'a str) -> Result<Self> {
85 #[cfg(feature = "plugins")]
86 return Self::analyze_with_registry(text, None);
87 #[cfg(not(feature = "plugins"))]
88 return Self::analyze_impl(text);
89 }
90
91 #[cfg(feature = "plugins")]
109 pub fn analyze_with_registry(
110 text: &'a str,
111 registry: Option<&ExtensionRegistry>,
112 ) -> Result<Self> {
113 Self::analyze_impl_with_registry(text, registry)
114 }
115
116 #[cfg(not(feature = "plugins"))]
118 fn analyze_impl(text: &'a str) -> Result<Self> {
119 Self::analyze_impl_with_registry(text)
120 }
121
122 fn analyze_impl_with_registry(
124 text: &'a str,
125 #[cfg(feature = "plugins")] registry: Option<&ExtensionRegistry>,
126 ) -> Result<Self> {
127 const MAX_BRACE_DEPTH: usize = 100; let mut override_tags = Vec::new();
130 let mut parse_diagnostics = Vec::new();
131
132 let mut plain_text = String::new();
133 let mut position = 0;
134 let mut drawing_mode = false;
135
136 let mut chars = text.chars();
137 while let Some(ch) = chars.next() {
138 if ch == '{' {
139 let mut brace_count = 1;
140 let tag_start = position + ch.len_utf8();
141
142 for inner_ch in chars.by_ref() {
143 position += inner_ch.len_utf8();
144
145 if inner_ch == '{' {
146 brace_count += 1;
147 if check_depth_limit(brace_count, MAX_BRACE_DEPTH).is_err() {
149 return Err(CoreError::parse("Maximum brace nesting depth exceeded"));
150 }
151 } else if inner_ch == '}' {
152 brace_count -= 1;
153 if brace_count == 0 {
154 break;
155 }
156 }
157 }
158
159 if position > tag_start {
160 let tag_content = &text[tag_start..position];
161
162 #[cfg(feature = "plugins")]
163 if let Some(registry) = registry {
164 parse_override_block_with_registry(
165 tag_content,
166 tag_start,
167 &mut override_tags,
168 &mut parse_diagnostics,
169 Some(registry),
170 );
171 } else {
172 parse_override_block(
173 tag_content,
174 tag_start,
175 &mut override_tags,
176 &mut parse_diagnostics,
177 );
178 }
179
180 #[cfg(not(feature = "plugins"))]
181 parse_override_block(
182 tag_content,
183 tag_start,
184 &mut override_tags,
185 &mut parse_diagnostics,
186 );
187
188 drawing_mode = Self::update_drawing_mode(tag_content, drawing_mode);
190 } else {
191 parse_diagnostics.push(TagDiagnostic {
192 span: &text[tag_start..position.max(tag_start + 1)],
193 offset: tag_start,
194 kind: DiagnosticKind::EmptyOverride,
195 });
196 }
197 } else if ch == '\\' {
198 if let Some(next_ch) = chars.next() {
199 position += next_ch.len_utf8();
200 match next_ch {
201 'n' | 'N' => {
202 if !drawing_mode {
203 plain_text.push('\n');
204 }
205 }
206 'h' => {
207 if !drawing_mode {
208 plain_text.push('\u{00A0}');
209 }
210 }
211 _ => {
212 if !drawing_mode {
213 plain_text.push(ch);
214 plain_text.push(next_ch);
215 }
216 }
217 }
218 }
219 } else if !drawing_mode {
220 plain_text.push(ch);
221 }
222
223 position += ch.len_utf8();
224 }
225
226 let char_count = plain_text.chars().count();
227 let line_count = Self::count_lines(&plain_text);
228 let has_bidi_text = Self::detect_bidi_text(&plain_text);
229 let has_complex_unicode = Self::detect_complex_unicode(&plain_text);
230
231 Ok(Self {
232 plain_text,
233 char_count,
234 line_count,
235 has_bidi_text,
236 has_complex_unicode,
237 override_tags,
238 parse_diagnostics,
239 })
240 }
241
242 #[must_use]
244 pub fn plain_text(&self) -> &str {
245 &self.plain_text
246 }
247
248 #[must_use]
250 pub const fn char_count(&self) -> usize {
251 self.char_count
252 }
253
254 #[must_use]
256 pub const fn line_count(&self) -> usize {
257 self.line_count
258 }
259
260 #[must_use]
262 pub const fn has_bidi_text(&self) -> bool {
263 self.has_bidi_text
264 }
265
266 #[must_use]
268 pub const fn has_complex_unicode(&self) -> bool {
269 self.has_complex_unicode
270 }
271
272 #[must_use]
274 pub fn override_tags(&self) -> &[OverrideTag<'a>] {
275 &self.override_tags
276 }
277
278 #[must_use]
280 pub fn diagnostics(&self) -> &[TagDiagnostic<'a>] {
281 &self.parse_diagnostics
282 }
283
284 fn update_drawing_mode(tag_content: &str, current_mode: bool) -> bool {
286 let mut pos = 0;
287 let chars: Vec<char> = tag_content.chars().collect();
288 let mut drawing_mode = current_mode;
289
290 while pos < chars.len() {
291 if chars[pos] == '\\' && pos + 1 < chars.len() && chars[pos + 1] == 'p' {
292 pos += 2;
293 let mut number_str = String::new();
294
295 while pos < chars.len() && (chars[pos].is_ascii_digit() || chars[pos] == '-') {
296 number_str.push(chars[pos]);
297 pos += 1;
298 }
299
300 if let Ok(p_value) = number_str.parse::<i32>() {
301 drawing_mode = p_value > 0;
302 }
303 } else {
304 pos += 1;
305 }
306 }
307
308 drawing_mode
309 }
310
311 fn count_lines(text: &str) -> usize {
313 if text.is_empty() {
314 return 1;
315 }
316
317 let newline_count = text.chars().filter(|&ch| ch == '\n').count();
319
320 if newline_count == 0 {
321 1
323 } else if text.trim_end_matches('\n').is_empty() {
324 newline_count + 1
326 } else {
327 text.lines().count().max(1)
330 }
331 }
332
333 fn detect_bidi_text(text: &str) -> bool {
335 text.chars().any(|ch| matches!(ch as u32, 0x0590..=0x05FF | 0x0600..=0x06FF | 0x0750..=0x077F | 0x08A0..=0x08FF))
336 }
337
338 fn detect_complex_unicode(text: &str) -> bool {
340 text.chars().any(|ch| {
341 let code = ch as u32;
342 code > 0x00FF || matches!(code, 0x0000..=0x001F | 0x007F..=0x009F | 0x200C..=0x200D | 0x2060..=0x206F)
343 })
344 }
345}
346
347#[cfg(test)]
348mod tests {
349 use super::*;
350 #[cfg(not(feature = "std"))]
351 use alloc::string::ToString;
352
353 #[test]
354 fn text_analysis_simple_text() {
355 let text = "Hello world!";
356 let analysis = TextAnalysis::analyze(text).unwrap();
357
358 assert_eq!(analysis.plain_text(), "Hello world!");
359 assert_eq!(analysis.char_count(), 12);
360 assert_eq!(analysis.line_count(), 1);
361 assert!(!analysis.has_bidi_text());
362 assert!(!analysis.has_complex_unicode());
363 assert!(analysis.override_tags().is_empty());
364 assert!(analysis.diagnostics().is_empty());
365 }
366
367 #[test]
368 fn text_analysis_with_override_tags() {
369 let text = "Hello {\\b1}bold{\\b0} world!";
370 let analysis = TextAnalysis::analyze(text).unwrap();
371
372 assert_eq!(analysis.plain_text(), "Hello bold world!");
373 assert_eq!(analysis.char_count(), 17);
374 assert_eq!(analysis.line_count(), 1);
375 assert!(!analysis.override_tags().is_empty());
376 }
377
378 #[test]
379 fn text_analysis_nested_braces() {
380 let text = "Text {\\pos(100,{\\some}200)} more text";
381 let analysis = TextAnalysis::analyze(text).unwrap();
382
383 assert_eq!(analysis.plain_text(), "Text more text");
384 assert!(!analysis.override_tags().is_empty());
385 }
386
387 #[test]
388 fn text_analysis_line_breaks() {
389 let text = "First line\\NSecond line\\nThird line";
390 let analysis = TextAnalysis::analyze(text).unwrap();
391
392 assert_eq!(analysis.plain_text(), "First line\nSecond line\nThird line");
393 assert_eq!(analysis.line_count(), 3);
394 }
395
396 #[test]
397 fn text_analysis_hard_spaces() {
398 let text = "Text\\hwith\\hhard\\hspaces";
399 let analysis = TextAnalysis::analyze(text).unwrap();
400
401 assert_eq!(
402 analysis.plain_text(),
403 "Text\u{00A0}with\u{00A0}hard\u{00A0}spaces"
404 );
405 }
406
407 #[test]
408 fn text_analysis_mixed_escapes() {
409 let text = "Line 1\\NLine 2\\hspace\\nLine 3";
410 let analysis = TextAnalysis::analyze(text).unwrap();
411
412 assert_eq!(analysis.plain_text(), "Line 1\nLine 2\u{00A0}space\nLine 3");
413 assert_eq!(analysis.line_count(), 3);
414 }
415
416 #[test]
417 fn text_analysis_bidi_text_arabic() {
418 let text = "Hello مرحبا world";
419 let analysis = TextAnalysis::analyze(text).unwrap();
420
421 assert!(analysis.has_bidi_text());
422 assert!(analysis.has_complex_unicode());
423 }
424
425 #[test]
426 fn text_analysis_bidi_text_hebrew() {
427 let text = "Hello שלום world";
428 let analysis = TextAnalysis::analyze(text).unwrap();
429
430 assert!(analysis.has_bidi_text());
431 assert!(analysis.has_complex_unicode());
432 }
433
434 #[test]
435 fn text_analysis_complex_unicode_emoji() {
436 let text = "Hello 🌍 world";
437 let analysis = TextAnalysis::analyze(text).unwrap();
438
439 assert!(!analysis.has_bidi_text());
440 assert!(analysis.has_complex_unicode());
441 }
442
443 #[test]
444 fn text_analysis_complex_unicode_control_chars() {
445 let text = "Text\u{200C}with\u{200D}controls";
446 let analysis = TextAnalysis::analyze(text).unwrap();
447
448 assert!(analysis.has_complex_unicode());
449 }
450
451 #[test]
452 fn text_analysis_basic_latin_only() {
453 let text = "Basic ASCII text 123!@#";
454 let analysis = TextAnalysis::analyze(text).unwrap();
455
456 assert!(!analysis.has_bidi_text());
457 assert!(!analysis.has_complex_unicode());
458 }
459
460 #[test]
461 fn text_analysis_extended_latin() {
462 let text = "Café naïve résumé";
463 let analysis = TextAnalysis::analyze(text).unwrap();
464
465 assert!(!analysis.has_bidi_text());
466 assert!(!analysis.has_complex_unicode()); }
468
469 #[test]
470 fn text_analysis_empty_override_blocks() {
471 let text = "Text {} more text";
472 let analysis = TextAnalysis::analyze(text).unwrap();
473
474 assert_eq!(analysis.plain_text(), "Text more text");
475 assert!(!analysis.diagnostics().is_empty());
477 }
478
479 #[test]
480 fn text_analysis_unmatched_braces() {
481 let text = "Text {\\b1 unmatched";
482 let analysis = TextAnalysis::analyze(text).unwrap();
483
484 assert_eq!(analysis.plain_text(), "Text ");
485 }
487
488 #[test]
489 fn text_analysis_multiple_override_blocks() {
490 let text = "{\\b1}Bold{\\b0} and {\\i1}italic{\\i0} text";
491 let analysis = TextAnalysis::analyze(text).unwrap();
492
493 assert_eq!(analysis.plain_text(), "Bold and italic text");
494 assert_eq!(analysis.override_tags().len(), 4);
495 }
496
497 #[test]
498 fn text_analysis_complex_tags() {
499 let text = "{\\move(0,0,100,100)}{\\t(0,1000,\\fscx120)}{\\fade(255,0,0,0,800,900,1000)}Animated text";
500 let analysis = TextAnalysis::analyze(text).unwrap();
501
502 assert_eq!(analysis.plain_text(), "Animated text");
503 assert!(!analysis.override_tags().is_empty());
504 }
505
506 #[test]
507 fn text_analysis_drawing_commands() {
508 let text = "{\\p1}m 0 0 l 100 0 100 100 0 100{\\p0}Square";
509 let analysis = TextAnalysis::analyze(text).unwrap();
510
511 assert_eq!(analysis.plain_text(), "Square");
512 assert!(!analysis.override_tags().is_empty());
513 }
514
515 #[test]
516 fn text_analysis_color_tags() {
517 let text = "{\\c&H0000FF&}Red text{\\c} and {\\1c&H00FF00&}green text";
518 let analysis = TextAnalysis::analyze(text).unwrap();
519
520 assert_eq!(analysis.plain_text(), "Red text and green text");
521 assert!(!analysis.override_tags().is_empty());
522 }
523
524 #[test]
525 fn text_analysis_mixed_content() {
526 let text = "Start {\\b1}bold\\N{\\i1}italic{\\i0}{\\b0}\\hnormal end";
527 let analysis = TextAnalysis::analyze(text).unwrap();
528
529 assert_eq!(
530 analysis.plain_text(),
531 "Start bold\nitalic\u{00A0}normal end"
532 );
533 assert_eq!(analysis.line_count(), 2);
534 assert!(!analysis.override_tags().is_empty());
535 }
536
537 #[test]
538 fn text_analysis_whitespace_only() {
539 let text = " \t\n ";
540 let analysis = TextAnalysis::analyze(text).unwrap();
541
542 assert_eq!(analysis.plain_text(), " \t\n ");
543 assert_eq!(analysis.char_count(), 7);
544 assert_eq!(analysis.line_count(), 2);
545 }
546
547 #[test]
548 fn text_analysis_empty_text() {
549 let text = "";
550 let analysis = TextAnalysis::analyze(text).unwrap();
551
552 assert_eq!(analysis.plain_text(), "");
553 assert_eq!(analysis.char_count(), 0);
554 assert_eq!(analysis.line_count(), 1); assert!(analysis.override_tags().is_empty());
556 }
557
558 #[test]
559 fn text_analysis_only_override_tags() {
560 let text = "{\\b1}{\\i1}{\\u1}";
561 let analysis = TextAnalysis::analyze(text).unwrap();
562
563 assert_eq!(analysis.plain_text(), "");
564 assert_eq!(analysis.char_count(), 0);
565 assert!(!analysis.override_tags().is_empty());
566 }
567
568 #[test]
569 fn text_analysis_escape_sequences() {
570 let text = "Test`[Events]`backslash and \\{brace and \\}close";
571 let analysis = TextAnalysis::analyze(text).unwrap();
572
573 assert_eq!(
575 analysis.plain_text(),
576 "Test`[Events]`backslash and \\{brace and \\}close"
577 );
578 }
579
580 #[test]
581 fn text_analysis_karaoke_tags() {
582 let text = "{\\k50}Ka{\\k30}ra{\\k70}o{\\k40}ke";
583 let analysis = TextAnalysis::analyze(text).unwrap();
584
585 assert_eq!(analysis.plain_text(), "Karaoke");
586 assert!(!analysis.override_tags().is_empty());
587 }
588
589 #[test]
590 fn text_analysis_position_and_rotation() {
591 let text = "{\\pos(320,240)}{\\frz45}Rotated positioned text";
592 let analysis = TextAnalysis::analyze(text).unwrap();
593
594 assert_eq!(analysis.plain_text(), "Rotated positioned text");
595 assert!(!analysis.override_tags().is_empty());
596 }
597
598 #[test]
599 fn text_analysis_very_long_text() {
600 let text = "A".repeat(1000);
601 let analysis = TextAnalysis::analyze(&text).unwrap();
602
603 assert_eq!(analysis.char_count(), 1000);
604 assert_eq!(analysis.plain_text().len(), 1000);
605 }
606
607 #[test]
608 fn text_analysis_line_count_edge_cases() {
609 let text1 = "Line 1\\nLine 2\\n";
611 let analysis1 = TextAnalysis::analyze(text1).unwrap();
612 assert_eq!(analysis1.line_count(), 2);
613
614 let text2 = "Line 1\\n\\n\\nLine 2";
616 let analysis2 = TextAnalysis::analyze(text2).unwrap();
617 assert_eq!(analysis2.line_count(), 4);
618
619 let text3 = "\\n\\N\\n";
621 let analysis3 = TextAnalysis::analyze(text3).unwrap();
622 assert_eq!(analysis3.line_count(), 4);
623 }
624
625 #[test]
626 fn text_analysis_excessive_brace_nesting() {
627 let mut text = String::new();
629 for _ in 0..110 {
630 text.push('{');
631 }
632 text.push_str("\\b1");
633 for _ in 0..110 {
634 text.push('}');
635 }
636
637 let result = TextAnalysis::analyze(&text);
638 assert!(result.is_err());
639 assert!(result
640 .unwrap_err()
641 .to_string()
642 .contains("Maximum brace nesting depth exceeded"));
643 }
644
645 #[test]
646 fn text_analysis_drawing_mode_escape_sequences() {
647 let text = "{\\p1}Line1\\nLine2\\hSpace\\NNewline{\\p0}Normal\\ntext";
649 let analysis = TextAnalysis::analyze(text).unwrap();
650
651 assert_eq!(analysis.plain_text(), "Normal\ntext");
654 assert!(!analysis.override_tags().is_empty());
655 }
656
657 #[test]
658 fn text_analysis_drawing_mode_p_value_parsing() {
659 let text1 = "{\\p0}Not drawing mode";
661 let analysis1 = TextAnalysis::analyze(text1).unwrap();
662 assert_eq!(analysis1.plain_text(), "Not drawing mode");
663
664 let text2 = "{\\p1}Drawing mode";
665 let analysis2 = TextAnalysis::analyze(text2).unwrap();
666 assert_eq!(analysis2.plain_text(), ""); let text3 = "{\\p5}Also drawing mode";
669 let analysis3 = TextAnalysis::analyze(text3).unwrap();
670 assert_eq!(analysis3.plain_text(), ""); }
672
673 #[test]
674 fn text_analysis_line_count_only_newlines() {
675 let text = "\n\n\n";
677 let analysis = TextAnalysis::analyze(text).unwrap();
678 assert_eq!(analysis.line_count(), 4); }
680
681 #[test]
682 fn text_analysis_drawing_mode_mixed_escapes() {
683 let text = "{\\p1}Start\\nNew\\NLine\\hHard{\\p0}End\\nNormal";
685 let analysis = TextAnalysis::analyze(text).unwrap();
686
687 assert_eq!(analysis.plain_text(), "End\nNormal");
689 assert!(!analysis.override_tags().is_empty());
690 }
691}