mathcat 0.7.5

MathCAT: Math Capable Assistive Technology ('Speech and braille from MathML')
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
---
-
  name: simple-number-set
  tag: mi
  match: "string-length(text())=1 and translate(text(), 'ℂℕℚℝℤ', '')=''"  # tricky way to match any of the letters instead of using 'or'
  replace:
  - intent:
      name: number-sets
      children: [x: "text()"]

-
  name: sup-number-set
  tag: msup
  match:
  - "*[1][string-length(text())=1 and translate(., 'ℂℕℚℝℤ', '')=''] and" 
  - "*[2][string-length(text())=1 and (.='+' or .='-' or self::m:mn)]" 
  replace:
  - intent:
      name: number-sets
      children:
      - x: "*[1]"
      - x: "*[2]"

-
  name: real-part
  tag: mi
  match: "(.='Re' or .='re' or .='ℜ') and following-sibling::*[1][.='\u2061']" # before function apply
  replace:
  - intent:
      name: real-part
      children: []
  
-
  name: imaginary-part
  tag: mi
  match: "(.='Im' or .='im' or .='ℑ') and following-sibling::*[1][.='\u2061']" # before function apply
  replace: 
  - intent:
      name: imaginary-part
      children: []
  
-
  name: roman_numeral
  tag: [mi, mn, mtext]
  match: "@data-number"
  replace: 
  - intent:
      name: mn
      children: [x: "@data-number"]

-
  name: positive-or-negative
  tag: mrow
  match: "count(*)=2 and *[1][self::m:mo][.='+' or .='-']"
  variables:
  - AtLeftEdge: "EdgeNode(., 'left', 'mtd')"
  - AtLeftEdgeOfMTD: "name($AtLeftEdge) = 'mtd'"
  - ContinuedColumn: "$TableProperty='lines' or $TableProperty='system-of-equations' or $TableProperty='piecewise'"
  - InContinuedRow: "parent::m:mtd[parent::m:mtr[@data-intent-property[contains(., ':continued-row:')]]]"
  - FirstColumn: "$AtLeftEdge/../*[1]=$AtLeftEdge"   # first child of mtr...
  replace:
  - test:
      # don't use positive/negative if a continued data cell
      if: "$AtLeftEdgeOfMTD and $ContinuedColumn and (not($FirstColumn) or $InContinuedRow)"
      then:
      - intent:
          xpath-name: "IfThenElse(*[1][self::m:mo][.='+'], 'plus', 'minus')"
          attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
          children: [x: "*[2]"]
      else:
      - intent:
          xpath-name: "IfThenElse(*[1][self::m:mo][.='+'], 'plus', 'minus')"
          attrs: "data-intent-property='concat(data-intent-property, \":prefix:\")'"
          children: [x: "*[2]"]
               

-
  # (a,b) has many interpretations; (a, b] (etc) have fewer interpretations.
  # as an interval, it represents a set and hence a clue that it is an interval is that a set operator comes
  #   before or after it. '=' is also common.
  # They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't include (xxx, yyy) in the inference
  # Note [x,y] is also a commutator.
  # Intervals are never contained in other intervals, so rule them out if there is an ancestor that could be an interval
  name: interval
  tag: mrow
  match: 
    - "count(*)=3 and "
       # FIX: consider adding ]...[ versions
    - "(*[1][.='(' or .='['] and *[3][.=')' or .=']']) and"   # match bracketing
    - "(*[2][count(*)=3 and *[2][.=',']]) and"                               # inside should have ','
    # intervals are not part of set notation (e.g, { (x,y)∈L | ...}) nor are they nested
    - "not(ancestor::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and "  
    - "not(descendant::*[IsBracketed(., '{', '}') or IsBracketed(., '[', ']') or IsBracketed(., '(', ')')]) and "  
      # FIX: if both the first and third children of *[2] are mn, then make sure first <= third
    - "("
    - "  not(IsBracketed(., '(', ')')) or "                                                                   # (.,.) is very ambiguous -- need more clues
    - "  $ClearSpeak_Paren = 'Interval' or "                                                                  # pref is set
    - " *[2]/*[1][contains(., '∞')] or "                                                                      # starts with infinity
    - " *[2]/*[3][contains(., '∞')] or "                                                                      # ends with infinity
    - "  preceding-sibling::*[1][self::m:mo and ( .='=' or IsInDefinition(., 'SubsetOperators') )] or "  # context hint
    - "  following-sibling::*[1][self::m:mo and ( .='=' or IsInDefinition(., 'SubsetOperators') )]"      # context hint
    - ")"
  replace:
  - test:
    - if: "*[1][.='('] and *[3][.=')']"
      then:
      - intent:
          name: "open-interval"
          children: [x: "*[2]/*[1]", x: "*[2]/*[3]"]
    - else_if: "*[1][.='('] and *[3][.=']']"
      then:
      - intent:
          name: "open-closed-interval"
          children: [x: "*[2]/*[1]", x: "*[2]/*[3]"]
    - else_if: "*[1][.='['] and *[3][.=']']"
      then:
      - intent:
          name: "closed-interval"
          children: [x: "*[2]/*[1]", x: "*[2]/*[3]"]
      else:
      - intent:
          name: "closed-open-interval"
          children: [x: "*[2]/*[1]", x: "*[2]/*[3]"]

-
  name: binomial-frac
  tag: mrow
  # "translate()" is used because it might be "0em" and we need to get rid of the "em"
  match: "IsBracketed(., '(', ')') and *[2][self::m:mfrac][translate(@linethickness, 'abcdefghijklmnopqrstuvwxyz', '')=0]"
  replace:
  - intent:
      name: binomial
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "*[2]/*[1]"
      - x: "*[2]/*[2]"

-
  # C with pre-subscript 'n' and post-subscript 'm'. The 'n' can be either in the sub or super position
  # See https://en.wikipedia.org/wiki/Binomial_coefficient#History_and_notation
  name: binomial-mmultiscripts
  tag: mmultiscripts
  variables:
  - Prescripts: "m:mprescripts/following-sibling::*"
  - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]"
  match: 
  - "*[1][self::m:mi and .='C'] and"
  - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and "
  - "count($Postscripts)=2 and $Postscripts[2][self::m:none]"
  replace:
  - intent:
      name: binomial
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "IfThenElse(name($Prescripts[2])='none', $Prescripts[1], $Prescripts[2])"
      - x: "$Postscripts[1]"


-
  name: binomial-msub   # C_{n,k}
  tag: msub
  match:
  - "*[1][self::m:mi and .='C'] and"
  - "*[2][self::m:mrow and count(*)=3 and *[2][.=',']]"
  replace:
  - intent:
      name: binomial
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "*[2]/*[1]"
      - x: "*[2]/*[3]"

-
  # P with pre-sub or superscript 'n' and post-subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n)
  name: permutation-mmultiscripts
  tag: mmultiscripts
  variables:
  - Prescripts: "m:mprescripts/following-sibling::*"
  - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]"
  match: 
  - "*[1][self::m:mi and .='P'] and"
  - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and "
  - "count($Postscripts)=2 and $Postscripts[2][self::m:none]"
  replace:
  - intent:
      name: pochhammer
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - test:
          if: "$Prescripts[1][self::m:none]"
          then: [x: "$Prescripts[2]"]
          else: [x: "$Prescripts[1]"]
      - x: "$Postscripts[1]"

-
  # P with superscript 'n' and subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n)
  name: permutation-msubsup
  tag: msubsup
  match: 
  - "*[1][self::m:mi and .='P']"
  replace:
  - intent:
      name: pochhammer
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "*[3]"
      - x: "*[2]"

# rules on scripted vertical bars ('evaluated at')
-
  name: evaluated-at-msub
  tag: mrow
  match: "count(*)=2 and *[2][self::m:msub and *[1][self::m:mo][.='|']]"
  replace:
  - intent:
      name: "evaluate"
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "*[1]"
      - x: "*[2]/*[2]"

-
  name: evaluated-at-msubsup
  tag: mrow
  match: "count(*)=2 and *[2][self::m:msubsup and *[1][self::m:mo][.='|']]"
  replace: 
  - intent:
      name: "evaluate"
      children:
      - x: "*[1]"
      - x: "*[2]/*[2]"
      - x: "*[2]/*[3]"

# it is also used outside of brackets
-
  name: bracketed-evaluated-at
  tag: msubsup
  match: "IsBracketed(*[1], '[', ']')"
  replace: 
  - intent:
      name: "evaluate"
      children:
      - x: "*[1]/*[1]"
      - x: "*[2]"
      - x: "*[3]"


-
  # vertical bars otherwise
  # could also be cardinality, length, ...
  name: absolute-value
  tag: mrow
  match: "count(*) = 3 and not(*[2][self::m:mtable]) and IsBracketed(., '|', '|')"
  replace:
  - intent:
      name: "absolute-value"
      attrs: "data-intent-property='concat(data-intent-property, \":function:\")'"
      children: [x: "*[2]"]

-
  name: default
  tag: msqrt
  match: "."
  replace:
  - intent:
      name: "square-root"
      attrs: "data-intent-property='concat(data-intent-property, \":function:\")'"
      children: [x: "*[1]"]


-
  name: sqrt
  tag: mroot
  match: "*[2][self::m:mn and .='2']"
  replace:
  - intent:
      name: "square-root"
      attrs: "data-intent-property='concat(data-intent-property, \":function:\")'"
      children: [x: "*[1]"]

-
  name: default
  tag: mroot
  match: "."
  replace:
  - intent:
      name: "root"
      children:
      - x: "*[1]"
      - x: "*[2]"

  
- # handle log with a base
  name: log-with-base
  tag: [msub, msubsup]
  match: "*[1][self::m:mi and .='log']"
  replace:
  - test:
      if: "self::m:msub"
      then:
      - intent:
          name: "logarithm-with-base"
          attrs: "data-intent-property='concat(data-intent-property, \":prefix\")'"
          children: [x: "*[2]"]    # grab the base
      else:
      - intent:
          name: "power"
          attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
          children:
          - intent:
              name: "logarithm-with-base"
              attrs: "data-intent-property='concat(data-intent-property, \":prefix:\")'"
              children: [x: "*[2]"]  # grab the base
          - x: "*[3]"       # grab the exponent

-
  name: bigop
  tag: [msub, munder]
  match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')"
  replace: 
  - intent:
      name: "large-op"    # Fix: the name in the spreadsheet needs updating/fixing
      children:
      - x: "*[1]"
      - x: "*[2]"
-
  name: limit
  tag: [msub, munder]
  variables: [NoSpacesBase: "translate(*[1], ' \u00A0\u2004\u2005\u2006\u2007\u2008\u2009\u200A','')"]   # Sometimes these have (weird) spaces in them
  match: "*[1][$NoSpacesBase='lim' or $NoSpacesBase='limsup' or $NoSpacesBase='liminf']"
  replace: 
  - intent:
      xpath-name: "IfThenElse($NoSpacesBase='lim', 'limit', IfThenElse($NoSpacesBase='limsup', 'limit-sup', 'limit-inf'))"
      attrs: "data-intent-property='concat(data-intent-property, \":prefix:\")'"
      children: [x: "*[2]"]

- name: repeating-decimal
  tag: mrow
  match: "count(*)=3 and *[1][self::m:mn] and 
          *[2][.='\u2062'] and
          *[3][self::m:mover][*[1][self::m:mn] and *[2][.='\u00AF']]"
  replace:
  - intent:
      name: "repeating-decimal"
      children:
      - x: "*[1]"
      - x: "*[3]/*[1]"

-
  name: modified-var
  tag: mover
  # breve, check, dot, double-dot, triple-dot, quadruple-dot, grave, hat, tilde, line/bar
  match: "*[1][self::m:mi] and *[2][translate(., '\u0306\u030c.\u00A8\u02D9\u20DB\u20DC`^~¯_', '')='']"
  replace:
  - intent:
      name: "modified-variable"
      attrs: "data-intent-property='concat(data-intent-property, \":silent:\")'"
      children:
      - x: "*[1]"
      - x: "*[2]"

-
  name: vector
  tag: mover
  match: "*[1][self::m:mi] and *[2][.='→' or .='⇀']"
  replace:
  - intent:
      name: "vector"
      attrs: "data-intent-property='concat(data-intent-property, \":prefix:\")'"
      children:
      - x: "*[1]"


-
   # this captures the output for the mhchem's "<=>", "<<=>", and "<=>>" output (there are no Unicode arrows for them)
   # this isn't a perfect match, but should be good enough and allows merging all three (see github.com/NSoiffer/MathCAT/issues/60)
   name: chemistry-mhchem-equilibrium-arrow
   tag: mover
   match:
   -    "*[1][substring(., 1, 1)='↽'] and"
   -    "*[2][substring(., string-length(), 1)='⇀']"
   replace:
   - intent:
      name: "chemical-arrow-operator"
      children:
      - test:
          if: "*[1][self::m:mrow]"
          then_test:
              if: "*[2][self::m:mrow]"
              then: [t: "\u1f8d2"]    # this is currently unassigned and may get used by UTC at some point (<=>)
              else: [t: "\u1f8d4"]    # this is currently unassigned and may get used by UTC at some point (<<=>)
          else: [t: "\u1f8d3"]        # this is currently unassigned and may get used by UTC at some point (<==>>)

-
  # need to include the name of the element so the rules know whether to say "sub" or "super"
  name: chemistry
  tag: [msub, msup]
  match: "@data-chem-formula"
  replace:
  - intent:
      name: "chemical-formula"
      children:       # have to list individual children because "*" results in an internal error (children aren't flat)
      - x: "name(.)"
      - x: "*[1]"
      - x: "*[2]"


-
  name: chemistry-prescripts
  tag: mmultiscripts
  match: "@data-chem-formula"
  replace:
  - test:
      - if: "count(*)=4 or count(*)=6"
        then:
        - intent:
            name: "chemical-nuclide"
            children:
            - x: "*"
        else:       # FIX: what other cases are there???
        - intent:
            name: "chemical-formula"
            children:       
            - x: "*"


-
  name: chemical-element
  tag: [mi, mtext]
  match: "@data-chem-element"
  replace:
  - intent:
      name: "chemical-element"
      children:
      - x: "text()"

-
  name: unit
  tag: [mi, mtext]
  match: "@class='MathML-unit' or contains(@data-intent-property, ':unit')"
  replace:
  - intent:
      name: "unit"
      attrs: "data-intent-property='\":unit:\"'"   # the value ":unit:" needs to be an xpath string, hence the escaped quotes
      children:
      - x: "text()"

- # this is attempt to catch units when some has created a non-italic word that matches a unit
  # other mtext unit cases were caught above.
  # This has a guard to make sure it is positionally a unit.
  #   Currently only checking for a simple mrow like "3 s or 3/4 km"
  #   Other checks would be an mfrac with a unit in both parts (complicated to check) or msup with a unit in the base and that inside an mrow
  name: mtext-inference-unit
  tag: [mtext, mi]
  match: "not(@data-chem-element) and
          (self::m:mtext or string-length(.) > 1 or @mathvariant='normal') and
          (parent::*[1][self::m:mrow and count(*)=3 and *[2][.='\u2062'] and
                        *[1][self::m:mn or (self::m:mfrac and *[1][self::m:mn] and *[2][self::m:mn])]]) and
          count(preceding-sibling::*) = 2 and 
          (DefinitionValue(., 'Speech', 'SIUnits') != '' or
           DefinitionValue(., 'Speech', 'UnitsWithoutPrefixes') != '' or 
           DefinitionValue(., 'Speech', 'EnglishUnits') != '' or
           (string-length(.) >= 3 and
            substring(., 1, 2) = 'da' and 
            DefinitionValue(substring(., 3), 'Speech', 'SIUnits') != '') or
           (string-length(.) >= 2 and
            DefinitionValue(substring(., 1, 1), 'Speech', 'SIPrefixes') != '' and
            DefinitionValue(substring(., 2), 'Speech', 'SIUnits') != ''))"
  replace:
  - intent:
      name: "unit"
      attrs: "data-intent-property='\":unit:\"'"   # the value ":unit:" needs to be an xpath string, hence the escaped quotes
      children:
      - x: "text()"

-
  name: chemical-formula-op
  tag: [mo]
  match: "@data-chem-formula-op"
  replace:
  - intent:
      name: "chemical-formula-operator"
      children:
      - x: "text()"

-
  name: chemical-arrow-op
  tag: [mo]
  # FIX: this is a duplicate of the list in chemistry.rs. Probably should pull out and add to definitions.yaml
  match: "@data-chem-equation-op and translate(., '→➔←⟶⟵⤻⇋⇌↿↾⇃⇂⥮⥯⇷⇸⤉⤈⥂⥄', '')=''"
  replace:
  - intent:
      name: "chemical-arrow-operator"
      children:
      - x: "text()"

-
  name: chemical-equation-op
  tag: [mo]
  match: "@data-chem-equation-op"
  replace:
  - intent:
      name: "chemical-equation-operator"
      children:
      - x: "text()"

-
  name: chemistry-concentration
  tag: mrow
  match: "ancestor-or-self::*[@data-chem-equation] and IsBracketed(., '[', ']')"
  replace:
  - intent:
      name: "chemistry-concentration"
      attrs: "data-intent-property='concat(data-intent-property, \":function:\")'"
      children:
      - x: "*[2]"

-
  name: chemistry-state
  tag: mrow
  match:
    - "(@data-chem-formula or @data-chem-equation) and"
    - "IsBracketed(., '(', ')') and"
    - "*[2][.='s' or .='l' or .='g' or .='aq']"
  replace:
  - intent:
      name: "chemical-state"
      children:
      - x: "*[2]"


# This needs to be before the simple "x prime" rule
# minutes/seconds or feet/inches
# If ' or " follows a number, then it is not "prime", but is a unit
# Note the ASCII ' and " are converted to prime during canonicalization if in a superscript
# Handles single, double, primes, and also double quote, which don't have to be in an msup
# The rules are:
# 1. If the prime follows a degree sign with a number (in various forms) or letter after it,
#    then it is minutes/seconds
# 2. Else, if it follows a *number* (in various forms), then it feet/inches
# 3. Else it is 'prime'
# any ? ( count(match)==2 &&
#           (name(match)=="mrow" || name(match)=="msup") &&
#           (MatchString($1, "mo", "'") || MatchString($1, "mo", "′") ||      // apostrophe or prime
#            MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) &&    // double quote or double prime
#           (
#               // '1': degree sign check
#               ( has_previous(match) &&
#                  ( (MatchString($1, "mo", "′") &&
#                      ( name(previous(match))=="msup" ||
#                         (name(previous(match))=="mrow" && count(previous(match))==2)) &&
#                     MatchString(previous(match)[1], "mo", "°")) ||
#                      ( has_previous(previous(match)) &&
#                         (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) &&
#                         ( name(previous(match, 2))=="msup" ||
#                           (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) &&
#                         MatchString(previous(match, 2)[1], "mo", "°"))
#                   )
#               ) || 
# 
#              // '2': number or numeric fraction in front
#              name($0)=="mn" ||
#               (name($0)=="mfrac" && name($0[0])=="mn" && name($0[1])=="mn")
#           )
#         )
#   => structure(
#         $0,
#        UIWord([$1[0],$1[-1]])
#                 {ruleRef="RR_unitsBase";
#                  singular=MatchString($0, "mn", "1");
#                  // if this follows a degree sign, then it is an angle notation (min/secs) regardless of the pref setting
#                  // we have to either look at the previous entry for mins or the one prior to that for secs
#                  pref= ( has_previous(match) &&
#                            ( (MatchString($1, "mo", "′") &&
#                               ( name(previous(match))=="msup" ||
#                                  (name(previous(match))=="mrow" && count(previous(match))==2)) &&
#                                MatchString(previous(match)[1], "mo", "°")) ||
#                               ( has_previous(previous(match)) &&
#                                   (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) &&
#                                 ( name(previous(match, 2))=="msup" ||
#                                    (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) &&
#                                  MatchString(previous(match, 2)[1], "mo", "°"))
#                             )
#                          ) ? "Angle" : "Length";
#                 }
#         );

# Pseudo-script characters are characters such as "degree sign" ('°') that are raised but in MathML should be in a superscript.
# They are not spoken as if in a superscript (e.g "x degrees", not "x superscripts degrees")
-
  name: skip-super
  tag: [msup, msubsup]
  match: "*[last()][translate(., '′″‴⁗\"†‡°*', '')='' or .=\"'\"]"  # you can't have both ' and " in an xpath string
  replace:
  - intent:
      name: "skip-super"
      attrs: "data-intent-property='concat(data-intent-property, \":silent:\")'"
      children: [x: "*"]

-
  name: mo-super
  tag: [msup, msubsup]
  match: "*[last()][self::m:mo]"
  replace:
  - intent:
      name: "say-super"
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children: [x: "*"]

# rules for functions raised to a power
# these could have been written on 'mrow' but putting them on msup seems more specific
# to see if it is a function, we look right to see if the following sibling is apply-function
-
  name: function-inverse
  tag: msup
  match:
  - "*[2][self::m:mrow][count(*)=2] and"                        # exponent is an mrow with children...
  - "*[2]/*[1][self::m:mo][.='-'] and"                     #   '-'
  - "*[2]/*[2][self::m:mn][.=1] and"                       #  and '1'
  - "*[1][self::m:mi] and IsInDefinition(*[1], 'TrigFunctionNames') and"        # base is a trig function name (e.g, sin, sinh)
  - "following-sibling::*[1][self::m:mo][.='\u2061']"      # and msup is followed by invisible function apply
  replace:
  - intent:
      name: "inverse-function"
      children:
      - x: "*[1]"


-
  name: bigop
  tag: [msubsup, munderover]
  match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')"
  replace: 
  - intent:
      name: "large-op"    # Fix: the name in the spreadsheet needs updating/fixing
      children:
      - x: "*[1]"
      - x: "*[2]"
      - x: "*[3]"

- 
  name: set
  tag: mrow
  # sets have { }s and should have at least one of ",∈∉|" (vertical bar is "such that") or be the empty set
  match: "IsBracketed(., '{', '}') and
          (count(*)=2 or (count(*)=3 and IsNode(*[2], 'leaf')) or . != translate(., ',∈∉|:', ''))"
  replace: 
  - intent:
      name: "set"
      children:
      - test:
          if: count(*) = 2
          then: []
          else: [x: "*[2]"]
-
  name: default
  tag: msub
  match: "."
  replace:
  - intent:
      name: "indexed-by"
      attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
      children:
      - x: "*[1]"
      - x: "*[2]"

-
  # this adds a :unit property to the fraction if its children are units -- simplifies things downstream
  # we have to this is the attr calculation because at that point, the children have been replaced so they will be marked as units if there are units
  # at the point of 'match', the replacement has yet to be done so they may not have inferred units.
  # this catches s^2 as in m/s^2
  name: default
  tag: msup
  match: "."
  replace:
  - intent:
      name: "power"
      attrs: "data-intent-property='IfThenElse(*[1][contains(@data-intent-property, \":unit:\")], \":unit:\", \"\")'"
      children:
      - x: "*[1]"
      - x: "*[2]"


-
  name: default
  tag: msubsup
  match: "."
  replace: 
  - intent:
      name: "power"
      children:
      - intent:
          name: "indexed-by"
          attrs: "data-intent-property='concat(data-intent-property, \":infix:\")'"
          children:
            - x: "*[1]"
            - x: "*[2]"
      - x: "*[3]"

-
  # this adds a :unit property to the fraction if its children are units -- simplifies things downstream
  # we have to this is the attr calculation because at that point, the children have been replaced so they will be marked as units if there are units
  # at the point of 'match', the replacement has yet to be done so they may not have inferred units.
  name: default
  tag: mfrac
  match: "."
  replace:
  - intent:
      name: "fraction"
      attrs: "data-intent-property='IfThenElse(*[1][contains(@data-intent-property, \":unit:\")] and
                                               DEBUG(*[2])[contains(@data-intent-property, \":unit:\")],
                                               \":unit:\",
                                               \"\")'"
      children:
      - x: "*[1]"
      - x: "*[2]"


# generic mtable: treat as multiline equations of some sort
# if a property is set, handle that first

-
  name: mtable-property-is-set
  tag: mtable
  # any 'intent' property set overrides the inferred property
  variables: [TableProperty: "IfThenElse(contains(@data-intent-property, ':array:'), 'array',
                               IfThenElse(contains(@data-intent-property, ':matrix:'), 'matrix',
                               IfThenElse(contains(@data-intent-property, ':determinant:'), 'determinant',
                               IfThenElse(contains(@data-intent-property, ':piecewise:'), 'piecewise',
                               IfThenElse(contains(@data-intent-property, ':system-of-equations:'), 'system-of-equations',
                               IfThenElse(contains(@data-intent-property, ':lines:'), 'lines',
                                 $TableProperty))))))"]   # '' is what to say if none match"
  match: "$TableProperty !=''"
  replace:
  - intent:
      xpath-name: "$TableProperty"
      children:
      - x: "*"

-
  name: "piecewise"
  tag: mrow
  match: "count(*)=2 and *[1][.='{'] and
          *[2][self::m:mtable and not(
                        contains(@data-intent-property, ':array:') or
                        contains(@data-intent-property, ':matrix:') or
                        contains(@data-intent-property, ':determinant:') or
                        contains(@data-intent-property, 'system-of-equations:') or
                        contains(@data-intent-property, ':lines:') )
              ]"
  replace:
  - with:
      variables: [TableProperty: "'piecewise'"]
      replace: [x: "*[2]"]

-
  name: matrix
  tag: mrow
  match:
  - "count(*)=3 and (IsBracketed(., '(', ')') or IsBracketed(., '[', ']')) and
          *[2][self::m:mtable and not(
                        contains(@data-intent-property, ':piecewise:') or
                        contains(@data-intent-property, ':matrix:') or
                        contains(@data-intent-property, ':determinant:') or
                        contains(@data-intent-property, 'system-of-equations:') or
                        contains(@data-intent-property, ':lines:') )
              ]"
  replace:
  - with:
      variables: [TableProperty: "'matrix'"]
      replace: [x: "*[2]"]

-
  name: determinant
  tag: mrow
  match:
  - "count(*)=3 and IsBracketed(., '|', '|') and
          *[2][self::m:mtable and not(
                        contains(@data-intent-property, ':array:') or
                        contains(@data-intent-property, ':matrix:') or
                        contains(@data-intent-property, ':piecewise:') or
                        contains(@data-intent-property, 'system-of-equations:') or
                        contains(@data-intent-property, ':lines:') )
              ]"
  replace:
  - with:
      variables: [TableProperty: "'determinant'"]
      replace: [x: "*[2]"]

-
  name: mtable-equations-property
  tag: mtable
  match: "count(*) > 0 and *[1][contains(translate(., '=≠<>≤≥≦≧', '='), '=')]"
  replace:
  - with:
      variables:
      - TableProperty: "'system-of-equations'"
      replace:
      - intent:
          name: "system-of-equations"
          children:
          - x: "*"

-
  name: mtable-lines-property
  tag: mtable
  match: "count(*) > 0 and not(descendant::*[self::m:mo and not(.='-' or .='+')])"
  replace:
  - with:
      variables:
      - TableProperty: "'lines'"
      replace:
      - intent:
          name: "lines"
          children:
          - x: "*"

-
  # merge all the mtd into a single mtd
  name: consolidate-labeled-mtd-entries
  tag: [mlabeledtr, mtr]
  # Note: child hasn't been processed, so 'intent' hasn't been processed/turned into data-intent-property
  match: "count(*) > 1 and
          ($TableProperty = 'piecewise' or $TableProperty = 'system-of-equations' or $TableProperty = 'lines') and
          (self::m:mlabeledtr or *[contains(@intent, ':equation-label')])"
  replace:
  - intent:
      name: "mlabeledtr"
      children:
      - x: "*[contains(@intent, ':equation-label')]"
      - intent:
          name: "mtd"
          attrs: "id=*[not(contains(@intent, ':equation-label'))][1]/*[1]/@id"
          children: 
          - intent:
              name: "mrow"
              attrs: "id=*[not(contains(@intent, ':equation-label'))][1]/*[1]/@id"
              children: [x: "*[not(contains(@intent, ':equation-label'))]"]

-
  # convert a mtr to a mlabeledtr
  name: mlabeledtr
  tag: mtr
  # Note: child hasn't been processed, so 'intent' hasn't been processed/turned into data-intent-property
  match: "count(*) > 1 and *[contains(@intent, ':equation-label')]"
  replace:
  - intent:
      name: "mlabeledtr"
      children:
      - x: "*[contains(@intent, ':equation-label')]"
      - x: "*[not(contains(@intent, ':equation-label'))]"

-
  # merge all the mtd into a single mtd
  name: consolidate-mtd-entries
  tag: mtr
  match: "count(*) > 1 and
          ($TableProperty = 'piecewise' or $TableProperty = 'system-of-equations' or $TableProperty = 'lines')"
  replace:
  - intent:
      name: "mtr"
      attrs: "id='@id'"
      children:
      - intent:
          name: "mtd"
          attrs: "id='*[1]/@id'"
          children: 
          - intent:
              name: "mrow"
              attrs: "id='*[2]/@id'"
              children: [x: "*"]

-
  # We need to preserve any intent properties that are on the mtd (e.g., :pause)
  # Note: at this point "intent" has been processed so we look at @data-intent-property
  name: mtd-to-mrow
  tag: mtd
  match: "($TableProperty = 'piecewise' or $TableProperty = 'system-of-equations' or $TableProperty = 'lines') and
          not(contains(@data-intent-property, ':equation-label'))"
  replace:
  - test:
      if: "@data-intent-property"
      then:
      - intent:
          name: "mrow"
          attrs: "id=@id data-intent-property=@data-intent-property"
          children: [x: "*"]
      else:
      - x: "*"