mathcat 0.7.5

MathCAT: Math Capable Assistive Technology ('Speech and braille from MathML')
Documentation
---
- include: "../../definitions.yaml"

# If an "intent" is used, the 'terse:medium:verbose' speech for the intent name is given here for a prefix||infix||postfix||function fixity
# If only one ":" is used, the first part is used for 'terse' and the second part is used for 'medium' and 'verbose'
# If no ":"s are used, the same speech is used for all forms
# If bracketing words make sense, they are separated with ";"s
# Intent mappings must specify whether they are "prefix", "infix", "postfix", or "function" with an "=" sign
# If there are multiple fixities (e.g., see transpose), they are separated with "||"
#   for readability, spaces can be used around any of the delimiter characters
# Note: if there are multiple fixities, the first one is used if the fixity is not given in the intent
- IntentMappings: {
    "indexed-by": "infix= ; sub; end sub: end sub: end subscript",
    "modified-variable": "silent= ",
    "say-super": "infix=super: superscript: superscript",   # used with 'mo' for superscripts (e.g, "<")
    "skip-super": "silent=",   # used with 'mo' for superscripts (e.g, "*")

    "absolute-value": "function= ; absolute value: the absolute value: the absolute value; end absolute value",
    "binomial": "infix=binomial; choose; end binomial",
    "dimension-product": "infix=by",
    # "closed-interval": "other=closed-interval; from,to; end closed-interval",
    "greatest-common-divisor": "function=gcd: the gcd: the greatest common divisor",
    "imaginary-part": "function=imaginary part, the imaginary part, the imaginary part",
    "least-common-multiple": "function=lcm:the lcm:the least common multiple",
    # "large-op": "infix=over || other=from,to",
    "limit": "prefix=limit as: the limit as: the limit as",
    "lim-sup": "prefix=lim sup as: the limit superior as: the limit superior as",
    "lim-inf": "prefix=lim inf as: the limit inferior as : the limit inferior as",
    "logarithm-with-base": "prefix=log base: the log base: the log base",
    "natural-logarithm": "function=l n: natural log: natural log",
    "minus": "infix=minus || prefix=negative",
    "plus": "infix=plus || prefix=positive",
    # "pochhammer": "infix=permutations of",  # arguments are in reverse order, so can't work here
    "real-part": "function=the real part",

    "transpose": "postfix=transpose || function=transpose",
    "norm": "function=; norm: norm: norm; end norm",
    "trace": "function=;trace : trace: the trace; end trace",
    "dimension": "function=; dimension : dimension: the dimension; end dimension",
    "homomorphism": "function= ; homomorphism : homomorphism: the homomorphism; end homomorphism",
    "kernel": "function= ; kernel : kernel: the kernel; end kernel",
    "vector": "function= ; vector || prefix=vector",
    "cross-product": "infix=cross: cross product: cross product",
    "dot-product": "infix=dot: dot product: dot:dot product",

    "divergence": "function= ; dihv: divergence: divergence; end divergence",
    "curl": "function= ; curl; end curl",
    "gradient": "function= ; del: gradient: gradient; end gradient",
    "laplacian": "function=lahplahsian",   # speech engines don't do a good job with "laplacian"

    "chemistry-concentration": "function= ; concentration: concentration of: the concentration of; end concentration",
  }

  # Names of functions that in terse mode don't say "of" (or it's equivalent in other languages)
- TerseFunctionNames: {
    "divergence", "gradient", "curl"
  }

- NavigationParts: {
    # These are the parts of a formula that can be navigated to
    "large-op": "base; lower limit; upper limit",
    "mfrac": "numerator; denominator",
    "fraction": "numerator; denominator",
    "msqrt": "root",
    "square-root": "root",
    "mroot": "root; root index",
    "root": "root; root index",
    "msub": "base; subscript",
    "sub": "base; subscript",
    "msup": "base; superscript",
    "say-super": "base; superscript",
    "skip-super": "base; superscript",
    "power": "base; exponent",
    "msubsup": "base; subscript; superscript",
    "munder": "base; under limit",
    "mover": "base; upper limit",
    "munderover": "base; under limit; upper limit",

    # words for moving into and out of one of the parts (e.g., "move right 'out of' numerator, 'in' denominator")
    # it's a hack to put them here, but at least they are grouped with the other navigation parts
    "in": "in",
    "out": "out of",
  }

- KnownWords: {
    # MathCAT will put together some runs of three or more mi's (a common mistake), but skips those in alphabetical order.
    # This is a list of exceptions so that they do get put together. Don't list words with repeated letters.

    # I asked bard and chatgpt for formula words that are alphabetical, and they failed.
    # Here are the words I managed to find, but it almost certainly not complete. Change the list for other languages.
    "Abel", "aery", "ails", "airy", "amps", "cents", "flux", "flow", "knot",
  }

- SIPrefixes: {
    "Q": "quetta", "R": "ronna", "Y": "yotta", "Z": "zetta", "E": "exa", "P": "peta", "T": "tera", "G": "giga", "M": "mega", "k": "kilo", "h": "hecto", "da": "deka",
    "d": "deci", "c": "centi", "m": "milli", "µ": "micro", "n": "nano", "p": "pico", "f": "femto", "a": "atto", "z": "zepto", "y": "yocto", "r": "ronto", "q": "quecto"
  }

# this is a list of all units that accept SIPrefixes
# from www.bipm.org/documents/20126/41483022/SI-Brochure-9-EN.pdf
#   Prefixes may be used with any of the 29 SI units with special names
#   The SI prefixes can be used with several of accepted units, but not, for example, with the non-SI units of time.
- SIUnits: {
    # base units
    "A": "amp",
    "cd": "candela",
    "K": "kelvin", "K": "kelvin", # U+212A
    "g": "gram",
    "m": "metre",     # British spelling works for US also
    "mol": "mole",
    "s": "second", "sec": "second",  # "sec" not actually legal

    # derived units
    "Bq": "becquerel",
    "C": "coulomb",
    "°C": "degree celsius", "℃": "degree celsius", # should only take negative powers
    "F": "farad",
    "Gy": "gray",
    "H": "henry",
    "Hz": "hertz",
    "J": "joule",
    "kat": "kattel",
    "lm": "lumen",
    "lx": "lux",
    "N": "newton",
    "Ω": "ohm", "Ω": "ohm",       # Greek Cap letter, U+2126 OHM SIGN
    "Pa": "pascal",
    "S": "siemens",
    "Sv": "sievert",
    "T": "tesla",
    "V": "volt",
    "W": "watt",
    "Wb": "weber",

    # accepted (plus a few variants) that take SI prefixes
    "l": "litre", "L": "litre", "â„“": "litre",  # Should only take negative powers; British spelling works for US also
    "t": "metric ton",           # should only take positive powers
    "Da": "dalton",
    "Np": "neper",               # should only take negative powers
    "u": "atomic mass unit",     # 'u' is correct: https://en.wikipedia.org/wiki/Dalton_(unit)
    "eV": "electronvolt",
    "rad": "radian",             # should only take negative powers
    "sr": "sterradion",          # should only take negative powers      
  
    # others that take a prefix
    "a": "annum",               # should only take positive powers
    "as": "arcsecond",          # see en.wikipedia.org/wiki/Minute_and_second_of_arc

    # technically wrong, but used in practice with SI Units
    "b": "bit",               # should only take positive powers
    "B": "byte",              # should only take positive powers
    "Bd": "baud",             # should only take positive powers
  }


- UnitsWithoutPrefixes: {
    # time
    "″": "second", "\"": "second",
    "′": "minute", "'": "minute","min": "minute",
    "h": "hour", "hr": "hour", "Hr": "hour",
    "d": "day", "dy": "day",
    "w": "week", "wk": "week",
    "y": "year", "yr": "year",    

    # angles
    "°": "degree", "deg": "degree", # should only take negative powers
    "arcmin": "arcminute",
    "amin": "arcminute",
    "am": "arcminute",
    "MOA": "arcminute",
    "arcsec": "arcsecond",
    "asec": "arcsecond",

    # distance
    "au": "astronomical unit", "AU": "astronomical unit",
    "ltyr": "light year", "ly": "light year",
    "pc": "parsec",
    "Ã…": "angstrom", "â„«": "angstrom",           # U+00C5 and U+212B
    "fm": "fermi",

    # others
    "ha": "hectare",
    # "B": "bel",    # "B" more commonly means bytes
    "dB": "decibel", # already logarithmic, so not used with SI prefixes

    "amu": "atomic mass unit",
    "atm": "atmosphere",
    "bar": "bar",
    "cal": "calorie",
    "Ci": "curie",
    "grad": "gradian",
    "M": "molar",
    "R": "roentgen",
    "rpm": "revolution per minute",
    "â„§": "m-h-o",
    "dyn": "dyne",
    "erg": "erg",

    # powers of 2 used with bits and bytes
    "Kib": "kibi-bit", "Mib": "mebi-bit", "Gib": "gibi-bit", "Tib": "tebi-bit", "Pib": "pebi-bit", "Eib": "exbi-bit", "Zib": "zebi-bit", "Yib": "yobi-bit",
    "KiB": "kibi-byte", "MiB": "mebi-byte", "GiB": "gibi-byte", "TiB": "tebi-byte", "PiB": "pebi-byte", "EiB": "exbi-byte", "ZiB": "zebi-byte", "YiB": "yobi-byte",
  }

  # this will only be used if the language is English, so it can be empty for other countries
- EnglishUnits: {
    # length
    "in": "inch",
    "ft": "foot",
    "mi": "mile",
    "rd": "rod",
    "li": "link",
    "ch": "chain",

    # area
    "sq in": "square inch", "sq. in": "square inch", "sq. in.": "square inch",
    "sq ft": "square foot", "sq. ft": "square foot", "sq. ft.": "square foot",
    "sq yd": "square yard", "sq. yd": "square yard", "sq. yd.": "square yard",
    "sq mi": "square mile", "sq. mi": "square mile", "sq. mi.": "square mile",
    "ac": "acre",
    "FBM": "board foot",

    # volume
    "cu in": "cubic inch", "cu. in": "cubic inch", "cu. in.": "cubic inch",
    "cu ft": "cubic foot", "cu. ft": "cubic foot", "cu. ft.": "cubic foot",
    "cu yd": "cubic yard", "cu. yd": "cubic yard", "cu. yd.": "cubic yard",
    "bbl": "barrel", "BBL": "barrel",
    "pk": "peck",
    "bu": "bushel",
    "tsp": "teaspoon",
    "tbl": "tablespoon",

    # liquid
    "fl dr": "fluid drams",
    "fl oz": "fluid ounce",
    "gi": "gill",
    "cp": "cup", "cup": "cup",
    "pt": "pint",
    "qt": "quart",
    "gal": "gallon",

    # weight
    "gr": "grain",
    "dr": "dram",
    "oz": "ounce", "â„¥": "ounce",
    "lb": "pound",
    "cwt": "hundredweight",
    "dwt": "pennyweight",
    "oz t": "troy ounce",
    "lb t": "troy pound",

    # energy
    "hp": "horsepower",
    "BTU": "BTU",
    "°F": "degree fahrenheit", "℉": "degree fahrenheit",

    # other
    "mph": "mile per hour",
    "mpg": "mile per gallon",
  }

- PluralForms: {
  # FIX: this needs to be flushed out
    "inch": "inches", "square inch": "square inches", "cubic inch": "cubic inches",
    "foot": "feet", "square foot": "square feet", "cubic foot": "cubic feet",
    "board foot": "board feet",
    "degree celsius": "degrees celsius",
    "degree fahrenheit": "degrees fahrenheit",
    "hertz": "hertz",
    "siemens": "siemens",
    "revolution per minute": "revolutions per minute",
    "mile per hour": "miles per hour",
    "mile per gallon": "miles per gallon",
  }

# Lines starting with "#" are a comment
# Each definition in this file is of the form
# - name: { "...", "..." "..." }
# For numbers, 
# - name: [] "...", "..." "..." ]


# ----------------  Cardinal and Ordinal Numbers  --------------------------
# The following definitions are used to convert numbers to words
# The are mainly used for ordinals, of which there are two cases:
# 1. Regular ordinals: first, second, third, ...
# 2. Ordinals used in the denominator of fractions (e.g, one half, one third)
#    When used in the denominator of fractions, a plural version might be
#    used (e.g., two halves, two thirds)
# Although a lot of languages are regular after a few entries, for generality,
# the following lists should be filled out even though they are the same
# or easily derived from others in many languages (e.g, an 's' is added for plurals).
# The larger ordinal numbers (e.g, millionth) is used when there are only
# '0's after that decimal place (e.g., 23000000).:w

# All definitions start 0, 10, 100, etc.

# The definitions for the "ones" should extend until a regular pattern begins
#   The minimum length is 10.

# For English, a regular pattern starts at twenty
- NumbersOnes: [
        "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
        "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen",
        "seventeen", "eighteen", "nineteen"
    ]

- NumbersOrdinalOnes: [
        "zeroth", "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth",
        "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth",
        "seventeenth", "eighteenth", "nineteenth"
    ]

- NumbersOrdinalPluralOnes: [
        "zeroth", "first", "seconds", "thirds", "fourths", "fifths", "sixths", "sevenths", "eighths", "ninths",
        "tenths", "elevenths", "twelfths", "thirteenths", "fourteenths", "fifteenths", "sixteenths",
        "seventeenths", "eighteenths", "nineteenths"
    ]

    # stop when regularity begins
- NumbersOrdinalFractionalOnes: [
        "zero", "first", "half"
    ]

    # stop when regularity begins
- NumbersOrdinalFractionalPluralOnes: [
        "zeros", "firsts", "halves"
    ]


    # What to use for multiples of 10
- NumbersTens: [
        "", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"
    ]

- NumbersOrdinalTens: [
        "", "tenth", "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth", "eightieth", "ninetieth"
    ]

- NumbersOrdinalPluralTens: [
        "", "tenths", "twentieths", "thirtieths", "fortieths", "fiftieths", "sixtieths", "seventieths", "eightieths", "ninetieths"
    ]


- NumbersHundreds: [
      "", "one hundred", "two hundred", "three hundred", "four hundred", "five hundred",
        "six hundred", "seven hundred", "eight hundred", "nine hundred"
    ]

- NumbersOrdinalHundreds: [
      "", "one hundredth", "two hundredth", "three hundredth", "four hundredth", "five hundredth",
        "six hundredth", "seven hundredth", "eight hundredth", "nine hundredth"
    ]

- NumbersOrdinalPluralHundreds: [
      "", "one hundredths", "two hundredths", "three hundredths", "four hundredths", "five hundredths",
        "six hundredths", "seven hundredths", "eight hundredths", "nine hundredths"
    ]
      

    # At this point, hopefully the language is regular. If not, code needs to be written
- NumbersLarge: [
        "", "thousand", "million", "billion", "trillion", "quadrillion",
        "quintillion", "sextillion", "septillion", "octillion", "nonillion",
    ]
      
- NumbersOrdinalLarge: [
        "", "thousandth", "millionth", "billionth", "trillionth", "quadrillionth",
        "quintillionth", "sextillionth", "septillionth", "octillionth", "nonillionth"
    ]
      
- NumbersOrdinalPluralLarge: [
        "", "thousandths", "millionths", "billionths", "trillionths", "quadrillionths",
        "quintillionths", "sextillionths", "septillionths", "octillionths", "nonillionths"
    ]