mumu 0.10.0 - Docs.rs

/**
 * syntax.js
 * ---------
 * Provides "window.syntax" with an advanced bracket-aware syntax highlighting for
 * Lava/JS-like code. It does:
 *   - Single/double quote string detection (here just double quotes shown, feel free
 *     to adapt for single quotes or both).
 *   - Inline comment with "//"
 *   - Block comment with "/*...*\/"
 *   - Bracket stack for (), {}, []
 *   - Basic number detection
 *   - Basic 'fn' as keyword, plus a small set of builtins
 *   - Multi-line handling
 *
 * We'll generate tokens via a single-pass parse. Each token gets a .className
 * eventually replaced into <span>..</span>.
 *
 * Then highlightAllCodeBlocks() is called by docs-dynamic.js after doc blocks are loaded.
 */

(function(){
  // Expose an object on window
  window.syntax = {
    highlightAllCodeBlocks
  };

  /**
   * The built-ins and keywords we'll highlight specially.
   * You can expand or adapt as needed.
   */
  const BUILTINS = [
    'map','eq','plus','sput','slog','sum','inc','keys','tap','upper','lower',
    'multiply','prop','assoc','merge','select','cat','comp','range','filter',
    'reduce','pluck','head','tail','take','some','none','flatten','push','fall',
    'every','find','group','intersect','sort','union','each','length'
  ];
  const KEYWORDS = ['fn'];

  /**
   * highlightAllCodeBlocks():
   *   - For each .code-block, parse and replace contents with bracket-aware highlight
   */
  function highlightAllCodeBlocks() {
    const blocks = document.querySelectorAll('.code-block');
    blocks.forEach(block => {
      const originalText = block.textContent;
      const tokens = parseToTokens(originalText);
      const html = tokensToHtml(tokens);
      block.innerHTML = html;
    });
  }

  /**
   * parseToTokens(code): returns an array of token objects:
   *   { type: 'string'|'comment'|'bracket'|'builtin'|'keyword'|'number'|'normal', text, bracketType?:...,  }
   * We'll do a single pass with a small state machine:
   *   state = NORMAL | IN_STRING | IN_LINE_COMMENT | IN_BLOCK_COMMENT
   * Also track bracket stack for '(', '{', '[' ... pop on ')', '}', ']'.
   */
  function parseToTokens(code) {
    const tokens = [];
    let i = 0;
    const len = code.length;
    let state = 'NORMAL';
    let currentText = '';
    let bracketStack = [];

    // helper to push a token, reset currentText
    function pushToken(type) {
      if (currentText.length > 0) {
        tokens.push({ type, text: currentText });
        currentText = '';
      }
    }

    // convenience: push single bracket token
    function pushBracketToken(ch, isOpen) {
      tokens.push({ type: 'bracket', text: ch, bracketType: ch, isOpen });
    }

    while (i < len) {
      const c = code[i];
      const next = (i + 1 < len) ? code[i+1] : '';

      if (state === 'NORMAL') {
        // Check for line comment
        if (c === '/' && next === '/') {
          // push any normal text so far
          pushToken('normal');
          state = 'IN_LINE_COMMENT';
          i += 2; // skip both slashes
          continue;
        }
        // Check for block comment
        if (c === '/' && next === '*') {
          pushToken('normal');
          state = 'IN_BLOCK_COMMENT';
          i += 2;
          continue;
        }
        // Check for string start
        if (c === '"') {
          pushToken('normal');
          currentText += c;
          state = 'IN_STRING';
          i++;
          continue;
        }
        // Check bracket
        if (c === '(' || c === '{' || c === '[') {
          // push normal text if we have any
          pushToken('normal');
          bracketStack.push(c);
          pushBracketToken(c, true);
          i++;
          continue;
        }
        if (c === ')' || c === '}' || c === ']') {
          // push normal text
          pushToken('normal');
          // pop bracket if matching
          const top = (bracketStack.length > 0) ? bracketStack[bracketStack.length - 1] : null;
          if (matchesBracket(top, c)) {
            bracketStack.pop();
          }
          pushBracketToken(c, false);
          i++;
          continue;
        }

        // Otherwise accumulate text in currentText
        currentText += c;
        i++;
      }

      else if (state === 'IN_STRING') {
        currentText += c;
        // End of string?
        if (c === '"' && (code[i-1] !== '\\' || code[i-1] === '\\' && code[i-2] === '\\')) {
          // double quote that isn't escaped
          pushToken('string');
          state = 'NORMAL';
        }
        i++;
      }

      else if (state === 'IN_LINE_COMMENT') {
        // read until newline
        currentText += c;
        if (c === '\n') {
          pushToken('comment');
          state = 'NORMAL';
        }
        i++;
      }

      else if (state === 'IN_BLOCK_COMMENT') {
        currentText += c;
        if (c === '*' && next === '/') {
          // close block comment
          currentText += '/';
          i += 2;
          pushToken('comment');
          state = 'NORMAL';
        } else {
          i++;
        }
      }
    }

    // finalize leftover text
    if (currentText.length > 0) {
      const finalType = (state === 'IN_STRING') ? 'string'
        : (state === 'IN_LINE_COMMENT' || state === 'IN_BLOCK_COMMENT') ? 'comment'
        : 'normal';
      pushToken(finalType);
    }

    // bracketStack might have unclosed brackets if mismatch => up to you to highlight errors
    return postProcessTokens(tokens);
  }

  /**
   * matchesBracket(opening, closing): returns true if they match
   */
  function matchesBracket(opening, closing) {
    if (opening === '(' && closing === ')') return true;
    if (opening === '{' && closing === '}') return true;
    if (opening === '[' && closing === ']') return true;
    return false;
  }

  /**
   * postProcessTokens(tokens):
   *   - further break up 'normal' text into sub-tokens (numbers, builtins, keywords)
   *   - So we highlight numbers, builtins, etc. while leaving the rest as normal
   */
  function postProcessTokens(tokens) {
    const results = [];
    for (let t of tokens) {
      if (t.type === 'normal') {
        // We do a word-based split to find "fn", builtins, numbers
        const splitted = splitNormalToken(t.text);
        results.push(...splitted);
      } else {
        results.push(t);
      }
    }
    return results;
  }

  function splitNormalToken(txt) {
    // We'll split on whitespace or symbolic boundaries, then reassemble as tokens
    // to highlight numbers / builtins / keywords. This is not fully robust but decent.

    // We make a small regex to capture either "word", "number", or punctuation
    const regex = /([A-Za-z_]\w*|\d+(\.\d+)?|[^A-Za-z0-9_\s]+)/g;
    // we'll accumulate matched pieces and figure out their type
    let tokens = [];
    let lastIndex = 0;
    let match;
    while ((match = regex.exec(txt)) !== null) {
      const before = txt.slice(lastIndex, match.index);
      if (before) {
        tokens.push({ type: 'normal', text: before });
      }
      const str = match[0];
      if (isNumber(str)) {
        tokens.push({ type: 'number', text: str });
      } else if (isKeyword(str)) {
        tokens.push({ type: 'keyword', text: str });
      } else if (isBuiltin(str)) {
        tokens.push({ type: 'builtin', text: str });
      } else {
        // punctuation or other stuff
        tokens.push({ type: 'normal', text: str });
      }
      lastIndex = regex.lastIndex;
    }
    // leftover
    if (lastIndex < txt.length) {
      const remainder = txt.slice(lastIndex);
      tokens.push({ type: 'normal', text: remainder });
    }
    return tokens;
  }

  function isNumber(str) {
    // quick check => optionally allow integer or float
    return /^\d+(\.\d+)?$/.test(str);
  }
  function isKeyword(str) {
    return KEYWORDS.includes(str);
  }
  function isBuiltin(str) {
    return BUILTINS.includes(str);
  }

  /**
   * tokensToHtml(tokens): convert to HTML with <span class="af-..."> wrappers
   */
  function tokensToHtml(tokens) {
    let html = '';
    for (let tk of tokens) {
      const escaped = escapeHtml(tk.text);
      let className = '';
      switch (tk.type) {
        case 'string': className = 'af-string'; break;
        case 'comment': className = 'af-comment'; break;
        case 'bracket': className = 'af-bracket'; break;
        case 'number': className = 'af-number'; break;
        case 'builtin': className = 'af-builtin'; break;
        case 'keyword': className = 'af-keyword'; break;
        default:
          // normal or anything else
          className = 'af-normal';
      }
      html += `<span class="${className}">${escaped}</span>`;
    }
    return html;
  }

  // Simple HTML escape
  function escapeHtml(str) {
    return str
      .replace(/&/g, '&amp;')
      .replace(/</g, '&lt;')
      .replace(/>/g, '&gt;');
  }

})();