mq-lang 0.5.10

Core language implementation for mq query language
Documentation
# XML Implementation in mq

def _parse_attributes(input):
  let input = trim(input)
  | let attributes = {}
  | let result = while (!is_empty(input) && !starts_with(input, ">") && !starts_with(input, "/>")):
      let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)")
      | if (is_empty(name_match)):
          error("Invalid attribute name")
      | let attr_name = first(name_match)
      | let input = input[len(attr_name):len(input)]
      | let input = trim(input)
      | if (!starts_with(input, "=")):
          error("Expected '=' after attribute name")
      | let input = input[1:len(input)]
      | let input = trim(input)
      | let quote_char = if (starts_with(input, "\"")):
              "\""
            elif (starts_with(input, "'")):
              "'"
            else:
              error("Attribute value must be quoted")
      | let input = input[1:len(input)]
      | let value_result = do
              let pos = 0
              | let start_pos = pos
              | let input_len = len(input)
              | let pos = while (pos < input_len && input[pos] != quote_char):
                              let pos = pos + 1
                              | pos
                            end
              | let value = input[start_pos:pos]
              | [value, input[pos + 1:len(input)]]
            end
      | let attr_value = value_result[0]
      | let input = value_result[1]
      | let attributes = set(attributes, attr_name, attr_value)
      | let input = trim(input)
      | [attributes, input]
    end
  | if (is_none(result)): [{}, input] else: result
end

def _find_closing_tag(input, tag_name):
  let closing_tag = "</" + tag_name + ">"
  | let pos = 0
  | let depth = 0
  | let open_tag_start = "<" + tag_name
  | let closing_tag_len = len(closing_tag)
  | let open_tag_start_len = len(open_tag_start)
  | let input_len = len(input)
  | let has_close_tag = false
  | let result = while (pos < input_len && !has_close_tag):
      let next_char_pos = pos + open_tag_start_len
      | let result = if (pos + open_tag_start_len <= input_len && input[pos:pos + open_tag_start_len] == open_tag_start):
              if (pos + open_tag_start_len < input_len && (input[next_char_pos] == " " || input[next_char_pos] == ">" || input[next_char_pos] == "/")):
                [depth + 1, pos + open_tag_start_len, false]
              else:
                [depth, pos + 1, false]
            elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag && depth == 0):
              [depth, pos, true]
            elif (pos + closing_tag_len <= input_len && input[pos:pos + closing_tag_len] == closing_tag):
              [depth - 1, pos + closing_tag_len, true]
            else:
              [depth, pos + 1, false]
      | let depth = result[0]
      | let pos = result[1]
      | let has_close_tag = result[2]
      | result
    end
  | result[1]
end

def _parse_element_content(input, tag_name):
  let closing_pos = _find_closing_tag(input, tag_name)
  | let content = input[0:closing_pos]
  | let remaining = input[closing_pos + len("</" + tag_name + ">"):len(input)]
  | [content, remaining]
end

def _parse_xml_declaration(input):
  if (!starts_with(input, "<?xml")): error("Invalid XML declaration")
  | let end_pos = index(input, "?>")
  | if (end_pos == -1):
      error("Unclosed XML declaration")
    else:
      trim(input[end_pos + 2:len(input)])
end


def _parse_xml_element(input):
  let input = trim(input)
  | let input = if (starts_with(input, "<?xml")):
      _parse_xml_declaration(input)
    else:
      input
  | if (!starts_with(input, "<")):
      [None, input]
    else:
      _parse_xml_tag(input)
end

def _create_element_with_content(tag_name, attributes, input):
  let content_result = _parse_element_content(input, tag_name)
  | let content = content_result[0]
  | let remaining = content_result[1]
  | let parsed_content = _parse_xml_content(content)
  | let children = parsed_content["children"]
  | let text = parsed_content["text"]
  | let element = {"tag": tag_name, "attributes": attributes, "children": children, "text": text}
  | [element, remaining]
end

def _parse_xml_tag(input):
  let input = input[1:len(input)]
  | let name_match = regex_match(input, "([a-zA-Z_][a-zA-Z0-9_-]*)")
  | if (is_empty(name_match)):
      error("Invalid tag name")
  | let tag_name = first(name_match)
  | let input = input[len(tag_name):len(input)]
  | let attr_result = _parse_attributes(input)
  | let attributes = attr_result[0]
  | let input = attr_result[1]
  | if (starts_with(input, "/>")):
      _create_self_closing_element(tag_name, attributes, input[2:len(input)])
    elif (starts_with(input, ">")):
      _create_element_with_content(tag_name, attributes, input[1:len(input)])
    else:
      error("Expected '>' or '/>' after tag")
end

def _create_self_closing_element(tag_name, attributes, remaining):
  let element = {"tag": tag_name, "attributes": attributes, "children": [], "text": None}
  | [element, remaining]
end

def _parse_xml_content(input):
  let input = trim(input)
  | let children = []
  | let text_parts = []
  | let result = while (!is_empty(input)):
      let result = if (starts_with(input, "<")):
        _parse_child_element(input, children)
      else:
        _parse_text_content(input, text_parts)
      | let children = children + if (is_empty(result[0])): [] else: result[0]
      | let text_parts = result[1]
      | let input = result[2]
      | [children, text_parts, input]
    end
  | let children = if (is_none(result)): [] else: result[0]
  | let text_parts = if (is_none(result)): [] else: result[1]
  | let text = if (is_empty(text_parts)):
      None
    else:
      trim(join(text_parts, ""))
  | {"children": children, "text": text}
end

def _parse_child_element(input, children):
  let element_result = _parse_xml_element_recursive(input)
  | let element = element_result[0]
  | let remaining = element_result[1]
  | let children = element
  | [children, [], remaining]
end

def _parse_cdata(input):
  if (!starts_with(input, "<![CDATA[")):
    error("Invalid CDATA section")
  | let end_pos = index(input, "]]>")
  | if (end_pos == -1):
      error("Unclosed CDATA section")
  | let content = input[9:end_pos]
  | let remaining = input[end_pos + 3:len(input)]
  | [content, remaining]
end

def _parse_cdata_content(input, text_parts):
  let cdata_result = _parse_cdata(input)
  | let cdata_content = cdata_result[0]
  | let remaining = cdata_result[1]
  | let text_parts = text_parts + cdata_content
  | [None, text_parts, remaining]
end

def _parse_text_content(input, text_parts):
  let pos = 0
  | let start_pos = pos
  | let pos = while (pos < len(input) && input[pos] != "<"):
      let pos = pos + 1
      | pos
    end
  | if (is_none(pos)): error("Unexpected end of input while parsing text content")
  | let text = input[start_pos:pos]
  | let text_parts = text_parts + text
  | [None, text_parts, input[pos:len(input)]]
end

def _parse_xml_element_recursive(input):
  let input = trim(input)
  | if (!starts_with(input, "<")):
      [None, input]
    elif (starts_with(input, "<![CDATA[")):
      _parse_cdata_content(input, [])
    else:
      _parse_xml_tag(input)
end

# Parses an XML string and returns the corresponding data structure.
def xml_parse(input):
  let input = gsub(to_string(input), "(?s)<!--.*?-->", "")
  | let result = _parse_xml_element(input)
  | result[0]
end

def _xml_stringify(data):
  def _format_attributes(attributes):
    if (is_empty(keys(attributes))):
      ""
    else:
      " " + join(map(entries(attributes), fn(entry): first(entry) + "=\"" + last(entry) + "\"";), " ")
  end

  def _format_xml_element(element):
    let tag = element["tag"]
    | let attributes = element["attributes"]
    | let children = element["children"]
    | let text = element["text"]
    | let attr_str = _format_attributes(attributes)
    | if (is_empty(children) && is_none(text)):
        "<" + tag + attr_str + "/>"
      else:
        _format_xml_element_with_content(tag, attr_str, children, text)
  end

  def _format_xml_element_with_content(tag, attr_str, children, text):
    let content = if (!is_none(text)):
      text
    else:
      ""
    | let child_content = if (!is_empty(children)):
          join(map(children, _xml_stringify), "")
        else:
          ""
    | let all_content = content + child_content
    | "<" + tag + attr_str + ">" + all_content + "</" + tag + ">"
  end

  | if (is_dict(data) && !is_none(data["tag"])):
      _format_xml_element(data)
    else:
      to_string(data)
end

# Serializes a value to an XML string.
def xml_stringify(data):
  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + _xml_stringify(data) + "\n"
end

def _xml_to_markdown_table(data, depth):
  def _create_element_markdown_table(element):
    let headers = ["Tag", "Attributes", "Text", "Children"]
    | let header_row = "| " + join(headers, " | ") + " |"
    | let separator_row = "| " + join(map(headers, fn(_): "---";), " | ") + " |"
    | let tag_value = element["tag"]
    | let attributes_value = if (is_empty(keys(element["attributes"]))):
          ""
        else:
          join(map(entries(element["attributes"]), fn(entry): first(entry) + "=" + last(entry);), ", ")
    | let text_value = if (is_none(element["text"])):
          ""
        else:
          replace(element["text"], "\n", "\\n")
    | let children_count = to_string(len(element["children"]))
    | let data_row = "| " + tag_value + " | " + attributes_value + " | " + text_value + " | " + children_count + " |"
    | join([header_row, separator_row, data_row], "\n")
  end

  def _create_children_markdown_table(children, depth):
    let headers = ["Index", "Tag", "Attributes", "Text"]
    | let header_row = "| " + join(headers, " | ") + " |"
    | let separator_row = "| " + join(map(headers, fn(_): "---";), " | ") + " |"
    | let children_len = len(children) - 1
    | let data_rows = map(range(0, children_len), fn(i):
          let child = children[i]
          | let tag_value = child["tag"]
          | let attributes_value = if (is_empty(keys(child["attributes"]))):
                      ""
                    elif (!is_none(child["attributes"])):
                      join(map(entries(child["attributes"]), fn(entry): first(entry) + "=" + last(entry);), ", ")
                    else: ""
          | let text_value = if (is_none(child["text"])):
                      ""
                    else:
                      replace(child["text"], "\n", "\\n")
          | "| " + to_string(i) + " | " + tag_value + " | " + attributes_value + " | " + text_value + " |"
        end)
    | let main_table = join([header_row, separator_row] + data_rows, "\n")
    | let child_tables = map(children, fn(child):
          if (!is_empty(child["tag"])):
            "\n\n" + repeat("#", depth + 1) + " Children of " + child["tag"] + ":\n\n" + _xml_to_markdown_table(child, depth + 1)
          else:
            ""
        end)
    | main_table + join(child_tables, "")
  end

  def _create_simple_markdown_table(data):
    "| Value |\n| --- |\n| " + to_string(data) + " |"
  end

  | if (is_dict(data) && !is_none(data["tag"])):
      if (is_empty(data["children"])):
        _create_element_markdown_table(data)
      else:
        _create_children_markdown_table(data["children"], depth)
    else:
      _create_simple_markdown_table(data)
end

# Converts an XML data structure to a Markdown table.
def xml_to_markdown_table(data):
  _xml_to_markdown_table(data, 1)
end