hhh 1.0.1

The hhh Binary File Processor
Documentation
#!/usr/bin/env python

# hhh
# Copyright (c) 2023 by Stacy Prowell.  All rights reserved.
# https://gitlab.com/sprowell/hhh

"""
Execute tests of the code found in the hhh book.

This identifies blocks of text that contain hhh examples.  It finds lines that start with
```hhh and reads the content and passes that to hhh.  It expects the result to be found
at the end in a `// Result` block.  The leading "//" is removed.

The following is an example.

```hhh -p
// Simple example.
0004:43 44 45 0:46

// Result:
// 00000000: 46 00 00 00 43 44 45                             // F...CDE
```

If the result matches the output exactly, then the test passes.

If additional options need to be passed to hhh, then they can be included after hhh on the
first line.

```hhh -r
// Simple example.
0x4: 0x434445 0:0x46

// Result:
// 00000000: 46 00 00 00 43 44 45                             // F...CDE
```

Bash commands can also be run.  These are indicated by a finding a line that starts with the
designated "bash marker" (usually "$ ", but configurable).  If the line starts with "hhh", this
is replaced with "cargo run --", and all subsequent options are passed.

```bash
$ hhh --no-ascii --no-offset test_files/simple.bin
a4 21 41 3b fe cf 08 7c de af 67 ea
"""

import sys
import os
from typing import List, Tuple, Optional
from subprocess import Popen, PIPE
import subprocess
import difflib
from debug import debug, error

# Folder that holds the documents to scan.
FOLDERS = ["doc"]

# Extra files to scan, relative to the root folder of the distribution.
FILES: List[str] = []

# Start marker.
START_MARKER = "```hhh"

# Result marker.
RESULT_MARKER = "// result"

# End marker.
END_MARKER = "```"

# Bash marker.
BASH_MARKER = "```bash"

# Bash prompt.
BASH_PROMPT = "$ "

def run_test(test: bytes, result: bytes, file: str, start: int,
             options: Optional[List[str]] = None, hexdump: bool = False) -> bool:
    """Execute a single test and check the result.

    Pass in the test (input) and the expected result (output).  A list of additional
    options to add to the command line (for the parser) can also be passed in.

    The output can be processed a second time to a hex dump.  If you want that, set
    hexdump to true.

    Return true on success, and false on failure.
    """
    if options is None:
        options = []
    parse_opts = ["cargo", "run", "--", "--no-configuration-file"] + options
    debug(f"run_test({file}:{start}: {parse_opts})")
    hhh_parse = Popen(parse_opts, stdout=PIPE, stderr=PIPE, stdin=PIPE)
    if hexdump:
        hhh_gen = Popen(
            ["cargo", "run", "--", "--no-configuration-file", "--no-ascii"],
            stdout=PIPE, stderr=PIPE, stdin=PIPE
        )
        tval = hhh_parse.communicate(input=test)
        tval = hhh_gen.communicate(input=tval[0])
    else:
        tval = hhh_parse.communicate(input=test)
    expected = result.strip()
    observed = tval[0].strip()
    if expected != observed:
        print("================================================================================")
        print(f"Test FAILED ({file}:{start})\n")
        print("--------------------------------------------------------------------------------")
        print("Input (raw):")
        print(f"{test!r}\n")
        print(f"Output Expected (raw): (length = {len(expected)})")
        print(f"{expected!r}\n")
        print(f"Output Observed (raw): (length = {len(observed)})")
        print(f"{observed!r}\n")
        print("--------------------------------------------------------------------------------")
        print("Input (decoded):")
        print(f"{test.decode('utf-8')}\n")
        print("Output Expected (decoded):")
        print(f"{expected.decode('utf-8')}\n")
        print("Output Observed (decoded):")
        print(f"{observed.decode('utf-8')}\n")
        return False
    print(f"Test PASSED ({file}:{start})")
    return True

def run_shell_test(line: bytes, result: bytes, file: str, start: int) -> bool:
    """Execute a shell command and check the output.

    Execute the shell command and the check the output against the provided result.  Any instance
    of `hhh` in the command is replaced with `cargo run -- --no-configuration-file`.

    Return true on success, false on failure.
    """
    line = line.replace(b"hhh", b"cargo run -- --no-configuration-file")
    debug(f"run_shell_test({file}:{start}: {line.decode('utf-8')})")
    observed = subprocess.check_output(
        line, shell=True, text=True, stderr=PIPE
    ).encode("utf-8").strip()
    expected = result.strip()
    if expected != observed:
        print("================================================================================")
        print(f"Test FAILED ({file}:{start})\n")
        print("--------------------------------------------------------------------------------")
        print("Input (raw):")
        print(f"{line!r}\n")
        print(f"Output Expected (raw): (length = {len(expected)})")
        print(f"{expected!r}\n")
        print(f"Output Observed (raw): (length = {len(observed)})")
        print(f"{observed!r}\n")
        print("--------------------------------------------------------------------------------")
        print("Input (decoded):")
        print(f"{line.decode('utf-8')}\n")
        print("Output Expected (decoded):")
        print(f"{expected.decode('utf-8')}\n")
        print("Output Observed (decoded):")
        print(f"{observed.decode('utf-8')}\n")
        first = difflib.SequenceMatcher(None, expected, observed).get_matching_blocks()[0]
        print(f"Differ At Offset: {first}")
        return False
    print(f"Test PASSED ({file}:{start})")
    return True


def process_file(file: str) -> Tuple[int,int]:
    """Process a single file.
    
    This looks for test blocks in the file and then executes them and
    compares the output to the specified output.

    Return the number of successes and failures.
    """
    # Open the file and read some lines.
    success = failure = 0
    try:
        debug(f"process_file({file})")
        with open(file, "rt", encoding = "utf-8") as file_in:
            lines = file_in.readlines()

            # Now run through the lines and try to find any marked blocks.
            lno = 0
            while lno < len(lines):
                if lines[lno].startswith(BASH_MARKER):
                    start = lno
                    test = []
                    result = []
                    in_result = False
                    lno += 1
                    while not lines[lno].startswith(END_MARKER):
                        if in_result:
                            result.append(lines[lno])
                        elif lines[lno].startswith(BASH_PROMPT):
                            in_result = True
                            test = [lines[lno][len(BASH_PROMPT):].strip()]
                        lno += 1
                    if run_shell_test(
                        "".join(test).encode("utf-8"),
                        "".join(result).encode("utf-8"),
                        file,
                        start):
                        success += 1
                    else:
                        failure += 1
                elif lines[lno].startswith(START_MARKER):
                    start = lno
                    options = lines[lno][len(START_MARKER):].split()
                    hexdump = ("-p" in options) | ("--parse" in options)
                    test = []
                    result = []
                    in_result = False
                    lno += 1
                    while not lines[lno].startswith(END_MARKER):
                        if in_result:
                            result.append(lines[lno][3:])
                        elif lines[lno].lower().startswith(RESULT_MARKER):
                            in_result = True
                        else:
                            test.append(lines[lno])
                        lno += 1
                    if run_test(
                        "".join(test).encode("utf-8"),
                        "".join(result).encode("utf-8"),
                        file,
                        start+1,
                        options=options,
                        hexdump=hexdump): # Arrays are zero-based, but "file line" is one-based.
                        success += 1
                    else:
                        failure += 1
                lno += 1

    except UnicodeDecodeError:
        pass

    except IOError:
        error(f"Unexpected error: {sys.exc_info()[0]}")

    return (success, failure)

def main() -> None:
    '''Process documentation files and execute any tests found in them.
    '''
    if len(FOLDERS) > 0:
        print("This will execute tests found in the hhh documentation folders:")
        for folder in FOLDERS:
            print(f"  * {folder}")
        print()
    if len(FILES) > 0:
        print("The following additional files will be checked, also:")
        for filename in FILES:
            print(f"  * {filename}")
        print()
    proceed = input("Proceed? (y/N) ")
    if proceed.startswith('y'):
        success = failure = 0
        for filename in FILES:
            #print(f"Processing file {filename}...")
            (good, bad) = process_file(filename)
            success += good
            failure += bad
        for folder in FOLDERS:
            for root, _dirs, files in os.walk(folder):
                for basename in files:
                    filename = os.path.join(root, basename)
                    #print(f"Processing file {filename}... ")
                    (good, bad) = process_file(filename)
                    success += good
                    failure += bad
        print(f"\nSuccessful tests: {success}\nFailed tests: {failure}")
        if failure > 0:
            sys.exit(1)
        else:
            sys.exit(0)

if __name__ == "__main__":
    main()