prime-formula 0.3.1

"""Implements the Prime Formula for the Periodic Table of Primes.

The algorithm is very different to previous methods of generating
primes in that the prime candidates are known ahead of time, and
there is one expensive calculation necessary to determine where
the composites fall on the number line.

The twin-prime method reduces the number of root-primes used
making it faster at finding higher twin-primes.

The triplet method reduces the number of root-primes used
making it faster at finding prime-triplets.

Isolated primes uses the cyclic period to find gaps of N x 210
in the prime number table. Note that large compute power will
be needed to find gaps > 210.

The Periodic Table of Primes does not include the non-root
primes (2, 3, 5, 7) - and so these will be handled
separately for cases where these low value primes are
expected.

Copyright: Adam Cottrell (cottrela@gmail.com)

For more details on the Periodic Table of Primes:
  see https://ssrn.com/abstract=4742238
"""
import csv
import logging
import math
import os
from argparse import ArgumentParser
from concurrent.futures import ProcessPoolExecutor
from typing import List, Set, Tuple

LOGGER = logging.getLogger(__name__)

# Using just the first 4 primes, derive the 48 roots of prime
#
# From here, all other prime numbers can be derived
#  https://ssrn.com/abstract=4742238
NON_ROOT_PRIMES = (2, 3, 5, 7)
PRIME_PERIOD = math.prod(NON_ROOT_PRIMES)  # 210
PRIME_ROOTS = [
    x for x in range(2, PRIME_PERIOD + 2) if x % 2 and x % 3 and x % 5 and x % 7
]  # [11, 13, 17, ... 211]
NUM_ROOTS = len(PRIME_ROOTS)  # 48

# For twin primes, we only care about roots that are
# exactly two apart.
TWIN_PRIME_ROOTS = [
    i
    for i, x in enumerate(PRIME_ROOTS)
    if i < NUM_ROOTS - 1 and x + 2 == PRIME_ROOTS[i + 1]
]  # [0, 2, .. 46]

# For prime triplets, we only care about the centre roots that
# are either (p-2, p, p+4) or (p-4, p, p+2).
PRIME_TRIPLETS_ROOTS = [
    i
    for i, x in enumerate(PRIME_ROOTS)
    if i < NUM_ROOTS - 1
    and i > 0
    and (
        (x + 2 == PRIME_ROOTS[i + 1] and x - 4 == PRIME_ROOTS[i - 1])
        or (x + 4 == PRIME_ROOTS[i + 1] and x - 2 == PRIME_ROOTS[i - 1])
    )
]  # [1, 2, 8, ... 44]


def _find_inv_prime_root_vec(root_idx: int):
    """Return row from the inverse prime roots table

    Where:
     - root_idx is row index in the range 0..47

    For any given pair of prime roots (r & q) find the
    inverse prime root (q_hat) that satisfies the equation:
      (q * q_hat - r) // 210 = 0

    n.b. there is exactly one inverse prime root for each
    pair of prime roots.
    """
    r = PRIME_ROOTS[root_idx]
    rows = []
    for q in PRIME_ROOTS:
        for h_candidate in PRIME_ROOTS:
            if ((q * h_candidate - r) % PRIME_PERIOD) == 0:
                q_hat = h_candidate
                break
        else:
            # Code bug if this is seen
            raise ValueError(f"No root found for q={q} and r={r}")
        rows.append(q_hat)
    return rows


INV_PRIME_ROOT_TABLE = [_find_inv_prime_root_vec(i) for i in range(NUM_ROOTS)]


def get_cyclic_composite_vec(root_index: int) -> List[int]:
    """Generates a list of cyclic composites for a given root index

    Where:
     - root_index is in the range 0..47

    Returns a list of 48 of cyclic composites which are later used
    in finding the composites.

    See The Period Table of Primes for more info on this.
    """
    items = []
    r = PRIME_ROOTS[root_index]
    q_hat_vec = INV_PRIME_ROOT_TABLE[root_index]
    for j, q in enumerate(PRIME_ROOTS):
        l = 1 + ((q * q_hat_vec[j] - r) // PRIME_PERIOD)
        items.append(l)
    return items


CYCLIC_COMPOSITE_TABLE = [get_cyclic_composite_vec(i) for i in range(NUM_ROOTS)]


def _find_inverse_composite_roots_in_cycle(
    composites: List[int],
    val: int,
    i: int,
    q_hat: int,
    start_cycle: int,
    end_cycle: int,
):
    """Cycle through the inverse composite roots (q_hat)

    Each inverse root gets added along with the i_multiplier
    which takes into consideration the various cycles.

    Return index of any composite found in range
    """
    # Allow j_min to be non-zero to reduce checking
    # done when start_cycle is non-zero
    i_multiplier = i * PRIME_PERIOD
    j_min = max(((start_cycle - val - 1) // (q_hat + i_multiplier)), 0)
    j_max = 1 + ((end_cycle + 1 - val) // (q_hat + i_multiplier))
    # Cycle through all multiples of inverse roots
    for j, q_hat_multiple in enumerate(range(j_min * q_hat, j_max * q_hat, q_hat)):
        new_val = val + q_hat_multiple + ((j + j_min) * i_multiplier)
        if new_val > start_cycle:
            composites[new_val - 1 - start_cycle] = 1


def _find_composite_roots_in_cycles(
    composites: List[int],
    val: int,
    q: int,
    q_hat: int,
    start_cycle: int,
    end_cycle: int,
):
    """Cycle through composite roots (q) and iterate
    each valid combination found to exhaustively search
    the tree.

    Return set of composites found on this branch
    """
    # This range must start with zero so that we consider
    # all possible inverse cyclic composites
    i_max = 1 + ((end_cycle + 1 - val) // q)
    for i in range(i_max):
        if val > start_cycle:
            composites[val - 1 - start_cycle] = 1
        _find_inverse_composite_roots_in_cycle(
            composites, val, i, q_hat, start_cycle, end_cycle
        )
        val += q


def find_composites_in_cycles(
    root_idx: int, start_cycle: int, end_cycle: int
) -> List[int]:
    """Return position of composite indices for a given root idx.

    Where:
     - root_idx is the prime root to check 0..47
     - start_cycle is the lowest cycle we care about
     - end_cycle is the highest cycle we care about

    This is an expensive function as it must find all valid composite
    indexes in a range. If no sums match the cycle number then it is
    by definition a prime.
    """
    l = CYCLIC_COMPOSITE_TABLE[root_idx]
    q_hat_vec = INV_PRIME_ROOT_TABLE[root_idx]
    composites = [0] * (1 + end_cycle - start_cycle)
    for i in range(NUM_ROOTS):
        _find_composite_roots_in_cycles(
            composites, l[i], PRIME_ROOTS[i], q_hat_vec[i], start_cycle, end_cycle
        )
    return composites


def find_non_composites_in_cycles(
    root_idx: int, start_cycle: int, end_cycle: int
) -> List[int]:
    """Convenience function to return non-composites in a range

    Takes the list of composites, and converts it into an ordered list
    of non-composites i.e. primes. Compresses sparse vector into a smaller
    footprint.
    """
    # Convert back to indices of primes at the end
    composites = find_composites_in_cycles(root_idx, start_cycle, end_cycle)
    return [start_cycle + i for i, x in enumerate(composites) if x == 0]


def get_primes_in_range(b_start: int, b_end: int) -> List[int]:
    """Return primes in a given range

    Where:
     - b_start is the start range
     - b_end is the end range

    Returns a list of prime numbers in counting order.
    """
    start_cycle: int = b_start // PRIME_PERIOD
    end_cycle: int = b_end // PRIME_PERIOD
    # Get composites in range
    futures = []
    with ProcessPoolExecutor() as pool:
        for i in range(NUM_ROOTS):
            futures.append(
                pool.submit(find_non_composites_in_cycles, i, start_cycle, end_cycle)
            )
    # Special case: on the first page (cycle=0) we need
    # to include the four non-root primes as these
    # are not covered by the roots of prime
    if start_cycle:
        primes = []
    else:
        primes = list(NON_ROOT_PRIMES)
    # Collect futures
    for i, future in enumerate(futures):
        prime_indexes = future.result()
        primes.extend(
            [PRIME_ROOTS[i] + (PRIME_PERIOD * page) for page in prime_indexes]
        )
    return [x for x in sorted(primes) if x >= b_start and x <= b_end]


def get_twin_primes_in_range(b_start: int, b_end: int) -> List[Tuple[int, int]]:
    """Return all twin primes in a given range

    Where:
     - b_start is the start range
     - b_end is the end range

    Returns a list of twin primes in counting order.
    """
    start_cycle: int = b_start // PRIME_PERIOD
    end_cycle: int = b_end // PRIME_PERIOD
    # Get twin prime candidates range
    futures = {}
    with ProcessPoolExecutor() as pool:
        for i in TWIN_PRIME_ROOTS:
            for n in range(2):
                futures[i + n] = pool.submit(
                    find_non_composites_in_cycles, i + n, start_cycle, end_cycle
                )
    # Special case: on the first page (cycle=0) we need
    # to include twins that involve non-root primes, as
    # these will not be found from the prime roots.
    if start_cycle:
        twin_primes = []
    else:
        twin_primes = [
            tuple(NON_ROOT_PRIMES[1:2]),  # (3, 5)
            tuple(NON_ROOT_PRIMES[2:3]),  # (5, 7)
        ]
    # Collect futures
    prime_indexes = {}
    for i, future in futures.items():
        prime_indexes[i] = set(future.result())
    for i in TWIN_PRIME_ROOTS:
        twin_indices = prime_indexes[i].intersection(prime_indexes[i + 1])
        twin_primes.extend(
            [
                (
                    PRIME_ROOTS[i] + (PRIME_PERIOD * page),
                    PRIME_ROOTS[i + 1] + (PRIME_PERIOD * page),
                )
                for page in twin_indices
            ]
        )
    return [x for x in sorted(twin_primes)]


def get_primes_triplets_in_range(
    b_start: int, b_end: int
) -> List[Tuple[int, int, int]]:
    """Return primes triplets in a given range

    Where:
     - b_start is the start range
     - b_end is the end range

    Returns a list of prime triplets in counting order.
    """
    start_cycle: int = b_start // PRIME_PERIOD
    end_cycle: int = b_end // PRIME_PERIOD
    # Get twin prime candidates range
    futures = {}
    with ProcessPoolExecutor() as pool:
        for i in PRIME_TRIPLETS_ROOTS:
            for n in range(3):
                futures[i - 1 + n] = pool.submit(
                    find_non_composites_in_cycles, i - 1 + n, start_cycle, end_cycle
                )
    # Special case: on the first page (cycle=0) we need
    # to include the two triplets that overlap with the
    # non-root primes.
    if start_cycle:
        prime_triplets = []
    else:
        prime_triplets = [
            tuple([NON_ROOT_PRIMES[2], NON_ROOT_PRIMES[3], 11]),  # (5, 7, 11)
            tuple([NON_ROOT_PRIMES[3], 11, 13]),  # (7, 11, 13)
        ]
    # Collect futures
    prime_indexes = {}
    for i, future in futures.items():
        prime_indexes[i] = set(future.result())
    for i in PRIME_TRIPLETS_ROOTS:
        triple_indices = (
            prime_indexes[i]
            .intersection(prime_indexes[i + 1])
            .intersection(prime_indexes[i - 1])
        )
        prime_triplets.extend(
            [
                (
                    PRIME_ROOTS[i - 1] + (PRIME_PERIOD * page),
                    PRIME_ROOTS[i] + (PRIME_PERIOD * page),
                    PRIME_ROOTS[i + 1] + (PRIME_PERIOD * page),
                )
                for page in triple_indices
            ]
        )
    return [x for x in sorted(prime_triplets)]


def get_separated_primes_in_range(
    num_cycles: int, b_start: int, b_end: int
) -> List[Tuple[int, int]]:
    """Return primes that are separated by one or more periodic cycles

    Where:
     - num_cycles is the minimum gap in range 1..N
     - b_start is the start range
     - b_end is the end range

    For example num_cycles = 1 would return isolated cyclic primes that
    are at least 210 apart. The approach taken is to find a period cycle
    that is empty, and then return the first and last prime surrounding
    the gap. The first isolated pair can be found:
      367_876_529, 367_876_771

    If num_cycles=0, then only the pair with the largest gap will
    be returned. This is a special case.

    Returns a list of isolated prime in counting order.
    """
    if num_cycles == 0:
        primes = get_primes_in_range(b_start, b_end)
        isolated_prime = None
        last_prime = primes[0]
        for i, prime in enumerate(primes[1:]):
            if isolated_prime is None or (prime - last_prime) > isolated_prime[1]:
                isolated_prime = ((last_prime, prime), prime - last_prime)
            last_prime = prime

        return [isolated_prime[0]] if isolated_prime else []
    start_cycle: int = b_start // PRIME_PERIOD
    end_cycle: int = b_end // PRIME_PERIOD
    # Get composites in range
    futures = []
    with ProcessPoolExecutor() as pool:
        for i in range(NUM_ROOTS):
            futures.append(
                pool.submit(find_non_composites_in_cycles, i, start_cycle, end_cycle)
            )
    # Collect futures
    prime_indices = {}
    for i, future in enumerate(futures):
        prime_indices[i] = set(future.result())
    # Combine the prime indices into a single vector
    common_indices = set()
    for indices in prime_indices.values():
        common_indices.update(indices)
    # In sequence, scroll through the common indices until we find a gap
    # that is equal or larger than the required num_cycles.
    isolated_primes = []
    last_index = None
    for idx, index in enumerate(sorted(common_indices)):
        if idx > 0 and index - last_index > num_cycles:
            # A gap of num_cycles or larger was found
            # Get nearest prime before the gap
            a = None
            for i in reversed(range(NUM_ROOTS)):
                if last_index in prime_indices[i]:
                    a = PRIME_ROOTS[i] + (PRIME_PERIOD * last_index)
                    break
            else:
                # Code bug if seen
                raise ValueError(
                    "No last index found in prime indices which is unexpected"
                )
            # Get nearest prime after the gap
            b = None
            for i in range(NUM_ROOTS):
                if index in prime_indices[i]:
                    b = PRIME_ROOTS[i] + (PRIME_PERIOD * index)
                    break
            else:
                # Code bug if seen
                raise ValueError("No index found in prime indices which is unexpected")
            isolated_primes.append((a, b))
        last_index = index
    return isolated_primes


def periodic_table_primes(start_cycle: int, end_cycle: int) -> List[List[int]]:
    """Create the periodic table of primes for a given cycle range

    Where:
     - start_cycle is the first cycle to show
     - end_cycle is the last cycle to show

    Returns a list of lists where:
     - Rows are the prime root that derives the prime number
     - Columns increase by cycle number in range
    """
    b_start = PRIME_ROOTS[0] * start_cycle
    b_end = PRIME_ROOTS[-1] * end_cycle
    primes = set(get_primes_in_range(b_start, b_end))
    cols = []
    for x in PRIME_ROOTS:
        rows = []
        for cycle in range(start_cycle, end_cycle):
            item = x + (PRIME_PERIOD * cycle)
            if item in primes:
                rows.append(item)
            else:
                rows.append(None)
        cols.append(rows)
    return cols


def periodic_table_primes_as_csv(
    fname: str, start_cycle: int, end_cycle: int
) -> List[List[int]]:
    """Create a csv of the periodic table of primes

    Where:
     - fname in the file name to write
     - start_cycle is the first cycle to show
     - end_cycle is the last cycle to show

    Creates a CSV file where:
     - Rows are the prime root that derives the prime number
     - Columns increase by cycle number in range
    """
    table = periodic_table_primes(start_cycle, end_cycle)
    with open(fname, "w") as fd:
        writer = csv.writer(fd)
        # Add a header row to show which cycle the values are from
        writer.writerow(["Cycle/root"] + list(range(start_cycle, end_cycle)))
        # Dump the roots of prime and along with the primes
        for i, row in enumerate(table):
            writer.writerow([PRIME_ROOTS[i]] + row)
    return table


def get_prime_root(prime: int) -> Tuple[int, int]:
    """Return the prime's root and cycle number

    Returns a tuple containing:
      - root of the prime
      - cycle_num of the prime

    Can be used to re-construct any prime using
    the following formula:
       prime = root + (210 * cycle_num)
    """
    # Special case for first 4 prime numbers, which
    # would be expected on the first cycle
    if prime in NON_ROOT_PRIMES:
        return prime, 0
    # All primes => root + (210 x cycle_num)
    root = prime % PRIME_PERIOD
    if root not in PRIME_ROOTS:
        # Handle wrapping in modular
        root = PRIME_PERIOD - root
        if root not in PRIME_ROOTS:
            # If this is seen, then number is not prime (or proof invalid)
            raise ValueError(f"Number does not appear to be prime")
    cycle_num = (prime - root) // PRIME_PERIOD
    return (root, cycle_num)


# Large list of primes (for verification)
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
BIG_PRIME_LIST = f"{BASE_DIR}/all_primes.txt"  # Primes up to 1 million


class VerifyPrimes:
    """Helper to verify primes from a txt file"""

    def _read_prime_list(self, fname):
        primes = []
        with open(fname) as fd:
            for line in fd:
                clean_line = line.strip()
                if not clean_line:
                    continue
                if clean_line[0].isdigit():
                    tokens = [int(x) for x in clean_line.split(" ") if x]
                    primes.extend(tokens)
        return primes

    def _get_primes_in_range(self, r_min: int, r_max: int):
        big_prime_list = self._read_prime_list(BIG_PRIME_LIST)
        if r_min > max(big_prime_list):
            return None
        r_max = min(r_max, max(big_prime_list))
        return {x for x in big_prime_list if x >= r_min and x <= r_max}

    def verify_primes(self, primes: Set[int]):
        exp = self._get_primes_in_range(min(primes), max(primes))
        if not exp:
            print("Skipping verify as nothing to check against..")
            return
        max_exp = max(exp)
        if max_exp < max(primes):
            print(f"Cropping primes to fit test data <= {max_exp}")
            primes = {x for x in primes if x <= max_exp}
        false_positives = primes - exp
        missing_primes = exp - primes
        if false_positives:
            raise ValueError(f"Found the following were not primes: {false_positives}")
        if missing_primes:
            raise ValueError(f"Found the following were missing: {missing_primes}")
        print(f"Verified {len(primes)} primes")


def _get_prime_from_parts(parts) -> Tuple[int, str]:
    if isinstance(parts, list):
        prime = 0
        name_parts = []
        for part in parts:
            if isinstance(part, tuple):
                val = int(part[0] ** part[1])
                sign = "+"
                if len(part) > 2:
                    val = -val
                    sign = "-"
                prime += val
                if name_parts:
                    name_parts.append(sign)
                name_parts.append(f"{part[0]}^{part[1]}")
            else:
                prime += part
                if name_parts:
                    name_parts.append("-" if prime < 0 else "+")
                name_parts.append(str(abs(part)))
        name = " ".join(name_parts)
    else:
        prime = parts
        name = str(prime)
    return prime, name


def test_get_prime_root():
    res = []
    for parts in (
        2,
        5,
        7,
        [(2, 136279841), -1],
        [(2, 82589933), -1],
        [(2, 77232917), -1],
        [(2, 74207281), -1],
        [(2, 57885161), -1],
        [(2, 43112609), -1],
        [(2, 42643801), -1],
        [(516693, 2097152), (516693, 1048576, -1), 1],
        [(465859, 2097152), (465859, 1048576, -1), 1],
        [(2, 37156667), -1],
    ):
        prime, name = _get_prime_from_parts(parts)
        root, index = get_prime_root(prime)
        info = {"name": name, "prime": prime, "root": root, "index": index}
        if prime > int(10**10):
            print(
                f"Prime {info['name']} has root {info['root']} with an index of ~10^({math.log10(info['index']):.3f})"
            )
        elif info["index"]:
            print(
                f"Prime {info['prime']} has root {info['root']} with index of {info['index']}"
            )
        else:
            print(f"Prime {info['prime']} has no roots")
        res.append(info)


def test_get_primes_in_range():
    b_start = int(200e6)
    b_end = b_start + int(1e6)
    primes = get_primes_in_range(b_start, b_end)
    checker = VerifyPrimes()
    checker.verify_primes(set(primes))
    print(f"In range {b_start:,} to {b_end:,} there are {len(primes)} primes")


def test_periodic_table_primes():
    start_cycle = 96
    end_cycle = 144
    fname = "out.csv"
    ptp_table = periodic_table_primes_as_csv(fname, start_cycle, end_cycle)
    print(
        f"In PFP({start_cycle}, {end_cycle}) there are {len(ptp_table)} rows and {len(ptp_table[0])} cols"
    )


def test_get_twin_primes_in_range():
    b_start = int(0)
    b_end = int(70e6)
    prime_twins = get_twin_primes_in_range(b_start, b_end)
    print(f"In range {b_start:,} to {b_end:,} there are {len(prime_twins)} twin primes")


def test_get_prime_triplets_in_range():
    b_start = int(0)
    b_end = int(1e6)
    prime_triplets = get_primes_triplets_in_range(b_start, b_end)
    print(
        f"In range {b_start:,} to {b_end:,} there are {len(prime_triplets)} prime triplets"
    )


def test_get_separated_primes_in_range():
    num_cycles = 1
    b_start = int(350_000_000)
    b_end = int(795_000_000)
    isolated_primes = get_separated_primes_in_range(num_cycles, b_start, b_end)
    print(isolated_primes)
    print(
        f"In range {b_start:,} to {b_end:,} there are {len(isolated_primes)} isolated primes with a minimum gap of {PRIME_PERIOD * num_cycles}"
    )
    largest_gap = None
    for i, (a, b) in enumerate(isolated_primes):
        if largest_gap is None or (b - a) > largest_gap[0]:
            largest_gap = (b - a, i)
    print(
        f"The largest gap found was {largest_gap[0]} found on pair {isolated_primes[largest_gap[1]]}"
    )


def test_known_large_prime():
    # 416_608_695_821 is a large prime from the 101 root
    b_start = 416_608_695_820
    b_end = 416_608_695_822
    primes = get_primes_in_range(b_start, b_end)
    print(f"In range {b_start:,} to {b_end:,} there are {primes} primes")


def main():
    parser = ArgumentParser()
    parser.add_argument("start", nargs="?", type=int)
    parser.add_argument("end", nargs="?", type=int)
    parser.add_argument("--print", "-p", action="store_true")
    args = parser.parse_args()

    if not args.start:
        args.start = 0
    if not args.end:
        args.end = args.start

    primes = get_primes_in_range(args.start, args.end)
    if args.print:
        print(primes)
    print(f"Found {len(primes):,} primes in range {args.start:,} to {args.end:,}")


if __name__ == "__main__":
    main()