kangaroo 0.1.0 - Docs.rs

#!/usr/bin/env python3
"""
Test correlation between elliptic curve position and Bitcoin address prefix.

The B1000 project found "vanity triangles" - clusters of private keys that
produce addresses with matching prefixes. The question is:

Can we use EC point properties to predict address prefix clustering?

If there's ANY correlation, it could help Kangaroo by:
1. Computing point properties during walks
2. Detecting "hot zones" more likely to contain the target
"""

import hashlib
from collections import defaultdict

# secp256k1 parameters
P = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
N = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798
Gy = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8

# Base58 alphabet
BASE58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"


def mod_inverse(a: int, m: int) -> int:
    if a < 0:
        a = a % m
    g, x, _ = extended_gcd(a, m)
    if g != 1:
        raise ValueError("No inverse")
    return x % m


def extended_gcd(a: int, b: int):
    if a == 0:
        return b, 0, 1
    gcd, x1, y1 = extended_gcd(b % a, a)
    return gcd, y1 - (b // a) * x1, x1


def point_add(p1, p2):
    if p1 is None:
        return p2
    if p2 is None:
        return p1
    x1, y1 = p1
    x2, y2 = p2
    if x1 == x2:
        if y1 != y2:
            return None
        s = (3 * x1 * x1 * mod_inverse(2 * y1, P)) % P
    else:
        s = ((y2 - y1) * mod_inverse(x2 - x1, P)) % P
    x3 = (s * s - x1 - x2) % P
    y3 = (s * (x1 - x3) - y1) % P
    return (x3, y3)


def scalar_mult(k: int, point):
    if k == 0:
        return None
    result = None
    addend = point
    while k:
        if k & 1:
            result = point_add(result, addend)
        addend = point_add(addend, addend)
        k >>= 1
    return result


G = (Gx, Gy)


def pubkey_to_address(point) -> str:
    """Convert EC point to Bitcoin address (P2PKH)."""
    if point is None:
        return ""

    x, y = point
    # Compressed public key
    prefix = b"\x02" if y % 2 == 0 else b"\x03"
    pubkey = prefix + x.to_bytes(32, "big")

    # SHA256 then RIPEMD160
    sha = hashlib.sha256(pubkey).digest()
    ripe = hashlib.new("ripemd160", sha).digest()

    # Add version byte and checksum
    versioned = b"\x00" + ripe
    checksum = hashlib.sha256(hashlib.sha256(versioned).digest()).digest()[:4]
    binary = versioned + checksum

    # Base58 encode
    num = int.from_bytes(binary, "big")
    result = ""
    while num:
        num, rem = divmod(num, 58)
        result = BASE58[rem] + result

    # Add leading 1s for leading zero bytes
    for b in binary:
        if b == 0:
            result = "1" + result
        else:
            break

    return result


def analyze_address_prefix_clustering():
    """Check if EC point properties correlate with address prefix."""
    print("=" * 70)
    print("EC POINT PROPERTIES vs ADDRESS PREFIX CORRELATION")
    print("=" * 70)

    # Generate keys around puzzle 20 solution (0xd2c55 = 863317)
    base_key = 863317
    range_size = 100000  # ±50K around the key

    print(f"\nAnalyzing {range_size} keys around puzzle 20 solution...")

    # Collect data
    data = []
    current = scalar_mult(base_key - range_size // 2, G)

    for i in range(range_size):
        k = base_key - range_size // 2 + i
        if i > 0:
            current = point_add(current, G)

        if current is None:
            continue

        x, y = current
        address = pubkey_to_address(current)
        prefix = address[:4]  # First 4 chars

        data.append(
            {
                "key": k,
                "x": x,
                "y": y,
                "prefix": prefix,
                "x_mod_1024": x % 1024,
                "y_parity": y % 2,
                "x_high_4": (x >> 252) & 0xF,
            }
        )

        if i % 20000 == 0:
            print(f"  Processed {i}/{range_size}...")

    print(f"\nTotal data points: {len(data)}")

    # Find addresses with matching prefix (like the target)
    target_address = pubkey_to_address(scalar_mult(base_key, G))
    target_prefix = target_address[:4]
    print(f"\nTarget key 0x{base_key:x} has address: {target_address}")
    print(f"Target prefix: {target_prefix}")

    # Check prefix distribution
    prefix_counts = defaultdict(int)
    for d in data:
        prefix_counts[d["prefix"][:3]] += 1

    print("\nTop 10 most common 3-char prefixes in range:")
    for prefix, count in sorted(prefix_counts.items(), key=lambda x: -x[1])[:10]:
        print(f"  {prefix}: {count} ({100 * count / len(data):.2f}%)")

    # Use most common prefix for analysis
    most_common_prefix = max(prefix_counts.keys(), key=lambda x: prefix_counts[x])
    print(f"\nUsing most common prefix: {most_common_prefix}")

    # Find all matching prefixes
    matching = [d for d in data if d["prefix"].startswith(most_common_prefix)]
    print(f"\nAddresses with same prefix: {len(matching)}")

    if len(matching) < 2:
        print("Not enough matches to analyze clustering")
        return

    # Check if matching addresses cluster by EC properties
    print("\n" + "-" * 70)
    print("EC PROPERTY DISTRIBUTION FOR MATCHING vs ALL")
    print("-" * 70)

    # X mod 1024 distribution
    all_x_mod = defaultdict(int)
    match_x_mod = defaultdict(int)
    for d in data:
        all_x_mod[d["x_mod_1024"] // 64] += 1  # Bucket into 16 groups
    for d in matching:
        match_x_mod[d["x_mod_1024"] // 64] += 1

    print("\n  X mod 1024 (bucketed to 16 groups):")
    print("  Bucket | All (%) | Match (%)")
    for b in range(16):
        all_pct = 100 * all_x_mod[b] / len(data) if data else 0
        match_pct = 100 * match_x_mod[b] / len(matching) if matching else 0
        diff = abs(match_pct - all_pct)
        marker = "***" if diff > 5 else ""
        print(f"    {b:2d}   | {all_pct:5.1f}   | {match_pct:5.1f}  {marker}")

    # Y parity
    all_even = sum(1 for d in data if d["y_parity"] == 0)
    match_even = sum(1 for d in matching if d["y_parity"] == 0)

    print("\n  Y parity:")
    print(f"    All:   {100 * all_even / len(data):.1f}% even")
    print(f"    Match: {100 * match_even / len(matching):.1f}% even")

    # Check distances between matching keys
    print("\n" + "-" * 70)
    print("DISTANCE PATTERN FOR MATCHING PREFIXES")
    print("-" * 70)

    matching_keys = sorted([d["key"] for d in matching])
    if len(matching_keys) >= 2:
        distances = [
            matching_keys[i + 1] - matching_keys[i]
            for i in range(len(matching_keys) - 1)
        ]

        print(f"\n  Number of matches: {len(matching_keys)}")
        print(f"  Key range: {matching_keys[0]} - {matching_keys[-1]}")
        print("  Distance stats:")
        print(f"    Min: {min(distances)}")
        print(f"    Max: {max(distances)}")
        print(f"    Mean: {sum(distances) / len(distances):.1f}")

        # Check for periodicity
        print("\n  First 10 distances between matching addresses:")
        for i, d in enumerate(distances[:10]):
            print(f"    {i + 1}: {d}")

    # Key finding
    print("\n" + "=" * 70)
    print("CONCLUSIONS")
    print("=" * 70)
    print("""
Analysis of vanity prefix clustering:

1. ADDRESS PREFIX DEPENDS ON:
   - SHA256(SHA256(pubkey)) → RIPEMD160 → Base58
   - This is a CRYPTOGRAPHIC HASH - designed to be random

2. EC POINT PROPERTIES:
   - X coordinate is pseudo-random across the curve
   - Y parity determines compressed key prefix (02/03)
   - Neither correlates with address prefix

3. WHY VANITY TRIANGLES EXIST:
   - Pure probability: ~1/58^4 chance for 4-char prefix match
   - In 100K keys: expect ~100K/58^4 ≈ 9 matches
   - These appear "clustered" but are random

4. IMPLICATION FOR KANGAROO:
   - No way to detect "hot zones" from EC point
   - Hash function breaks any correlation
   - Vanity structure exists in ADDRESS space, not EC space

5. THE FUNDAMENTAL BARRIER:
   - Kangaroo operates on EC points (pre-hash)
   - Vanity patterns exist in addresses (post-hash)
   - Hash function is one-way and pseudorandom
   - NO OPTIMIZATION POSSIBLE from vanity geometry
""")


def main():
    analyze_address_prefix_clustering()


if __name__ == "__main__":
    main()