kangaroo 0.1.0 - Docs.rs

#!/usr/bin/env python3
"""
Analyze if B1000 triangle structure can help Kangaroo optimization.

Key question: Is there any correlation between public key features
and the private key's position (N/D) that we could exploit?
"""

import json
import math
from collections import defaultdict
from pathlib import Path

# secp256k1 parameters
P = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
N = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798
Gy = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8


def mod_inverse(a: int, m: int) -> int:
    if a < 0:
        a = a % m
    g, x, _ = extended_gcd(a, m)
    if g != 1:
        raise ValueError("No inverse")
    return x % m


def extended_gcd(a: int, b: int):
    if a == 0:
        return b, 0, 1
    gcd, x1, y1 = extended_gcd(b % a, a)
    return gcd, y1 - (b // a) * x1, x1


def point_add(p1, p2):
    if p1 is None:
        return p2
    if p2 is None:
        return p1
    x1, y1 = p1
    x2, y2 = p2
    if x1 == x2:
        if y1 != y2:
            return None
        s = (3 * x1 * x1 * mod_inverse(2 * y1, P)) % P
    else:
        s = ((y2 - y1) * mod_inverse(x2 - x1, P)) % P
    x3 = (s * s - x1 - x2) % P
    y3 = (s * (x1 - x3) - y1) % P
    return (x3, y3)


def scalar_mult(k: int, point):
    if k == 0:
        return None
    result = None
    addend = point
    while k:
        if k & 1:
            result = point_add(result, addend)
        addend = point_add(addend, addend)
        k >>= 1
    return result


G = (Gx, Gy)


# Bitcoin puzzles with known solutions (from B1000 data)
SOLVED_PUZZLES = [
    (1, 1),
    (2, 3),
    (3, 7),
    (4, 8),
    (5, 21),
    (6, 49),
    (7, 76),
    (8, 224),
    (9, 467),
    (10, 514),
    (11, 1155),
    (12, 2683),
    (13, 5216),
    (14, 10544),
    (15, 26867),
    (16, 51510),
    (17, 95823),
    (18, 198669),
    (19, 357535),
    (20, 863317),
    (21, 1811764),
    (22, 3007503),
    (23, 5598802),
    (24, 14428676),
    (25, 33185509),
    (26, 54538862),
    (27, 111949941),
    (28, 227634408),
    (29, 400708894),
    (30, 1033162084),
    (31, 2102388551),
    (32, 3093472814),
    (33, 7137437912),
    (34, 14133072157),
    (35, 20112871792),
    (36, 42387769980),
    (37, 100251560595),
    (38, 146971536592),
    (39, 323724968937),
    (40, 1003651412950),
]


def compute_nd_ratio(puzzle_num: int, private_key: int) -> float:
    """Compute N/D ratio (position in range)."""
    range_start = 1 << (puzzle_num - 1)
    range_size = 1 << (puzzle_num - 1)  # D = 2^(bits-1) - 1, simplified
    n = private_key - range_start
    return n / range_size


def get_pubkey_features(point) -> dict:
    """Extract features from public key that might correlate with position."""
    x, y = point

    # Various bit patterns and modular features
    features = {
        "x_high_4": (x >> 252) & 0xF,
        "x_high_8": (x >> 248) & 0xFF,
        "y_parity": y % 2,
        "x_mod_17": x % 17,
        "x_mod_37": x % 37,
        "x_mod_256": x % 256,
        "y_mod_256": y % 256,
        "xor_xy_low": (x ^ y) & 0xFFFF,
        "x_bit_count": bin(x).count("1"),
        "y_bit_count": bin(y).count("1"),
        "x_trailing_zeros": (x & -x).bit_length() - 1 if x else 256,
        "y_trailing_zeros": (y & -y).bit_length() - 1 if y else 256,
    }
    return features


def analyze_correlation():
    """Check if any public key feature correlates with N/D position."""
    print("=" * 70)
    print("TRIANGLE-KANGAROO CORRELATION ANALYSIS")
    print("Checking if public key features predict position in range")
    print("=" * 70)

    data = []
    for puzzle_num, privkey in SOLVED_PUZZLES:
        if puzzle_num < 10:
            continue  # Skip very small puzzles

        point = scalar_mult(privkey, G)
        nd_ratio = compute_nd_ratio(puzzle_num, privkey)
        features = get_pubkey_features(point)

        data.append(
            {
                "puzzle": puzzle_num,
                "privkey": privkey,
                "nd_ratio": nd_ratio,
                "gc": int(nd_ratio * 4),  # Global component
                "hi16": (privkey >> (puzzle_num - 4)) & 0xF if puzzle_num >= 4 else 0,
                **features,
            }
        )

    print(f"\nAnalyzing {len(data)} puzzles (10-40 bits)")

    # Check correlation of each feature with N/D
    print("\n" + "-" * 70)
    print("CORRELATION: Feature vs N/D ratio")
    print("-" * 70)

    feature_names = [
        "x_high_4",
        "x_high_8",
        "y_parity",
        "x_mod_17",
        "x_mod_37",
        "x_mod_256",
        "y_mod_256",
        "xor_xy_low",
        "x_bit_count",
        "y_bit_count",
        "x_trailing_zeros",
        "y_trailing_zeros",
    ]

    for feature in feature_names:
        nd_values = [d["nd_ratio"] for d in data]
        feat_values = [d[feature] for d in data]

        # Compute Pearson correlation
        n = len(data)
        mean_nd = sum(nd_values) / n
        mean_feat = sum(feat_values) / n

        cov = (
            sum(
                (nd_values[i] - mean_nd) * (feat_values[i] - mean_feat)
                for i in range(n)
            )
            / n
        )
        std_nd = math.sqrt(sum((x - mean_nd) ** 2 for x in nd_values) / n)
        std_feat = math.sqrt(sum((x - mean_feat) ** 2 for x in feat_values) / n)

        if std_nd > 0 and std_feat > 0:
            corr = cov / (std_nd * std_feat)
        else:
            corr = 0

        marker = "***" if abs(corr) > 0.3 else ""
        print(f"  {feature:20s}: r = {corr:+.4f} {marker}")

    # Check if gc (global component) can be predicted
    print("\n" + "-" * 70)
    print("GC PREDICTION FROM PUBLIC KEY")
    print("-" * 70)

    gc_by_y_parity = defaultdict(list)
    gc_by_x_mod17 = defaultdict(list)

    for d in data:
        gc_by_y_parity[d["y_parity"]].append(d["gc"])
        gc_by_x_mod17[d["x_mod_17"]].append(d["gc"])

    print("\n  GC distribution by Y parity:")
    for parity in [0, 1]:
        gcs = gc_by_y_parity[parity]
        if gcs:
            avg = sum(gcs) / len(gcs)
            print(
                f"    Y {'even' if parity == 0 else 'odd '}: mean gc = {avg:.2f}, samples = {len(gcs)}"
            )

    # Check if any modular feature predicts gc
    print("\n  Can X mod 17 predict gc?")
    predictive = 0
    for mod, gcs in gc_by_x_mod17.items():
        if len(gcs) >= 2:
            if len(set(gcs)) == 1:  # All same gc
                predictive += 1
    print(
        f"    Predictive mod values: {predictive}/{len(gc_by_x_mod17)} (need 100% for useful)"
    )

    return data


def analyze_triangle_centers():
    """Check if triangle center rules help narrow search."""
    print("\n" + "=" * 70)
    print("TRIANGLE CENTER ANALYSIS")
    print("Can triangle corrections reduce Kangaroo search space?")
    print("=" * 70)

    # Load triangle rules
    rules_path = Path(
        "/home/oritwoen/Projekty/B1000/data/processed/analytics/vanity_triangle_center_rules.json"
    )
    if not rules_path.exists():
        print("Triangle rules file not found")
        return

    with open(rules_path) as f:
        rules = json.load(f)

    print(f"\nLoaded {len(rules)} triangle center rules")

    # Analyze rule coverage
    solved_rules = [r for r in rules if r["solved_count"] > 0]
    unsolved_rules = [r for r in rules if r["unsolved_count"] > 0]

    print(f"\n  Rules with solved puzzles: {len(solved_rules)}")
    print(f"  Rules with unsolved puzzles: {len(unsolved_rules)}")

    # Check delta_norm distribution
    print("\n  Delta norm ranges for solved puzzles:")
    solved_deltas = [float(r["delta_norm"]) for r in solved_rules]
    print(f"    Min: {min(solved_deltas):.4f}")
    print(f"    Max: {max(solved_deltas):.4f}")
    print(f"    Range: {max(solved_deltas) - min(solved_deltas):.4f}")

    # Key insight: can we use delta_norm to reduce search?
    print("\n" + "-" * 70)
    print("SEARCH SPACE REDUCTION ANALYSIS")
    print("-" * 70)

    # The formula: N = D × (gc/4 + hi16/64 + residual/4)
    # If we knew gc and could guess hi16, the residual gives ±0.25 of D
    # But we DON'T know gc without knowing the key!

    print("""
The fundamental problem:

1. Triangle formula: N = D × (gc/4 + hi16/64 + residual/4)

2. To use it, we need:
   - gc: requires knowing N/D (position) → requires key!
   - hi16: random (0% prediction accuracy)
   - residual: depends on bucket (cluster/gc/mod5)

3. This is CIRCULAR:
   - To predict position → need gc
   - To get gc → need position
   - To get position → need private key

4. For Kangaroo:
   - We have the public key
   - We don't have gc, hi16, or bucket classification
   - Triangle structure CAN'T help

CONCLUSION: No usable correlation found.
""")


def analyze_vanity_distance_pattern():
    """Check if distances between consecutive puzzle keys have pattern."""
    print("\n" + "=" * 70)
    print("VANITY DISTANCE PATTERN (alternative approach)")
    print("=" * 70)

    print("\nChecking if there's a pattern in key distances...")

    distances = []
    for i in range(1, len(SOLVED_PUZZLES)):
        prev_puzzle, prev_key = SOLVED_PUZZLES[i - 1]
        curr_puzzle, curr_key = SOLVED_PUZZLES[i]

        # Normalize distance to range size
        prev_range = 1 << (prev_puzzle - 1)
        curr_range = 1 << (curr_puzzle - 1)

        # Ratio of key to range midpoint
        prev_ratio = prev_key / (prev_range * 1.5)  # Relative to 1.5x range start
        curr_ratio = curr_key / (curr_range * 1.5)

        distances.append(
            {
                "puzzle": curr_puzzle,
                "ratio": curr_ratio,
                "delta_ratio": curr_ratio - prev_ratio,
            }
        )

    # Check for periodicity in ratio
    ratios = [d["ratio"] for d in distances]
    mean_ratio = sum(ratios) / len(ratios)

    print(f"\n  Mean N/D ratio: {mean_ratio:.4f}")
    print("  Expected (uniform): 0.5")
    print(f"  Deviation: {abs(mean_ratio - 0.5):.4f}")

    # Check for mod-5 periodicity (as in B1000)
    print("\n  Checking mod-5 periodicity in ratios:")
    mod5_means = defaultdict(list)
    for d in distances:
        mod5_means[d["puzzle"] % 5].append(d["ratio"])

    for mod in range(5):
        vals = mod5_means[mod]
        if vals:
            avg = sum(vals) / len(vals)
            print(f"    puzzle % 5 = {mod}: mean ratio = {avg:.4f} (n={len(vals)})")


def main():
    print("=" * 70)
    print("CAN B1000 TRIANGLE STRUCTURE HELP KANGAROO?")
    print("=" * 70)

    data = analyze_correlation()
    analyze_triangle_centers()
    analyze_vanity_distance_pattern()

    print("\n" + "=" * 70)
    print("FINAL VERDICT")
    print("=" * 70)
    print("""
After thorough analysis:

1. PUBLIC KEY → POSITION: No useful correlation found
   - Y parity: ~50/50, no gc correlation
   - X modular features: random distribution
   - Bit patterns: no predictive power

2. TRIANGLE STRUCTURE: Cannot be applied
   - Requires knowing gc (global component)
   - gc requires knowing N/D (position)
   - N/D requires knowing the private key
   - Circular dependency - unusable for prediction

3. KANGAROO OPTIMIZATION: No improvement possible
   - Standard O(√n) remains optimal
   - Triangle insights only work AFTER knowing the key
   - No way to narrow search range from public key alone

4. ALTERNATIVE APPROACHES to consider:
   - Better jump table design (pseudo-random vs powers of 2)
   - More distinguished points (memory/time tradeoff)
   - Multi-GPU parallelization
   - Batch point operations (Montgomery trick)

The B1000 triangle structure is mathematically interesting but
provides no computational advantage for ECDLP solving.
""")


if __name__ == "__main__":
    main()