kangaroo 0.1.0

Pollard's Kangaroo ECDLP solver for secp256k1 using Vulkan/Metal/DX12 compute
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#!/usr/bin/env python3
"""
Analyze if B1000 triangle structure can help Kangaroo optimization.

Key question: Is there any correlation between public key features
and the private key's position (N/D) that we could exploit?
"""

import json
import math
from collections import defaultdict
from pathlib import Path

# secp256k1 parameters
P = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
N = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798
Gy = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8


def mod_inverse(a: int, m: int) -> int:
    if a < 0:
        a = a % m
    g, x, _ = extended_gcd(a, m)
    if g != 1:
        raise ValueError("No inverse")
    return x % m


def extended_gcd(a: int, b: int):
    if a == 0:
        return b, 0, 1
    gcd, x1, y1 = extended_gcd(b % a, a)
    return gcd, y1 - (b // a) * x1, x1


def point_add(p1, p2):
    if p1 is None:
        return p2
    if p2 is None:
        return p1
    x1, y1 = p1
    x2, y2 = p2
    if x1 == x2:
        if y1 != y2:
            return None
        s = (3 * x1 * x1 * mod_inverse(2 * y1, P)) % P
    else:
        s = ((y2 - y1) * mod_inverse(x2 - x1, P)) % P
    x3 = (s * s - x1 - x2) % P
    y3 = (s * (x1 - x3) - y1) % P
    return (x3, y3)


def scalar_mult(k: int, point):
    if k == 0:
        return None
    result = None
    addend = point
    while k:
        if k & 1:
            result = point_add(result, addend)
        addend = point_add(addend, addend)
        k >>= 1
    return result


G = (Gx, Gy)


# Bitcoin puzzles with known solutions (from B1000 data)
SOLVED_PUZZLES = [
    (1, 1),
    (2, 3),
    (3, 7),
    (4, 8),
    (5, 21),
    (6, 49),
    (7, 76),
    (8, 224),
    (9, 467),
    (10, 514),
    (11, 1155),
    (12, 2683),
    (13, 5216),
    (14, 10544),
    (15, 26867),
    (16, 51510),
    (17, 95823),
    (18, 198669),
    (19, 357535),
    (20, 863317),
    (21, 1811764),
    (22, 3007503),
    (23, 5598802),
    (24, 14428676),
    (25, 33185509),
    (26, 54538862),
    (27, 111949941),
    (28, 227634408),
    (29, 400708894),
    (30, 1033162084),
    (31, 2102388551),
    (32, 3093472814),
    (33, 7137437912),
    (34, 14133072157),
    (35, 20112871792),
    (36, 42387769980),
    (37, 100251560595),
    (38, 146971536592),
    (39, 323724968937),
    (40, 1003651412950),
]


def compute_nd_ratio(puzzle_num: int, private_key: int) -> float:
    """Compute N/D ratio (position in range)."""
    range_start = 1 << (puzzle_num - 1)
    range_size = 1 << (puzzle_num - 1)  # D = 2^(bits-1) - 1, simplified
    n = private_key - range_start
    return n / range_size


def get_pubkey_features(point) -> dict:
    """Extract features from public key that might correlate with position."""
    x, y = point

    # Various bit patterns and modular features
    features = {
        "x_high_4": (x >> 252) & 0xF,
        "x_high_8": (x >> 248) & 0xFF,
        "y_parity": y % 2,
        "x_mod_17": x % 17,
        "x_mod_37": x % 37,
        "x_mod_256": x % 256,
        "y_mod_256": y % 256,
        "xor_xy_low": (x ^ y) & 0xFFFF,
        "x_bit_count": bin(x).count("1"),
        "y_bit_count": bin(y).count("1"),
        "x_trailing_zeros": (x & -x).bit_length() - 1 if x else 256,
        "y_trailing_zeros": (y & -y).bit_length() - 1 if y else 256,
    }
    return features


def analyze_correlation():
    """Check if any public key feature correlates with N/D position."""
    print("=" * 70)
    print("TRIANGLE-KANGAROO CORRELATION ANALYSIS")
    print("Checking if public key features predict position in range")
    print("=" * 70)

    data = []
    for puzzle_num, privkey in SOLVED_PUZZLES:
        if puzzle_num < 10:
            continue  # Skip very small puzzles

        point = scalar_mult(privkey, G)
        nd_ratio = compute_nd_ratio(puzzle_num, privkey)
        features = get_pubkey_features(point)

        data.append(
            {
                "puzzle": puzzle_num,
                "privkey": privkey,
                "nd_ratio": nd_ratio,
                "gc": int(nd_ratio * 4),  # Global component
                "hi16": (privkey >> (puzzle_num - 4)) & 0xF if puzzle_num >= 4 else 0,
                **features,
            }
        )

    print(f"\nAnalyzing {len(data)} puzzles (10-40 bits)")

    # Check correlation of each feature with N/D
    print("\n" + "-" * 70)
    print("CORRELATION: Feature vs N/D ratio")
    print("-" * 70)

    feature_names = [
        "x_high_4",
        "x_high_8",
        "y_parity",
        "x_mod_17",
        "x_mod_37",
        "x_mod_256",
        "y_mod_256",
        "xor_xy_low",
        "x_bit_count",
        "y_bit_count",
        "x_trailing_zeros",
        "y_trailing_zeros",
    ]

    for feature in feature_names:
        nd_values = [d["nd_ratio"] for d in data]
        feat_values = [d[feature] for d in data]

        # Compute Pearson correlation
        n = len(data)
        mean_nd = sum(nd_values) / n
        mean_feat = sum(feat_values) / n

        cov = (
            sum(
                (nd_values[i] - mean_nd) * (feat_values[i] - mean_feat)
                for i in range(n)
            )
            / n
        )
        std_nd = math.sqrt(sum((x - mean_nd) ** 2 for x in nd_values) / n)
        std_feat = math.sqrt(sum((x - mean_feat) ** 2 for x in feat_values) / n)

        if std_nd > 0 and std_feat > 0:
            corr = cov / (std_nd * std_feat)
        else:
            corr = 0

        marker = "***" if abs(corr) > 0.3 else ""
        print(f"  {feature:20s}: r = {corr:+.4f} {marker}")

    # Check if gc (global component) can be predicted
    print("\n" + "-" * 70)
    print("GC PREDICTION FROM PUBLIC KEY")
    print("-" * 70)

    gc_by_y_parity = defaultdict(list)
    gc_by_x_mod17 = defaultdict(list)

    for d in data:
        gc_by_y_parity[d["y_parity"]].append(d["gc"])
        gc_by_x_mod17[d["x_mod_17"]].append(d["gc"])

    print("\n  GC distribution by Y parity:")
    for parity in [0, 1]:
        gcs = gc_by_y_parity[parity]
        if gcs:
            avg = sum(gcs) / len(gcs)
            print(
                f"    Y {'even' if parity == 0 else 'odd '}: mean gc = {avg:.2f}, samples = {len(gcs)}"
            )

    # Check if any modular feature predicts gc
    print("\n  Can X mod 17 predict gc?")
    predictive = 0
    for mod, gcs in gc_by_x_mod17.items():
        if len(gcs) >= 2:
            if len(set(gcs)) == 1:  # All same gc
                predictive += 1
    print(
        f"    Predictive mod values: {predictive}/{len(gc_by_x_mod17)} (need 100% for useful)"
    )

    return data


def analyze_triangle_centers():
    """Check if triangle center rules help narrow search."""
    print("\n" + "=" * 70)
    print("TRIANGLE CENTER ANALYSIS")
    print("Can triangle corrections reduce Kangaroo search space?")
    print("=" * 70)

    # Load triangle rules
    rules_path = Path(
        "/home/oritwoen/Projekty/B1000/data/processed/analytics/vanity_triangle_center_rules.json"
    )
    if not rules_path.exists():
        print("Triangle rules file not found")
        return

    with open(rules_path) as f:
        rules = json.load(f)

    print(f"\nLoaded {len(rules)} triangle center rules")

    # Analyze rule coverage
    solved_rules = [r for r in rules if r["solved_count"] > 0]
    unsolved_rules = [r for r in rules if r["unsolved_count"] > 0]

    print(f"\n  Rules with solved puzzles: {len(solved_rules)}")
    print(f"  Rules with unsolved puzzles: {len(unsolved_rules)}")

    # Check delta_norm distribution
    print("\n  Delta norm ranges for solved puzzles:")
    solved_deltas = [float(r["delta_norm"]) for r in solved_rules]
    print(f"    Min: {min(solved_deltas):.4f}")
    print(f"    Max: {max(solved_deltas):.4f}")
    print(f"    Range: {max(solved_deltas) - min(solved_deltas):.4f}")

    # Key insight: can we use delta_norm to reduce search?
    print("\n" + "-" * 70)
    print("SEARCH SPACE REDUCTION ANALYSIS")
    print("-" * 70)

    # The formula: N = D × (gc/4 + hi16/64 + residual/4)
    # If we knew gc and could guess hi16, the residual gives ±0.25 of D
    # But we DON'T know gc without knowing the key!

    print("""
The fundamental problem:

1. Triangle formula: N = D × (gc/4 + hi16/64 + residual/4)

2. To use it, we need:
   - gc: requires knowing N/D (position) → requires key!
   - hi16: random (0% prediction accuracy)
   - residual: depends on bucket (cluster/gc/mod5)

3. This is CIRCULAR:
   - To predict position → need gc
   - To get gc → need position
   - To get position → need private key

4. For Kangaroo:
   - We have the public key
   - We don't have gc, hi16, or bucket classification
   - Triangle structure CAN'T help

CONCLUSION: No usable correlation found.
""")


def analyze_vanity_distance_pattern():
    """Check if distances between consecutive puzzle keys have pattern."""
    print("\n" + "=" * 70)
    print("VANITY DISTANCE PATTERN (alternative approach)")
    print("=" * 70)

    print("\nChecking if there's a pattern in key distances...")

    distances = []
    for i in range(1, len(SOLVED_PUZZLES)):
        prev_puzzle, prev_key = SOLVED_PUZZLES[i - 1]
        curr_puzzle, curr_key = SOLVED_PUZZLES[i]

        # Normalize distance to range size
        prev_range = 1 << (prev_puzzle - 1)
        curr_range = 1 << (curr_puzzle - 1)

        # Ratio of key to range midpoint
        prev_ratio = prev_key / (prev_range * 1.5)  # Relative to 1.5x range start
        curr_ratio = curr_key / (curr_range * 1.5)

        distances.append(
            {
                "puzzle": curr_puzzle,
                "ratio": curr_ratio,
                "delta_ratio": curr_ratio - prev_ratio,
            }
        )

    # Check for periodicity in ratio
    ratios = [d["ratio"] for d in distances]
    mean_ratio = sum(ratios) / len(ratios)

    print(f"\n  Mean N/D ratio: {mean_ratio:.4f}")
    print("  Expected (uniform): 0.5")
    print(f"  Deviation: {abs(mean_ratio - 0.5):.4f}")

    # Check for mod-5 periodicity (as in B1000)
    print("\n  Checking mod-5 periodicity in ratios:")
    mod5_means = defaultdict(list)
    for d in distances:
        mod5_means[d["puzzle"] % 5].append(d["ratio"])

    for mod in range(5):
        vals = mod5_means[mod]
        if vals:
            avg = sum(vals) / len(vals)
            print(f"    puzzle % 5 = {mod}: mean ratio = {avg:.4f} (n={len(vals)})")


def main():
    print("=" * 70)
    print("CAN B1000 TRIANGLE STRUCTURE HELP KANGAROO?")
    print("=" * 70)

    data = analyze_correlation()
    analyze_triangle_centers()
    analyze_vanity_distance_pattern()

    print("\n" + "=" * 70)
    print("FINAL VERDICT")
    print("=" * 70)
    print("""
After thorough analysis:

1. PUBLIC KEY → POSITION: No useful correlation found
   - Y parity: ~50/50, no gc correlation
   - X modular features: random distribution
   - Bit patterns: no predictive power

2. TRIANGLE STRUCTURE: Cannot be applied
   - Requires knowing gc (global component)
   - gc requires knowing N/D (position)
   - N/D requires knowing the private key
   - Circular dependency - unusable for prediction

3. KANGAROO OPTIMIZATION: No improvement possible
   - Standard O(√n) remains optimal
   - Triangle insights only work AFTER knowing the key
   - No way to narrow search range from public key alone

4. ALTERNATIVE APPROACHES to consider:
   - Better jump table design (pseudo-random vs powers of 2)
   - More distinguished points (memory/time tradeoff)
   - Multi-GPU parallelization
   - Batch point operations (Montgomery trick)

The B1000 triangle structure is mathematically interesting but
provides no computational advantage for ECDLP solving.
""")


if __name__ == "__main__":
    main()