import hashlib
from collections import defaultdict
P = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F
N = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
Gx = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798
Gy = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8
BASE58 = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
def mod_inverse(a: int, m: int) -> int:
if a < 0:
a = a % m
g, x, _ = extended_gcd(a, m)
if g != 1:
raise ValueError("No inverse")
return x % m
def extended_gcd(a: int, b: int):
if a == 0:
return b, 0, 1
gcd, x1, y1 = extended_gcd(b % a, a)
return gcd, y1 - (b // a) * x1, x1
def point_add(p1, p2):
if p1 is None:
return p2
if p2 is None:
return p1
x1, y1 = p1
x2, y2 = p2
if x1 == x2:
if y1 != y2:
return None
s = (3 * x1 * x1 * mod_inverse(2 * y1, P)) % P
else:
s = ((y2 - y1) * mod_inverse(x2 - x1, P)) % P
x3 = (s * s - x1 - x2) % P
y3 = (s * (x1 - x3) - y1) % P
return (x3, y3)
def scalar_mult(k: int, point):
if k == 0:
return None
result = None
addend = point
while k:
if k & 1:
result = point_add(result, addend)
addend = point_add(addend, addend)
k >>= 1
return result
G = (Gx, Gy)
def pubkey_to_address(point) -> str:
if point is None:
return ""
x, y = point
prefix = b"\x02" if y % 2 == 0 else b"\x03"
pubkey = prefix + x.to_bytes(32, "big")
sha = hashlib.sha256(pubkey).digest()
ripe = hashlib.new("ripemd160", sha).digest()
versioned = b"\x00" + ripe
checksum = hashlib.sha256(hashlib.sha256(versioned).digest()).digest()[:4]
binary = versioned + checksum
num = int.from_bytes(binary, "big")
result = ""
while num:
num, rem = divmod(num, 58)
result = BASE58[rem] + result
for b in binary:
if b == 0:
result = "1" + result
else:
break
return result
def analyze_address_prefix_clustering():
print("=" * 70)
print("EC POINT PROPERTIES vs ADDRESS PREFIX CORRELATION")
print("=" * 70)
base_key = 863317
range_size = 100000
print(f"\nAnalyzing {range_size} keys around puzzle 20 solution...")
data = []
current = scalar_mult(base_key - range_size // 2, G)
for i in range(range_size):
k = base_key - range_size // 2 + i
if i > 0:
current = point_add(current, G)
if current is None:
continue
x, y = current
address = pubkey_to_address(current)
prefix = address[:4]
data.append(
{
"key": k,
"x": x,
"y": y,
"prefix": prefix,
"x_mod_1024": x % 1024,
"y_parity": y % 2,
"x_high_4": (x >> 252) & 0xF,
}
)
if i % 20000 == 0:
print(f" Processed {i}/{range_size}...")
print(f"\nTotal data points: {len(data)}")
target_address = pubkey_to_address(scalar_mult(base_key, G))
target_prefix = target_address[:4]
print(f"\nTarget key 0x{base_key:x} has address: {target_address}")
print(f"Target prefix: {target_prefix}")
prefix_counts = defaultdict(int)
for d in data:
prefix_counts[d["prefix"][:3]] += 1
print("\nTop 10 most common 3-char prefixes in range:")
for prefix, count in sorted(prefix_counts.items(), key=lambda x: -x[1])[:10]:
print(f" {prefix}: {count} ({100 * count / len(data):.2f}%)")
most_common_prefix = max(prefix_counts.keys(), key=lambda x: prefix_counts[x])
print(f"\nUsing most common prefix: {most_common_prefix}")
matching = [d for d in data if d["prefix"].startswith(most_common_prefix)]
print(f"\nAddresses with same prefix: {len(matching)}")
if len(matching) < 2:
print("Not enough matches to analyze clustering")
return
print("\n" + "-" * 70)
print("EC PROPERTY DISTRIBUTION FOR MATCHING vs ALL")
print("-" * 70)
all_x_mod = defaultdict(int)
match_x_mod = defaultdict(int)
for d in data:
all_x_mod[d["x_mod_1024"] // 64] += 1 for d in matching:
match_x_mod[d["x_mod_1024"] // 64] += 1
print("\n X mod 1024 (bucketed to 16 groups):")
print(" Bucket | All (%) | Match (%)")
for b in range(16):
all_pct = 100 * all_x_mod[b] / len(data) if data else 0
match_pct = 100 * match_x_mod[b] / len(matching) if matching else 0
diff = abs(match_pct - all_pct)
marker = "***" if diff > 5 else ""
print(f" {b:2d} | {all_pct:5.1f} | {match_pct:5.1f} {marker}")
all_even = sum(1 for d in data if d["y_parity"] == 0)
match_even = sum(1 for d in matching if d["y_parity"] == 0)
print("\n Y parity:")
print(f" All: {100 * all_even / len(data):.1f}% even")
print(f" Match: {100 * match_even / len(matching):.1f}% even")
print("\n" + "-" * 70)
print("DISTANCE PATTERN FOR MATCHING PREFIXES")
print("-" * 70)
matching_keys = sorted([d["key"] for d in matching])
if len(matching_keys) >= 2:
distances = [
matching_keys[i + 1] - matching_keys[i]
for i in range(len(matching_keys) - 1)
]
print(f"\n Number of matches: {len(matching_keys)}")
print(f" Key range: {matching_keys[0]} - {matching_keys[-1]}")
print(" Distance stats:")
print(f" Min: {min(distances)}")
print(f" Max: {max(distances)}")
print(f" Mean: {sum(distances) / len(distances):.1f}")
print("\n First 10 distances between matching addresses:")
for i, d in enumerate(distances[:10]):
print(f" {i + 1}: {d}")
print("\n" + "=" * 70)
print("CONCLUSIONS")
print("=" * 70)
print("""
Analysis of vanity prefix clustering:
1. ADDRESS PREFIX DEPENDS ON:
- SHA256(SHA256(pubkey)) → RIPEMD160 → Base58
- This is a CRYPTOGRAPHIC HASH - designed to be random
2. EC POINT PROPERTIES:
- X coordinate is pseudo-random across the curve
- Y parity determines compressed key prefix (02/03)
- Neither correlates with address prefix
3. WHY VANITY TRIANGLES EXIST:
- Pure probability: ~1/58^4 chance for 4-char prefix match
- In 100K keys: expect ~100K/58^4 ≈ 9 matches
- These appear "clustered" but are random
4. IMPLICATION FOR KANGAROO:
- No way to detect "hot zones" from EC point
- Hash function breaks any correlation
- Vanity structure exists in ADDRESS space, not EC space
5. THE FUNDAMENTAL BARRIER:
- Kangaroo operates on EC points (pre-hash)
- Vanity patterns exist in addresses (post-hash)
- Hash function is one-way and pseudorandom
- NO OPTIMIZATION POSSIBLE from vanity geometry
""")
def main():
analyze_address_prefix_clustering()
if __name__ == "__main__":
main()