based on MCD script
import hashlib
import random
import time
import math
import statistics
import scipy.stats as stats
import statsmodels.stats.power as smp
from math import ceil
# Configuration
TOTAL_SIZE = 100_000
RANGE_SIZE = 4_096
PREFIX_LENGTH = 3
SIMULATIONS = 5000
SECP256K1_ORDER = int("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16)
print(f"""
=== Configuration ===
Total numbers: {TOTAL_SIZE:,}
Block size: {RANGE_SIZE:,}
Total blocks needed: {ceil(TOTAL_SIZE/RANGE_SIZE)}
Prefix: {PREFIX_LENGTH} characters (16^{PREFIX_LENGTH} = {16**PREFIX_LENGTH:,} combinations)
Simulations: {SIMULATIONS}
secp256k1 order: {SECP256K1_ORDER}
""")
def generate_h160(data):
h = hashlib.new('ripemd160', str(data).encode('utf-8'))
return h.hexdigest()
def shuffled_block_order(total_blocks):
blocks = list(range(total_blocks))
random.shuffle(blocks)
return blocks
def sequential_search(dataset, block_size, target_hash, block_order):
checks = 0
for block_idx in block_order:
start = block_idx * block_size
end = min(start + block_size, len(dataset))
for i in range(start, end):
checks += 1
if generate_h160(dataset[i]) == target_hash:
return {"checks": checks, "found": True, "index": i}
return {"checks": checks, "found": False}
def prefix_search(dataset, block_size, prefix_len, target_hash, block_order):
prefix_hash = target_hash[:prefix_len]
checks = 0
ranges_to_scan = []
omitted_ranges = []
omitted_keys = []
num_keys_to_omit = ceil(block_size * 0.2)
skip_counter = 0
scan_increment = 1
for block_idx in block_order:
start = block_idx * block_size
end = min(start + block_size, len(dataset))
found_prefix = False
i = start
while i < end:
current_key = dataset[i]
checks += 1
h = generate_h160(current_key)
if h == target_hash:
return {"checks": checks, "found": True, "index": i}
if not found_prefix and h.startswith(prefix_hash):
found_prefix = True
omitted_start = i + 1
omitted_end = min(omitted_start + num_keys_to_omit, end)
omitted_ranges.append((omitted_start, omitted_end))
omitted_keys.extend(dataset[omitted_start:omitted_end])
ranges_to_scan.append({"start": omitted_end, "end": end})
skip_counter += 1
if skip_counter >= 4 and ranges_to_scan:
for _ in range(min(scan_increment, len(ranges_to_scan))):
r = ranges_to_scan.pop(0)
for j in range(r["start"], r["end"]):
checks += 1
if generate_h160(dataset[j]) == target_hash:
return {"checks": checks, "found": True, "index": j}
skip_counter = 0
scan_increment += 1
i = omitted_end
continue
i += 1
for r in omitted_ranges:
start_omit, end_omit = r
for i in range(start_omit, end_omit):
checks += 1
if generate_h160(dataset[i]) == target_hash:
return {"checks": checks, "found": True, "index": i}
for r in ranges_to_scan:
for i in range(r["start"], r["end"]):
checks += 1
if generate_h160(dataset[i]) == target_hash:
return {"checks": checks, "found": True, "index": i, "searched_omitted": True}
return {"checks": checks, "found": False, "omitted_keys": omitted_keys, "omitted_ranges": omitted_ranges}
def compute_cohens_d(list1, list2):
if len(list1) < 2 or len(list2) < 2:
return float('nan')
n1, n2 = len(list1), len(list2)
m1, m2 = statistics.mean(list1), statistics.mean(list2)
s1, s2 = statistics.stdev(list1), statistics.stdev(list2)
pooled_std = math.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1+n2-2))
if pooled_std == 0:
return float('nan')
return (m1 - m2) / pooled_std
def correct_coefficient_of_variation(data):
if not data or statistics.mean(data) == 0:
return float('nan')
return (statistics.stdev(data) / statistics.mean(data)) * 100
def longest_streak(outcomes, letter):
max_streak = current = 0
for o in outcomes:
current = current + 1 if o == letter else 0
max_streak = max(max_streak, current)
return max_streak
def ascii_bar(label, value, max_value, bar_length=50):
bar_count = int((value / max_value) * bar_length) if max_value > 0 else 0
return f"{label:12}: {'#' * bar_count} ({value})"
def get_confidence_interval(data, confidence=0.95):
if len(data) < 2:
return (0, 0)
try:
return stats.t.interval(
confidence=confidence,
df=len(data)-1,
loc=statistics.mean(data),
scale=stats.sem(data)
)
except:
return (statistics.mean(data), statistics.mean(data))
def enhanced_statistical_analysis(seq_checks, pre_checks, seq_success, pre_success):
analysis = {}
analysis['seq_mean'] = statistics.mean(seq_checks) if seq_checks else 0
analysis['pre_mean'] = statistics.mean(pre_checks) if pre_checks else 0
analysis['seq_ci'] = get_confidence_interval(seq_checks)
analysis['pre_ci'] = get_confidence_interval(pre_checks)
if len(seq_checks) > 1 and len(pre_checks) > 1:
analysis['t_test'] = stats.ttest_ind(seq_checks, pre_checks, equal_var=False)
analysis['mann_whitney'] = stats.mannwhitneyu(seq_checks, pre_checks)
analysis['cohen_d'] = compute_cohens_d(seq_checks, pre_checks)
effect_size = abs(analysis['cohen_d'])
if effect_size > 0:
analysis['power'] = smp.tt_ind_solve_power(
effect_size=effect_size,
nobs1=len(seq_checks),
alpha=0.05,
ratio=len(pre_checks)/len(seq_checks)
)
else:
analysis['power'] = 0
else:
analysis['t_test'] = None
analysis['mann_whitney'] = None
analysis['cohen_d'] = 0
analysis['power'] = 0
analysis['risk_ratio'] = (seq_success/SIMULATIONS) / (pre_success/SIMULATIONS) if pre_success > 0 else 0
return analysis
def compare_methods():
results = {
"sequential": {"wins": 0, "success": 0, "checks": [], "times": []},
"prefix": {"wins": 0, "success": 0, "checks": [], "times": []},
"ties": 0
}
outcome_history = []
total_blocks = ceil(TOTAL_SIZE / RANGE_SIZE)
for _ in range(SIMULATIONS):
max_offset = SECP256K1_ORDER - TOTAL_SIZE - 1
offset = random.randint(0, max_offset)
dataset = [offset + i for i in range(TOTAL_SIZE)]
target_num = random.choice(dataset)
target_hash = generate_h160(target_num)
block_order = shuffled_block_order(total_blocks)
start = time.perf_counter()
seq_res = sequential_search(dataset, RANGE_SIZE, target_hash, block_order)
seq_time = time.perf_counter() - start
start = time.perf_counter()
pre_res = prefix_search(dataset, RANGE_SIZE, PREFIX_LENGTH, target_hash, block_order)
pre_time = time.perf_counter() - start
for method, res, t in [("sequential", seq_res, seq_time), ("prefix", pre_res, pre_time)]:
if res["found"]:
results[method]["success"] += 1
results[method]["checks"].append(res["checks"])
results[method]["times"].append(t)
if seq_res["found"] and pre_res["found"]:
if seq_res["checks"] < pre_res["checks"]:
results["sequential"]["wins"] += 1
outcome_history.append("S")
elif pre_res["checks"] < seq_res["checks"]:
results["prefix"]["wins"] += 1
outcome_history.append("P")
else:
results["ties"] += 1
outcome_history.append("T")
def get_stats(data):
if not data:
return {"mean": 0, "min": 0, "max": 0, "median": 0, "stdev": 0}
return {
"mean": statistics.mean(data),
"min": min(data),
"max": max(data),
"median": statistics.median(data),
"stdev": statistics.stdev(data) if len(data) > 1 else 0
}
seq_stats = get_stats(results["sequential"]["checks"])
pre_stats = get_stats(results["prefix"]["checks"])
seq_time_stats = get_stats(results["sequential"]["times"])
pre_time_stats = get_stats(results["prefix"]["times"])
seq_success_rate = results["sequential"]["success"] / SIMULATIONS
pre_success_rate = results["prefix"]["success"] / SIMULATIONS
total_comparisons = results["sequential"]["wins"] + results["prefix"]["wins"] + results["ties"]
seq_win_rate = results["sequential"]["wins"] / total_comparisons if total_comparisons > 0 else 0
pre_win_rate = results["prefix"]["wins"] / total_comparisons if total_comparisons > 0 else 0
cv_seq = correct_coefficient_of_variation(results["sequential"]["checks"])
cv_pre = correct_coefficient_of_variation(results["prefix"]["checks"])
stats_analysis = enhanced_statistical_analysis(
seq_checks=results["sequential"]["checks"],
pre_checks=results["prefix"]["checks"],
seq_success=results["sequential"]["success"],
pre_success=results["prefix"]["success"]
)
print(f"""
=== FINAL ANALYSIS ===
[Success Rates]
Sequential: {seq_success_rate:.1%} ({results['sequential']['success']}/{SIMULATIONS})
Prefix: {pre_success_rate:.1%} ({results['prefix']['success']}/{SIMULATIONS})
[Performance Metrics]
| Sequential | Prefix
---------------+---------------------+--------------------
Checks (mean) | {seq_stats['mean']:>12,.1f} ± {seq_stats['stdev']:,.1f} | {pre_stats['mean']:>12,.1f} ± {pre_stats['stdev']:,.1f}
Time (mean ms) | {seq_time_stats['mean']*1000:>12.2f} ± {seq_time_stats['stdev']*1000:.2f} | {pre_time_stats['mean']*1000:>12.2f} ± {pre_time_stats['stdev']*1000:.2f}
Min checks | {seq_stats['min']:>12,} | {pre_stats['min']:>12,}
Max checks | {seq_stats['max']:>12,} | {pre_stats['max']:>12,}
Coef. Variation| {cv_seq:>11.1f}% | {cv_pre:>11.1f}%
[Comparison When Both Succeed]
Sequential wins: {results['sequential']['wins']} ({seq_win_rate:.1%})
Prefix wins: {results['prefix']['wins']} ({pre_win_rate:.1%})
Ties: {results['ties']}
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%]
Checks Sequential: {seq_stats['mean']:.1f} ({stats_analysis['seq_ci'][0]:.1f} - {stats_analysis['seq_ci'][1]:.1f})
Checks Prefix: {pre_stats['mean']:.1f} ({stats_analysis['pre_ci'][0]:.1f} - {stats_analysis['pre_ci'][1]:.1f})
[Statistical Tests]
Welch's t-test: {'t = %.3f, p = %.4f' % (stats_analysis['t_test'].statistic, stats_analysis['t_test'].pvalue) if stats_analysis['t_test'] else 'N/A'}
Mann-Whitney U: {'U = %.1f, p = %.4f' % (stats_analysis['mann_whitney'].statistic, stats_analysis['mann_whitney'].pvalue) if stats_analysis['mann_whitney'] else 'N/A'}
Effect Size (Cohen's d): {stats_analysis['cohen_d']:.3f}
[Power Analysis]
Statistical Power: {stats_analysis['power']:.1%}
[Risk/Benefit Ratio]
Success Ratio (Seq/Pre): {stats_analysis['risk_ratio']:.2f}:1
""")
non_tie_outcomes = [o for o in outcome_history if o != "T"]
streak_analysis = f"""
=== STREAK ANALYSIS ===
Longest Sequential streak: {longest_streak(outcome_history, 'S')}
Longest Prefix streak: {longest_streak(outcome_history, 'P')}
Expected max streak: {math.log(len(non_tie_outcomes), 2):.1f} (for {len(non_tie_outcomes)} trials)
"""
print(streak_analysis)
max_wins = max(results["sequential"]["wins"], results["prefix"]["wins"], results["ties"])
print("=== WIN DISTRIBUTION ===")
print(ascii_bar("Sequential", results["sequential"]["wins"], max_wins))
print(ascii_bar("Prefix", results["prefix"]["wins"], max_wins))
print(ascii_bar("Ties", results["ties"], max_wins))
if __name__ == '__main__':
compare_methods()
=== Configuration ===
Total numbers: 100,000
Block size: 4,096
Total blocks needed: 25
Prefix: 3 characters (16^3 = 4,096 combinations)
Simulations: 5000
secp256k1 order: 115792089237316195423570985008687907852837564279074904382605163141518161494337
=== FINAL ANALYSIS ===
[Success Rates]
Sequential: 100.0% (5000/5000)
Prefix: 100.0% (5000/5000)
[Performance Metrics]
| Sequential | Prefix
---------------+---------------------+--------------------
Checks (mean) | 50,296.4 ± 29,080.6 | 55,576.3 ± 33,445.2
Time (mean ms) | 123.08 ± 71.47 | 141.39 ± 85.28
Min checks | 12 | 12
Max checks | 99,998 | 126,151
Coef. Variation| 57.8% | 60.2%
[Comparison When Both Succeed]
Sequential wins: 1243 (24.9%)
Prefix wins: 3558 (71.2%)
Ties: 199
=== ADVANCED STATISTICS ===
[Confidence Intervals 95%]
Checks Sequential: 50296.4 (49490.1 - 51102.7)
Checks Prefix: 55576.3 (54649.0 - 56503.5)
[Statistical Tests]
Welch's t-test: t = -8.424, p = 0.0000
Mann-Whitney U: U = 11394783.5, p = 0.0000
Effect Size (Cohen's d): -0.168
[Power Analysis]
Statistical Power: nan%
[Risk/Benefit Ratio]
Success Ratio (Seq/Pre): 1.00:1
=== STREAK ANALYSIS ===
Longest Sequential streak: 6
Longest Prefix streak: 23
Expected max streak: 12.2 (for 4801 trials)
=== WIN DISTRIBUTION ===
Sequential : ################# (1243)
Prefix : ################################################## (3558)
Ties : ## (199)
For most cases, Prefix has better average performance.