Re: Probabilistic search of prefixes vs random+sequential

What if we have limited resources and can scan only part of range, for example, only 10%?
Difference becomes even more obvious.
Here is modified script:

Code:

import hashlib
import random
import time

# Configuration
TOTAL_SIZE = 100000
RANGE_SIZE = 4096
PREFIX_LENGTH = 3
SIMULATIONS = 2000
SECP256K1_ORDER = int("0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16)

print(f"""
=== Configuration ===
Total numbers: {TOTAL_SIZE:,}
Block size: {RANGE_SIZE:,}
Prefix: {PREFIX_LENGTH} characters (16^{PREFIX_LENGTH} combinations)
Simulations: {SIMULATIONS}
secp256k1 Order: {SECP256K1_ORDER}
""")

def generate_h160(data):
    h = hashlib.new('ripemd160', str(data).encode('utf-8'))
    return h.hexdigest()

def shuffled_range(n):
    arr = list(range(n + 1))
    random.shuffle(arr)
    return arr

def sequential_search(dataset, block, target_hash, order, max_iters):
    checks = 0
    for idx in order:
        start = idx * block
        end = start + block
        for i in range(start, end):
            checks += 1
            if checks > max_iters:
                return {"checks": TOTAL_SIZE, "found": False}
            if generate_h160(dataset[i]) == target_hash:
                return {"checks": checks, "found": True}
    return {"checks": checks, "found": False}

def precise_search(dataset, block, prefix, target_hash, order, max_iters):
    prefix_hash = target_hash[:prefix]
    checks = 0
    ranges = []
    for idx in order:
        start = idx * block
        end = start + block
        found_prefix = False
        for i in range(start, end):
            checks += 1
            if checks > max_iters:
                return {"checks": TOTAL_SIZE, "found": False}
            h = generate_h160(dataset[i])
            if h == target_hash:
                return {"checks": checks, "found": True}
            if not found_prefix and h.startswith(prefix_hash):
                found_prefix = True
                ranges.append({"start": i + 1, "end": end})
                break
    for r in ranges:
        for i in range(r["end"] - 1, r["start"] - 1, -1):
            checks += 1
            if checks > max_iters:
                return {"checks": TOTAL_SIZE, "found": False}
            if generate_h160(dataset[i]) == target_hash:
                return {"checks": checks, "found": True}
    return {"checks": checks, "found": False}

def compare_methods():
    results = {
        "sequential": {"wins": 0, "total_checks": 0, "total_time": 0},
        "precise": {"wins": 0, "total_checks": 0, "total_time": 0},
        "ties": 0
    }

    for i in range(SIMULATIONS):
        max_range = SECP256K1_ORDER - TOTAL_SIZE - 1
        random_offset = random.randrange(max_range)
        R = 1 + random_offset

        dataset = [R + i for i in range(TOTAL_SIZE)]
        target_num = random.choice(dataset)
        target_hash = generate_h160(target_num)
        blocks = TOTAL_SIZE // RANGE_SIZE
        order = shuffled_range(blocks - 1)

        #for test here we can scan only small part of range
        max_iters = TOTAL_SIZE / 10

        start_time = time.perf_counter()
        seq_result = sequential_search(dataset, RANGE_SIZE, target_hash, order, max_iters)
        end_time = time.perf_counter()
        seq_time = end_time - start_time

        start_time = time.perf_counter()
        pre_result = precise_search(dataset, RANGE_SIZE, PREFIX_LENGTH, target_hash, order, max_iters)
        end_time = time.perf_counter()
        pre_time = end_time - start_time

        if seq_result["found"]:
            results["sequential"]["total_checks"] += seq_result["checks"]
            results["sequential"]["total_time"] += seq_time

        if pre_result["found"]:
            results["precise"]["total_checks"] += pre_result["checks"]
            results["precise"]["total_time"] += pre_time

        if seq_result["checks"] < pre_result["checks"]:
            results["sequential"]["wins"] += 1
        elif seq_result["checks"] > pre_result["checks"]:
            results["precise"]["wins"] += 1
        else:
            results["ties"] += 1

        print(f"Simulation {i + 1}: Sequential = {seq_result['checks']} checks in {seq_time:.6f}s | Prefix = {pre_result['checks']} checks in {pre_time:.6f}s")

    avg_success_rate_sequential = (results["sequential"]["total_checks"] / results["sequential"]["wins"]
                                   if results["sequential"]["wins"] > 0 else float('inf'))
    avg_success_rate_precise = (results["precise"]["total_checks"] / results["precise"]["wins"]
                                if results["precise"]["wins"] > 0 else float('inf'))
    avg_time_sequential = (results["sequential"]["total_time"] / results["sequential"]["wins"]
                           if results["sequential"]["wins"] > 0 else float('inf'))
    avg_time_precise = (results["precise"]["total_time"] / results["precise"]["wins"]
                        if results["precise"]["wins"] > 0 else float('inf'))

    print(f"""
=== FINAL RESULTS ===
Wins:
Sequential: {results['sequential']['wins']}
Prefix: {results['precise']['wins']}
Ties+Fails: {results['ties']}

Total Checks:

Sequential: {results['sequential']['total_checks']}
Prefix: {results['precise']['total_checks']}
Total Time:

Sequential: {results['sequential']['total_time']:.6f} seconds
Prefix: {results['precise']['total_time']:.6f} seconds

Averages (Total Time / Wins):

Sequential : {avg_time_sequential:.6f} seconds/victory
Prefix : {avg_time_precise:.6f} seconds/victory

Checks per Win:
Sequential : {avg_success_rate_sequential:.2f} checks/win
Prefix : {avg_success_rate_precise:.2f} checks/win
""")

if __name__ == '__main__':
    compare_methods()

Code:

=== FINAL RESULTS ===
Wins:
Sequential: 56
Prefix: 128
Ties+Fails: 1816