This probably depends on the batch size. Since the atomic lock is basically just a clock cycle, this might indicate that multiple cores are often accessing overlapping RAM areas, forcing I/O between CPU caches and main memory.
I also have a problem with my implementation.
The current implementation loads the entire baby table into memory before starting the search phase. For large puzzles (like puzzle 70), this becomes impractical.
For puzzle 70, the baby table would contain approximately 2^35 entries (34 billion+). With each entry being ~12 bytes (8-byte hash + 4-byte index), this would require ~512GB of RAM. Instead of loading the entire table at once: Process each compressed partition one at a time.
For each DB part: Decompress it to memory...Search against the current DB part....Discard it before loading the next DB part
// Modified search function
void partitioned_search(const Point& S, const mpz_class& start_range,
const mpz_class& end_range, const Point& m_P,
const string& puzzle_pubkey) {
bool found = false;
mpz_class found_key;
int part_num = 1;
auto st = chrono::high_resolution_clock::now();
while (!found) {
string filename = "baby_table_part_" + to_string(part_num) + ".gz";
ifstream test(filename);
if (!test.good()) {
if (part_num == 1) {
cerr << "[error] No baby table parts found!" << endl;
return;
}
break; // No more parts
}
test.close();
// Load just this part
auto baby_table_part = load_baby_table_part(filename);
if (baby_table_part.empty()) {
part_num++;
continue;
}
if (verbose) {
cout << "[+] Searching part " << part_num << " with "
<< baby_table_part.size() << " entries" << endl;
}
#pragma omp parallel
{
Point local_S = S;
mpz_class local_step = 0;
#pragma omp for schedule(dynamic)
for (int i = 0; i < omp_get_num_threads(); ++i) {
while (local_step < (end_range - start_range)) {
if (found) break;
string cpub = point_to_cpub(local_S);
string cpub_hash = hash_cpub(cpub);
auto it = baby_table_part.find(cpub_hash);
if (it != baby_table_part.end()) {
int b = it->second;
mpz_class k = start_range + local_step + b;
if (point_to_cpub(mul(k)) == puzzle_pubkey) {
#pragma omp critical
{
if (!found) {
found = true;
found_key = k;
auto et = chrono::high_resolution_clock::now();
chrono::duration<double> elapsed = et - st;
cout << "\n\033[01;32m[+] Solution found!\033[0m" << endl;
cout << "[+] Private key: " << k << endl;
cout << "[+] Hex: 0x" << hex << k << dec << endl;
cout << "[+] Time elapsed: " << elapsed.count() << " seconds\n";
}
}
break;
}
}
local_S = point_subtraction(local_S, m_P);
local_step += m;
}
}
}
part_num++;
}
if (!found) {
auto et = chrono::high_resolution_clock::now();
chrono::duration<double> elapsed = et - st;
cout << "\n\033[01;31m[!] Key not found in the specified range\033[0m" << endl;
cout << "[+] Time elapsed: " << elapsed.count() << " seconds\n";
}
}