Post
Topic
Board Bitcoin Discussion
Re: Bitcoin puzzle transaction ~32 BTC prize to who solves it
by
nomachine
on 13/03/2025, 17:36:58 UTC
Any chance you’d be willing to "accidentally" paste that code here? I promise I won’t tell anyone… except my CPU, which is very trustful.  Grin

Code:
git clone https://github.com/Dookoo2/Cyclone.git


copy/paste my version into Cyclone.cpp
Code:
#include <iostream>
#include <iomanip>
#include <vector>
#include <string>
#include <sstream>
#include <fstream>
#include <cmath>
#include <chrono>
#include <cstring>
#include <algorithm>
#include <omp.h>
#include <random>
#include <immintrin.h>
#include <array>

// Adding program modules
#include "SECP256K1.h"
#include "Point.h"
#include "Int.h"
#include "IntGroup.h"

#define BISIZE 256
#if BISIZE == 256
#define NB64BLOCK 5
#define NB32BLOCK 10
#else
#error Unsupported size
#endif

class Xoshiro256plus {
public:
    Xoshiro256plus(uint64_t seed = 0) {
        state[0] = seed;
        for (int i = 1; i < 4; ++i) {
            state[i] = 1812433253ULL * (state[i - 1] ^ (state[i - 1] >> 30)) + i;
        }
    }

    uint64_t next() {
        const uint64_t result = state[0] + state[3];
        const uint64_t t = state[1] << 17;

        state[2] ^= state[0];
        state[3] ^= state[1];
        state[1] ^= state[2];
        state[0] ^= state[3];

        state[2] ^= t;
        state[3] = rotl(state[3], 45);

        return result;
    }

private:
    static inline uint64_t rotl(const uint64_t x, int k) {
        return (x << k) | (x >> (64 - k));
    }

    std::array<uint64_t, 4> state;
};

//------------------------------------------------------------------------------
// Constants
static constexpr int POINTS_BATCH_SIZE = 256;
static constexpr int HASH_BATCH_SIZE   = 8;

// Status output and progress saving frequency
static constexpr double statusIntervalSec = 5.0;
static constexpr double saveProgressIntervalSec = 300.0;

static int g_progressSaveCount = 0;
static std::vector<std::string> g_threadPrivateKeys;

//------------------------------------------------------------------------------
void saveProgressToFile(const std::string &progressStr)
{
    std::ofstream ofs("progress.txt", std::ios::app);
    if (ofs) {
        ofs << progressStr << "\n";
    } else {
        std::cerr << "Cannot open progress.txt for writing\n";
    }
}

//------------------------------------------------------------------------------
//Converts a HEX string into a large number (a vector of 64-bit words, little-endian).

std::vector<uint64_t> hexToBigNum(const std::string& hex) {
    std::vector<uint64_t> bigNum;
    const size_t len = hex.size();
    bigNum.reserve((len + 15) / 16);
    for (size_t i = 0; i < len; i += 16) {
        size_t start = (len >= 16 + i) ? len - 16 - i : 0;
        size_t partLen = (len >= 16 + i) ? 16 : (len - i);
        uint64_t value = std::stoull(hex.substr(start, partLen), nullptr, 16);
        bigNum.push_back(value);
    }
    return bigNum;
}

//Reverse conversion to a HEX string (with correct leading zeros within blocks).

std::string bigNumToHex(const std::vector<uint64_t>& num) {
    std::ostringstream oss;
    for (auto it = num.rbegin(); it != num.rend(); ++it) {
         if (it != num.rbegin())
            oss << std::setw(16) << std::setfill('0');
        oss << std::hex << *it;
    }
    return oss.str();
}

std::vector<uint64_t> singleElementVector(uint64_t val) {
    return { val };
}

std::vector<uint64_t> bigNumAdd(const std::vector<uint64_t>& a, const std::vector<uint64_t>& b) {
    std::vector<uint64_t> sum;
    sum.reserve(std::max(a.size(), b.size()) + 1);
    uint64_t carry = 0;
    for (size_t i = 0, sz = std::max(a.size(), b.size()); i < sz; ++i) {
        uint64_t x = (i < a.size()) ? a[i] : 0ULL;
        uint64_t y = (i < b.size()) ? b[i] : 0ULL;
        __uint128_t s = ( __uint128_t )x + ( __uint128_t )y + carry;
        carry = (uint64_t)(s >> 64);
        sum.push_back((uint64_t)s);
    }
    if (carry) sum.push_back(carry);
    return sum;
}

std::vector<uint64_t> bigNumSubtract(const std::vector<uint64_t>& a, const std::vector<uint64_t>& b) {
    std::vector<uint64_t> diff = a;
    uint64_t borrow = 0;
    for (size_t i = 0; i < b.size(); ++i) {
        uint64_t subtrahend = b[i];
        if (diff[i] < subtrahend + borrow) {
            diff[i] = diff[i] + (~0ULL) - subtrahend - borrow + 1ULL; // eqv diff[i] = diff[i] - subtrahend - borrow
            borrow = 1ULL;
        } else {
            diff[i] -= (subtrahend + borrow);
            borrow = 0ULL;
        }
    }
   
    for (size_t i = b.size(); i < diff.size() && borrow; ++i) {
        if (diff[i] == 0ULL) {
            diff[i] = ~0ULL;
        } else {
            diff[i] -= 1ULL;
            borrow = 0ULL;
        }
    }
    // delete leading zeros
    while (!diff.empty() && diff.back() == 0ULL)
        diff.pop_back();
    return diff;
}


std::pair<std::vector<uint64_t>, uint64_t> bigNumDivide(const std::vector<uint64_t>& a, uint64_t divisor) {
    std::vector<uint64_t> quotient(a.size(), 0ULL);
    uint64_t remainder = 0ULL;
    for (int i = (int)a.size() - 1; i >= 0; --i) {
        __uint128_t temp = ((__uint128_t)remainder << 64) | a[i];
        uint64_t q = (uint64_t)(temp / divisor);
        uint64_t r = (uint64_t)(temp % divisor);
        quotient[i] = q;
        remainder   = r;
    }
    while (!quotient.empty() && quotient.back() == 0ULL)
        quotient.pop_back();
    return { quotient, remainder };
}

long double hexStrToLongDouble(const std::string &hex) {
    long double result = 0.0L;
    for (char c : hex) {
        result *= 16.0L;
        if (c >= '0' && c <= '9')
            result += (c - '0');
        else if (c >= 'a' && c <= 'f')
            result += (c - 'a' + 10);
        else if (c >= 'A' && c <= 'F')
            result += (c - 'A' + 10);
    }
    return result;
}

//------------------------------------------------------------------------------
static inline std::string padHexTo64(const std::string &hex) {
    return (hex.size() >= 64) ? hex : std::string(64 - hex.size(), '0') + hex;
}
static inline Int hexToInt(const std::string &hex) {
    Int number;
    char buf[65] = {0};
    std::strncpy(buf, hex.c_str(), 64);
    number.SetBase16(buf);
    return number;
}
static inline std::string intToHex(const Int &value) {
    Int temp;
    temp.Set((Int*)&value);
    return temp.GetBase16();
}
static inline bool intGreater(const Int &a, const Int &b) {
    std::string ha = ((Int&)a).GetBase16();
    std::string hb = ((Int&)b).GetBase16();
    if (ha.size() != hb.size()) return (ha.size() > hb.size());
    return (ha > hb);
}
static inline bool isEven(const Int &number) {
    return ((Int&)number).IsEven();
}

static inline std::string intXToHex64(const Int &x) {
    Int temp;
    temp.Set((Int*)&x);
    std::string hex = temp.GetBase16();
    if (hex.size() < 64)
        hex.insert(0, 64 - hex.size(), '0');
    return hex;
}

static inline std::string pointToCompressedHex(const Point &point) {
    return (isEven(point.y) ? "02" : "03") + intXToHex64(point.x);
}
static inline void pointToCompressedBin(const Point &point, uint8_t outCompressed[33]) {
    outCompressed[0] = isEven(point.y) ? 0x02 : 0x03;
    Int temp;
    temp.Set((Int*)&point.x);
    for (int i = 0; i < 32; i++) {
        outCompressed[1 + i] = (uint8_t)temp.GetByte(31 - i);
    }
}

//------------------------------------------------------------------------------
static void printUsage(const char* programName) {
    std::cerr << "Usage: " << programName << " -p <puzzle> -k <public_key_hex>\n";
}

static std::string formatElapsedTime(double seconds) {
    int hrs = (int)seconds / 3600;
    int mins = ((int)seconds % 3600) / 60;
    int secs = (int)seconds % 60;
    std::ostringstream oss;
    oss << std::setw(2) << std::setfill('0') << hrs << ":"
        << std::setw(2) << std::setfill('0') << mins << ":"
        << std::setw(2) << std::setfill('0') << secs;
    return oss.str();
}

//------------------------------------------------------------------------------
static void printStatsBlock(int numCPUs, const std::string &targetPublicKeyHex,
                            const std::string &rangeStr, double mkeysPerSec,
                            unsigned long long totalChecked, double elapsedTime,
                            int puzzle)
{
    static bool firstPrint = true;
    if (!firstPrint) {
        std::cout << "\033[6A";
    } else {
        firstPrint = false;
    }
    std::cout << "================= WORK IN PROGRESS =================\n";
    std::cout << "Puzzle        : " << puzzle << "\n";  // Print puzzle value
    //std::cout << "Range         : " << rangeStr << "\n";
    //std::cout << "Target Public Key: " << targetPublicKeyHex << "\n";
    std::cout << "CPU Threads   : " << numCPUs << "\n";
    std::cout << "Mkeys/s       : " << std::fixed << std::setprecision(2) << mkeysPerSec << "\n";
    std::cout << "Total Checked : " << totalChecked << "\n";
    std::cout << "Elapsed Time  : " << formatElapsedTime(elapsedTime) << "\n";
    std::cout.flush();
}


struct ThreadRange {
    std::string startHex;
    std::string endHex;
};

static std::vector<ThreadRange> g_threadRanges;

class Timer {
public:
    static std::string getSeed(int length) {
        auto now = std::chrono::high_resolution_clock::now();
        auto epoch = now.time_since_epoch();
        auto value = std::chrono::duration_cast<std::chrono::nanoseconds>(epoch).count();
        std::ostringstream oss;
        oss << std::hex << value;
        return oss.str().substr(0, length);
    }
};

Int generateRandomPrivateKey(Int minKey, Int range, Xoshiro256plus &rng) {
    Int randomPrivateKey((uint64_t)0);

    // Generate random values in chunks of 64 bits using Xoshiro256plus
    for (int i = 0; i < NB64BLOCK; ++i) {
        uint64_t randVal = rng.next();
        randomPrivateKey.ShiftL(64);  // Shift left by 64 bits
        randomPrivateKey.Add(randVal);
    }

    // Apply modulo operation and add minKey
    randomPrivateKey.Mod(&range);
    randomPrivateKey.Add(&minKey);

    return randomPrivateKey;
}

Int minKey, maxKey;

int main(int argc, char *argv[]) {
    bool publicKeyProvided = false, rangeProvided = false;
    std::string targetPublicKeyHex;
    std::vector<uint8_t> targetPublicKey;
    int puzzle = 0;  // Declare puzzle variable

    // Parse command-line arguments
    for (int i = 1; i < argc; i++) {
        if (!std::strcmp(argv[i], "-k") && i + 1 < argc) {  // Use -k for public_key_hex
            targetPublicKeyHex = argv[++i];
            publicKeyProvided = true;
            // Convert the hex string to a byte array
            targetPublicKey.resize(33);
            for (size_t j = 0; j < 33; j++) {
                targetPublicKey[j] = std::stoul(targetPublicKeyHex.substr(j * 2, 2), nullptr, 16);
            }
        } else if (!std::strcmp(argv[i], "-p") && i + 1 < argc) {
            puzzle = std::stoi(argv[++i]);
            if (puzzle <= 0) {
                std::cerr << "Invalid puzzle value. Must be greater than 0.\n";
                return 1;
            }

            Int one, range;
            one.SetBase10(const_cast<char*>("1"));
            minKey = one;
            minKey.ShiftL(puzzle - 1); // Start of range: 2^(puzzle-1)
            maxKey = one;
            maxKey.ShiftL(puzzle);     // End of range: 2^puzzle - 1
            maxKey.Sub(&one);
            range = maxKey;
            range.Sub(&minKey);

            rangeProvided = true;
        } else {
            std::cerr << "Unknown parameter: " << argv[i] << "\n";
            printUsage(argv[0]);
            return 1;
        }
    }

    if (!publicKeyProvided || !rangeProvided) {
        std::cerr << "Both -k and -p are required!\n";
        printUsage(argv[0]);
        return 1;
    }

    // Convert range to big numbers
    auto rangeStart = hexToBigNum(intToHex(minKey));
    auto rangeEnd = hexToBigNum(intToHex(maxKey));

    // Validate range
    bool validRange = false;
    if (rangeStart.size() < rangeEnd.size()) {
        validRange = true;
    } else if (rangeStart.size() > rangeEnd.size()) {
        validRange = false;
    } else {
        validRange = true;
        for (int i = (int)rangeStart.size() - 1; i >= 0; --i) {
            if (rangeStart[i] < rangeEnd[i]) {
                break;
            } else if (rangeStart[i] > rangeEnd[i]) {
                validRange = false;
                break;
            }
        }
    }
    if (!validRange) {
        std::cerr << "Range start must be less than range end.\n";
        return 1;
    }

    auto rangeSize = bigNumSubtract(rangeEnd, rangeStart);
    rangeSize = bigNumAdd(rangeSize, singleElementVector(1ULL));

    const std::string rangeSizeHex = bigNumToHex(rangeSize);
   
    const long double totalRangeLD = hexStrToLongDouble(rangeSizeHex);

    const int numCPUs = omp_get_num_procs();
    g_threadPrivateKeys.resize(numCPUs, "0");

    auto [chunkSize, remainder] = bigNumDivide(rangeSize, (uint64_t)numCPUs);
    g_threadRanges.resize(numCPUs);

    std::vector<uint64_t> currentStart = rangeStart;
    for (int t = 0; t < numCPUs; t++) {
        auto currentEnd = bigNumAdd(currentStart, chunkSize);
        if (t < (int)remainder) {
            currentEnd = bigNumAdd(currentEnd, singleElementVector(1ULL));
        }
        currentEnd = bigNumSubtract(currentEnd, singleElementVector(1ULL));

        g_threadRanges[t].startHex = bigNumToHex(currentStart);
        g_threadRanges[t].endHex   = bigNumToHex(currentEnd);

        currentStart = bigNumAdd(currentEnd, singleElementVector(1ULL));
    }
    const std::string displayRange = g_threadRanges.front().startHex + ":" + g_threadRanges.back().endHex;

    unsigned long long globalComparedCount = 0ULL;
    double globalElapsedTime = 0.0;
    double mkeysPerSec       = 0.0;

    const auto tStart = std::chrono::high_resolution_clock::now();
    auto lastStatusTime = tStart;
    auto lastSaveTime   = tStart;

    bool matchFound            = false;
    std::string foundPrivateKeyHex;

    Int one; one.SetBase10(const_cast<char*>("1"));
    Int minKey = one;
    minKey.ShiftL(puzzle - 1); // Start of range: 2^(puzzle-1)
    Int maxKey = one;
    maxKey.ShiftL(puzzle);     // End of range: 2^puzzle - 1
    maxKey.Sub(&one);
    Int range = maxKey;
    range.Sub(&minKey);

    Secp256K1 secp;
    secp.Init();

#pragma omp parallel num_threads(numCPUs) \
    shared(globalComparedCount, globalElapsedTime, mkeysPerSec, matchFound, \
           foundPrivateKeyHex, lastStatusTime, lastSaveTime, g_progressSaveCount, \
           g_threadPrivateKeys, tStart)
{
    const int threadId = omp_get_thread_num();

    // Initialize Xoshiro256plus PRNG for this thread
    Xoshiro256plus rng(std::chrono::steady_clock::now().time_since_epoch().count() + threadId);

    Int privateKey = hexToInt(g_threadRanges[threadId].startHex);
    const Int threadRangeEnd = hexToInt(g_threadRanges[threadId].endHex);

    #pragma omp critical
    {
        g_threadPrivateKeys[threadId] = padHexTo64(intToHex(privateKey));
    }

    // Precomputing +i*G and -i*G for i=0..255
    std::vector<Point> plusPoints(POINTS_BATCH_SIZE);
    std::vector<Point> minusPoints(POINTS_BATCH_SIZE);
    for (int i = 0; i < POINTS_BATCH_SIZE; i++) {
        Int tmp; tmp.SetInt32(i);
        Point p = secp.ComputePublicKey(&tmp);
        plusPoints[i] = p;
        p.y.ModNeg();
        minusPoints[i] = p;
    }

    // Arrays for batch-adding
    std::vector<Int>  deltaX(POINTS_BATCH_SIZE);
    IntGroup modGroup(POINTS_BATCH_SIZE);

    // Save 512 publickeys
    const int fullBatchSize = 2 * POINTS_BATCH_SIZE;
    std::vector<Point> pointBatch(fullBatchSize);

    // Buffers for hashing
    uint8_t localPubKeys[fullBatchSize][33];
    uint8_t localHashResults[HASH_BATCH_SIZE][20];
    int localBatchCount = 0;
    int pointIndices[HASH_BATCH_SIZE];

    // Local count
    unsigned long long localComparedCount = 0ULL;

    // Download the target (public key) for fast compare
    __m128i target16 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(targetPublicKey.data()));

    // Main loop for generating random private keys
    while (!matchFound) {
        // Generate a random private key within the thread's range using Xoshiro256++
        Int currentBatchKey = generateRandomPrivateKey(minKey, range, rng);

        currentBatchKey.Set(&privateKey);
        Point startPoint = secp.ComputePublicKey(&currentBatchKey);

        #pragma omp critical
        {
            g_threadPrivateKeys[threadId] = padHexTo64(intToHex(privateKey));
        }

        // Divide the batch of 512 keys into 2 blocks of 256 keys, count +256 and -256 from the center G-point of the batch
        // First pointBatch[0..255] +
        for (int i = 0; i < POINTS_BATCH_SIZE; i++) {
            deltaX[i].ModSub(&plusPoints[i].x, &startPoint.x);
        }
        modGroup.Set(deltaX.data());
        modGroup.ModInv();
        for (int i = 0; i < POINTS_BATCH_SIZE; i++) {
            Point tempPoint = startPoint;
            Int deltaY;
            deltaY.ModSub(&plusPoints[i].y, &startPoint.y);
            Int slope;
            slope.ModMulK1(&deltaY, &deltaX[i]);
            Int slopeSq;
            slopeSq.ModSquareK1(&slope);

            Int tmpX;
            tmpX.Set(&startPoint.x);
            tmpX.ModNeg();
            tmpX.ModAdd(&slopeSq);
            tmpX.ModSub(&plusPoints[i].x);
            tempPoint.x.Set(&tmpX);

            Int diffX;
            diffX.Set(&startPoint.x);
            diffX.ModSub(&tempPoint.x);
            diffX.ModMulK1(&slope);
            tempPoint.y.ModNeg();
            tempPoint.y.ModAdd(&diffX);

            pointBatch[i] = tempPoint;
        }

        // Second pointBatch[256..511] -
        for (int i = 0; i < POINTS_BATCH_SIZE; i++) {
            Point tempPoint = startPoint;
            Int deltaY;
            deltaY.ModSub(&minusPoints[i].y, &startPoint.y);
            Int slope;
            slope.ModMulK1(&deltaY, &deltaX[i]);
            Int slopeSq;
            slopeSq.ModSquareK1(&slope);

            Int tmpX;
            tmpX.Set(&startPoint.x);
            tmpX.ModNeg();
            tmpX.ModAdd(&slopeSq);
            tmpX.ModSub(&minusPoints[i].x);
            tempPoint.x.Set(&tmpX);

            Int diffX;
            diffX.Set(&startPoint.x);
            diffX.ModSub(&tempPoint.x);
            diffX.ModMulK1(&slope);
            tempPoint.y.ModNeg();
            tempPoint.y.ModAdd(&diffX);

            pointBatch[POINTS_BATCH_SIZE + i] = tempPoint;
        }

        // Construct local buffer
        for (int i = 0; i < fullBatchSize; i++) {
            pointToCompressedBin(pointBatch[i], localPubKeys[localBatchCount]);
            pointIndices[localBatchCount] = i;
            localBatchCount++;

            // 8 keys are ready - time to use avx2
            if (localBatchCount == HASH_BATCH_SIZE) {
                // Results check
                for (int j = 0; j < HASH_BATCH_SIZE; j++) {
                    __m128i cand16 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(localPubKeys[j]));
                    __m128i cmp = _mm_cmpeq_epi8(cand16, target16);
                    if (_mm_movemask_epi8(cmp) == 0xFFFF) {
                        // Checking last 4 bytes (33 - 16)
                        if (!matchFound && std::memcmp(localPubKeys[j], targetPublicKey.data(), 33) == 0) {
                            #pragma omp critical
                            {
                                if (!matchFound) {
                                    matchFound = true;
                                    auto tEndTime = std::chrono::high_resolution_clock::now();
                                    globalElapsedTime = std::chrono::duration<double>(tEndTime - tStart).count();
                                    mkeysPerSec = (double)(globalComparedCount + localComparedCount) / globalElapsedTime / 1e6;

                                    // Recovering private key
                                    Int matchingPrivateKey;
                                    matchingPrivateKey.Set(&currentBatchKey);
                                    int idx = pointIndices[j];
                                    if (idx < 256) {
                                        Int offset; offset.SetInt32(idx);
                                        matchingPrivateKey.Add(&offset);
                                    } else {
                                        Int offset; offset.SetInt32(idx - 256);
                                        matchingPrivateKey.Sub(&offset);
                                    }
                                    foundPrivateKeyHex = padHexTo64(intToHex(matchingPrivateKey));
                                }
                            }
                            #pragma omp cancel parallel
                        }
                        localComparedCount++;
                    } else {
                        localComparedCount++;
                    }
                }
                localBatchCount = 0;
            }
        }

        // Next step
        {
            Int step;
            step.SetInt32(fullBatchSize - 2); // 510
            privateKey.Add(&step);
        }

        // Time to show status
        auto now = std::chrono::high_resolution_clock::now();
        double secondsSinceStatus = std::chrono::duration<double>(now - lastStatusTime).count();
        if (secondsSinceStatus >= statusIntervalSec) {
            #pragma omp critical
            {
                globalComparedCount += localComparedCount;
                localComparedCount = 0ULL;
                globalElapsedTime = std::chrono::duration<double>(now - tStart).count();
                mkeysPerSec = (double)globalComparedCount / globalElapsedTime / 1e6;

                printStatsBlock(numCPUs, targetPublicKeyHex, displayRange,
                mkeysPerSec, globalComparedCount,
                globalElapsedTime, puzzle);
                lastStatusTime = now;
            }
        }

        if (matchFound) {
            break;
        }
    } // while(true)

    // Adding local count
    #pragma omp atomic
    globalComparedCount += localComparedCount;
} // end of parallel section
    // Main results
    auto tEnd = std::chrono::high_resolution_clock::now();
    globalElapsedTime = std::chrono::duration<double>(tEnd - tStart).count();

    if (!matchFound) {
        mkeysPerSec = (double)globalComparedCount / globalElapsedTime / 1e6;
        std::cout << "\nNo match found.\n";
        std::cout << "Total Checked : " << globalComparedCount << "\n";
        std::cout << "Elapsed Time  : " << formatElapsedTime(globalElapsedTime) << "\n";
        std::cout << "Speed         : " << mkeysPerSec << " Mkeys/s\n";
        return 0;
    }


    // If the key was found
    std::cout << "================== FOUND MATCH! ==================\n";
    std::cout << "Private Key   : " << foundPrivateKeyHex << "\n";
    std::cout << "Total Checked : " << globalComparedCount << "\n";
    std::cout << "Elapsed Time  : " << formatElapsedTime(globalElapsedTime) << "\n";
    std::cout << "Speed         : " << mkeysPerSec << " Mkeys/s\n";
    return 0;
}

Makefile
Code:
# Compiler
CXX = g++

# Compiler flags
CXXFLAGS = -m64 -std=c++17 -Ofast -mssse3 -Wall -Wextra \
           -Wno-write-strings -Wno-unused-variable -Wno-deprecated-copy \
           -Wno-unused-parameter -Wno-sign-compare -Wno-strict-aliasing \
           -Wno-unused-but-set-variable \
           -march=native -mtune=native \
           -funroll-loops -ftree-vectorize -fstrict-aliasing -fno-semantic-interposition \
           -fvect-cost-model=unlimited -fno-trapping-math -fipa-ra -flto \
           -fassociative-math -fopenmp -mavx2 -mbmi2 -madx \

# Source files
SRCS = Cyclone.cpp SECP256K1.cpp Int.cpp Timer.cpp IntGroup.cpp IntMod.cpp \
       Point.cpp

# Object files
OBJS = $(SRCS:.cpp=.o)

# Target executable
TARGET = Cyclone

# Default target
all: fix_rdtsc $(TARGET)

# Target to replace __rdtsc with my_rdtsc
fix_rdtsc:
find . -type f -name '*.cpp' -exec sed -i 's/__rdtsc/my_rdtsc/g' {} +

# Link the object files to create the executable and then delete .o files
$(TARGET): $(OBJS)
$(CXX) $(CXXFLAGS) -o $(TARGET) $(OBJS)
rm -f $(OBJS) && chmod +x $(TARGET)

# Compile each source file into an object file
%.o: %.cpp
$(CXX) $(CXXFLAGS) -c $< -o $@

# Clean up build files
clean:
echo "Cleaning..."
rm -f $(OBJS) $(TARGET)

# Phony targets
.PHONY: all clean fix_rdtsc

This version uses Xoshiro256plus. I have about ~380M keys/s.

Almost feels like smoke is coming out of the 7985WX!  Grin