Post
Topic
Board Bitcoin Discussion
Re: Bitcoin puzzle transaction ~32 BTC prize to who solves it
by
kTimesG
on 14/07/2024, 17:04:07 UTC
You are going off topic and pretending to be too smart without need. Just like Digaran... This is my answer @WanderingPhilospher what would happen if we switch same script from GMP to iceland. And I don't intend to talk with with you at all. You just raise the pressure without any script ever being shown..

It's OK, not as if it's the second time you insist on calling names after you fail to understand what people are trying to explain to you.

Like page 67 of this book which you probably think was written by Digaran:

https://members.loria.fr/PZimmermann/mca/mca-cup-0.5.9.pdf

Here's my benchmarking C routine that runs 12 M affine point additions per second. For simplicity, the second point is the same, but all the source and target points are random points on the curve. It's also SIMD-friendly for the product-tree construction (independent items).

I don't provide "scripts" exactly for the reason that, unlike you, I don't sell false hopes. This routine will probably be uncomprehensible to you simply because it's part of a larger code set that is only meant to benchmark things, not solve problems top to bottom. The caller of this method does a post-priori verification check to confirm that the batched addition worked correctly. But that is, like you say, "off-topic", so it's up to you, if you care (you don't), that it indeed works correctly and also that it surpasses well over 10 M additions/s, making your scrips sit in the dust, if applied into a full program. I think I will retire from this forum and leave you with this as a goodbye. BTW I only use C stuff to validate that the GPU software I'm working on runs correctly. When I will first crack #120 to confirm that my software stack is in a good shape, I will post the key here. It's gonna cost me around 1000$. Have fun guys.

Code:
#define BATCH_MODE 1

SECP256K1_INLINE
static void _ge_add_ge_batch(
    secp256k1_ge_ext *r,                // array out
    const secp256k1_ge_ext *a,          // array in
    const secp256k1_ge_ext *b           // one element for now
) {
    secp256k1_fe t0, t1, t2, t3, x2n;

#if BATCH_MODE == 1
    // product tree leafs + parent nodes
    secp256k1_fe xz[BATCH_SIZE * 2 - 1];
    secp256k1_fe xzOut[BATCH_SIZE * 2 - 1];
#elif BATCH_MODE == 2
    secp256k1_fe xz[BATCH_SIZE * 2];
#endif

    int i;

//    _NEG(x2n, b->x, 1);                                     // -x2                         m = 2
    for (i = 0; i < BATCH_SIZE; i++) {
        xz[i] = a[i].x;
//        _ADD(xz[i], x2n);                                   // XZ[i] = x1 - x2
        _ADD(xz[i], b->xn);                                 // XZ[i] = x1 - x2
    }

    // batched inversions
#if BATCH_MODE == 1
    // tree-like products, parallelization-friendly
    for (i = 0; i < BATCH_SIZE - 1; i++) {
        _MUL(xz[BATCH_SIZE + i], xz[i * 2], xz[i * 2 + 1]);
    }

    _INV(xzOut[BATCH_SIZE * 2 - 2], xz[2 * BATCH_SIZE - 2]);    // out[14] = inv(14)

    for (i = BATCH_SIZE - 2; i >= 0; i--) {
        _MUL(xzOut[i * 2], xz[i * 2 + 1], xzOut[BATCH_SIZE + i]);
        _MUL(xzOut[i * 2 + 1], xz[i * 2], xzOut[BATCH_SIZE + i]);
    }
#elif BATCH_MODE == 2
//    secp256k1_fe *p = &xz[BATCH_SIZE];
    xz[BATCH_SIZE] = xz[0];
    for (i = 1; i < BATCH_SIZE; i++) {
        _MUL(xz[BATCH_SIZE + i], xz[BATCH_SIZE + i - 1], xz[i]);                 // z[i] = z[i-1] * x[i]
//        p = &xz[BATCH_SIZE + i];
    }

    _INV(t3, xz[2 * BATCH_SIZE - 1]);                       // T3 = (x1 * x2 * ... * xN) ** -1

    for (i = BATCH_SIZE - 1; i > 0; i--) {
        _MUL(xz[BATCH_SIZE + i], t3, xz[BATCH_SIZE + i - 1]);   // z[i] = q * z[i-1]
        _MUL(t3, t3, xz[i]);                                // q = q * x[i]
    }
    xz[BATCH_SIZE] = t3;
#endif

    const secp256k1_ge_ext * _a = a;
    secp256k1_ge_ext * _r = r;
    const secp256k1_fe * _inv =
#if BATCH_MODE == 1
        xzOut;
#elif BATCH_MODE == 2
        xz + BATCH_SIZE;
#endif

    // this can be constant if b is constant
//    _NEG(t1, b->y, 1);                                      // T1 = -y2                         m = 1 + 1 = 2

    for (i = 0; i < BATCH_SIZE; i++) {
        t2 = _a->y;                                         // T2 = y1                          m = max_y

//        _ADD(t2, t1);                                       // T2 = y1 - y2                     m = max_y + 2(1)
        _ADD(t2, b->yn);                                       // T2 = y1 - y2                     m = max_y + 2(1)

        _MUL(t3, *_inv, t2);                                // T3 = (y1 - y2) / (x1 - x2)       m = 1
        _SQR(_r->x, t3);                                    // X3 = m**2                        m = 1

//        _ADD(_r->x, x2n);                                   // X3 = m**2 - x2                   m = 1 + 2(1) = 3(2)
        _ADD(_r->x, b->xn);                                   // X3 = m**2 - x2                   m = 1 + 2(1) = 3(2)

//        _NEG(t0, _a->x, 1);                                 // T0 = -x1                         m = max_x + 1
//        _ADD(_r->x, t0);                                    // X3 = m**2 - x1 - x2              max_x = 3 + max_x + 1
        _ADD(_r->x, _a->xn);

        fe_normalize_weak(&_r->x);

        _NEG(t2, _r->x, 1);                                 // T2 = -x3                         m = 1 + 1 = 2
        _r->xn = t2;

        _ADD(t2, b->x);                                     // T2 = x2 - x3                     m = 2 + 1 = 3
        _MUL(_r->y, t3, t2);                                // Y3 = m * (x2 - x3)               m = 1

//        _ADD(_r->y, t1);                                    // Y3 = m * (x2 - x3) - y2          m = 1 + 2 = 3
        _ADD(_r->y, b->yn);                                    // Y3 = m * (x2 - x3) - y2          m = 1 + 2 = 3

        fe_normalize(&_r->y);

        ++_a;
        ++_r;
        ++_inv;
    }
}