Post Reply 
Fast remainder
05-12-2021, 12:05 AM
Post: #3
RE: Fast remainder
(05-10-2021 02:44 PM)Albert Chan Wrote:  Update2: unroll loop 1 time (but, 2 fastmod per loop), n=60 timing at 14.5 sec (1.5 sec saving)
The reason for the update is to compare effect of replacing 2 16-bits fastmod with 1 32-bits mod

First experiment, for the inner loop:

< d = fastmod((d + x[i-1])*n, k, c);
< d = fastmod((d + x[i])*n, k, c);

> d = (d + x[i-1])*n;
> d = (d + x[i])*n % k

For n=60, timings at 19.5 sec. (slower, by 5 sec)
This confirmed Lemire's chart. fastmod is about 3 times as fast (blue vs red)

[Image: hashbenches-skylake-clang-300x180.png]

---

We could pre-compute multiply constants and shifts, to speed-up quotient (x%k = x-q*k)
Unfortunately, code is more involved ... I use libdivide.h, for fast divide.
(It is just a header file, very easy to use; for C++, it is even easier)

libdivide version is good. n=60 timings at 14.8 sec (slower, by only 0.3 sec)
For this tiny cost, we have both quotient and remainder !

And, it cover more bases, 2n^3 ≤ 2^32 → Base n ≤ 1290

Code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <libdivide.h>

// 32-bits mod, x % k = x-q*k, where q is the quotient
static uint32_t fastmod(uint32_t x, uint32_t k, struct libdivide_u32_t *c)
{
    return x - libdivide_u32_do(x, c) * k;  // = x % k
}

uint32_t gcd(uint32_t a, uint32_t b)
{
    for(;;) {
        if (b == 0) return a;
        a %= b;
        if (a == 0) return b;
        b %= a;
    }
}

void printx(uint32_t *x, uint32_t n)
{
    printf("{ ");
    for(uint32_t i=1; i<n; i++) printf("%d ", x[i]);
    printf("}\n");
}

void recurse(uint32_t *a, uint32_t n, uint32_t k,
             struct libdivide_u32_t *fast_d, uint32_t *x)
{
    if (k==n) return printx(x, n);
    uint32_t d = 0, step = k+k;
    struct libdivide_u32_t *c = &fast_d[k];
    for(uint32_t i=2; i < k; i+=2) {    // assumed n <= 1290
        d = (d + x[i-1])*n;
        d = fastmod((d + x[i])*n, k, c);
    }

    if (k%2) {  // odd k
        d = k - d;                      // valid (mod k)
        if (d%2 == 0) d += k;           // k = d = 1 (mod 2)
    } else {    // even k
        d = (d + x[k-1])*n;             // missed iteration
        d = k - fastmod(d, k, c);       // valid (mod k)
        if (k%4 == 0) step = k;         // step  = 0 (mod 4)
        else if ((n+d)%4 == 0) d += k;  // n+d+k = 0 (mod 4)
    }

    for(uint32_t bucket = a[2*n+k]; d < n; d += step) {
        if (a[d] && a[n+d] == bucket) {
            x[k] = d;
            a[d] = 0;                   // mark taken
            recurse(a, n, k+1, fast_d, x);
            a[d] = d;                   // restore
        }
    }
}

void puzzle(uint32_t n) {       // polydivisible number, base n
    if (n%2) return;            // n must be even
    uint32_t a[3*n], x[n], i, j, g, bad = 0;
    struct libdivide_u32_t fast_d[n];
    for(i=1; i < n; i++) {
        a[i] = i;               //   +1 to  n-1 = duplicate check
        fast_d[i] = libdivide_u32_gen(i);
        if ((g = a[n+i] = gcd(n,i)) != (a[2*n+i] = gcd(2*n,i))) {
            for (x[bad] = g, j=0; x[j] != g; j++) ;
            if (j == bad) bad++;    // bad buckets
        }
    }
    for(i=0; i < bad; i++) {        // fix bad buckets
        for(g = x[i], j=2*n+1; j < 3*n; j++)
            if (g == a[j]) a[j-n] = g+g;
    }
    recurse(a, n, 1, fast_d, x-1);  // arrays all 1-based
}

int main(int argc, char **argv)
{
    uint32_t n = 0;
    if (argc > 1) n = atoi(argv[1]);
    if (n-1 < 1290) {puzzle(n); return 0;}
    return printf("Usage %s n, 0 < n <= 1290\n", argv[0]);
}

Perhaps of interest:
How do compilers optimize divisions?
http://homepage.divms.uiowa.edu/~jones/bcd/
Find all posts by this user
Quote this message in a reply
Post Reply 


Messages In This Thread
Fast remainder - Albert Chan - 05-09-2021, 08:48 PM
RE: Fast remainder - Albert Chan - 05-10-2021, 02:44 PM
RE: Fast remainder - Albert Chan - 05-12-2021 12:05 AM
RE: Fast remainder - Albert Chan - 05-12-2021, 03:10 PM
RE: Fast remainder - Albert Chan - 05-12-2021, 11:03 PM
RE: Fast remainder - Albert Chan - 05-12-2021, 04:12 PM
RE: Fast remainder - Albert Chan - 05-14-2021, 06:15 PM



User(s) browsing this thread: 2 Guest(s)