Post Reply 
Fast remainder
05-10-2021, 02:44 PM (This post was last modified: 08-20-2021 10:56 PM by Albert Chan.)
Post: #2
RE: Fast remainder
With better digit buckets, we improved n=60 timing, from 58 to 16 sec. Smile
Here is the reason the better bucket work.

Time savings is directly related to the much reduced search cases.
Original version, using gcd(n, d) buckets, required 529,631,194 recursive calls.
Updated version, with gcd(2n,d) buckets, required 148,843,222 recursive calls.

148843222 / 529631194 ≈ 28%       → 58 * 28% = 16 sec (as expected)

Code:
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

// note: with 32-bits c, x and k really limited to 16-bits
static uint32_t fastmod(uint32_t x, uint64_t k, const uint32_t c)
{
    return (x * c * k) >> 32;   // = x % k
}

uint32_t gcd(uint32_t a, uint32_t b)
{
    for(;;) {
        if (b == 0) return a;
        a %= b;
        if (a == 0) return b;
        b %= a;
    }
}

void printx(uint32_t *x, uint32_t n)
{
    printf("{ ");
    for(uint32_t i=1; i<n; i++) printf("%d ", x[i]);
    printf("}\n");
}

void recurse(uint32_t *a, uint32_t n, uint32_t k, uint32_t *x)
{
    if (k==n) return printx(x, k);
    uint32_t c = a[3*n+k], d = 0, step = k+k;
    for(uint32_t i=2; i < k; i+=2) {
        d = fastmod((d + x[i-1])*n, k, c);
        d = fastmod((d + x[i])*n, k, c);
    }

    if (k%2) {  // odd k
        d = k - d;                      // valid (mod k)
        if (d%2 == 0) d += k;           // k = d = 1 (mod 2)
    } else {    // even k
        d = (d + x[k-1])*n;             // missed iteration
        d = k - fastmod(d, k, c);       // valid (mod k)
        if (k%4 == 0) step = k;         // step  = 0 (mod 4)
        else if ((n+d)%4 == 0) d += k;  // n+d+k = 0 (mod 4)
    }

    for(uint32_t bucket = a[2*n+k]; d < n; d += step) {
        if (a[d] && a[n+d] == bucket) {
            x[k] = d;
            a[d] = 0;                   // mark taken
            recurse(a, n, k+1, x);
            a[d] = d;                   // restore
        }
    }
}

void puzzle(uint32_t n) {       // polydivisible number, base n
    if (n%2) return;            // n must be even
    uint32_t a[4*n], x[n], i, j, g, bad = 0;
    for(i=1; i < n; i++) {
        a[i] = i;               //   +1 to  n-1 = duplicate check
        a[3*n+i] = (~0U)/i + 1; // 3n+1 to 4n-1 = fastmod factor
        if ((g = a[n+i] = gcd(n,i)) != (a[2*n+i] = gcd(2*n,i))) {
            for (x[bad] = g, j=0; x[j] != g; j++) ;
            if (j == bad) bad++;    // bad buckets
        }
    }
    for(i=0; i < bad; i++) {        // fix bad buckets
        for(g = x[i], j=2*n+1; j < 3*n; j++)
            if (g == a[j]) a[j-n] = g+g;
    }
    recurse(a, n, 1, x-1);          // arrays all 1-based
}

int main(int argc, char **argv)
{
    uint32_t n = 0;
    if (argc > 1) n = atoi(argv[1]);
    if (n-1 < 181) {puzzle(n); return 0;}
    return printf("Usage %s n, 0 < n <= 181\n", argv[0]);
}

Update1: fixed fastmod domain problem (see previous post)
Update2: unroll loop 1 time (but, 2 fastmod per loop), n=60 timing at 14.5 sec (1.5 sec saving)
Find all posts by this user
Quote this message in a reply
Post Reply 


Messages In This Thread
Fast remainder - Albert Chan - 05-09-2021, 08:48 PM
RE: Fast remainder - Albert Chan - 05-10-2021 02:44 PM
RE: Fast remainder - Albert Chan - 05-12-2021, 12:05 AM
RE: Fast remainder - Albert Chan - 05-12-2021, 03:10 PM
RE: Fast remainder - Albert Chan - 05-12-2021, 11:03 PM
RE: Fast remainder - Albert Chan - 05-12-2021, 04:12 PM
RE: Fast remainder - Albert Chan - 05-14-2021, 06:15 PM



User(s) browsing this thread: 1 Guest(s)