"Fossies" - the Fresh Open Source Software Archive

Member "dmd-2.089.0/src/dmd/backend/divcoeff.d" (2 Nov 2019, 7763 Bytes) of package /linux/misc/dmd-2.089.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) D source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "divcoeff.d": 2.086.1_vs_2.087.0.

    1 /**
    2  * Compiler implementation of the
    3  * $(LINK2 http://www.dlang.org, D programming language).
    4  *
    5  * Copyright:   Copyright (C) 2013-2019 by The D Language Foundation, All Rights Reserved
    6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
    7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
    8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/divcoeff.d, backend/divcoeff.d)
    9  */
   10 
   11 /***************************************************
   12  * Algorithms from "Division by Invariant Integers using Multiplication"
   13  * by Torbjoern Granlund and Peter L. Montgomery
   14  */
   15 
   16 import core.stdc.stdio;
   17 
   18 extern (C++):
   19 
   20 nothrow:
   21 
   22 import core.stdc.stdint : uint64_t;
   23 alias ullong = uint64_t;
   24 
   25 /* unsigned 128 bit math
   26  */
   27 
   28 bool SIGN64(ullong x)
   29 {
   30     return cast(long)x < 0;
   31 }
   32 
   33 void SHL128(out ullong dh, out ullong dl, ullong xh,ullong xl)
   34 {
   35     dh = (xh << 1) | SIGN64(xl);
   36     dl = xl << 1;
   37 }
   38 
   39 void SHR128(out ullong dh, out ullong dl, ullong xh,ullong xl)
   40 {
   41     dl = (xl >> 1) | ((xh & 1) << 63);
   42     dh = xh >> 1;
   43 }
   44 
   45 bool XltY128(ullong xh, ullong xl, ullong yh, ullong yl)
   46 {
   47     return xh < yh || (xh == yh && xl < yl);
   48 }
   49 
   50 void u128Div(ullong xh, ullong xl, ullong yh, ullong yl, ullong *pqh, ullong *pql)
   51 {
   52     /* Use auld skool shift & subtract algorithm.
   53      * Not very efficient.
   54      */
   55 
   56     //ullong xxh = xh, xxl = xl, yyh = yh, yyl = yl;
   57 
   58     assert(yh || yl);           // no div-by-0 bugs
   59 
   60     // left justify y
   61     uint shiftcount = 1;
   62     if (!yh)
   63     {   yh = yl;
   64         yl = 0;
   65         shiftcount += 64;
   66     }
   67     while (!SIGN64(yh))
   68     {
   69         SHL128(yh,yl, yh,yl);
   70         shiftcount += 1;
   71     }
   72 
   73     ullong qh = 0;
   74     ullong ql = 0;
   75     do
   76     {
   77         SHL128(qh,ql, qh,ql);
   78         if (XltY128(yh,yl,xh,xl))
   79         {
   80             // x -= y;
   81             if (xl < yl)
   82             {   xl -= yl;
   83                 xh -= yh + 1;
   84             }
   85             else
   86             {   xl -= yl;
   87                 xh -= yh;
   88             }
   89 
   90             ql |= 1;
   91         }
   92         SHR128(yh,yl, yh,yl);
   93     } while (--shiftcount);
   94 
   95     *pqh = qh;
   96     *pql = ql;
   97 
   98     // Remainder is xh,xl
   99 
  100     version (none)
  101     {
  102         printf("%016llx_%016llx / %016llx_%016llx = %016llx_%016llx\n", xxh,xxl,yyh,yyl,qh,ql);
  103         if (xxh == 0 && yyh == 0)
  104             printf("should be %llx\n", xxl / yyl);
  105     }
  106 }
  107 
  108 /************************************
  109  * Implement Algorithm 6.2: Selection of multiplier and shift count
  110  * Params:
  111  *      N =     32 or 64
  112  *      d =     divisor (must not be 0 or a power of 2)
  113  *      prec =  bits of precision desired
  114  * Output:
  115  *      *pm =      factor
  116  *      *pshpost = post shift
  117  * Returns:
  118  *      true    m >= 2**N
  119  */
  120 
  121 extern (C) bool choose_multiplier(int N, ullong d, int prec, ullong *pm, int *pshpost)
  122 {
  123     assert(N == 32 || N == 64);
  124     assert(prec <= N);
  125     assert(d > 1 && (d & (d - 1)));
  126 
  127     // Compute b such that 2**(b-1) < d <= 2**b
  128     // which is the number of significant bits in d
  129     int b = 0;
  130     ullong d1 = d;
  131     while (d1)
  132     {
  133         ++b;
  134         d1 >>= 1;
  135     }
  136 
  137     int shpost = b;
  138 
  139     bool mhighbit = false;
  140     if (N == 32)
  141     {
  142         // mlow = (2**(N + b)) / d
  143         ullong mlow = (1UL << (N + b)) / d;
  144 
  145         // uhigh = (2**(N + b) + 2**(N + b - prec)) / d
  146         ullong mhigh = ((1UL << (N + b)) + (1UL << (N + b - prec))) / d;
  147 
  148         while (mlow/2 < mhigh/2 && shpost)
  149         {
  150             mlow /= 2;
  151             mhigh /= 2;
  152             --shpost;
  153         }
  154 
  155         *pm = mhigh & 0xFFFFFFFF;
  156         mhighbit = (mhigh >> N) != 0;
  157     }
  158     else if (N == 64)
  159     {
  160         // Same as for N==32, but use 128 bit unsigned arithmetic
  161 
  162         // mlow = (2**(N + b)) / d
  163         ullong mlowl = 0;
  164         ullong mlowh = 1UL << b;
  165 
  166         // mlow /= d
  167         u128Div(mlowh, mlowl, 0, d, &mlowh, &mlowl);
  168 
  169         // mhigh = (2**(N + b) + 2**(N + b - prec)) / d
  170         ullong mhighl = 0;
  171         ullong mhighh = 1UL << b;
  172         int e = N + b - prec;
  173         if (e < 64)
  174             mhighl = 1UL << e;
  175         else
  176             mhighh |= 1UL << (e - 64);
  177 
  178         // mhigh /= d
  179         u128Div(mhighh, mhighl, 0, d, &mhighh, &mhighl);
  180 
  181         while (1)
  182         {
  183             // mlowb = mlow / 2
  184             ullong mlowbh,mlowbl;
  185             SHR128(mlowbh,mlowbl, mlowh,mlowl);
  186 
  187             // mhighb = mhigh / 2
  188             ullong mhighbh,mhighbl;
  189             SHR128(mhighbh,mhighbl, mhighh,mhighl);
  190 
  191             // if (mlowb < mhighb && shpost)
  192             if (XltY128(mlowbh,mlowbl, mhighbh,mhighbl) && shpost)
  193             {
  194                 // mlow = mlowb
  195                 mlowl = mlowbl;
  196                 mlowh = mlowbh;
  197 
  198                 // mhigh = mhighb
  199                 mhighl = mhighbl;
  200                 mhighh = mhighbh;
  201 
  202                 --shpost;
  203             }
  204             else
  205                 break;
  206         }
  207 
  208         *pm = mhighl;
  209         mhighbit = mhighh & 1;
  210     }
  211     else
  212         assert(0);
  213 
  214     *pshpost = shpost;
  215     return mhighbit;
  216 }
  217 
  218 /*************************************
  219  * Find coefficients for Algorithm 4.2:
  220  * Optimized code generation of unsigned q=n/d for constant nonzero d
  221  * Input:
  222  *      N       32 or 64 (width of divide)
  223  *      d       divisor (not a power of 2)
  224  * Output:
  225  *      *pshpre  pre-shift
  226  *      *pm      factor
  227  *      *pshpost post-shift
  228  * Returns:
  229  *      true    Use algorithm:
  230  *              t1 = MULUH(m, n)
  231  *              q = SRL(t1 + SRL(n - t1, 1), shpost - 1)
  232  *
  233  *      false   Use algorithm:
  234  *              q = SRL(MULUH(m, SRL(n, shpre)), shpost)
  235  */
  236 
  237 extern (C) bool udiv_coefficients(int N, ullong d, int *pshpre, ullong *pm, int *pshpost)
  238 {
  239     bool mhighbit = choose_multiplier(N, d, N, pm, pshpost);
  240     if (mhighbit && (d & 1) == 0)
  241     {
  242         int e = 0;
  243         while ((d & 1) == 0)
  244         {   ++e;
  245             d >>= 1;
  246         }
  247         *pshpre = e;
  248         mhighbit = choose_multiplier(N, d, N - e, pm, pshpost);
  249         assert(mhighbit == false);
  250     }
  251     else
  252         *pshpre = 0;
  253     return mhighbit;
  254 }
  255 
  256 unittest
  257 {
  258     struct S
  259     {
  260         int N;
  261         ullong d;
  262         int shpre;
  263         int highbit;
  264         ullong m;
  265         int shpost;
  266     }
  267 
  268     static immutable S[14] table =
  269     [
  270         { 32, 10,    0, 0, 0xCCCCCCCD, 3 },
  271         { 32, 13,    0, 0, 0x4EC4EC4F, 2 },
  272         { 32, 14,    1, 0, 0x92492493, 2 },
  273         { 32, 15,    0, 0, 0x88888889, 3 },
  274         { 32, 17,    0, 0, 0xF0F0F0F1, 4 },
  275         { 32, 14007, 0, 1, 0x2B71840D, 14 },
  276 
  277         { 64, 7,     0, 1, 0x2492492492492493, 3 },
  278         { 64, 10,    0, 0, 0xCCCCCCCCCCCCCCCD, 3 },
  279         { 64, 13,    0, 0, 0x4EC4EC4EC4EC4EC5, 2 },
  280         { 64, 14,    1, 0, 0x4924924924924925, 1 },
  281         { 64, 15,    0, 0, 0x8888888888888889, 3 },
  282         { 64, 17,    0, 0, 0xF0F0F0F0F0F0F0F1, 4 },
  283         { 64, 100,   2, 0, 0x28F5C28F5C28F5C3, 2 },
  284         { 64, 14007, 0, 1, 0x2B71840C5ADF02C3, 14 },
  285     ];
  286 
  287     for (int i = 0; i < table.length; i++)
  288     {   const ps = &table[i];
  289 
  290         ullong m;
  291         int shpre;
  292         int shpost;
  293         bool mhighbit = udiv_coefficients(ps.N, ps.d, &shpre, &m, &shpost);
  294 
  295         //printf("[%d] %d %d %llx %d\n", i, shpre, mhighbit, m, shpost);
  296         assert(shpre == ps.shpre);
  297         assert(mhighbit == ps.highbit);
  298         assert(m == ps.m);
  299         assert(shpost == ps.shpost);
  300     }
  301 }
  302 
  303 version (none)
  304 {
  305     import core.stdc.stdlib;
  306 
  307     extern (D) int main(string[] args)
  308     {
  309         if (args.length == 2)
  310         {
  311             ullong d = atoi(args[1].ptr);
  312             ullong m;
  313             int shpre;
  314             int shpost;
  315             bool mhighbit = udiv_coefficients(64, d, &shpre, &m, &shpost);
  316 
  317             printf("%d %d %llx, %d\n", shpre, mhighbit, m, shpost);
  318         }
  319         return 0;
  320     }
  321 }