// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Math/LargeInt/hkLargeIntTypes.h>

void hkLargeIntImpl::shiftLeft(_In_reads_(N) const hkUint64* a, _Out_writes_all_(N) hkUint64* out, unsigned int N, unsigned int shift)
{
    int blockShift = shift / 64;
    unsigned int relShift = shift % 64;

    for (int i = N - 1; i >= blockShift; --i)
    {
        hkUint64 high = a[i - blockShift];
        hkUint64 low = (i - blockShift - 1 >= 0) ? a[i - blockShift - 1] : 0;
        out[i] = shiftLeftUnsigned128(low, high, relShift);
    }

    for (int i = blockShift - 1; i >= 0; --i)
    {
        out[i] = 0;
    }
}

void hkLargeIntImpl::shiftRight(_In_reads_(N) const hkUint64* a, _Out_writes_all_(N) hkUint64* out, unsigned int N, unsigned int shift)
{
    unsigned int blockShift = shift / 64;
    unsigned char relShift = shift % 64;

    for (unsigned int i = 0; i < N - blockShift; ++i)
    {
        hkUint64 high = (i + blockShift + 1 < N) ? a[i + blockShift + 1] : 0;
        hkUint64 low = a[i + blockShift];
        out[i] = shiftRightUnsigned128(low, high, relShift);
    }

    for (unsigned int i = N - blockShift; i < N; ++i)
    {
        out[i] = 0;
    }
}

void hkLargeIntImpl::multiplyUnsigned(_In_reads_(Na) const hkUint64* a, _In_reads_(Nb) const hkUint64* b, _Out_writes_all_(Nprod) hkUint64* prod, unsigned int Na, unsigned int Nb, unsigned int Nprod)
{
    HK_ASSERT(0x5559fd29, a != prod && b != prod, "Input must not be aliased");

    hkString::memSet4(prod, 0, Nprod * sizeof(*prod) / 4);

    // Temp vector for intermediate products,
    // with 2 extra elements so we can write overflowing carry without bounds check
    HK_ALIGN16(hkUint64 temp[16 + 2]);
    HK_ASSERT_NO_MSG(0x626aa38e, Nprod + 2 < HK_COUNT_OF(temp));

    for (unsigned int i = 0; i < Na; ++i)
    {
        hkUint64 ai = a[i];

        Carry carryBit = 0;
        hkUint64 carryLimb = 0;

        hkString::memClear16(temp, sizeof(temp) / 16);

        // Compute product between a[i] and b[0...Nb]
        const unsigned int upperBound = hkMath::min2(Nb, Nprod - i);
        for (unsigned int j = 0; j < upperBound; ++j)
        {
            hkUint64 bj = b[j];

            hkUint64 lowProduct, highProduct;
            multiplyUnsigned128(ai, bj, lowProduct, highProduct);

            carryBit = addWithCarry(carryBit, lowProduct, carryLimb, temp[i + j]);
            carryLimb = highProduct;
        }

        // Ripple through any remaining carry terms
        carryBit = addWithCarry(carryBit, 0, carryLimb, temp[i + upperBound]);
        addWithCarry(carryBit, 0, 0, temp[i + upperBound + 1]);

        // Accumulate into final result
        carryBit = 0;
        for (unsigned int j = i; j < Nprod; ++j)
        {
            carryBit = addWithCarry(carryBit, temp[j], prod[j], prod[j]);
        }
    }
}

// Implementation of Knuth's algorithm D, adapted from http://www.hackersdelight.org/hdcodetxt/divmnu.c.txt
//
// Original license: http://www.hackersdelight.org/permissions.htm
//   "You are free to use, copy, and distribute any of the code on this web site, whether modified by you or not. You need not give attribution."
static void hkLargeIntDivideUnsigned(_In_reads_(N) const hkUint32* a, _In_reads_(N) const hkUint32* b, _Out_writes_all_(N) hkUint32* quotient, _Out_writes_all_opt_(N) hkUint32* remainder,
    _In_range_(1, 16) int N)
{
    const hkUint64 base = hkUint64(1) << 32; // Number base (32 bits).

    // Scan actual number of used limbs of A and B
    int numLimbsA = N;
    while (a[numLimbsA - 1] == 0 && numLimbsA > 0)
    {
        numLimbsA--;
    }

    int numLimbsB = N;
    while (b[numLimbsB - 1] == 0 && numLimbsB > 0)
    {
        numLimbsB--;
    }

    if (numLimbsB == 0)
    {
        HK_ASSERT(0x72638521, false, "Division by zero");
        return;
    }

    // B is clearly greater than A, quotient is 0, remainder is A
    if (numLimbsB > numLimbsA)
    {
        hkString::memSet4(quotient, 0, N);

        if (remainder != HK_NULL)
        {
            hkString::memCpy4(remainder, a, N);
        }
        return;
    }

    // Optimized case for dividing by a single digit
    if (numLimbsB == 1)
    {
        hkInt64 k = 0;
        hkUint32 divisor = b[0];
        for (int j = numLimbsA - 1; j >= 0; j--)
        {
            hkUint64 dividend = k * base + a[j];
            quotient[j] = hkUint32(dividend / divisor);
            k = dividend % divisor;
        }

        hkString::memSet4(quotient + numLimbsA, 0, N - (numLimbsA));

        if (remainder != HK_NULL)
        {
            hkString::memSet4(remainder + 1, 0, N - 1);
            remainder[0] = hkUint32(k);
        }
        return;
    }

    // Normalize by shifting v left just enough so that
    // its high-order bit is on, and shift u left the
    // same amount.  We may have to append a high-order
    // digit on the dividend; we do that unconditionally.

    int shift = hkMath::countLeadingZeros(b[numLimbsB - 1]);

    // Just enough space for 512 bits
    hkUint32 aNormalized[17];
    hkUint32 bNormalized[16];
    HK_ASSERT_NO_MSG(0x26586394, HK_COUNT_OF(aNormalized) >= numLimbsA + 1);
    HK_ASSERT_NO_MSG(0x6877aa60, HK_COUNT_OF(bNormalized) >= numLimbsB);

    // Don't remove this special case; if shift == 0 we would shift by 32 in the else branch which is U.B.
    if (shift == 0)
    {
        hkString::memCpy(aNormalized, a, numLimbsA * sizeof(a[0]));
        aNormalized[numLimbsA] = 0;

        hkString::memCpy(bNormalized, b, numLimbsB * sizeof(b[0]));
    }
    else
    {
        aNormalized[numLimbsA] = a[numLimbsA - 1] >> (32 - shift);
        for (int i = numLimbsA - 1; i > 0; i--)
        {
            aNormalized[i] = (a[i] << shift) | (a[i - 1] >> (32 - shift));
        }
        aNormalized[0] = a[0] << shift;

        for (int i = numLimbsB - 1; i > 0; i--)
        {
            bNormalized[i] = (b[i] << shift) | (b[i - 1] >> (32 - shift));
        }
        bNormalized[0] = b[0] << shift;
    }

    // Main loop
    for (int j = numLimbsA - numLimbsB; j >= 0; j--)
    {
        // Compute estimate qhat of q[j]
        hkUint64 dividend = aNormalized[j + numLimbsB] * base + aNormalized[j + numLimbsB - 1];
        hkUint32 divisor = bNormalized[numLimbsB - 1];

        hkUint64 qhat = dividend / divisor;
        hkUint64 rhat = dividend % divisor;

    again:
        if (qhat >= base || qhat * bNormalized[numLimbsB - 2] > base * rhat + aNormalized[j + numLimbsB - 2])
        {
            qhat = qhat - 1;
            rhat = rhat + divisor;
            if (rhat < base)
            {
                goto again;
            }
        }

        // Multiply and subtract.
        hkInt64 k = 0;
        for (int i = 0; i < numLimbsB; i++)
        {
            hkUint64 p = qhat * bNormalized[i];
            hkInt64 t = aNormalized[i + j] - k - (p & 0xFFFFFFFF);
            aNormalized[i + j] = hkUint32(t);
            k = (p >> 32) - (t >> 32);
        }
        hkInt64 t = aNormalized[j + numLimbsB] - k;
        aNormalized[j + numLimbsB] = hkUint32(t);

        // Store quotient digit.
        quotient[j] = hkUint32(qhat);
        if (t < 0)
        {
            // If we subtracted too much, add back.
            quotient[j] = quotient[j] - 1;
            k = 0;
            for (int i = 0; i < numLimbsB; i++)
            {
                t = k + aNormalized[i + j] + bNormalized[i];
                aNormalized[i + j] = hkUint32(t);
                k = t >> 32;
            }
            aNormalized[j + numLimbsB] = hkUint32(aNormalized[j + numLimbsB] + k);
        }
    }

    // Zero out upper quotient bits
    hkString::memSet4(quotient + numLimbsA - numLimbsB + 1, 0, N - (numLimbsA - numLimbsB + 1));

    // If the caller wants the remainder, unnormalize it and pass it back.
    if (remainder != HK_NULL)
    {
        if (shift == 0)
        {
            hkString::memCpy4(remainder, aNormalized, numLimbsB);
        }
        else
        {
            for (int i = 0; i < numLimbsB; i++)
            {
                remainder[i] = (aNormalized[i] >> shift) | (aNormalized[i + 1] << (32 - shift));
            }
        }

        hkString::memSet4(remainder + numLimbsB, 0, N - numLimbsB);
    }
}

int hkLargeIntImpl::countLeadingZeros(_In_reads_(N) const hkUint64* a, unsigned int N)
{
    int sum = 0;
    for (int i = N - 1; i >= 0; --i)
    {
        int count = hkMath::countLeadingZeros(a[i]);
        sum += count;
        if (count != 64)
        {
            break;
        }
    }

    return sum;
}


int hkLargeIntImpl::countTrailingZeros(_In_reads_(N) const hkUint64* a, unsigned int N)
{
    int sum = 0;
    for (unsigned int i = 0; i < N; ++i)
    {
        int count = hkMath::countTrailingZeros(a[i]);
        sum += count;
        if (count != 64)
        {
            break;
        }
    }

    return sum;
}

bool hkLargeIntImpl::equalZero(_In_reads_(N) const hkUint64* a, unsigned int N)
{
    for (unsigned int i = 0; i < N; ++i)
    {
        if (a[i] != 0)
        {
            return false;
        }
    }

    return true;
}

bool hkLargeIntImpl::equal(_In_reads_(N) const hkUint64* a, _In_reads_(N) const hkUint64* b, unsigned int N)
{
    for (unsigned int i = 0; i < N; ++i)
    {
        if (a[i] != b[i])
        {
            return false;
        }
    }

    return true;
}

template<typename T>
static int hkLargeIntCompareFractions(const T& origA, const T& origB, const T& origC, const T& origD)
{
    // Compute signs of a, b, c, d and reduce the problem to comparing positive quantities
    bool flipResult;
    {
        int signA = origA.getSign();
        int signB = origB.getSign();
        int signC = origC.getSign();
        int signD = origD.getSign();

        int signAB = signA * signB;
        int signCD = signC * signD;

        // See if we can early exit based on signs!
        if (signAB < signCD)
        {
            return -1;
        }
        else if (signAB > signCD)
        {
            return 1;
        }

        // We only need to compute if both are positive or both are negative, i.e signAB == signCD == -1 or signAB == signCD == 1.
        // For signAB == -1 we need to flip the result (this also implies signCD == -1).
        flipResult = signAB == -1;
    }

    // Work on the absolute values
    T a;    a.setAbs(origA);
    T b;    b.setAbs(origB);
    T c;    c.setAbs(origC);
    T d;    d.setAbs(origD);

    // Start iterating
    do
    {
        // Divide a by b and c by d
        T aDivB, aModB; T::computeUnsignedDivMod(a, b, aDivB, aModB);   // (a / b) = aDivB * b + aModB, aModB < b
        T cDivD, cModD; T::computeUnsignedDivMod(c, d, cDivD, cModD);   // (c / d) = cDivD * d + cModD, cModD < d

        // Compare aDivB against cDivD. If inequality, we can stop!
        T abSubCd;      abSubCd.setSub(aDivB, cDivD);   // (aDivB - cDivD)
        if (!abSubCd.equalZero())
        {
            // Integer parts not equal, we can stop!
            const int ret = abSubCd.lessZero() ? -1 : 1;    // aDivB < cDivD ? -1 : 1
            return flipResult ? -ret : ret;
        }

        // At this point aDivB == cDivD. We've reduced the problem to comparing aModB / b with cModD / d
        // Check if the remainders are non-zero
        {
            const int modZeroMask = (aModB.equalZero() ? 1 : 0) | (cModD.equalZero() ? 2 : 0);
            const int ret = ((0x53 >> (modZeroMask << 1)) & 3) - 1;
            if (ret != 2)
            {
                return flipResult ? -ret : ret;
            }
        }

        // Reverse the fractions and flip the result
        a = b;  b = aModB;
        c = d;  d = cModD;
        flipResult = !flipResult;
    } while (true);

    return 0;
}



template <int N, typename T>
int hkLargeIntComputeDiv32(const T& origA, const T& origB)
{
    // Make values absolute
    T a;    a.setAbs(origA);
    T b;    b.setAbs(origB);

    // Copy to 32bit limb format
    const int numLimbs = N / 32;
    hkUint32 aLimbs[numLimbs];
    hkUint32 bLimbs[numLimbs];

    for (int i = 0; i < numLimbs; i += 2)
    {
        aLimbs[i + 0] = hkUint32(a.getDoubleWord(i / 2));
        aLimbs[i + 1] = hkUint32(a.getDoubleWord(i / 2) >> 32);

        bLimbs[i + 0] = hkUint32(b.getDoubleWord(i / 2));
        bLimbs[i + 1] = hkUint32(b.getDoubleWord(i / 2) >> 32);
    }

    hkUint32 quotientLimbs[numLimbs];

    hkLargeIntDivideUnsigned(aLimbs, bLimbs, quotientLimbs, HK_NULL, numLimbs);

    hkInt64 result = origA.getSign() * origB.getSign() * hkInt64(quotientLimbs[0]);

#if defined(HK_DEBUG)
    for (int i = 1; i < numLimbs; ++i)
    {
        HK_ASSERT(0x72638520, quotientLimbs[i] == 0, "Quotient overflow");
    }

    HK_ASSERT(0x72638520, result >= HK_INT32_MIN && result <= HK_INT32_MAX, "Quotient overflow");
#endif

    return int(result);
}

template <int N, typename T>
static void hkLargeIntComputeUnsignedDivMod(const T& origA, const T& origB, T& divOut, T& modOut)
{
    // Copy to 32bit limb format
    const int numLimbs = N / 32;
    hkUint32 aLimbs[numLimbs];
    hkUint32 bLimbs[numLimbs];

    for (int i = 0; i < numLimbs; i += 2)
    {
        aLimbs[i + 0] = hkUint32(origA.getDoubleWord(i / 2));
        aLimbs[i + 1] = hkUint32(origA.getDoubleWord(i / 2) >> 32);

        bLimbs[i + 0] = hkUint32(origB.getDoubleWord(i / 2));
        bLimbs[i + 1] = hkUint32(origB.getDoubleWord(i / 2) >> 32);
    }

    hkUint32 quotientLimbs[numLimbs];
    hkUint32 remainderLimbs[numLimbs];

    hkLargeIntDivideUnsigned(aLimbs, bLimbs, quotientLimbs, remainderLimbs, numLimbs);

    // Copy to 64bit limb format
    for (int i = 0; i < numLimbs; i += 2)
    {
        divOut.setDoubleWord(i / 2, quotientLimbs[i + 0] | (hkUint64(quotientLimbs[i + 1]) << 32));
        modOut.setDoubleWord(i / 2, remainderLimbs[i + 0] | (hkUint64(remainderLimbs[i + 1]) << 32));
    }
}

void hkInt128::setUnsignedDiv(const hkInt128& origA, const hkInt128& origB)
{
    // Copy to 32bit limb format
    const int numLimbs = 128 / 32;
    hkUint32 aLimbs[numLimbs];
    hkUint32 bLimbs[numLimbs];

    for (int i = 0; i < numLimbs; i += 2)
    {
        aLimbs[i + 0] = hkUint32(origA.getDoubleWord(i / 2));
        aLimbs[i + 1] = hkUint32(origA.getDoubleWord(i / 2) >> 32);

        bLimbs[i + 0] = hkUint32(origB.getDoubleWord(i / 2));
        bLimbs[i + 1] = hkUint32(origB.getDoubleWord(i / 2) >> 32);
    }

    hkUint32 quotientLimbs[numLimbs];

    hkLargeIntDivideUnsigned(aLimbs, bLimbs, quotientLimbs, HK_NULL, numLimbs);

    // Copy to 64bit limb format
    for (int i = 0; i < numLimbs; i += 2)
    {
        this->setDoubleWord(i / 2, quotientLimbs[i + 0] | (hkUint64(quotientLimbs[i + 1]) << 32));
    }
}

void hkInt128::setGreatestCommonDivisor(const hkInt128& a, const hkInt128& b)
{
    if (a.equalZero()) { *this = b; return; }
    if (b.equalZero()) { *this = a; return; }

    // Divide by 2 as much as possible
    int shift;
    hkInt128 u, v;
    {
        const int numTrailingZerosA = a.countTrailingZeros();
        const int numTrailingZerosB = b.countTrailingZeros();
        shift = hkMath::min2(numTrailingZerosA, numTrailingZerosB);

        u.setShiftRight(a, numTrailingZerosA);
        v.setShiftRight(b, shift);
    }

    // From here on, u is always odd.
    do
    {
        // Remove all factors of 2 in v; they are not common. Note: v is not zero!
        const int numTrailingZerosV = v.countTrailingZeros();
        v.setShiftRight(v, numTrailingZerosV);

        // Now u and v are both odd. Swap if necessary so u <= v, then set v = v - u (which is even).
        hkInt128 v_minus_u;         v_minus_u.setSub(v, u);         // (v - u)
        const bool u_gt_v = v_minus_u.lessZero();           // (u > v)
        hkInt128 u_minus_v;         u_minus_v.setNeg(v_minus_u);    // (u - v)

        u.setSelect(u_gt_v, v, u);
        v.setSelect(u_gt_v, u_minus_v, v_minus_u);
        // Here v >= u.
    } while (!v.equalZero());

    // Restore common factors of 2
    setShiftLeft(u, shift);
}

//

int hkInt128::computeDiv32(const hkInt128& origA, const hkInt128& origB)
{
    return hkLargeIntComputeDiv32<128>(origA, origB);
}

void hkInt128::computeUnsignedDivMod(const hkInt128& a, const hkInt128& b, hkInt128& divOut, hkInt128& modOut)
{
    hkLargeIntComputeUnsignedDivMod<128>(a, b, divOut, modOut);
}

int hkInt128::compareFractions(const hkInt128& a, const hkInt128& b, const hkInt128& c, const hkInt128& d)
{
    return hkLargeIntCompareFractions(a, b, c, d);
}

void hkInt256::setZero()
{
    m_limbs[0] = 0;
    m_limbs[1] = 0;
    m_limbs[2] = 0;
    m_limbs[3] = 0;
}

void hkInt256::setFromUint32(hkUint32 i)
{
    setFromUint64(hkUint64(i));
}

void hkInt256::setFromUint64(hkUint64 i)
{
    m_limbs[0] = i;

    m_limbs[1] = 0;
    m_limbs[2] = 0;
    m_limbs[3] = 0;
}

void hkInt256::setFromInt32(hkInt32 i)
{
    setFromInt64(hkInt64(i));
}

void hkInt256::setFromInt64(hkInt64 i)
{
    m_limbs[0] = hkUint64(i);

    hkUint64 signExt = hkUint64(i >> 63);
    m_limbs[1] = signExt;
    m_limbs[2] = signExt;
    m_limbs[3] = signExt;
}

void hkInt256::setFromInt128(const hkInt128& i)
{
    m_limbs[0] = i.m_limbs[0];
    m_limbs[1] = i.m_limbs[1];

    hkUint64 signExt = hkInt64(i.m_limbs[1]) >> 63;
    m_limbs[2] = signExt;
    m_limbs[3] = signExt;
}

void hkInt256::setAnd(const hkInt256& a, const hkInt256& b)
{
    m_limbs[0] = a.m_limbs[0] & b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] & b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] & b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] & b.m_limbs[3];
}

void hkInt256::setOr(const hkInt256& a, const hkInt256& b)
{
    m_limbs[0] = a.m_limbs[0] | b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] | b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] | b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] | b.m_limbs[3];
}

void hkInt256::setXor(const hkInt256& a, const hkInt256& b)
{
    m_limbs[0] = a.m_limbs[0] ^ b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] ^ b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] ^ b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] ^ b.m_limbs[3];
}

void hkInt256::setNot(const hkInt256& a)
{
    m_limbs[0] = ~a.m_limbs[0];
    m_limbs[1] = ~a.m_limbs[1];
    m_limbs[2] = ~a.m_limbs[2];
    m_limbs[3] = ~a.m_limbs[3];
}

void hkInt256::increment(hkUint64 a)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(m_limbs[0], a, m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[1], 0, m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[2], 0, m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[3], 0, m_limbs[3]);
}

void hkInt256::decrement(hkUint64 a)
{
    hkLargeIntImpl::Borrow borrow;
    borrow = hkLargeIntImpl::sub(m_limbs[0], a, m_limbs[0]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[1], 0, m_limbs[1]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[2], 0, m_limbs[2]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[3], 0, m_limbs[3]);
}

void hkInt256::setNeg(const hkInt256& a)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(~a.m_limbs[0], 1, m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[1], 0, m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[2], 0, m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[3], 0, m_limbs[3]);
}

void hkInt256::setAdd(const hkInt256& a, const hkInt256& b)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(a.m_limbs[0], b.m_limbs[0], m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[1], b.m_limbs[1], m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[2], b.m_limbs[2], m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[3], b.m_limbs[3], m_limbs[3]);
}

void hkInt256::setSub(const hkInt256& a, const hkInt256& b)
{
    hkLargeIntImpl::Borrow borrow;
    borrow = hkLargeIntImpl::sub(a.m_limbs[0], b.m_limbs[0], m_limbs[0]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[1], b.m_limbs[1], m_limbs[1]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[2], b.m_limbs[2], m_limbs[2]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[3], b.m_limbs[3], m_limbs[3]);
}

void hkInt256::setMul(const hkInt128& a, const hkInt128& b)
{
    hkInt128 aAbs;
    aAbs.setAbs(a);

    hkInt128 bAbs;
    bAbs.setAbs(b);

    hkInt256 product;

    hkInt128 prod0, prod1, prod2, prod3;
    prod0.setMul(aAbs.m_limbs[0], bAbs.m_limbs[0]);
    prod1.setMul(aAbs.m_limbs[0], bAbs.m_limbs[1]);
    prod2.setMul(aAbs.m_limbs[1], bAbs.m_limbs[0]);
    prod3.setMul(aAbs.m_limbs[1], bAbs.m_limbs[1]);

    product.m_limbs[0] = prod0.m_limbs[0];

    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(prod0.m_limbs[1], prod1.m_limbs[0], product.m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, prod1.m_limbs[1], prod3.m_limbs[0], product.m_limbs[2]);
    hkLargeIntImpl::addWithCarry(carry, 0, prod3.m_limbs[1], product.m_limbs[3]);

    carry = hkLargeIntImpl::add(prod2.m_limbs[0], product.m_limbs[1], product.m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, prod2.m_limbs[1], product.m_limbs[2], product.m_limbs[2]);
    hkLargeIntImpl::addWithCarry(carry, 0, product.m_limbs[3], product.m_limbs[3]);

    setFlipSign(product, a.lessZero() ^ b.lessZero());
}

void hkInt256::setMul(const hkInt128& a, hkInt64 b)
{
    hkInt128 aAbs;
    aAbs.setAbs(a);

    hkUint64 bAbs;
    bAbs = hkMath::abs(b);

    hkInt256 product;
    hkLargeIntImpl::multiplyUnsigned(aAbs.m_limbs, &bAbs, product.m_limbs, HK_COUNT_OF(aAbs.m_limbs), 1, HK_COUNT_OF(product.m_limbs));
    setFlipSign(product, a.lessZero() ^ (b < 0));
}

void hkInt256::addMul(const hkInt128& a, const hkInt128& b)
{
    hkInt256 prod;
    prod.setMul(a, b);
    setAdd(*this, prod);
}

void hkInt256::subMul(const hkInt128& a, const hkInt128& b)
{
    hkInt256 prod;
    prod.setMul(a, b);
    setSub(*this, prod);
}

int hkInt256::computeDiv32(const hkInt256& origA, const hkInt256& origB)
{
    return hkLargeIntComputeDiv32<256>(origA, origB);
}

int hkInt256::compareFractions(const hkInt256& a, const hkInt256& b, const hkInt256& c, const hkInt256& d)
{
    return hkLargeIntCompareFractions(a, b, c, d);
}

void hkInt256::computeUnsignedDivMod(const hkInt256& a, const hkInt256& b, hkInt256& divOut, hkInt256& modOut)
{
    hkLargeIntComputeUnsignedDivMod<256>(a, b, divOut, modOut);
}

void hkInt512::setZero()
{
    m_limbs[0] = 0;
    m_limbs[1] = 0;
    m_limbs[2] = 0;
    m_limbs[3] = 0;
    m_limbs[4] = 0;
    m_limbs[5] = 0;
    m_limbs[6] = 0;
    m_limbs[7] = 0;
}

void hkInt512::setFromUint32(hkUint32 i)
{
    setFromUint64(hkUint64(i));
}

void hkInt512::setFromUint64(hkUint64 i)
{
    m_limbs[0] = i;

    m_limbs[1] = 0;
    m_limbs[2] = 0;
    m_limbs[3] = 0;
    m_limbs[4] = 0;
    m_limbs[5] = 0;
    m_limbs[6] = 0;
    m_limbs[7] = 0;
}

void hkInt512::setFromInt32(hkInt32 i)
{
    setFromInt64(hkInt64(i));
}

void hkInt512::setFromInt64(hkInt64 i)
{
    m_limbs[0] = hkUint64(i);

    hkUint64 signExt = hkUint64(i >> 63);
    m_limbs[1] = signExt;
    m_limbs[2] = signExt;
    m_limbs[3] = signExt;
    m_limbs[4] = signExt;
    m_limbs[5] = signExt;
    m_limbs[6] = signExt;
    m_limbs[7] = signExt;
}

void hkInt512::setFromInt128(const hkInt128& i)
{
    m_limbs[0] = i.m_limbs[0];
    m_limbs[1] = i.m_limbs[1];

    hkUint64 signExt = hkInt64(i.m_limbs[1]) >> 63;
    m_limbs[2] = signExt;
    m_limbs[3] = signExt;
    m_limbs[4] = signExt;
    m_limbs[5] = signExt;
    m_limbs[6] = signExt;
    m_limbs[7] = signExt;
}

void hkInt512::setFromInt256(const hkInt256& i)
{
    m_limbs[0] = i.m_limbs[0];
    m_limbs[1] = i.m_limbs[1];
    m_limbs[2] = i.m_limbs[2];
    m_limbs[3] = i.m_limbs[3];

    hkUint64 signExt = hkInt64(i.m_limbs[3]) >> 63;
    m_limbs[4] = signExt;
    m_limbs[5] = signExt;
    m_limbs[6] = signExt;
    m_limbs[7] = signExt;
}

void hkInt512::setAnd(const hkInt512& a, const hkInt512& b)
{
    m_limbs[0] = a.m_limbs[0] & b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] & b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] & b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] & b.m_limbs[3];
    m_limbs[4] = a.m_limbs[4] & b.m_limbs[4];
    m_limbs[5] = a.m_limbs[5] & b.m_limbs[5];
    m_limbs[6] = a.m_limbs[6] & b.m_limbs[6];
    m_limbs[7] = a.m_limbs[7] & b.m_limbs[7];
}

void hkInt512::setOr(const hkInt512& a, const hkInt512& b)
{
    m_limbs[0] = a.m_limbs[0] | b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] | b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] | b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] | b.m_limbs[3];
    m_limbs[4] = a.m_limbs[4] | b.m_limbs[4];
    m_limbs[5] = a.m_limbs[5] | b.m_limbs[5];
    m_limbs[6] = a.m_limbs[6] | b.m_limbs[6];
    m_limbs[7] = a.m_limbs[7] | b.m_limbs[7];
}

void hkInt512::setXor(const hkInt512& a, const hkInt512& b)
{
    m_limbs[0] = a.m_limbs[0] ^ b.m_limbs[0];
    m_limbs[1] = a.m_limbs[1] ^ b.m_limbs[1];
    m_limbs[2] = a.m_limbs[2] ^ b.m_limbs[2];
    m_limbs[3] = a.m_limbs[3] ^ b.m_limbs[3];
    m_limbs[4] = a.m_limbs[4] ^ b.m_limbs[4];
    m_limbs[5] = a.m_limbs[5] ^ b.m_limbs[5];
    m_limbs[6] = a.m_limbs[6] ^ b.m_limbs[6];
    m_limbs[7] = a.m_limbs[7] ^ b.m_limbs[7];
}

void hkInt512::setNot(const hkInt512& a)
{
    m_limbs[0] = ~a.m_limbs[0];
    m_limbs[1] = ~a.m_limbs[1];
    m_limbs[2] = ~a.m_limbs[2];
    m_limbs[3] = ~a.m_limbs[3];
    m_limbs[4] = ~a.m_limbs[4];
    m_limbs[5] = ~a.m_limbs[5];
    m_limbs[6] = ~a.m_limbs[6];
    m_limbs[7] = ~a.m_limbs[7];
}

void hkInt512::increment(hkUint64 a)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(m_limbs[0], a, m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[1], 0, m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[2], 0, m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[3], 0, m_limbs[3]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[4], 0, m_limbs[4]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[5], 0, m_limbs[5]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[6], 0, m_limbs[6]);
    carry = hkLargeIntImpl::addWithCarry(carry, m_limbs[7], 0, m_limbs[7]);
}

void hkInt512::decrement(hkUint64 a)
{
    hkLargeIntImpl::Borrow borrow;
    borrow = hkLargeIntImpl::sub(m_limbs[0], a, m_limbs[0]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[1], 0, m_limbs[1]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[2], 0, m_limbs[2]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[3], 0, m_limbs[3]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[4], 0, m_limbs[4]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[5], 0, m_limbs[5]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[6], 0, m_limbs[6]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, m_limbs[7], 0, m_limbs[7]);
}

void hkInt512::setNeg(const hkInt512& a)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(~a.m_limbs[0], 1, m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[1], 0, m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[2], 0, m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[3], 0, m_limbs[3]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[4], 0, m_limbs[4]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[5], 0, m_limbs[5]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[6], 0, m_limbs[6]);
    carry = hkLargeIntImpl::addWithCarry(carry, ~a.m_limbs[7], 0, m_limbs[7]);
}

void hkInt512::setAdd(const hkInt512& a, const hkInt512& b)
{
    hkLargeIntImpl::Carry carry;
    carry = hkLargeIntImpl::add(a.m_limbs[0], b.m_limbs[0], m_limbs[0]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[1], b.m_limbs[1], m_limbs[1]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[2], b.m_limbs[2], m_limbs[2]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[3], b.m_limbs[3], m_limbs[3]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[4], b.m_limbs[4], m_limbs[4]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[5], b.m_limbs[5], m_limbs[5]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[6], b.m_limbs[6], m_limbs[6]);
    carry = hkLargeIntImpl::addWithCarry(carry, a.m_limbs[7], b.m_limbs[7], m_limbs[7]);
}

void hkInt512::setSub(const hkInt512& a, const hkInt512& b)
{
    hkLargeIntImpl::Borrow borrow;
    borrow = hkLargeIntImpl::sub(a.m_limbs[0], b.m_limbs[0], m_limbs[0]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[1], b.m_limbs[1], m_limbs[1]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[2], b.m_limbs[2], m_limbs[2]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[3], b.m_limbs[3], m_limbs[3]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[4], b.m_limbs[4], m_limbs[4]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[5], b.m_limbs[5], m_limbs[5]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[6], b.m_limbs[6], m_limbs[6]);
    borrow = hkLargeIntImpl::subWithBorrow(borrow, a.m_limbs[7], b.m_limbs[7], m_limbs[7]);
}

void hkInt512::setMul(const hkInt256& a, const hkInt256& b)
{
    hkInt256 aAbs;
    aAbs.setAbs(a);

    hkInt256 bAbs;
    bAbs.setAbs(b);

    hkInt512 product;
    hkLargeIntImpl::multiplyUnsigned(aAbs.m_limbs, bAbs.m_limbs, product.m_limbs, HK_COUNT_OF(aAbs.m_limbs), HK_COUNT_OF(bAbs.m_limbs), HK_COUNT_OF(product.m_limbs));
    setFlipSign(product, a.lessZero() ^ b.lessZero());
}

void hkInt512::setMul(const hkInt512& origA, const hkInt512& origB)
{
    hkInt512 a = origA;
    hkInt512 b = origB;
    hkLargeIntImpl::multiplyUnsigned(a.m_limbs, b.m_limbs, m_limbs, HK_COUNT_OF(a.m_limbs), HK_COUNT_OF(b.m_limbs), HK_COUNT_OF(m_limbs));
}

void hkInt512::setMul(const hkInt512& origA, hkUint64 b)
{
    hkInt512 a = origA;
    hkLargeIntImpl::multiplyUnsigned(a.m_limbs, &b, m_limbs, HK_COUNT_OF(a.m_limbs), 1, HK_COUNT_OF(m_limbs));
}

void hkInt512::addMul(const hkInt256& a, const hkInt256& b)
{
    hkInt512 prod;
    prod.setMul(a, b);
    setAdd(*this, prod);
}

void hkInt512::subMul(const hkInt256& a, const hkInt256& b)
{
    hkInt512 prod;
    prod.setMul(a, b);
    setSub(*this, prod);
}

int hkInt512::computeDiv32(const hkInt512& origA, const hkInt512& origB)
{
    return hkLargeIntComputeDiv32<512>(origA, origB);
}

int hkInt512::compareFractions(const hkInt512& a, const hkInt512& b, const hkInt512& c, const hkInt512& d)
{
    return hkLargeIntCompareFractions(a, b, c, d);
}

void hkInt512::computeUnsignedDivMod(const hkInt512& a, const hkInt512& b, hkInt512& divOut, hkInt512& modOut)
{
    hkLargeIntComputeUnsignedDivMod<512>(a, b, divOut, modOut);
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
