// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Types/Geometry/Aabb/hkAabbUtil.h>
#include <Common/Base/hkBaseHeartbeat.h>

namespace hkAabbUtil
{

//
//  The maximum extents of a hkAabbUin32 (in int space): we cannot use the full
//  32 bit as we have to use the sign bit (without the overflow bit) to compare
//  two values;
//

hkQuadReal hkAabbUint32MaxVal = HK_QUADREAL_CONSTANT(
    hkReal(AABB_UINT32_MAX_FVALUE), hkReal(AABB_UINT32_MAX_FVALUE),
    hkReal(AABB_UINT32_MAX_FVALUE), hkReal(AABB_UINT32_MAX_FVALUE)
);

#if !defined(HK_AABBUTIL_convertAabbToUint32)
        HK_COMPILE_TIME_ASSERT( HK_OFFSET_EQUALS( hkAabbUint32, m_min, 0 ) );
        HK_COMPILE_TIME_ASSERT( HK_OFFSET_EQUALS( hkAabbUint32, m_max, 0x10 ) );
#endif


void HK_CALL calcAabb(_In_reads_bytes_(numVertices * striding) const hkReal* vertexArray, int numVertices, int striding, hkAabb& aabbOut)
{
    aabbOut.setEmpty();

    if ( numVertices <= 0 )
    {
        return;
    }

    hkVector4 v; v.setZero();
    for (int i = 0; i < numVertices; i++)
    {
        v.load<3,HK_IO_NATIVE_ALIGNED>( hkAddByteOffsetConst(vertexArray, i*striding) );
        aabbOut.includePoint(v);
    }

    aabbOut.m_min.zeroComponent<3>();
    aabbOut.m_max.zeroComponent<3>();
}

void HK_CALL calcAabb( _In_reads_(numVertices) const hkVector4* vertexArray, int numVertices, hkAabb& aabbOut )
{
    aabbOut.setEmpty();

    if ( numVertices <= 0 )
    {
        return;
    }

    for (int i = 0; i < numVertices; i++)
    {
        const hkVector4& v = vertexArray[i];
        aabbOut.includePoint(v);
    }

    aabbOut.m_min.zeroComponent<3>();
    aabbOut.m_max.zeroComponent<3>();
}

void HK_CALL calcAabb( const hkVector4* vertexArray, int numVertices, hkAabb& aabbOut, hkAsyncHeartbeat::Heartbeat& heartbeat )
{
    hkAsyncHeartbeat::Scope heartbeatScope(heartbeat, 0xb32c99b1,
        hkAsyncHeartbeat::HeartbeatFile::BASE, "hkAabbUtil::calcAabb");

    aabbOut.setEmpty();

    if ( numVertices <= 0 )
    {
        return;
    }

    heartbeat.forRange<0x76c1b08a>(0, numVertices,
        [&](int i)
        {
            const hkVector4& v = vertexArray[i];
            aabbOut.includePoint(v);
        });

    aabbOut.m_min.zeroComponent<3>();
    aabbOut.m_max.zeroComponent<3>();
}

void HK_CALL calcAabb(const hkTransform& BvToWorld, const hkAabb& aabb, hkSimdRealParameter extraRadius, hkAabb& aabbOut)
{
    // Transforming an empty AABB results in NaNs!
    if ( aabb.isEmpty() )
    {
        aabbOut = aabb;
        return;
    }

    hkVector4 center;       aabb.getCenter(center);
    hkVector4 halfExtents;  aabb.getHalfExtents(halfExtents);

    calcAabb(BvToWorld, halfExtents, center, extraRadius, aabbOut);
}


void HK_CALL calcAabb(const hkMatrix4& BvToWorld, const hkAabb& aabb, hkAabb& aabbOut)
{
    // Transforming an empty AABB results in NaNs!
    if ( aabb.isEmpty() )
    {
        aabbOut = aabb;
        return;
    }

    hkVector4 center;       aabb.getCenter(center);
    hkVector4 halfExtents;  aabb.getHalfExtents(halfExtents);

    calcAabb(BvToWorld, halfExtents, center, aabbOut);
}


void HK_CALL calcAabb(const hkTransform& BvToWorld, const hkAabb& aabb, hkAabb& aabbOut)
{
    // Transforming an empty AABB results in NaNs!
    if ( aabb.isEmpty() )
    {
        aabbOut = aabb;
        return;
    }

    hkVector4 center;       aabb.getCenter(center);
    hkVector4 halfExtents;  aabb.getHalfExtents(halfExtents);

    calcAabb(BvToWorld, halfExtents, center, aabbOut);
}


void HK_CALL calcAabb(const hkQsTransform& bvToWorld, const hkAabb& aabb, hkAabb& aabbOut)
{
    // Transforming an empty AABB results in NaNs!
    if ( aabb.isEmpty() )
    {
        aabbOut = aabb;
        return;
    }

    // Scale center
    hkTransform bvToWorldNoScale; bvToWorld.copyToTransformNoScale(bvToWorldNoScale);
    const hkVector4& scale = bvToWorld.getScale();
    hkVector4 center; aabb.getCenter(center);
    center.mul(scale);

    // Scale half extents
    hkVector4 halfExtents;  aabb.getHalfExtents(halfExtents);
    halfExtents.mul(scale);
    halfExtents.setAbs(halfExtents);

    // Apply transform without scale
    calcAabb(bvToWorldNoScale, halfExtents, center, aabbOut);
}


void HK_CALL calcAabb( const hkQTransform& bvToWorld, const hkAabb& aabb, hkAabb& aabbOut )
{
    // Transforming an empty AABB results in NaNs!
    if ( aabb.isEmpty() )
    {
        aabbOut = aabb;
        return;
    }

    // Scale center
    hkTransform bBvToWorldNoScale; bBvToWorldNoScale.set( bvToWorld.getRotation(), bvToWorld.getTranslation() );
    hkVector4 center; aabb.getCenter(center);

    // Scale half extents
    hkVector4 halfExtents;  aabb.getHalfExtents(halfExtents);
    halfExtents.setAbs(halfExtents);

    // Apply transform without scale
    calcAabb(bBvToWorldNoScale, halfExtents, center, aabbOut);
}


//
void HK_CALL calcAabb(_In_reads_(numVertices) const hkVector4*const* vectorArray, int numVertices, hkAabb& aabbOut )
{
    aabbOut.setEmpty();

    if(numVertices <= 0)
    {
        return;
    }

    for(int i=0;i<numVertices;++i)
    {
        aabbOut.includePoint(*vectorArray[i]);
    }

    aabbOut.m_min.zeroComponent<3>();
    aabbOut.m_max.zeroComponent<3>();
}

// Calculation of screen space bounding box follows:

HK_INLINE hkVector4 getClipIntersection(hkVector4Parameter edgeStart, hkVector4Parameter edgeEnd, hkSimdRealParameter clipDistanceStart, hkSimdRealParameter clipDistanceEnd)
{
    hkSimdReal inv = clipDistanceEnd - clipDistanceStart;
    inv.setReciprocal<HK_ACC_12_BIT, HK_DIV_IGNORE>(inv);

    hkVector4 result;
    result.setMul(edgeStart, clipDistanceEnd);
    result.subMul(edgeEnd, clipDistanceStart);
    result.mul(inv);

    return result;
}

// Clip a convex polygon against a given plane
HK_INLINE hkUint32 clipPolygon(_In_reads_(inCount) const hkVector4* clipIn, _Out_writes_(inCount * 2) hkVector4* clipOut, hkVector4Parameter clipPlane, hkUint32 inCount)
{
    hkUint32 outCount = 0;
    hkVector4 edgeStart = clipIn[inCount - 1];
    hkSimdReal clipDistanceStart = edgeStart.dot<4>(clipPlane);

    for (hkUint32 inIdx = 0; inIdx < inCount; inIdx++)
    {
        hkVector4 edgeEnd = clipIn[inIdx];
        hkSimdReal clipDistanceEnd = edgeEnd.dot<4>(clipPlane);

        if (clipDistanceEnd.isGreaterEqualZero())
        {
            // For NaNs, isLessZero() can differ from !isGreaterEqualZero(). We need this test to be the exact
            // opposite of the other branches, otherwise we can end up outputting more vertices than allowed.
            if (!clipDistanceStart.isGreaterEqualZero())
            {
                clipOut[outCount++] = getClipIntersection(edgeStart, edgeEnd, clipDistanceStart, clipDistanceEnd);
            }

            clipOut[outCount++] = edgeEnd;
        }
        else if (clipDistanceStart.isGreaterEqualZero())
        {
            clipOut[outCount++] = getClipIntersection(edgeStart, edgeEnd, clipDistanceStart, clipDistanceEnd);
        }

        edgeStart = edgeEnd;
        clipDistanceStart = clipDistanceEnd;
    }

    return outCount;
}

HK_INLINE void addVerticesToScreenAabb(hkVector4Parameter v0, hkVector4Parameter v1, hkVector4Parameter v2, hkVector4Parameter v3, hkAabb& screenSpace)
{
    hkVector4 a, b, c, d;
    // Divide by W
    d.setReciprocal<HK_ACC_12_BIT, HK_DIV_IGNORE>(v3);

    a.setMul(v0, d);
    b.setMul(v1, d);
    c.setMul(v2, d);
    d.setZero();

    HK_TRANSPOSE4(a, b, c, d);

    screenSpace.includePoint(a);
    screenSpace.includePoint(b);
    screenSpace.includePoint(c);
    screenSpace.includePoint(d);
}

HK_INLINE static void getWorldSpaceCorners(const hkAabb &worldSpace, const hkMatrix4 &viewProjection, _In_reads_(8) hkVector4 * corners)
{
    hkVector4 extents;
    worldSpace.getExtents(extents);

    hkVector4 edge0 = viewProjection.getColumn<0>();
    edge0.mul(extents.getComponent<0>());

    hkVector4 edge1 = viewProjection.getColumn<1>();
    edge1.mul(extents.getComponent<1>());

    hkVector4 edge2 = viewProjection.getColumn<2>();
    edge2.mul(extents.getComponent<2>());

    // Walk along the edges of the AABB in clip space instead of transforming each corner separately
    viewProjection.transformPosition(worldSpace.m_min, corners[0]);
    corners[1].setAdd(corners[0], edge0);
    corners[2].setAdd(corners[0], edge1);
    corners[4].setAdd(corners[0], edge2);

    corners[3].setAdd(corners[1], edge1);
    corners[5].setAdd(corners[4], edge0);
    corners[6].setAdd(corners[2], edge2);

    corners[7].setAdd(corners[6], edge0);
}

void HK_CALL calculateScreenSpaceAabb(const hkAabb& worldSpace, const hkMatrix4& viewProjection, hkAabb& screenSpace)
{
    screenSpace.setEmpty();

    hkVector4 corners[8];
    getWorldSpaceCorners(worldSpace, viewProjection, corners);

    // Transpose all so we can save some operations
    HK_TRANSPOSE4(corners[0], corners[1], corners[2], corners[3]);
    HK_TRANSPOSE4(corners[4], corners[5], corners[6], corners[7]);

    // Now check if there's any plane to clip against by computing clip codes for the 4 sides and near.
    // We specifically don't clip against far, since this would clip away everything if the view frustum is fully contained in the AABB.

    // Bitmask containing information which vertices are clipped. If vertex #v is clipped by plane #p, then bit 8 * p + v is set to 1,
    // so we have 5 groups for each plane of 8 bits for each vertex.
    hkVector4 temp;

    hkUint64 clipCodeA, clipCodeB;

    // Left (-w < x)
    temp.setAdd(corners[3], corners[0]);
    clipCodeA = hkUint64(temp.lessZero().getMask()) << 0;

    temp.setAdd(corners[7], corners[4]);
    clipCodeB = hkUint64(temp.lessZero().getMask()) << 4;

    // Right (x < w)
    clipCodeA |= hkUint64(corners[3].less(corners[0]).getMask()) << 8;
    clipCodeB |= hkUint64(corners[7].less(corners[4]).getMask()) << 12;

    // Bottom (-w < y)
    temp.setAdd(corners[3], corners[1]);
    clipCodeA |= hkUint64(temp.lessZero().getMask()) << 16;
    temp.setAdd(corners[7], corners[5]);
    clipCodeB |= hkUint64(temp.lessZero().getMask()) << 20;

    // Top (y < w)
    clipCodeA |= hkUint64(corners[3].less(corners[1]).getMask()) << 24;
    clipCodeB |= hkUint64(corners[7].less(corners[5]).getMask()) << 28;

    // Near (-w < z)
    temp.setAdd(corners[3], corners[2]);
    clipCodeA |= hkUint64(temp.lessZero().getMask()) << 32;
    temp.setAdd(corners[7], corners[6]);
    clipCodeB |= hkUint64(temp.lessZero().getMask()) << 36;

    hkUint64 clipCode = clipCodeA | clipCodeB;

    // Fast path - no clipping
    if (clipCode == 0)
    {
        addVerticesToScreenAabb(corners[0], corners[1], corners[2], corners[3], screenSpace);
        addVerticesToScreenAabb(corners[4], corners[5], corners[6], corners[7], screenSpace);
        return;
    }

    // Check if any single plane clips all 8 vertices. AND each group of 8 bits into its lowest bit.
    hkUint64 allClippedByPlane = clipCode;
    allClippedByPlane = allClippedByPlane & (allClippedByPlane >> 4);
    allClippedByPlane = allClippedByPlane & (allClippedByPlane >> 2);
    allClippedByPlane = allClippedByPlane & (allClippedByPlane >> 1);

    // If the low bit of any of the 5 planes remains, that plane clips the entire AABB.
    if (allClippedByPlane & 0x11111)
    {
        return;
    }

    HK_TRANSPOSE4(corners[0], corners[1], corners[2], corners[3]);
    HK_TRANSPOSE4(corners[4], corners[5], corners[6], corners[7]);

    static const hkUint32 facesFull[] = {
        0, 1, 3, 2,
        0, 2, 6, 4,
        1, 3, 7, 5,
        4, 5, 7, 6,
        0, 1, 5, 4,
        2, 3, 7, 6,
    };

    static const hkUint64 faceMasksFull[] = {
        0x0F0F0F0F0F0F0F0FULL,
        0x5555555555555555ULL,
        0xAAAAAAAAAAAAAAAAULL,
        0xF0F0F0F0F0F0F0F0ULL,
        0x3333333333333333ULL,
        0xCCCCCCCCCCCCCCCCULL,
    };

    // Top and bottom of the AABB, and two diagonally crossing quads - this shape has the same silhouette as the AABB as long as the AABB is in front of the near plane.
    static const hkUint32 facesApprox[] = {
        0, 1, 3, 2,
        4, 5, 7, 6,
        0, 3, 7, 4,
        1, 2, 6, 5,
    };

    static const hkUint64 faceMasksApprox[] = {
        0x0F0F0F0F0F0F0F0FULL,
        0xF0F0F0F0F0F0F0F0ULL,
        0x9999999999999999ULL,
        0x6666666666666666ULL,
    };

    // Planes to clip against
    HK_ALIGN16(static const hkReal clipPlanes[]) =
    {
        +1.0f, 0.0f, 0.0f, +1.0f,
        -1.0f, 0.0f, 0.0f, +1.0f,
        0.0f, +1.0f, 0.0f, +1.0f,
        0.0f, -1.0f, 0.0f, +1.0f,
        0.0f, 0.0f, +1.0f, +1.0f,
    };

    const hkUint32* faces;
    const hkUint64* faceMasks;
    hkUint32 numFaces;

    // If no vertex is behind the near plane, use the silhouette approximation
    if ((clipCode & 0xFF00000000ULL) == 0)
    {
        faces = facesApprox;
        faceMasks = faceMasksApprox;
        numFaces = 4;
    }
    else
    {
        // Fallback to clipping full AABB
        faces = facesFull;
        faceMasks = faceMasksFull;
        numFaces = 6;
    }

    const hkUint32 numFloats = HK_COUNT_OF(clipPlanes);
    const hkUint32 numClipPlanes = numFloats / 4;
    const hkUint32 verticesPerFace = 4;

    hkVector4 finalClipResult[HK_NEXT_MULTIPLE_OF(4U, (verticesPerFace + numClipPlanes) * 6)];
    hkVector4* finalClipOut = finalClipResult;

    for (hkUint32 faceIdx = 0; faceIdx < numFaces; faceIdx++)
    {
        // Mask of the vertices used by this face
        const hkUint64 faceMask = *faceMasks++;
        hkUint64 clipCodeCopy = clipCode & faceMask;

        // If none of the vertices of this face were clipped, add all 4 to the bounding box directly
        if (!clipCodeCopy)
        {
            hkVector4 v0 = corners[*faces++];
            hkVector4 v1 = corners[*faces++];
            hkVector4 v2 = corners[*faces++];
            hkVector4 v3 = corners[*faces++];
            HK_TRANSPOSE4(v0, v1, v2, v3);
            addVerticesToScreenAabb(v0, v1, v2, v3, screenSpace);
            continue;
        }

        // Set up arrays for clipping process. Every clipping edge adds at most one vertex.
        hkVector4 clipBuffers[2][verticesPerFace + numClipPlanes];

        // Pointers to input and output of clipping, will be swapped after every clip operation.
        hkVector4* clipIn = clipBuffers[0];
        hkVector4* clipOut = clipBuffers[1];

        hkUint32 clipCount = verticesPerFace;

        // Extract primitive
        for (hkUint32 vertexIdx = 0; vertexIdx < clipCount; vertexIdx++)
        {
            hkUint32 cornerIdx = *faces++;
            clipIn[vertexIdx] = corners[cornerIdx];
        }

        const hkVector4* clipPlanePtr = reinterpret_cast<const hkVector4*>(clipPlanes);

        // Iterate over groups with bits set
        do
        {
#if defined(HK_PLATFORM_WIN64)
            unsigned long emptyGroups;
            _BitScanForward64(&emptyGroups, clipCodeCopy);
            emptyGroups >>= 3;
#else

            hkUint32 emptyGroups = (hkMath::countTrailingZeros(clipCodeCopy) >> 3);
#endif

            // Skip empty groups
            clipPlanePtr += emptyGroups;
            clipCodeCopy >>= (hkUint64(emptyGroups) << 3);

            clipCodeCopy >>= 8;

            // When clipping for the last time, clip directly into the target buffer
            if (!clipCodeCopy)
            {
                clipCount = clipPolygon(clipIn, finalClipOut, *clipPlanePtr, clipCount);
                break;
            }

            clipCount = clipPolygon(clipIn, clipOut, *clipPlanePtr++, clipCount);

            hkMath::swap(clipIn, clipOut);
        } while (clipCount);

        finalClipOut += clipCount;
    }

    // Fill up to a multiple of 4
    hkUint32 totalClipCount = hkUint32(finalClipOut - finalClipResult);
    hkUint32 padding = HK_NEXT_MULTIPLE_OF(4U, totalClipCount) - totalClipCount;
    for (hkUint32 padIdx = 0; padIdx < padding; padIdx++)
    {
        finalClipResult[totalClipCount++] = finalClipResult[0];
    }

    finalClipOut = finalClipResult;

    // Perspective divide in batches of 4
    for (hkUint32 vertexIdx = 0; vertexIdx < totalClipCount; vertexIdx += 4)
    {
        hkVector4 v0 = *finalClipOut++;
        hkVector4 v1 = *finalClipOut++;
        hkVector4 v2 = *finalClipOut++;
        hkVector4 v3 = *finalClipOut++;
        HK_TRANSPOSE4(v0, v1, v2, v3);
        addVerticesToScreenAabb(v0, v1, v2, v3, screenSpace);
    }
}

HK_EXPORT_COMMON void HK_CALL calculateScreenSpaceAabbUnclipped(const hkAabb& worldSpace, const hkMatrix4& viewProjection, hkAabb& screenSpace)
{
    screenSpace.setEmpty();

    hkVector4 corners[8];
    getWorldSpaceCorners(worldSpace, viewProjection, corners);

    HK_TRANSPOSE4(corners[0], corners[1], corners[2], corners[3]);
    HK_TRANSPOSE4(corners[4], corners[5], corners[6], corners[7]);

    hkVector4fComparison wLessZero0 = corners[3].lessZero();
    hkVector4fComparison wLessZero1 = corners[7].lessZero();

    hkVector4fComparison anyWLessZero;
    anyWLessZero.setOr(wLessZero0, wLessZero1);

    if (!anyWLessZero.anyIsSet())
    {
        addVerticesToScreenAabb(corners[0], corners[1], corners[2], corners[3], screenSpace);
        addVerticesToScreenAabb(corners[4], corners[5], corners[6], corners[7], screenSpace);
        return;
    }

    hkVector4fComparison allWLessZero;
    allWLessZero.setAnd(wLessZero0, wLessZero1);

    if (allWLessZero.allAreSet())
    {
        return;
    }

    // Clamp W to 1 / sqrt(FLT_MAX) - this prevents overflow of X/W as long as X and 1/W are both < sqrt(FLT_MAX)
    hkVector4f clampW;
    clampW.setAll(5.421011e-20f);

    corners[3].setAbs(corners[3]);
    corners[3].setMax(corners[3], clampW);
    corners[3].setFlipSign(corners[3], wLessZero0);
    corners[3].setReciprocal<HK_ACC_12_BIT, HK_DIV_IGNORE>(corners[3]);

    corners[0].mul(corners[3]);
    corners[1].mul(corners[3]);
    corners[2].mul(corners[3]);

    corners[7].setAbs(corners[7]);
    corners[7].setMax(corners[7], clampW);
    corners[7].setFlipSign(corners[7], wLessZero1);
    corners[7].setReciprocal<HK_ACC_12_BIT, HK_DIV_IGNORE>(corners[7]);

    corners[4].mul(corners[7]);
    corners[5].mul(corners[7]);
    corners[6].mul(corners[7]);

    hkVector4f posInf = hkVector4f::getConstant<HK_QUADREAL_INF>();
    hkVector4f negInf = hkVector4f::getConstant<HK_QUADREAL_MINUS_INF>();

    // Find bounds separately for points with W > 0 and W < 0
    hkSimdFloat32 minX_posW, maxX_posW, minY_posW, maxY_posW;
    hkSimdFloat32 minX_negW, maxX_negW, minY_negW, maxY_negW;
    hkSimdFloat32 maxZ_posW;

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, posInf, corners[0]);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, posInf, corners[4]);
        hkVector4f tmp2; tmp2.setMin(tmp0, tmp1);
        minX_posW = tmp2.horizontalMin<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, negInf, corners[0]);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, negInf, corners[4]);
        hkVector4f tmp2; tmp2.setMax(tmp0, tmp1);
        maxX_posW = tmp2.horizontalMax<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, posInf, corners[1]);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, posInf, corners[5]);
        hkVector4f tmp2; tmp2.setMin(tmp0, tmp1);
        minY_posW = tmp2.horizontalMin<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, negInf, corners[1]);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, negInf, corners[5]);
        hkVector4f tmp2; tmp2.setMax(tmp0, tmp1);
        maxY_posW = tmp2.horizontalMax<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, negInf, corners[2]);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, negInf, corners[6]);
        hkVector4f tmp2; tmp2.setMax(tmp0, tmp1);
        maxZ_posW = tmp2.horizontalMax<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, corners[0], posInf);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, corners[4], posInf);
        hkVector4f tmp2; tmp2.setMin(tmp0, tmp1);
        minX_negW = tmp2.horizontalMin<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, corners[0], negInf);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, corners[4], negInf);
        hkVector4f tmp2; tmp2.setMax(tmp0, tmp1);
        maxX_negW = tmp2.horizontalMax<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, corners[1], posInf);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, corners[5], posInf);
        hkVector4f tmp2; tmp2.setMin(tmp0, tmp1);
        minY_negW = tmp2.horizontalMin<4>();
    }

    {
        hkVector4f tmp0; tmp0.setSelect(wLessZero0, corners[1], negInf);
        hkVector4f tmp1; tmp1.setSelect(wLessZero1, corners[5], negInf);
        hkVector4f tmp2; tmp2.setMax(tmp0, tmp1);
        maxY_negW = tmp2.horizontalMax<4>();
    }

    hkVector4f minXY_posW, maxXY_posW;
    minXY_posW.set(minX_posW, minY_posW, hkSimdFloat32_0, hkSimdFloat32_0);
    maxXY_posW.set(maxX_posW, maxY_posW, hkSimdFloat32_0, hkSimdFloat32_0);

    hkVector4f minXY_negW, maxXY_negW;
    minXY_negW.set(minX_negW, minY_negW, hkSimdFloat32_0, hkSimdFloat32_0);
    maxXY_negW.set(maxX_negW, maxY_negW, hkSimdFloat32_0, hkSimdFloat32_0);

    // Depending on ordering of intervals, include either +/- 1 (full screen) or the min/max of points with W > 0
    hkVector4f boundA, boundB;
    boundA.setSelect(maxXY_negW.greater(minXY_posW), hkVector4f::getConstant<HK_QUADREAL_MINUS1>(), minXY_posW);
    boundB.setSelect(minXY_negW.less(maxXY_posW), hkVector4f::getConstant<HK_QUADREAL_1>(), maxXY_posW);

    hkVector4f minXY, maxXY;
    minXY.setMin(boundA, boundB);
    maxXY.setMax(boundA, boundB);

    // Assemble AABB
    screenSpace.m_min.set(minXY.getComponent<0>(), minXY.getComponent<1>(), hkSimdFloat32_Minus1, hkSimdFloat32_0);
    screenSpace.m_max.set(maxXY.getComponent<0>(), maxXY.getComponent<1>(), maxZ_posW, hkSimdFloat32_0);
}

void HK_CALL getPlanes(const hkAabb& aabb, hkArray<hkVector4>& planesOut)
{
    HK_ASSERT(0x2ca9b759, aabb.isValid(), "cannot build plane equations for invalid aabbs");
    if (aabb.isEmpty()) return;

    hkVector4 extents; aabb.getExtents(extents);
    const hkVector4ComparisonMask::Mask hasExtent = extents.greaterZero().getMask();

    hkVector4 X,Y,Z,minusD;
    {
        X = hkVector4::getConstant<HK_QUADREAL_1000>();
        Y = hkVector4::getConstant<HK_QUADREAL_0100>();
        Z = hkVector4::getConstant<HK_QUADREAL_0010>();
        minusD.setNeg<4>(aabb.m_max);
        hkVector4Util::transpose(X,Y,Z,minusD);
        if (hasExtent & hkVector4ComparisonMask::MASK_YZ) planesOut.pushBack(X);
        if (hasExtent & hkVector4ComparisonMask::MASK_XZ) planesOut.pushBack(Y);
        if (hasExtent & hkVector4ComparisonMask::MASK_XY) planesOut.pushBack(Z);
    }
    {
        X.setNeg<4>(X);
        Y.setNeg<4>(Y);
        Z.setNeg<4>(Z);
        minusD = aabb.m_min;
        hkVector4Util::transpose(X,Y,Z,minusD);
        if (hasExtent & hkVector4ComparisonMask::MASK_YZ) planesOut.pushBack(X);
        if (hasExtent & hkVector4ComparisonMask::MASK_XZ) planesOut.pushBack(Y);
        if (hasExtent & hkVector4ComparisonMask::MASK_XY) planesOut.pushBack(Z);
    }
}

} // namespace hkAabbUtil

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
