// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM     : ALL
// PRODUCT      : PHYSICS_2012
// VISIBILITY   : CLIENT
//
// ------------------------------------------------------TKBMS v1.0

// MOPP Virtual Machine implementation

// include all default MOPP headers
#include <Physics2012/Collide/hkpCollide.h>
#include <Physics2012/Collide/Shape/Compound/Tree/Mopp/Machine/hkpMoppRayBundleVirtualMachine.h>
#include <Physics2012/Collide/Shape/Compound/Tree/Mopp/Code/hkpMoppCommands.h>
#include <Physics2012/Collide/Shape/Compound/Tree/Mopp/Machine/hkpMoppVirtualMachineMacros.h>
#include <Physics2012/Collide/Shape/Compound/Collection/hkpShapeCollection.h>
#include <Physics2012/Collide/Shape/Query/hkpRayShapeCollectionFilter.h>
#include <Physics2012/Collide/Shape/Query/hkpRayHitCollector.h>

#include <Common/Base/Math/Vector/hkIntVector.h>

#include <Physics2012/Collide/Shape/Query/hkpShapeRayBundleCastInput.h>
#include <Physics2012/Collide/Shape/Query/hkpShapeRayCastInput.h>
#include <Physics2012/Collide/Shape/Query/hkpShapeRayCastOutput.h>

#if defined(HK_PLATFORM_SPU)
#   include <Physics2012/Collide/Filter/Spu/hkpSpuCollisionFilterUtil.h>
#endif

#if 0
#include <Common/Base/Fwd/hkcstdio.h>
#define G_OUT(x) extern hkOstream* g_out; (*g_out) << x << "\n";
#define PRINTF(x) extern hkOstream* g_out; g_out->printf x; (*g_out) << "\n"
#else
#define G_OUT(x)
#define PRINTF(x)
#endif

// Virtual Machine command definitions
struct hkpMoppRayBundleVirtualMachine::QueryInt
{
    //the offset of the all previous scales are accumulated here (in B space)
    hkVector4 m_FtoBoffset;

    // pc
    //const unsigned char* m_pc;

    //the shifts from all previous scale commands are accumulated here
    int m_shift;

    // this converts  floating point space into form byte space
    hkVector4 m_FtoBScale; // all components equal

    // the current offset for the primitives
    unsigned int m_primitiveOffset;
    unsigned int m_properties[hkpMoppCode::MAX_PRIMITIVE_PROPERTIES];
};

struct hkpMoppRayBundleVirtualMachine::QueryBundle
{
    // this is the ray in local int coordinate space
    RayPointBundle m_rayEnds[2];
    hkVector4Comparison m_activeMask;
};

// Try to save a few instructions by leaving out the mask
#define COPY_BUNDLE(A, B) { (A).m_vec[0] = (B).m_vec[0]; (A).m_vec[1] = (B).m_vec[1]; (A).m_vec[2] = (B).m_vec[2]; }
#define COPY_QUERY_NO_MASK(QUERYA, QUERYB) { COPY_BUNDLE((QUERYA).m_rayEnds[0], (QUERYB).m_rayEnds[0]); COPY_BUNDLE((QUERYA).m_rayEnds[1], (QUERYB).m_rayEnds[1]); }

// ratio = a / (a-b)
// oneMinusRatio = 1-ratio = -b / (a-b)
static inline void setRatio(hkVector4Parameter a, hkVector4Parameter b, hkVector4& ratio, hkVector4& oneMinusRatio )
{
    hkVector4 diff; diff.setSub(a, b);
    hkVector4 invDiff; invDiff.setReciprocal(diff);

    ratio.setMul(a, invDiff);
    oneMinusRatio.setMul(b, invDiff);
    oneMinusRatio.setNeg<4>(oneMinusRatio);
}

static HK_INLINE void setBundleInterpolateWithMasks(const RayPointBundle& inStart, const RayPointBundle& inEnd,
                                                 RayPointBundle& outStart, RayPointBundle& outEnd,
                                                 hkVector4Parameter t, hkVector4Parameter oneMinusT,
                                                 hkVector4ComparisonParameter end0Mask, hkVector4ComparisonParameter end1Mask)
{
    hkVector4 interp;
    //for(int i=0; i<3; i++)
    /*{
        interp.setMul4(oneMinusT, inStart.m_vec[i]);
        interp.addMul4(t, inEnd.m_vec[i]);
        outStart.m_vec[i].select32(outStart.m_vec[i], interp, end0Mask);
        outEnd.m_vec[i].select32(outEnd.m_vec[i], interp, end1Mask);
    }*/
    {
        interp.setMul(oneMinusT, inStart.m_vec[0]);
        interp.addMul(t, inEnd.m_vec[0]);
        outStart.m_vec[0].setSelect(end0Mask, interp, outStart.m_vec[0]);
        outEnd.m_vec[0].setSelect(end1Mask, interp, outEnd.m_vec[0]);

        interp.setMul(oneMinusT, inStart.m_vec[1]);
        interp.addMul(t, inEnd.m_vec[1]);
        outStart.m_vec[1].setSelect(end0Mask, interp, outStart.m_vec[1]);
        outEnd.m_vec[1].setSelect(end1Mask, interp, outEnd.m_vec[1]);

        interp.setMul(oneMinusT, inStart.m_vec[2]);
        interp.addMul(t, inEnd.m_vec[2]);
        outStart.m_vec[2].setSelect(end0Mask, interp, outStart.m_vec[2]);
        outEnd.m_vec[2].setSelect(end1Mask, interp, outEnd.m_vec[2]);
    }
}


static inline hkBool32 shouldDoLeftBranchFirst(hkVector4Parameter la, hkVector4Parameter lb, hkVector4ComparisonParameter activeMask)
{
    const hkVector4 one = hkVector4::getConstant<HK_QUADREAL_1>();
    hkVector4 zero; zero.setZero();

    hkVector4Comparison wantLeftMask = la.less(lb);
    wantLeftMask.setAnd(wantLeftMask, activeMask);
    hkVector4 activeVec; activeVec.setSelect(activeMask, one, zero);
    hkVector4 wantLeftVec; wantLeftVec.setSelect(wantLeftMask, one, zero);

    const hkSimdReal numActive = activeVec.horizontalAdd<4>(); // equal to # of non-zero elements in activeMask
    const hkSimdReal numWantLeft = wantLeftVec.horizontalAdd<4>();

    // take the left branch  first if more than half the rays "want" it
    return (numWantLeft + numWantLeft).isGreaterEqual(numActive);
}

static inline hkVector4Comparison getDifferentSignMask(hkVector4Parameter a, hkVector4Parameter b)
{
    hkVector4 prod; prod.setMul(a, b);
    return prod.lessZero();
}

// static inline void loadcommandFloatVec(hkVector4& v, const hkUint8* PC)
// {
// #if 0
//  // Can't do this in SSE because PC isn't 4-byte aligned??
//  hkIntVector ipc;
//  ipc.loadUnaligned4((hkUint32*)PC);
//  hkIntVector zero; zero.setZero();
//  ipc.setMergeHead8(zero, ipc);
//  ipc.setMergeHead8(zero, ipc);
//  ipc.convertFromF32toU32(v);
// #else
//  v(0) = hkReal(PC[0]);
//  v(1) = hkReal(PC[1]);
//  v(2) = hkReal(PC[2]);
//  v(3) = hkReal(PC[3]);
// #endif
// }

static inline hkBool32 allMaskedAreSet(hkVector4ComparisonParameter v, hkVector4ComparisonParameter mask)
{
    //return v.allAreSet( (hkVector4ComparisonMask::Mask)mask.getMask() );
    hkVector4Comparison notMask; notMask.setNot(mask);
    notMask.setOr(notMask, v);
    return notMask.allAreSet();
}


void hkpMoppRayBundleVirtualMachine::addHit(unsigned int key, const unsigned int properties[hkpMoppCode::MAX_PRIMITIVE_PROPERTIES], QueryBundle* fQuery)
{
#ifdef HK_MOPP_DEBUGGER_ENABLED
    if ( hkpMoppDebugger::getInstance().find() )
    {
        hkprintf("Adding correct triangle as %i %i\n", key, properties[0]);
    }
#endif

    if ( m_rays->m_rayShapeCollectionFilter )
    {
        // <ce.todo> Fix this up - either add another isCollisionEnabled method to the filter, or change it to only consider the filter info
        hkpShapeRayCastInput temp;
        temp.m_filterInfo = m_rays->m_filterInfo;
#if defined(HK_PLATFORM_SPU)
        if ( hkpSpuCollisionFilterUtil::s_rayShapeContainerIsCollisionEnabled( temp, *m_collection, key) )
#else
        if ( m_rays->m_rayShapeCollectionFilter->isCollisionEnabled( temp, *m_collection, key ) )
#endif
        {
            // Allocate on Havok stack to avoid blowing the program stack, since this function gets inlined into the recursive query function
            hkpShapeBuffer* shapeBuffer = hkAllocateStack<hkpShapeBuffer>(1);
            const hkpShape* childShape = m_collection->getChildShape( key, *shapeBuffer );

            PRINTF(("key: %d", key));

            //if ( m_rayResultPtr )
            HK_ASSERT_NO_MSG(0x364c0c01, m_rayResultPtr);
            {
                //
                //  data driven query
                //
                for (int i=0; i<4; i++)
                {
                    m_rayResultPtr->m_outputs[i].changeLevel(1);
                }

                hkVector4Comparison newHits = childShape->castRayBundle( *m_rays, *m_rayResultPtr, fQuery->m_activeMask );

                for (int i=0; i<4; i++)
                {
                    m_rayResultPtr->m_outputs[i].changeLevel(-1);

                    if( newHits.anyIsSet(hkVector4Comparison::getMaskForComponent(i) ) )
                    {
                        m_earlyOutHitFraction(i) = m_rayResultPtr->m_outputs[i].m_hitFraction ;
                        m_rayResultPtr->m_outputs[i].setKey( key );
                    }
                }

                // Make sure that none of the reported hits are on supposedly inactive rays.
                // E.g. if rays 0, 2, and 3 are active, and castRayBundle reports that 0, 1, and 2 had hits, something is wrong
                HK_ON_DEBUG( int activeFlags = fQuery->m_activeMask.getMask();  int hitFlags = newHits.getMask(); );
                HK_ASSERT_NO_MSG(0x2e5deabc, (activeFlags | hitFlags) == activeFlags);

                m_hitsFound.setOr(m_hitsFound, newHits);
            }

            //else
            //{
                //
                //  callback driven query
                //

                //hkpCdBody childBody( m_body );
                //childBody.setShape( childShape, key );
                //childShape->castRayWithCollector( m_ray, childBody, *m_collector );
                //m_earlyOutHitFraction.setAll(m_collector->m_earlyOutHitFraction);
            //}

            hkDeallocateStack(shapeBuffer, 1);
        }
    }

}

#define SPLIT2VEC( COMPA, OP, COMPB, NEG )                                  \
            { \
            hkVector4 offset; offset.setAdd(commandFloatVec, commandFloatVec);  \
            offset.subMul(hkVector4::getConstant<HK_QUADREAL_255>(), hkSimdReal::getConstant<(HK_QUADREAL_0 + NEG)>());                                     \
            foffseth.setBroadcast<1>(offset);                           \
            foffsetl.setBroadcast<2>(offset);                           \
} \
            {                                                                   \
            positionl.OP(fQuery->m_rayEnds[0].m_vec[COMPA], fQuery->m_rayEnds[0].m_vec[COMPB]); \
            positionh.OP(fQuery->m_rayEnds[1].m_vec[COMPA], fQuery->m_rayEnds[1].m_vec[COMPB]); \
}                                                           \
    goto do_compareN4;

#define SPLIT3VEC( FUNCA, FUNCB, NEG )                                  \
            { \
                hkVector4 offset; offset.setAdd(commandFloatVec, commandFloatVec);  \
                offset.add(commandFloatVec);                                    \
                offset.subMul(hkVector4::getConstant<HK_QUADREAL_255>(), hkSimdReal::getConstant<(HK_QUADREAL_0 + NEG)>());                                     \
                foffseth.setBroadcast<1>(offset);                           \
                foffsetl.setBroadcast<2>(offset);                           \
            } \
            {                                                                   \
                positionl.FUNCA(fQuery->m_rayEnds[0].m_vec[0], fQuery->m_rayEnds[0].m_vec[1]);\
                positionl.FUNCB(fQuery->m_rayEnds[0].m_vec[2]);\
                positionh.FUNCA(fQuery->m_rayEnds[1].m_vec[0], fQuery->m_rayEnds[1].m_vec[1]);\
                positionh.FUNCB(fQuery->m_rayEnds[1].m_vec[2]);\
            }                                                           \
            goto do_compareN4;

//
//  The spaces:
//  - original floating point space:  O-space (not used in this class)
//  - the relative MOPP space:        F-space  = O-space - m_code->m_info.m_offset
//  - the root level integer space:   I-space  = F-space * m_code->m_info.m_scale = F-space / m_ItoFScale
//  - the MOPP byte space:            B-space  = F-space * quary->m_FtoBScale - query->m_FtoBoffset]
//
#if defined(HK_PLATFORM_SPU)
static inline const hkUint8* recheckCache( int chunkId, const hkUint8* originalBaseAddress, int dmaGroup )
{
    // Fetch the chunk
    const unsigned char* chunkAddress = originalBaseAddress + HK_MOPP_CHUNK_SIZE * chunkId;
    unsigned char* chunkBase = (unsigned char*)g_SpuMoppCache->getFromMainMemoryInlined( chunkAddress, HK_MOPP_CHUNK_SIZE, dmaGroup, true );
    return chunkBase;
}
#endif

void hkpMoppRayBundleVirtualMachine::queryRayOnTree( const hkpMoppRayBundleVirtualMachine::QueryInt* query, const unsigned char* PC,hkpMoppRayBundleVirtualMachine::QueryBundle* const fQuery, int chunkId)
{
    hkpMoppRayBundleVirtualMachine::QueryInt scaledQuery; // for fast scale commands
    while (1)
    {
        HK_MOPP_LOAD_PC();

        // Zero component is never actually accessed
        hkVector4 commandFloatVec; commandFloatVec.set(0.0f, PC1_float, PC2_float, PC3_float);

        hkVector4 positionh,positionl;

        //a temporary direction ( used when subtractions are involved)
        int tdir=999;
        int offsetRB;
        int offsetLB;

        //these need to be changed later
        hkVector4 foffseth;
        hkVector4 foffsetl;

        int offsetl;
        int offseth;

        switch (command)
        {
        case HK_MOPP_SPLIT_YZ:      SPLIT2VEC( 1, setAdd, 2, 0);
        case HK_MOPP_SPLIT_YMZ:     SPLIT2VEC( 1, setSub, 2, 1);
        case HK_MOPP_SPLIT_XZ:      SPLIT2VEC( 0, setAdd, 2, 0);
        case HK_MOPP_SPLIT_XMZ:     SPLIT2VEC( 0, setSub, 2, 1);
        case HK_MOPP_SPLIT_XY:      SPLIT2VEC( 0, setAdd, 1, 0);
        case HK_MOPP_SPLIT_XMY:     SPLIT2VEC( 0, setSub, 1, 1);


        case HK_MOPP_SPLIT_XYZ:     SPLIT3VEC( setAdd, add, 0);
        case HK_MOPP_SPLIT_XYMZ:    SPLIT3VEC( setAdd, sub, 1);
        case HK_MOPP_SPLIT_XMYZ:    SPLIT3VEC( setSub, add, 1);
        case HK_MOPP_SPLIT_XMYMZ:   SPLIT3VEC( setSub, sub, 2);

        case HK_MOPP_SINGLE_SPLIT_Z:
        case HK_MOPP_SINGLE_SPLIT_Y:
        case HK_MOPP_SINGLE_SPLIT_X:
            tdir = command - HK_MOPP_SINGLE_SPLIT_X;
            positionl = fQuery->m_rayEnds[0].m_vec[tdir];
            positionh = fQuery->m_rayEnds[1].m_vec[tdir];
            foffsetl.setBroadcast<1>( commandFloatVec );
            foffseth.setAdd(foffsetl, hkSimdReal_1);

            offsetLB = 0;
            offsetRB = PC2;
            PC += 3;
            goto do_compare;
        case HK_MOPP_SPLIT_JUMP_Z:
        case HK_MOPP_SPLIT_JUMP_Y:
        case HK_MOPP_SPLIT_JUMP_X:
            tdir = command - HK_MOPP_SPLIT_JUMP_X;
            foffseth.setBroadcast<1>( commandFloatVec );
            foffsetl.setBroadcast<2>( commandFloatVec );
            positionl = fQuery->m_rayEnds[0].m_vec[tdir];
            positionh = fQuery->m_rayEnds[1].m_vec[tdir];

            offsetLB = (PC3 << 8) + (PC4);
            offsetRB = (PC5 << 8) + (PC6);
            PC += 7;
            goto do_compare;

        case HK_MOPP_SPLIT_Z:
        case HK_MOPP_SPLIT_Y:
        case HK_MOPP_SPLIT_X:
            tdir = command - HK_MOPP_SPLIT_X;
            foffseth.setBroadcast<1>( commandFloatVec );
            foffsetl.setBroadcast<2>( commandFloatVec );
            positionl = fQuery->m_rayEnds[0].m_vec[tdir];
            positionh = fQuery->m_rayEnds[1].m_vec[tdir];

do_compareN4:
            offsetLB = 0;
            offsetRB = PC3;
            PC += 4;
do_compare:
            {
                // not in right branch -> traverse left only
                //if ( (positionh < foffsetl) && (positionl < foffsetl) )
                hkVector4Comparison wantOnlyLeft, wantOnlyRight;
                {
                    const hkVector4Comparison comp1 = positionh.less(foffsetl);
                    const hkVector4Comparison comp2 = positionl.less(foffsetl);
                    wantOnlyLeft.setAnd(comp1, comp2);
                    wantOnlyLeft.setAnd(fQuery->m_activeMask, wantOnlyLeft);
                }
                {
                    const hkVector4Comparison comp3 = positionl.greater(foffseth);
                    const hkVector4Comparison comp4 = positionh.greater(foffseth);
                    wantOnlyRight.setAnd(comp3, comp4);
                    wantOnlyRight.setAnd(fQuery->m_activeMask, wantOnlyRight);
                }

                if ( allMaskedAreSet(wantOnlyLeft, fQuery->m_activeMask) )
                {
                    HK_QVM_DBG( hkpMoppDebugger::getInstance().rejectRight() );
                    HK_QVM_DBG( hkpMoppDebugger::getInstance().recurseLeft() );
                    PC += offsetLB;
                    //the PC is ready for the left branch
                    continue;
                }


                // now we have to check for traversing the right
                //if ((positionl > foffseth) && (positionh > foffseth))
                if ( allMaskedAreSet(wantOnlyRight, fQuery->m_activeMask) )
                {
                    HK_QVM_DBG( hkpMoppDebugger::getInstance().rejectLeft() );
                    HK_QVM_DBG( hkpMoppDebugger::getInstance().recurseRight() );
                    PC += offsetRB;
                    //the PC is ready for the right branch
                    continue;
                }

                //the ray may cross the plane
                {
                    //the distance from the start and end to the planePos
                    //these values are for clipping purposes
                    hkVector4 la; la.setSub(positionl, foffseth);
                    hkVector4 ha; ha.setSub(positionl, foffsetl);
                    hkVector4 lb; lb.setSub(positionh, foffseth);
                    hkVector4 hb; hb.setSub(positionh, foffsetl);

                    //if the ray goes from a low number to a high number,
                    //then we go left first, otherwise we go right first
                    //now that the ray must be split, we actually create 2 smaller rays
                    hkpMoppRayBundleVirtualMachine::QueryBundle leftBundle;

                    hkpMoppRayBundleVirtualMachine::QueryBundle rightBundle;

                    // Don't do a full copy here since we'll set the masks in a minute
                    COPY_QUERY_NO_MASK(leftBundle, *fQuery);
                    COPY_QUERY_NO_MASK(rightBundle, *fQuery);

                    {
                        hkVector4Comparison dontWantLeft; dontWantLeft.setNot(wantOnlyRight);
                        leftBundle.m_activeMask.setAnd(fQuery->m_activeMask, dontWantLeft);
                    }
                    {
                        hkVector4Comparison dontWantRight; dontWantRight.setNot(wantOnlyLeft);
                        rightBundle.m_activeMask.setAnd(fQuery->m_activeMask, dontWantRight);
                    }

                    hkVector4Comparison clipLeftQuery = getDifferentSignMask(la, lb);
                    // Could skip if clipLeftQuery is all 0, but checking actually takes more time than it saves
                    {
                        hkVector4 ratio, oneMinusRatio;
                        setRatio(la, lb, ratio, oneMinusRatio);
                        //const int leftEndIdx = (la < lb) ? 1 : 0;
                        hkVector4Comparison leftEndIdx; leftEndIdx = la.less(lb);
                        hkVector4Comparison notLeftEndIdx; notLeftEndIdx.setNot(leftEndIdx);

                        hkVector4Comparison clipRayEnd0Mask; clipRayEnd0Mask.setAnd(notLeftEndIdx, clipLeftQuery);
                        hkVector4Comparison clipRayEnd1Mask; clipRayEnd1Mask.setAnd(leftEndIdx, clipLeftQuery);

                        setBundleInterpolateWithMasks(leftBundle.m_rayEnds[0], leftBundle.m_rayEnds[1],
                                                      leftBundle.m_rayEnds[0], leftBundle.m_rayEnds[1],
                                                      ratio, oneMinusRatio, clipRayEnd0Mask, clipRayEnd1Mask );
                    }

                    hkVector4Comparison clipRightQuery = getDifferentSignMask(ha, hb);
                    // Could skip if clipRightQuery is all 0, but checking actually takes more time than it saves
                    {
                        hkVector4 ratio, oneMinusRatio;
                        setRatio(ha, hb, ratio, oneMinusRatio);
                        //const int rightEndIdx = (la < lb)? 0 : 1;
                        hkVector4Comparison rightEndIdx; rightEndIdx = la.greater(lb);
                        hkVector4Comparison notRightEndIdx; notRightEndIdx.setNot(rightEndIdx);

                        hkVector4Comparison clipRayEnd0Mask; clipRayEnd0Mask.setAnd(notRightEndIdx, clipRightQuery);
                        hkVector4Comparison clipRayEnd1Mask; clipRayEnd1Mask.setAnd(rightEndIdx, clipRightQuery);

                        setBundleInterpolateWithMasks(rightBundle.m_rayEnds[0], rightBundle.m_rayEnds[1],
                            rightBundle.m_rayEnds[0], rightBundle.m_rayEnds[1],
                            ratio, oneMinusRatio, clipRayEnd0Mask, clipRayEnd1Mask );
                    }


                    hkpMoppRayBundleVirtualMachine::QueryBundle* firstBundle  = &leftBundle;
                    hkpMoppRayBundleVirtualMachine::QueryBundle* secondBundle = &rightBundle;
                    int firstOffset = offsetLB;
                    int secondOffset = offsetRB;

                    hkBool32 doRightFirst;
                    {
                        doRightFirst = !(shouldDoLeftBranchFirst(la, lb, fQuery->m_activeMask));
                    }

                    if (doRightFirst)
                    {
                        secondBundle = &leftBundle;
                        firstBundle  = &rightBundle;
                        firstOffset  = offsetRB;
                        secondOffset = offsetLB;
                    }

                    queryRayOnTree( query,  PC+firstOffset, firstBundle, chunkId);

#if defined ( HK_PLATFORM_SPU )
                    const hkUint8* spuAddress = recheckCache( chunkId, m_originalBaseAddress, hkSpuShapeRayCastDmaGroups::GET_MOPP_CHUNK );

                    // We may have been given a different way in the cache so our local PC is potentially incorrect
                    // Since each chunk is aligned we can use the bottom bits and the new address
                    PC = (const unsigned char*) ( (hkUlong)spuAddress | ((hkUlong)PC & HK_MOPP_CHUNK_MASK ) );
#endif

                    {
                        const hkVector4& one = hkVector4::getConstant<HK_QUADREAL_1>();
                        hkVector4 oneMinusDistance; oneMinusDistance.setSub(one, m_earlyOutHitFraction);
                        hkVector4Comparison distanceLt1Mask = m_earlyOutHitFraction.less(one);

                        // Could skip if distanceLt1Mask is all 0 - on 360 there's no difference either way
                        {
                            hkVector4 hitPoint; // actually the x, y, or z components of 4 hitpoints, but you get the idea
                            hkVector4 codeOffset, FtoBScale, FtoBoffset; // will splat the corresponding vector into these

                            hitPoint.setMul(oneMinusDistance, m_from.m_vec[0]);
                            hitPoint.addMul(m_earlyOutHitFraction, m_to.m_vec[0]);

                            codeOffset.setBroadcast<0>(m_code->m_info.m_offset);
                            FtoBScale.setBroadcast<0>(query->m_FtoBScale);
                            FtoBoffset.setBroadcast<0>(query->m_FtoBoffset);

                            hitPoint.sub(codeOffset);
                            hitPoint.mul(FtoBScale);
                            hitPoint.sub(FtoBoffset);

                            secondBundle->m_rayEnds[1].m_vec[0].setSelect(distanceLt1Mask, hitPoint, secondBundle->m_rayEnds[1].m_vec[0]);


                            hitPoint.setMul(oneMinusDistance, m_from.m_vec[1]);
                            hitPoint.addMul(m_earlyOutHitFraction, m_to.m_vec[1]);

                            codeOffset.setBroadcast<1>(m_code->m_info.m_offset);
                            FtoBScale.setBroadcast<1>(query->m_FtoBScale);
                            FtoBoffset.setBroadcast<1>(query->m_FtoBoffset);

                            hitPoint.sub(codeOffset);
                            hitPoint.mul(FtoBScale);
                            hitPoint.sub(FtoBoffset);

                            secondBundle->m_rayEnds[1].m_vec[1].setSelect(distanceLt1Mask, hitPoint, secondBundle->m_rayEnds[1].m_vec[1]);


                            hitPoint.setMul(oneMinusDistance, m_from.m_vec[2]);
                            hitPoint.addMul(m_earlyOutHitFraction, m_to.m_vec[2]);

                            codeOffset.setBroadcast<2>(m_code->m_info.m_offset);
                            FtoBScale.setBroadcast<2>(query->m_FtoBScale);
                            FtoBoffset.setBroadcast<2>(query->m_FtoBoffset);

                            hitPoint.sub(codeOffset);
                            hitPoint.mul(FtoBScale);
                            hitPoint.sub(FtoBoffset);

                            secondBundle->m_rayEnds[1].m_vec[2].setSelect(distanceLt1Mask, hitPoint, secondBundle->m_rayEnds[1].m_vec[2]);
                        }

                        if (tdir<3)
                        {
                            la.setSub(positionl, foffseth);
                            lb.setSub(positionh, foffseth);
                            hkVector4Comparison laLessThanLb = la.less(lb);

                            hkVector4Comparison rayEndLessThanOffsetLow = secondBundle->m_rayEnds[1].m_vec[tdir].less(foffsetl);
                            hkVector4Comparison rayEndGreaterThanOffsetHigh = secondBundle->m_rayEnds[1].m_vec[tdir].greater(foffseth);

                            hkVector4Comparison pruneMask;
                            {
                                pruneMask.setSelect(rayEndGreaterThanOffsetHigh, rayEndLessThanOffsetLow, laLessThanLb);
                            }

                            hkVector4Comparison wantBothMask;
                            {
                                // = NOT(wantOnlyLeft) AND NOT(wantOnlyRight)
                                hkVector4Comparison dontWantLeft; dontWantLeft.setNot(wantOnlyLeft);
                                hkVector4Comparison dontWantRight; dontWantRight.setNot(wantOnlyRight);
                                wantBothMask.setAnd(dontWantLeft, dontWantRight);

                            }
                            pruneMask.setAnd(wantBothMask, pruneMask);

                            // Only prune if earlyOutDistance is less than 1
                            // <ce.todo> Optimization? Recompute the mask instead of reusing it (reusing probably has to pull it from memory)
                            pruneMask.setAnd(pruneMask, distanceLt1Mask);

                            // newly-dead rays have their components set, so we take the complement and AND it with the activeMask
                            pruneMask.setNot(pruneMask);

                            secondBundle->m_activeMask.setAnd(secondBundle->m_activeMask, pruneMask);
                        }
                    }

                    if (secondBundle->m_activeMask.anyIsSet())
                    {
                        PC += secondOffset;
                        *fQuery = *secondBundle;
                        continue;
                    }
                    goto end_of_function;

                }
            }

        case HK_MOPP_DOUBLE_CUT24_X:
        case HK_MOPP_DOUBLE_CUT24_Y:
        case HK_MOPP_DOUBLE_CUT24_Z:
            {
                // <ce.todo> Optimization?  This case doesn't happen very often, although there are still some LHS removal possible here
                tdir = command - HK_MOPP_DOUBLE_CUT24_X;

                foffsetl.setMul(hkSimdReal::fromFloat(hkReal((PC1<<16) + (PC2<<8) + PC3)), m_ItoFScale);
                foffseth.setMul(hkSimdReal::fromFloat(hkReal((PC4<<16) + (PC5<<8) + PC6)), m_ItoFScale);

                foffsetl.mul(query->m_FtoBScale);
                foffseth.mul(query->m_FtoBScale);

                hkVector4 sub; sub.setBroadcast(tdir, query->m_FtoBoffset);
                foffsetl.sub(sub);
                foffseth.sub(sub);
                PC += 7;
                goto doCut;
            }

        case HK_MOPP_DOUBLE_CUT_X:
        case HK_MOPP_DOUBLE_CUT_Y:
        case HK_MOPP_DOUBLE_CUT_Z:
            {
                tdir = command - HK_MOPP_DOUBLE_CUT_X;
                foffsetl.setBroadcast<1>( commandFloatVec );
                foffseth.setBroadcast<2>( commandFloatVec );
                PC += 3;
            }
doCut:
            {
                positionl = fQuery->m_rayEnds[0].m_vec[tdir];
                positionh = fQuery->m_rayEnds[1].m_vec[tdir];

                hkVector4Comparison zeroMask = positionl.less(positionh);
                hkVector4Comparison oneMask = positionl.greaterEqual(positionh);

                hkVector4Comparison pruneMask;
                {
                    const hkVector4Comparison comp1 = positionh.less(foffsetl);
                    const hkVector4Comparison comp2 = positionl.greater(foffseth);
                    hkVector4Comparison compBoth12; compBoth12.setOr(comp1, comp2);

                    const hkVector4Comparison comp3 = positionl.less(foffsetl);
                    const hkVector4Comparison comp4 = positionh.greater(foffseth);
                    hkVector4Comparison compBoth34; compBoth34.setOr(comp3, comp4);

                    pruneMask.setSelect(compBoth34, compBoth12, zeroMask);
                    pruneMask.setNot(pruneMask);
                }

                fQuery->m_activeMask.setAnd(fQuery->m_activeMask, pruneMask);

                // All of the rays in the bundle are pruned, so we can quit (on this branch at least)
                if (!fQuery->m_activeMask.anyIsSet())
                {
                    goto end_of_function;
                }

                // Can't avoid the copy here. We need a duplicate so that the "interp"s below are between the right values...
                const RayPointBundle rayStart = fQuery->m_rayEnds[0];
                const RayPointBundle rayEnd = fQuery->m_rayEnds[1];

                {
                    hkVector4 R0; R0.setSub(positionl, foffseth);
                    hkVector4 R1; R1.setSub(positionh, foffseth);
                    hkVector4Comparison differentSignMask = getDifferentSignMask(R0, R1);
                    hkVector4 ratio, oneMinusRatio;
                    setRatio(R0, R1, ratio, oneMinusRatio);

                    hkVector4Comparison clipRayEnd0Mask; clipRayEnd0Mask.setAnd(oneMask, differentSignMask);
                    hkVector4Comparison clipRayEnd1Mask; clipRayEnd1Mask.setAnd(zeroMask, differentSignMask);

                    setBundleInterpolateWithMasks(rayStart, rayEnd, fQuery->m_rayEnds[0], fQuery->m_rayEnds[1], ratio, oneMinusRatio, clipRayEnd0Mask, clipRayEnd1Mask);
                }

                {
                    hkVector4 L0; L0.setSub(positionl, foffsetl);
                    hkVector4 L1; L1.setSub(positionh, foffsetl);
                    hkVector4Comparison differentSignMask = getDifferentSignMask(L0, L1);
                    hkVector4 ratio, oneMinusRatio;
                    setRatio(L0, L1, ratio, oneMinusRatio);

                    hkVector4Comparison clipRayEnd0Mask; clipRayEnd0Mask.setAnd(zeroMask, differentSignMask);
                    hkVector4Comparison clipRayEnd1Mask; clipRayEnd1Mask.setAnd(oneMask, differentSignMask);

                    setBundleInterpolateWithMasks(rayStart, rayEnd, fQuery->m_rayEnds[0], fQuery->m_rayEnds[1], ratio, oneMinusRatio, clipRayEnd0Mask, clipRayEnd1Mask);
                }

                continue;
            }




        HK_MOPP_JUMP_MACRO

        case HK_MOPP_JUMP_CHUNK:
            {
                chunkId = ( PC1 << 8 ) |  PC2;

#if defined(HK_PLATFORM_SPU)
                const hkUint8* spuAddress = recheckCache( chunkId, m_originalBaseAddress, hkSpuShapeRayCastDmaGroups::GET_MOPP_CHUNK );
                PC = spuAddress;
#else
                const unsigned char* jumpAddress = m_code->m_data.begin() + HK_MOPP_CHUNK_SIZE * chunkId;
                PC = jumpAddress;
#endif
                continue;
            }

        case HK_MOPP_DATA_OFFSET:
            {
                // Skip over the command
                PC+=5;
                continue;
            }

        case HK_MOPP_SCALE1:
        case HK_MOPP_SCALE2:
        case HK_MOPP_SCALE3:
        case HK_MOPP_SCALE4:
            {
                const unsigned int shift = command - HK_MOPP_SCALE0;

                hkVector4 extraOffset;
                extraOffset.set( PC1_float, PC2_float, PC3_float );

                hkSimdReal f = hkSimdReal::getConstant((hkVectorConstant)(HK_QUADREAL_0+(1<<shift)));

                {
                    hkVector4 offsetSplat;
                    offsetSplat.setBroadcast<0>(extraOffset);
                    fQuery->m_rayEnds[0].m_vec[0].sub( offsetSplat );
                    fQuery->m_rayEnds[0].m_vec[0].mul( f );

                    offsetSplat.setBroadcast<1>(extraOffset);
                    fQuery->m_rayEnds[0].m_vec[1].sub( offsetSplat );
                    fQuery->m_rayEnds[0].m_vec[1].mul( f );

                    offsetSplat.setBroadcast<2>(extraOffset);
                    fQuery->m_rayEnds[0].m_vec[2].sub( offsetSplat );
                    fQuery->m_rayEnds[0].m_vec[2].mul( f );

                    offsetSplat.setBroadcast<0>(extraOffset);
                    fQuery->m_rayEnds[1].m_vec[0].sub( offsetSplat );
                    fQuery->m_rayEnds[1].m_vec[0].mul( f );

                    offsetSplat.setBroadcast<1>(extraOffset);
                    fQuery->m_rayEnds[1].m_vec[1].sub( offsetSplat );
                    fQuery->m_rayEnds[1].m_vec[1].mul( f );

                    offsetSplat.setBroadcast<2>(extraOffset);
                    fQuery->m_rayEnds[1].m_vec[2].sub( offsetSplat );
                    fQuery->m_rayEnds[1].m_vec[2].mul( f );

                }



                //accumulate the offset in root node space
                scaledQuery.m_FtoBoffset.setAdd( query->m_FtoBoffset, extraOffset );
                scaledQuery.m_FtoBoffset.mul( f );

                //the shift has been accumulating
                scaledQuery.m_shift = query->m_shift + shift;
                scaledQuery.m_FtoBScale.setMul(f, query->m_FtoBScale);

                //no need to copy anything except for the primitive toffset and the properties
                for(int p = 0; p < hkpMoppCode::MAX_PRIMITIVE_PROPERTIES;p++)
                {
                    scaledQuery.m_properties[p] = query->m_properties[p];
                }

                scaledQuery.m_primitiveOffset = query->m_primitiveOffset;
                //no need to copy anything except for the primitive toffset
                query = &scaledQuery;

                PC += 4;
                continue;
            }

        // Slightly modified version of the contents of HK_MOPP_CHUNK_TERMINAL_MACRO
        // We need to be able to pass on the array of "live" rays, so we had to add an extra parameter to addHit
        HK_MOPP_TERMINAL_COMMON_MACRO
        {
        add_Terminal:
            offsetl += query->m_primitiveOffset;
            unsigned int keyRemap = ( (chunkId << 8) & m_reindexingMask ) | offsetl;
            addHit(keyRemap, query->m_properties, fQuery );
            goto end_of_function;
        }

        HK_MOPP_REOFFSET_MACRO

        HK_MOPP_PROPERTY_MACRO

        HK_MOPP_DEFAULT_MACRO
        }
    }

end_of_function:
    return;
}


void hkpMoppRayBundleVirtualMachine::queryRayBundleSub(const hkpMoppCode* code,   const hkpShapeRayBundleCastInput& input, hkVector4ComparisonParameter mask) // true on successful execution
{
    //this will be handy for accessing code properties later
    m_code = code;

    const unsigned char* programCounter = &m_code->m_data[0];

    hkpMoppRayBundleVirtualMachine::QueryBundle fQuery;
    hkpMoppRayBundleVirtualMachine::QueryInt    query;

    query.m_FtoBoffset.setZero();

    const hkSimdReal sscale = hkSimdReal::fromFloat(m_code->m_info.getScale());
    hkSimdReal inv_sscale; inv_sscale.setReciprocal(sscale);
    m_ItoFScale.setAll(inv_sscale);

    const hkSimdReal bias = hkSimdReal::fromInt32(1 << 0x10);
    hkSimdReal scale_bias; scale_bias.setDiv(sscale, bias);
    query.m_FtoBScale.setAll(scale_bias);


    //***************FLOAT QUERY SETUP
    //fQuery.m_rayEnds[0].setSub4( input.m_from, code->m_info.m_offset);
    //fQuery.m_rayEnds[1].setSub4( input.m_to,   code->m_info.m_offset);
    //fQuery.m_rayEnds[0].mul4( query.m_FtoBScale );
    //fQuery.m_rayEnds[1].mul4( query.m_FtoBScale );

    hkVector4 offset;
    offset.setBroadcast<0>(code->m_info.m_offset);
    fQuery.m_rayEnds[0].m_vec[0].setSub(m_from.m_vec[0], offset);
    fQuery.m_rayEnds[1].m_vec[0].setSub(m_to.m_vec[0],   offset);
    fQuery.m_rayEnds[0].m_vec[0].mul( query.m_FtoBScale );
    fQuery.m_rayEnds[1].m_vec[0].mul( query.m_FtoBScale );

    offset.setBroadcast<1>(code->m_info.m_offset);
    fQuery.m_rayEnds[0].m_vec[1].setSub(m_from.m_vec[1], offset);
    fQuery.m_rayEnds[1].m_vec[1].setSub(m_to.m_vec[1],   offset);
    fQuery.m_rayEnds[0].m_vec[1].mul( query.m_FtoBScale );
    fQuery.m_rayEnds[1].m_vec[1].mul( query.m_FtoBScale );

    offset.setBroadcast<2>(code->m_info.m_offset);
    fQuery.m_rayEnds[0].m_vec[2].setSub(m_from.m_vec[2], offset);
    fQuery.m_rayEnds[1].m_vec[2].setSub(m_to.m_vec[2],   offset);
    fQuery.m_rayEnds[0].m_vec[2].mul( query.m_FtoBScale );
    fQuery.m_rayEnds[1].m_vec[2].mul( query.m_FtoBScale );

    // <ce.todo> ID
    HK_ASSERT(0x6fb7a3bf, mask.anyIsSet(), "Calling castRayBundle with no active rays!");
    fQuery.m_activeMask = mask;

    m_rays = &input;

    //no hits yet
    m_hitsFound.set<hkVector4ComparisonMask::MASK_NONE>();

    //any re-offsetting will occur in the tree
    query.m_primitiveOffset = 0;
    query.m_shift = 0;

    for(int p = 0; p < hkpMoppCode::MAX_PRIMITIVE_PROPERTIES; p++)
    {
        query.m_properties[p] = 0;
    }

    // If the MOPP code contains terminal data then we need to reindex the terminals
    m_reindexingMask = (*programCounter == HK_MOPP_DATA_OFFSET) ? 0xffffffff : 0x00000000;

    //call the query with the left-of-decimal place part
    HK_QVM_DBG( hkpMoppDebugger::getInstance().startRecurse() );
    queryRayOnTree( &query, programCounter, &fQuery, 0 );
}


hkVector4Comparison hkpMoppRayBundleVirtualMachine::queryRayBundle(const HK_SHAPE_CONTAINER* collection, const hkpMoppCode* code,
                                               const hkpShapeRayBundleCastInput& input, hkpShapeRayBundleCastOutput& rayResult, hkVector4ComparisonParameter mask) // true on successful execution
{
    m_collection = collection;
    m_rayResultPtr = &rayResult;
    m_earlyOutHitFraction.setConstant<HK_QUADREAL_1>();

    const hkVector4* vFrom = (const hkVector4*)input.m_from.m_vertices;
    m_from.m_vec[0] = vFrom[0];
    m_from.m_vec[1] = vFrom[1];
    m_from.m_vec[2] = vFrom[2];

    const hkVector4* vTo = (const hkVector4*)input.m_to.m_vertices;
    m_to.m_vec[0] = vTo[0];
    m_to.m_vec[1] = vTo[1];
    m_to.m_vec[2] = vTo[2];


#ifdef HK_PLATFORM_SPU
    m_originalBaseAddress = code->m_data.begin();
    const int moppBytes = hkMath::min2( HK_MOPP_CHUNK_SIZE, HK_NEXT_MULTIPLE_OF(16, code->m_data.getSize()) );
    const hkpMoppChunk* spuCode = (const hkpMoppChunk*)g_SpuMoppCache->getFromMainMemoryInlined( m_originalBaseAddress, moppBytes, hkSpuShapeRayCastDmaGroups::GET_MOPP_CHUNK, true );

    HK_DECLARE_ALIGNED_LOCAL_PTR( hkpMoppCode, spuMoppCode, 16 );
    spuMoppCode->initialize( code->m_info, (const hkUint8*)spuCode, moppBytes );
    code = spuMoppCode;
#endif

    queryRayBundleSub( code, input, mask);

    return m_hitsFound;
}


//void hkpMoppRayBundleVirtualMachine::queryRayBundle(const HK_SHAPE_CONTAINER* collection, const hkpMoppCode* code,
//                                             const hkpShapeRayCastInput& input, const hkpCdBody& body, hkpRayHitCollector& collector) // true on successful execution
//{
//  m_collection = collection;
//  m_rayResultPtr = HK_NULL;
//  m_body = &body;
//  m_collector = &collector;
//
//  m_earlyOutHitFraction.setAll(m_collector->m_earlyOutHitFraction);
//
//  queryRayBundleSub( code, input );
//}

/*
 * Havok SDK - Product file, BUILD(#20171210)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2017 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
