/*===========================================================================*
 *--- Include files ---------------------------------------------------------*
 *===========================================================================*/
#include "rwcore.h"
#include "rpworld.h"

#include "rpplugin.h"
#include "rpdbgerr.h"

#include "rpskin.h"

#include "skin.h"
#include "skinskycommon.h"

/*===========================================================================*
 *--- Private Types ---------------------------------------------------------*
 *===========================================================================*/

/*===========================================================================*
 *--- Private Global Variables ----------------------------------------------*
 *===========================================================================*/
SkinGlobals _rpSkinGlobals =
{
    0,
    0,
    0,
    { (RwMatrix *)NULL, NULL },
    (RwFreeList *)NULL,
    { 0, 0 },
    {                                 /* SkinGlobalPlatform  platform    */
        {                             /* RxPipeline *managerPipelines    */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEGENERIC        */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEMATFX          */
            (RxPipeline *)NULL        /* rpSKINSKYPIPELINEMATFXUV2       */
        },
        {                             /* RxPipeline *allPipelines        */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEGENERIC        */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEMATFX          */
            (RxPipeline *)NULL        /* rpSKINSKYPIPELINEMATFXUV2       */
        },
        {                             /* RxPipelin *allMaterialPipelines */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEGENERIC        */
            (RxPipeline *)NULL,       /* rpSKINSKYPIPELINEMATFX          */
            (RxPipeline *)NULL        /* rpSKINSKYPIPELINEMATFXUV2       */
        },
        (RxWorldLightingCallBack)NULL /* lightingCallBack                */
    }
};

/*===========================================================================*
 *--- Private Defines -------------------------------------------------------*
 *===========================================================================*/

/*===========================================================================*
 *--- Local Types -----------------------------------------------------------*
 *===========================================================================*/

/*===========================================================================*
 *--- Local Global Variables ------------------------------------------------*
 *===========================================================================*/
#if (!defined(DXOYGEN))
static const char rcsid[] __RWUNUSED__ =
    "@@@@(#)$Id: ";
#endif /* (!defined(DXOYGEN)) */

/*===========================================================================*
 *--- Local Defines ---------------------------------------------------------*
 *===========================================================================*/
#define SkinMatrixMultiplyMacro1( matrixArray1,         \
                                  matrixArray2,         \
                                  target,               \
                                  number,               \
                                  stride1,              \
                                  stride2 )             \
do                                                      \
{                                                       \
    RwMatrix *r0, *r1, *r2;                             \
    asm volatile ("                                     \
    .set noreorder                                 ;    \
    .set noat                                      ;    \
        mult $1, %6, %8                            ;    \
        add  %0, %3, $0                            ;    \
        add  %1, %4, $0                            ;    \
        add  %2, %5, $0                            ;    \
        add  $1, %2, $1                            ;    \
    SKMacLoop1:                                         \
        lqc2 vf1, 0x00(%0)                         ;    \
        lqc2 vf2, 0x10(%0)                         ;    \
        lqc2 vf3, 0x20(%0)                         ;    \
        lqc2 vf4, 0x30(%0)                         ;    \
        lqc2 vf5, 0x00(%1)                         ;    \
        lqc2 vf6, 0x10(%1)                         ;    \
        lqc2 vf7, 0x20(%1)                         ;    \
        lqc2 vf8, 0x30(%1)                         ;    \
        vmulax.xyzw  ACC, vf5, vf1                 ;    \
        vmadday.xyzw ACC, vf6, vf1                 ;    \
        vmaddz.xyzw  vf9, vf7, vf1                 ;    \
        vmulax.xyzw  ACC, vf5, vf2                 ;    \
        vmadday.xyzw ACC, vf6, vf2                 ;    \
        vmaddz.xyzw vf10, vf7, vf2                 ;    \
        vmulax.xyzw  ACC, vf5, vf3                 ;    \
        vmadday.xyzw ACC, vf6, vf3                 ;    \
        vmaddz.xyzw vf11, vf7, vf3                 ;    \
        vmulax.xyzw  ACC, vf5, vf4                 ;    \
        vmadday.xyzw ACC, vf6, vf4                 ;    \
        vmaddaz.xyzw ACC, vf7, vf4                 ;    \
        vmaddw.xyzw vf12, vf8, vf0                 ;    \
        sqc2 vf9,  0x00(%2)                        ;    \
        sqc2 vf10, 0x10(%2)                        ;    \
        sqc2 vf11, 0x20(%2)                        ;    \
        sqc2 vf12, 0x30(%2)                        ;    \
        addi %2, %2, 0x40                          ;    \
        add  %0, %0, %7                            ;    \
        add  %1, %1, %8                            ;    \
        bne  $1, %2, SKMacLoop1                    ;    \
        nop                                        ;    \
    .set reorder                                   ;    \
    .set at                                             \
        "                                               \
        : "=r&" (r0),                                   \
          "=r&" (r1),                                   \
          "=r&" (r2)                                    \
        :   "r" (matrixArray1),                         \
            "r" (matrixArray2),                         \
            "r" (target),                               \
            "r" (number),                               \
            "r" (stride1),                              \
            "r" (stride2)                               \
        :   "cc", "memory" );                           \
}                                                       \
while (0)

#define SkinMatrixMultiplyMacro2( matrixArray1,         \
                                  matrixArray2,         \
                                  matrix,               \
                                  target,               \
                                  number,               \
                                  stride1,              \
                                  stride2 )             \
do                                                      \
{                                                       \
    RwMatrix *r0, *r1, *r2;                             \
    asm volatile ("                                     \
    .set noreorder                                 ;    \
    .set noat                                      ;    \
        mult $1, %6, %9                            ;    \
        add  %0, %3, $0                            ;    \
        add  %1, %4, $0                            ;    \
        add  %2, %5, $0                            ;    \
        lqc2 vf13, 0x00(%7)                        ;    \
        lqc2 vf14, 0x10(%7)                        ;    \
        lqc2 vf15, 0x20(%7)                        ;    \
        lqc2 vf16, 0x30(%7)                        ;    \
        add  $1, %2, $1                            ;    \
    SKMacLoop2:                                         \
        lqc2 vf1, 0x00(%0)                         ;    \
        lqc2 vf2, 0x10(%0)                         ;    \
        lqc2 vf3, 0x20(%0)                         ;    \
        lqc2 vf4, 0x30(%0)                         ;    \
        lqc2 vf5, 0x00(%1)                         ;    \
        lqc2 vf6, 0x10(%1)                         ;    \
        lqc2 vf7, 0x20(%1)                         ;    \
        lqc2 vf8, 0x30(%1)                         ;    \
        vmulax.xyzw  ACC, vf5, vf1                 ;    \
        vmadday.xyzw ACC, vf6, vf1                 ;    \
        vmaddz.xyzw  vf9, vf7, vf1                 ;    \
        vmulax.xyzw  ACC, vf5, vf2                 ;    \
        vmadday.xyzw ACC, vf6, vf2                 ;    \
        vmaddz.xyzw vf10, vf7, vf2                 ;    \
        vmulax.xyzw  ACC, vf5, vf3                 ;    \
        vmadday.xyzw ACC, vf6, vf3                 ;    \
        vmaddz.xyzw vf11, vf7, vf3                 ;    \
        vmulax.xyzw  ACC, vf5, vf4                 ;    \
        vmadday.xyzw ACC, vf6, vf4                 ;    \
        vmaddaz.xyzw ACC, vf7, vf4                 ;    \
        vmaddw.xyzw vf12, vf8, vf0                 ;    \
        vmulax.xyzw  ACC, vf13, vf9                ;    \
        vmadday.xyzw ACC, vf14, vf9                ;    \
        vmaddz.xyzw  vf1, vf15, vf9                ;    \
        vmulax.xyzw  ACC, vf13, vf10               ;    \
        vmadday.xyzw ACC, vf14, vf10               ;    \
        vmaddz.xyzw  vf2, vf15, vf10               ;    \
        vmulax.xyzw  ACC, vf13, vf11               ;    \
        vmadday.xyzw ACC, vf14, vf11               ;    \
        vmaddz.xyzw  vf3, vf15, vf11               ;    \
        vmulax.xyzw  ACC, vf13, vf12               ;    \
        vmadday.xyzw ACC, vf14, vf12               ;    \
        vmaddaz.xyzw ACC, vf15, vf12               ;    \
        vmaddw.xyzw  vf4, vf16, vf0                ;    \
        sqc2 vf1, 0x00(%2)                         ;    \
        sqc2 vf2, 0x10(%2)                         ;    \
        sqc2 vf3, 0x20(%2)                         ;    \
        sqc2 vf4, 0x30(%2)                         ;    \
        addi %2, %2, 0x40                          ;    \
        add  %0, %0, %8                            ;    \
        add  %1, %1, %9                            ;    \
        bne  $1, %2, SKMacLoop2                    ;    \
        nop                                        ;    \
    .set reorder                                   ;    \
    .set at                                             \
        "                                               \
        : "=r&" (r0),                                   \
          "=r&" (r1),                                   \
          "=r&" (r2)                                    \
          : "r" (matrixArray1),                         \
            "r" (matrixArray2),                         \
            "r" (target),                               \
            "r" (number),                               \
            "r" (matrix),                               \
            "r" (stride1),                              \
            "r" (stride2)                               \
        :   "cc", "memory" );                           \
}                                                       \
while (0)

#define SkinMatrixMultiplyMacro5900(_pOutMatrix, _pMat1, _pMat2)  \
    asm volatile ("                                               \
    .set noreorder                              ;                 \
    lqc2 vf1, 0(%0)                             ;                 \
    lqc2 vf2, 16(%0)                            ;                 \
    lqc2 vf3, 32(%0)                            ;                 \
    lqc2 vf4, 48(%0)                            ;                 \
                                                                  \
    lqc2 vf5, 0(%1)                             ;                 \
    lqc2 vf6, 16(%1)                            ;                 \
    lqc2 vf7, 32(%1)                            ;                 \
    lqc2 vf8, 48(%1)                            ;                 \
                                                                  \
    vmulax.xyzw  ACC, vf5, vf1                  ;                 \
    vmadday.xyzw ACC, vf6, vf1                  ;                 \
    vmaddz.xyzw  vf9, vf7, vf1                  ;                 \
                                                                  \
    vmulax.xyzw  ACC, vf5, vf2                  ;                 \
    vmadday.xyzw ACC, vf6, vf2                  ;                 \
    vmaddz.xyzw  vf10, vf7, vf2                 ;                 \
                                                                  \
    vmulax.xyzw  ACC, vf5, vf3                  ;                 \
    vmadday.xyzw ACC, vf6, vf3                  ;                 \
    vmaddz.xyzw  vf11, vf7, vf3                 ;                 \
                                                                  \
    vmulax.xyzw  ACC, vf5, vf4                  ;                 \
    vmadday.xyzw ACC, vf6, vf4                  ;                 \
    vmaddaz.xyzw ACC, vf7, vf4                  ;                 \
    vmaddw.xyzw  vf12, vf8, vf0                 ;                 \
                                                                  \
    sqc2 vf9, 0(%2)                             ;                 \
    sqc2 vf10, 16(%2)                           ;                 \
    sqc2 vf11, 32(%2)                           ;                 \
    sqc2 vf12, 48(%2)                           ;                 \
    .set reorder                                                  \
    " : : "r" (_pMat1),                                           \
          "r" (_pMat2),                                           \
          "r" (_pOutMatrix)                                       \
        : "cc", "memory" )

/*===========================================================================*
 *--- Local functions -------------------------------------------------------*
 *===========================================================================*/

/*===========================================================================*
 *--- Private functions -----------------------------------------------------*
 *===========================================================================*/

/****************************************************************************
 _rpSkinInitialize

 Initialize an geometry's matrix weight data for dispatch to the VU.  Weights
 are stored in a 32 bit RwReal, with the least significant 10 bits used to
 encode the matrix index.

 Inputs :   RpGeometry * - Pointer to a skinned geometry.
 Outputs:   RpGeometry * - Pointer to the skinned geometry on success.
 */
RpGeometry *
_rpSkinInitialize(RpGeometry *geometry)
{
    RpSkin *skin;

    RwMatrixWeights *matrixWeights;
    RwUInt32 *matrixIndices;

    RwUInt32 numVertices;
    RwUInt32 iVertices;

    RWFUNCTION(RWSTRING("_rpSkinInitialize"));
    RWASSERT(NULL != geometry);

    skin = RpSkinGeometryGetSkin(geometry);
    RWASSERT(NULL != skin);
    matrixWeights = skin->vertexMaps.matrixWeights;
    RWASSERT(NULL != matrixWeights);
    matrixIndices = skin->vertexMaps.matrixIndices;
    RWASSERT(NULL != matrixIndices);

    numVertices = RpGeometryGetNumVertices(geometry);

    for (iVertices = 0; iVertices < numVertices; iVertices++)
    {
        RwUInt32 weight;
        RwUInt32 index;

        /*--------------- Index and weight for Bone 1 ----------------*/

        /* Get the weight. */
        weight = *((RwUInt32 *)&(matrixWeights[iVertices].w0));
        weight &= 0xFFFFFC00;

        /* Get the index. */
        index = (matrixIndices[iVertices] & 0xFF);
        RWASSERT((index != 0xFF) && (matrixWeights[iVertices].w0 > 0.0f));

        /* Calculate index in memory. */
        weight |= (index + 1) * 4;

        /* Store the weight. */
        *((RwUInt32 *) &(matrixWeights[iVertices].w0)) = weight;

        /*--------------- Index and weight for Bone 2 ----------------*/

        /* Get the weight. */
        weight = *((RwUInt32 *) &(matrixWeights[iVertices].w1));
        weight &= 0xFFFFFC00;

        /* Get the index. */
        index = ((matrixIndices[iVertices] >> 8) & 0xFF);
        if( (weight == 0xFF) || (matrixWeights[iVertices].w1 == 0.0f) )
        {
            weight = 0;
        }
        else
        {
            /* Calculate index in memory. */
            weight |= (index + 1) * 4;
        }

        /* Store the weight. */
        *((RwUInt32 *) &(matrixWeights[iVertices].w1)) = weight;

        /*--------------- Index and weight for Bone 3 ----------------*/

        /* Get the weight. */
        weight = *((RwUInt32 *) &(matrixWeights[iVertices].w2));
        weight &= 0xFFFFFC00;

        /* Get the index. */
        index = ((matrixIndices[iVertices] >> 16) & 0xFF);
        if( (weight == 0xFF) || (matrixWeights[iVertices].w2 == 0.0f) )
        {
            weight = 0;
        }
        else
        {
            /* Calculate index in memory. */
            weight |= (index + 1) * 4;
        }

        /* Store the weight. */
        *((RwUInt32 *) &(matrixWeights[iVertices].w2)) = weight;

        /*--------------- Index and weight for Bone 4 ----------------*/

        /* Get the weight. */
        weight = *((RwUInt32 *) &(matrixWeights[iVertices].w3));
        weight &= 0xFFFFFC00;

        /* Get the index. */
        index = ((matrixIndices[iVertices] >> 24) & 0xFF);
        if( (weight == 0xFF) || (matrixWeights[iVertices].w3 == 0.0f) )
        {
            weight = 0;
        }
        else
        {
            /* Calculate index in memory. */
            weight |= (index + 1) * 4;
        }

        /* Store the weight. */
        *((RwUInt32 *) &(matrixWeights[iVertices].w3)) = weight;
    }

    RWRETURN(geometry);
}


/****************************************************************************
 _rpSkinDeinitialize

 Platform specific deinitialize function for skinned geometry's.

 Inputs :  *geometry    - Pointer to the skinned geometry.
 Outputs:  RpGeometry * - The geometry which has been deinitialized.
 */
RpGeometry *
_rpSkinDeinitialize(RpGeometry *geometry)
{
    RWFUNCTION(RWSTRING("_rpSkinDeinitialize"));
    RWASSERT(NULL != geometry);

    RWRETURN(geometry);
}


/****************************************************************************
 _rpSkinWeightsInstancing

 Vertices weights and indices instance function

 Inputs :  *mesh     - Pointer on the current mesh
           *skin     - Pointer on the current skin
           *destMBI  - Pointer on the weights/indices cache
 Outputs:   None
 */
void
_rpSkinWeightsInstancing( const RpMesh *mesh,
                          RpSkin *skin,
                          RwReal *destMBI )
{
    const RxVertexIndex *meshIndices = mesh->indices;
    RwMatrixWeights     *srcMBI      = skin->vertexMaps.matrixWeights;
    RwInt32             numIndices   = mesh->numIndices;
    RwInt32             iVertices;

    RWFUNCTION(RWSTRING("_rpSkinWeightsInstancing"));
    RWASSERT(NULL != mesh);
    RWASSERT(NULL != skin);
    RWASSERT(NULL != srcMBI);
    RWASSERT(NULL != destMBI);

    /*--- Filling cluster number 5 ---*/
    for( iVertices = 0; iVertices < numIndices; iVertices++ )
    {
        destMBI[0] = srcMBI[*meshIndices].w0;
        destMBI[1] = srcMBI[*meshIndices].w1;
        destMBI[2] = srcMBI[*meshIndices].w2;
        destMBI[3] = srcMBI[*meshIndices].w3;

        meshIndices++;
        destMBI += 4;
    }

    RWRETURNVOID();
}


/****************************************************************************
 _rpSkinMatrixUpdating
 Skin matrices update function
 Inputs :  *atomic     - Pointer on the current atomic
           *skin       - Pointer on the current skin
 Outputs:   RwMatrix * - Pointer on the Matrix cache
 */
RwMatrix *
_rpSkinMatrixUpdating( RpAtomic *atomic,
                       RpSkin *skin )
{
    RpHAnimHierarchy *hierarchy;
    SkinAtomicData *atomicData;

    RWFUNCTION(RWSTRING("_rpSkinMatrixUpdating"));
    RWASSERT(NULL != atomic);
    RWASSERT(NULL != skin);

    /* Get the atomic's extension data. */
    atomicData = RPSKINATOMICGETDATA(atomic);
    RWASSERT(NULL != atomicData);

    /* Then it's hierarchy. */
    hierarchy = atomicData->hierarchy;

    if(NULL != hierarchy)
    {
        RwMatrix *mba;
        RwInt32 numNodes;
        RwMatrix *matrixArray;
        const RwMatrix *skinToBone;

        /* Get the matrix cache. */
        mba = _rpSkinGlobals.matrixCache.aligned;
        RWASSERT(NULL != mba);

        /* Get the number of frames in the hierarchy and it's matrix array. */
        numNodes = hierarchy->numNodes;
        matrixArray = hierarchy->pMatrixArray;
        RWASSERT(NULL != matrixArray);

        /* Get the bone information. */
        skinToBone = RpSkinGetSkinToBoneMatrices(skin);
        RWASSERT(NULL != skinToBone);

        if(hierarchy->flags & rpHANIMHIERARCHYNOMATRICES)
        {
            RwFrame *frame;
            RwMatrix *ltm;

            RwMatrix inverseLtm;
            RwMatrix prodMatrix;
            RwInt32 iFrame;

            RpHAnimNodeInfo *nodeInfo;

            /* Get the hierarchy's frame information. */
            nodeInfo = hierarchy->pNodeInfo;
            RWASSERT(NULL != nodeInfo);

            /* Setup the matrix flags. */
            inverseLtm.flags = 0;
            prodMatrix.flags = 0;

            /* Invert the atomics ltm. */
            frame = RpAtomicGetFrame(atomic);
            RWASSERT(NULL != frame);
            ltm = RwFrameGetLTM(frame);
            RWASSERT(NULL != ltm);
            RwMatrixInvert(&inverseLtm, ltm);

            for( iFrame = 0; iFrame < numNodes; iFrame++ )
            {
                RwFrame *frame;
                RwMatrix *ltm;

                frame = nodeInfo[iFrame].pFrame;
                RWASSERT(NULL != frame);
                ltm = RwFrameGetLTM(frame);
                RWASSERT(NULL != ltm);

                SkinMatrixMultiplyMacro5900( &prodMatrix,
                                             &skinToBone[iFrame],
                                             ltm );

                SkinMatrixMultiplyMacro5900( &mba[iFrame],
                                             &prodMatrix,
                                             &inverseLtm);
            }
        }
        else
        {
            if( hierarchy->flags & rpHANIMHIERARCHYLOCALSPACEMATRICES )
            {
                SkinMatrixMultiplyMacro1(
                    skinToBone,
                    &(matrixArray[0]),
                    &(mba[0]),
                    numNodes,
                    sizeof(RwMatrix),
                    sizeof(RwMatrix) );
            }
            else
            {
                RwFrame *frame;
                RwMatrix *ltm;

                RwMatrix inverseLtm;

                /* Setup the matrix flags. */
                inverseLtm.flags = 0;

                /* Invert the atomics ltm. */
                frame = RpAtomicGetFrame(atomic);
                RWASSERT(NULL != frame);
                ltm = RwFrameGetLTM(frame);
                RWASSERT(NULL != ltm);
                RwMatrixInvert(&inverseLtm, ltm);

                SkinMatrixMultiplyMacro2(
                    skinToBone,
                    &(matrixArray[0]),
                    &(inverseLtm),
                    &(mba[0]),
                    numNodes,
                    sizeof(RwMatrix),
                    sizeof(RwMatrix) );
            }
        }

        RWRETURN(mba);
    }

    RWRETURN((RwMatrix *)NULL);
}


/*===========================================================================*
 *--- Plugin Engine Functions -----------------------------------------------*
 *===========================================================================*/

/*===========================================================================*
 *--- Plugin API Functions --------------------------------------------------*
 *===========================================================================*/
