/****************************************************************************
 *                                                                          *
 *  Module  :   gcnmatbl.c                                                  *
 *                                                                          *
 *  Purpose :   Matrix-blending pipeline (GCN)                              *
 *                                                                          *
 ****************************************************************************/

/****************************************************************************
 Includes
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <math.h>
#include <float.h>

#include <rpcriter.h>
#include "rpplugin.h"
#include <rpdbgerr.h>

#include <rwcore.h>
#include <rpworld.h>
#include <rprandom.h>
#include <rpspline.h>
#include <rpskin.h>

#include "dolphin/os.h"
#include "skinpriv.h"
#include "genmatbl.h"

#if (!defined(DOXYGEN_SHOULD_SKIP_THIS))
static const char   rcsid[] __RWUNUSED__ =
    "@@(#)$Id: gcnmatbl.c,v 1.11 2001/08/17 13:28:31 Colinh Exp $";
#endif /* (!defined(DOXYGEN_SHOULD_SKIP_THIS)) */

/****************************************************************************
 Local Types
 */

/****************************************************************************
 Local (Static) Prototypes
 */

#define SKIN_PIPE

#ifdef SKIN_PIPE
static RxPipeline  *SkinPipeline = NULL;
static RxGameCubeAllInOneCallBack OldRender = NULL;
static void* SkinMatBlendAtomicRender(void *object,
                                      RxGameCubePipeData *pipeData);
#else /* SKIN_PIPE */
static RpAtomicCallBackRender pDefRenderCallback = NULL;
#endif /* SKIN_PIPE */

/****************************************************************************
 Local Defines
 */

#define _EPSILON          ((RwReal)(0.001))

/****************************************************************************
 Globals (across program)
 */


/****************************************************************************
 Local (static) Globals
 */

extern RwInt32      RpSkinAtomicSkinOffset;

/* !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

   Functions

   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! */

/****************************************************************************/

#ifdef SKIN_PIPE
/****************************************************************************
 _rpAtomicSkinPipelineCreate

 Inputs :   None

 Outputs:   RwBool              TRUE on success
 */
static RxPipeline*
_rpAtomicSkinPipelineCreate(void)
{
    RxPipeline *pipe;

    RWFUNCTION(RWSTRING("_rpAtomicSkinPipelineCreate"));

    pipe = RxPipelineCreate();
    if (pipe)
    {
        RxLockedPipe *lpipe;

        if ((lpipe = RxPipelineLock(pipe)) != NULL)
        {
            RxNodeDefinition *nodeDfn;

            nodeDfn = RxNodeDefinitionGetGameCubeAtomicAllInOne();

            RWASSERT(NULL != nodeDfn);

            lpipe = RxLockedPipeAddFragment(lpipe, NULL, nodeDfn, NULL);

            lpipe = RxLockedPipeUnlock(lpipe);

            RWASSERT(pipe == (RxPipeline *)lpipe);

            if (lpipe != NULL)
            {
                RxPipelineNode *node;

                node = RxPipelineFindNodeByName(pipe, nodeDfn->name, NULL,
                                                NULL);

                RWASSERT(NULL != node);

                OldRender = RxGameCubeAllInOneGetRenderCallBack(node);
                _rxGameCubeAllInOneSetNoInstanceFlags(node, rxGEOMETRY_POSITIONS
                                                           |rxGEOMETRY_NORMALS);
                RxGameCubeAllInOneSetRenderCallBack(node,
                                                      SkinMatBlendAtomicRender);
                RWRETURN(pipe);
            }
        }
        RxPipelineDestroy(pipe);
    }
    RWRETURN(NULL);
}
#endif /* SKIN_PIPE */

/**********************************************************************/

void
_rpSkinMBPipelineDestroy(void)
{
    RWFUNCTION(RWSTRING("_rpSkinMBPipelineDestroy"));

#ifdef SKIN_PIPE
    RxPipelineDestroy(SkinPipeline);
    SkinPipeline = NULL;
#endif /* SKIN_PIPE */

    RWRETURNVOID();
}

/****************************************************************************
 _rpSkinMBPluginAttach

 Initialise the matrix blending pipeline.

 Inputs :   None

 Outputs:   RwBool              TRUE on success
 */

RwBool
_rpSkinMBPluginAttach(void)
{
    RWFUNCTION(RWSTRING("_rpSkinMBPluginAttach"));

#ifdef SKIN_PIPE
    SkinPipeline = _rpAtomicSkinPipelineCreate();
    if (!SkinPipeline)
    {
        RWRETURN(FALSE);
    }
#endif /* SKIN_PIPE */

    RWRETURN(TRUE);
}

#if !defined(NOASM)
/****************************************************************************
 asmSkinLoop

 A GCN replacement for the skinning main loop. roughly x2 speed up for single
 weight. Probably more for more weights due to dual dispatch of later matrix
 loads
 */
asm static void
asmSkinLoop(register void *pMatrixWeightsMap, register void *pOriginalVertices,
            register void *pOriginalNormals, register void *pVertices,
            register void *pNormals, register void *pMatrixArray,
            register void *pMatrixIndexMap, register int numVerts)
{
    nofralloc

    stwu r1, -172(r1)

    stfd fp14, 8(r1)
    stfd fp15, 16(r1)
    stfd fp16, 24(r1)
    stfd fp17, 32(r1)
    stfd fp18, 40(r1)
    stfd fp19, 48(r1)
    stfd fp20, 56(r1)
    stfd fp21, 64(r1)
    stfd fp22, 72(r1)
    stfd fp23, 80(r1)
    stfd fp24, 88(r1)
    stfd fp25, 96(r1)
    stfd fp26, 104(r1)
    stfd fp27, 112(r1)
    stfd fp28, 120(r1)
    stfd fp29, 128(r1)
    stfd fp30, 136(r1)
    stfd fp31, 144(r1)

    stw r14, 152(r1)
    stw r15, 156(r1)
    stw r16, 160(r1)
    stw r17, 164(r1)

    cmpi cr0, 0, r10, 0
    mtspr CTR, r10

    addi r3, r3, -4

    addi r4, r4, -4
    addi r5, r5, -4
    addi r9, r9, -4

    bt- lt, skinLoopExit

    cmp cr0, 0, r4, r5

skinLoop:
    ps_sub fp0, fp0, fp0
    lfsu fp28, 4(r3)
    lfsu fp29, 4(r3)
    lfsu fp30, 4(r3)
    lfsu fp31, 4(r3)

    lwzu r11, 4(r9)

    ps_cmpo0 cr5, fp29, fp0
    ps_cmpo0 cr6, fp30, fp0
    ps_cmpo0 cr7, fp31, fp0

    rlwinm r14, r11, 6, 18, 25
    rlwinm r15, r11, 30, 18, 25
    rlwinm r16, r11, 22, 18, 25
    rlwinm r17, r11, 14, 18, 25

    add r14, r14, r8
    add r15, r15, r8
    add r16, r16, r8
    add r17, r17, r8

    bc 4, 21, local1
    bc 4, 25, local2
    bc 4, 29, local3
    b local4

local1:
    addi r15, r14, 0
local2:
    addi r16, r14, 0
local3:
    addi r17, r14, 0
local4:

    lfsu fp16, 4(r4)
    lfsu fp17, 4(r4)
    lfsu fp18, 4(r4)

    lfsu fp21, 4(r5)
    lfsu fp22, 4(r5)
    lfsu fp23, 4(r5)

    psq_l fp0, 0(r14), 0, 0
    psq_l fp1, 8(r14), 1, 0
    psq_l fp2, 16(r14), 0, 0
    psq_l fp3, 24(r14), 1, 0
    psq_l fp4, 32(r14), 0, 0
    psq_l fp5, 40(r14), 1, 0
    psq_l fp6, 48(r14), 0, 0
    psq_l fp7, 56(r14), 1, 0

    ps_madds0 fp6, fp0, fp16, fp6
    psq_l fp8, 0(r15), 0, 0
      ps_madds0 fp7, fp1, fp16, fp7
        ps_muls0 fp24, fp0, fp21
        psq_l fp9, 8(r15), 1, 0
          ps_muls0 fp25, fp1, fp21
    ps_madds0 fp6, fp2, fp17, fp6
    psq_l fp10, 16(r15), 0, 0
      ps_madds0 fp7, fp3, fp17, fp7
        ps_madds0 fp24, fp2, fp22, fp24
        psq_l fp11, 24(r15), 1, 0
          ps_madds0 fp25, fp3, fp22, fp25
    ps_madds0 fp6, fp4, fp18, fp6
    psq_l fp12, 32(r15), 0, 0
      ps_madds0 fp7, fp5, fp18, fp7
        ps_madds0 fp24, fp4, fp23, fp24
        psq_l fp13, 40(r15), 1, 0
          ps_madds0 fp25, fp5, fp23, fp25
    ps_muls0 fp19, fp6, fp28
    psq_l fp14, 48(r15), 0, 0
      ps_muls0 fp20, fp7, fp28
        ps_muls0 fp26, fp24, fp28
        psq_l fp15, 56(r15), 1, 0
          ps_muls0 fp27, fp25, fp28
          bc 4, 21, local5
 nop /* Nop added due to suspected speculative execution bug on 486MHz CPU */

    ps_madds0 fp14, fp8, fp16, fp14
    psq_l fp0, 0(r16), 0, 0
      ps_madds0 fp15, fp9, fp16, fp15
        ps_muls0 fp24, fp8, fp21
        psq_l fp1, 8(r16), 1, 0
          ps_muls0 fp25, fp9, fp21
    ps_madds0 fp14, fp10, fp17, fp14
    psq_l fp2, 16(r16), 0, 0
      ps_madds0 fp15, fp11, fp17, fp15
        ps_madds0 fp24, fp10, fp22, fp24
        psq_l fp3, 24(r16), 1, 0
          ps_madds0 fp25, fp11, fp22, fp25
    ps_madds0 fp14, fp12, fp18, fp14
    psq_l fp4, 32(r16), 0, 0
      ps_madds0 fp15, fp13, fp18, fp15
        ps_madds0 fp24, fp12, fp23, fp24
        psq_l fp5, 40(r16), 1, 0
          ps_madds0 fp25, fp13, fp23, fp25
    ps_madds0 fp19, fp14, fp29, fp19
    psq_l fp6, 48(r16), 0, 0
      ps_madds0 fp20, fp15, fp29, fp20
        ps_madds0 fp26, fp24, fp29, fp26
        psq_l fp7, 56(r16), 1, 0
          ps_madds0 fp27, fp25, fp29, fp27
          bc 4, 25, local5
 nop

    ps_madds0 fp6, fp0, fp16, fp6
    psq_l fp8, 0(r17), 0, 0
      ps_madds0 fp7, fp1, fp16, fp7
        ps_muls0 fp24, fp0, fp21
        psq_l fp9, 8(r17), 1, 0
          ps_muls0 fp25, fp1, fp21
    ps_madds0 fp6, fp2, fp17, fp6
    psq_l fp10, 16(r17), 0, 0
      ps_madds0 fp7, fp3, fp17, fp7
        ps_madds0 fp24, fp2, fp22, fp24
        psq_l fp11, 24(r17), 1, 0
          ps_madds0 fp25, fp3, fp22, fp25
    ps_madds0 fp6, fp4, fp18, fp6
    psq_l fp12, 32(r17), 0, 0
      ps_madds0 fp7, fp5, fp18, fp7
        ps_madds0 fp24, fp4, fp23, fp24
        psq_l fp13, 40(r17), 1, 0
          ps_madds0 fp25, fp5, fp23, fp25
    ps_madds0 fp19, fp6, fp30, fp19
    psq_l fp14, 48(r17), 0, 0
      ps_madds0 fp20, fp7, fp30, fp20
        ps_madds0 fp26, fp24, fp30, fp26
        psq_l fp15, 56(r17), 1, 0
          ps_madds0 fp27, fp25, fp30, fp27
          bc 4, 29, local5
 nop

    ps_madds0 fp14, fp8, fp16, fp14
      ps_madds0 fp15, fp9, fp16, fp15
        ps_muls0 fp24, fp8, fp21
          ps_muls0 fp25, fp9, fp21
    ps_madds0 fp14, fp10, fp17, fp14
      ps_madds0 fp15, fp11, fp17, fp15
        ps_madds0 fp24, fp10, fp22, fp24
          ps_madds0 fp25, fp11, fp22, fp25
    ps_madds0 fp14, fp12, fp18, fp14
      ps_madds0 fp15, fp13, fp18, fp15
        ps_madds0 fp24, fp12, fp23, fp24
          ps_madds0 fp25, fp13, fp23, fp25
    ps_madds0 fp19, fp14, fp31, fp19
      ps_madds0 fp20, fp15, fp31, fp20
        ps_madds0 fp26, fp24, fp31, fp26
          ps_madds0 fp27, fp25, fp31, fp27

local5:

    ps_merge11 fp14, fp19, fp19
    psq_st fp19, 0(r6), 1, 0
    psq_st fp14, 4(r6), 1, 0
    psq_st fp20, 8(r6), 1, 0
    addi r6, r6, 12
    beq local6

    ps_merge11 fp24, fp26, fp26
    psq_st fp26, 0(r7), 1, 0
    psq_st fp24, 4(r7), 1, 0
    psq_st fp27, 8(r7), 1, 0
    addi r7, r7, 12

local6:

    bdnz skinLoop
 nop

skinLoopExit:

    lfd fp14, 8(r1)
    lfd fp15, 16(r1)
    lfd fp16, 24(r1)
    lfd fp17, 32(r1)
    lfd fp18, 40(r1)
    lfd fp19, 48(r1)
    lfd fp20, 56(r1)
    lfd fp21, 64(r1)
    lfd fp22, 72(r1)
    lfd fp23, 80(r1)
    lfd fp24, 88(r1)
    lfd fp25, 96(r1)
    lfd fp26, 104(r1)
    lfd fp27, 112(r1)
    lfd fp28, 120(r1)
    lfd fp29, 128(r1)
    lfd fp30, 136(r1)
    lfd fp31, 144(r1)

    lwz r14, 152(r1)
    lwz r15, 156(r1)
    lwz r16, 160(r1)
    lwz r17, 164(r1)

    addi r1, r1, 172

    blr
}
#endif /* !defined(NOASM) */

/****************************************************************************
 SkinMatBlendAtomicRender

 Matrix blending Atomic render function - performs weighted transform of
 an atomic's vertices according to the attached RpSkin data.

 THIS IS A VERY SLOW, GENERIC "C" IMPLEMENTATION AND SHOULD BE OVERLOADED
 WITH SOMETHING FAST, SEXY AND PLATFORM-SPECIFIC.

 Inputs :   RpAtomic *    A pointer to the atomic.

 Outputs:   RwBool        TRUE on success
 */

static RpAtomic*
blendBody(RpAtomic *atomic, RwV3d *pVertices, RwV3d *pNormals)
{

    RpSkin             *pSkin;
    RwV3d              *pOriginalVertices;
    RwV3d              *pOriginalNormals;
    RwMatrix           *pMatrixArray = NULL;
#if defined(NOASM)
    RwMatrix           *pMatrix;
    RwInt32             i;
#endif /* defined(NOASM) */

    RWFUNCTION(RWSTRING("blendBody"));
    RWASSERT(atomic);

#define TIMEMEx
#ifdef TIMEME
OSTick TimeStart;
OSTick TimeMid;
OSTick TimeFinish;

TimeStart = OSGetTick();
#endif

    pSkin = *RPSKINATOMICGETDATA(atomic);

    /* Perform matrix blending */

    pOriginalVertices = (RwV3d *) pSkin->pPlatformData;
    if (pNormals)
    {
        pOriginalNormals =
            &(pOriginalVertices[pSkin->totalVertices]);
    }
    else
    {
        pOriginalNormals = NULL;
    }

    if (pSkin->pCurrentSkeleton)
    {
        pMatrixArray = pSkin->pCurrentSkeleton->pMatrixArray;
    }
    if (pSkin->pCurrentHierarchy)
    {
        pMatrixArray = RpSkinAtomicGlobals.SkinMatrixCache;

        if (pSkin->pCurrentHierarchy->
            flags & rpHANIMHIERARCHYNOMATRICES)
        {
            RwMatrix            inverseAtomicLTM;
            RwMatrix            tempMatrix;
            RwInt32             i;

            RwMatrixInvert(&inverseAtomicLTM,
                           RwFrameGetLTM(RpAtomicGetFrame
                                         (atomic)));

            for (i = 0; i < pSkin->pCurrentHierarchy->numNodes;
                 i++)
            {
                RwFrame            *pFrame =
                    pSkin->pCurrentHierarchy->pNodeInfo[i].
                    pFrame;
                RwMatrixMultiply(&tempMatrix,
                                 &pSkin->pBoneInfo[i].
                                 boneToSkinMat,
                                 RwFrameGetLTM(pFrame));
                RwMatrixMultiply(&pMatrixArray[i], &tempMatrix,
                                 &inverseAtomicLTM);
            }
        }
        else
        {
            if (pSkin->pCurrentHierarchy->
                flags & rpHANIMHIERARCHYLOCALSPACEMATRICES)
            {
                RwInt32             i;

                for (i = 0;
                     i < pSkin->pCurrentHierarchy->numNodes;
                     i++)
                {
                    RwMatrixMultiply(&pMatrixArray[i],
                                     &pSkin->pBoneInfo[i].
                                     boneToSkinMat,
                                     &pSkin->pCurrentHierarchy->
                                     pMatrixArray[i]);
                }
            }
            else
            {
                RwMatrix            inverseAtomicLTM;
                RwMatrix            tempMatrix;
                RwInt32             i;

                RwMatrixInvert(&inverseAtomicLTM,
                               RwFrameGetLTM(RpAtomicGetFrame
                                             (atomic)));

                for (i = 0;
                     i < pSkin->pCurrentHierarchy->numNodes;
                     i++)
                {
                    RwMatrixMultiply(&tempMatrix,
                                     &pSkin->pBoneInfo[i].
                                     boneToSkinMat,
                                     &pSkin->pCurrentHierarchy->
                                     pMatrixArray[i]);
                    RwMatrixMultiply(&pMatrixArray[i],
                                     &tempMatrix,
                                     &inverseAtomicLTM);
                }
            }
        }
    }

#ifdef TIMME
TimeMid = OSGetTick();
#endif


#if !defined(NOASM)
    asmSkinLoop(pSkin->pMatrixWeightsMap, pOriginalVertices,
                (pNormals ? pOriginalNormals:pOriginalVertices),
                pVertices, pNormals, pMatrixArray,
                pSkin->pMatrixIndexMap, pSkin->totalVertices);

#else /* !defined(NOASM) */

    for (i = 0; i < pSkin->totalVertices; i++)
    {
        /* Hideously slow matrix operations follow... */
        if (pSkin->pMatrixWeightsMap[i].w0 > (RwReal) 0.0)
        {
            pMatrix =
                &pMatrixArray[pSkin->pMatrixIndexMap[i] & 0xFF];
            /* RWASSERT(rwMatrixValidFlags(pMatrix, _EPSILON)); */

            pVertices[i].x =
                ((pMatrix->right.x * pOriginalVertices[i].x) +
                 (pMatrix->up.x * pOriginalVertices[i].y) +
                 (pMatrix->at.x * pOriginalVertices[i].z) +
                 (pMatrix->pos.x)) *
                pSkin->pMatrixWeightsMap[i].w0;

            pVertices[i].y =
                ((pMatrix->right.y * pOriginalVertices[i].x) +
                 (pMatrix->up.y * pOriginalVertices[i].y) +
                 (pMatrix->at.y * pOriginalVertices[i].z) +
                 (pMatrix->pos.y)) *
                pSkin->pMatrixWeightsMap[i].w0;

            pVertices[i].z =
                ((pMatrix->right.z * pOriginalVertices[i].x) +
                 (pMatrix->up.z * pOriginalVertices[i].y) +
                 (pMatrix->at.z * pOriginalVertices[i].z) +
                 (pMatrix->pos.z)) *
                pSkin->pMatrixWeightsMap[i].w0;

            if (pNormals)
            {
                pNormals[i].x =
                    ((pMatrix->right.x *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.x * pOriginalNormals[i].y) +
                     (pMatrix->at.x * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w0;

                pNormals[i].y =
                    ((pMatrix->right.y *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.y * pOriginalNormals[i].y) +
                     (pMatrix->at.y * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w0;

                pNormals[i].z =
                    ((pMatrix->right.z *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.z * pOriginalNormals[i].y) +
                     (pMatrix->at.z * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w0;
            }
        }
        else
        {
            continue;
        }

        if (pSkin->pMatrixWeightsMap[i].w1 > (RwReal) 0.0)
        {
            pMatrix =
                &pMatrixArray[(pSkin->
                               pMatrixIndexMap[i] >> 8) & 0xFF];
            /* RWASSERT(rwMatrixValidFlags(pMatrix, _EPSILON)); */

            pVertices[i].x +=
                ((pMatrix->right.x * pOriginalVertices[i].x) +
                 (pMatrix->up.x * pOriginalVertices[i].y) +
                 (pMatrix->at.x * pOriginalVertices[i].z) +
                 (pMatrix->pos.x)) *
                pSkin->pMatrixWeightsMap[i].w1;

            pVertices[i].y +=
                ((pMatrix->right.y * pOriginalVertices[i].x) +
                 (pMatrix->up.y * pOriginalVertices[i].y) +
                 (pMatrix->at.y * pOriginalVertices[i].z) +
                 (pMatrix->pos.y)) *
                pSkin->pMatrixWeightsMap[i].w1;

            pVertices[i].z +=
                ((pMatrix->right.z * pOriginalVertices[i].x) +
                 (pMatrix->up.z * pOriginalVertices[i].y) +
                 (pMatrix->at.z * pOriginalVertices[i].z) +
                 (pMatrix->pos.z)) *
                pSkin->pMatrixWeightsMap[i].w1;

            if (pNormals)
            {
                pNormals[i].x +=
                    ((pMatrix->right.x *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.x * pOriginalNormals[i].y) +
                     (pMatrix->at.x * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w1;

                pNormals[i].y +=
                    ((pMatrix->right.y *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.y * pOriginalNormals[i].y) +
                     (pMatrix->at.y * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w1;

                pNormals[i].z +=
                    ((pMatrix->right.z *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.z * pOriginalNormals[i].y) +
                     (pMatrix->at.z * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w1;
            }

        }
        else
        {
            continue;
        }

        if (pSkin->pMatrixWeightsMap[i].w2 > (RwReal) 0.0)
        {
            pMatrix =
                &pMatrixArray[(pSkin->
                               pMatrixIndexMap[i] >> 16) &
                              0xFF];
            /* RWASSERT(rwMatrixValidFlags(pMatrix, _EPSILON)); */

            pVertices[i].x +=
                ((pMatrix->right.x * pOriginalVertices[i].x) +
                 (pMatrix->up.x * pOriginalVertices[i].y) +
                 (pMatrix->at.x * pOriginalVertices[i].z) +
                 (pMatrix->pos.x)) *
                pSkin->pMatrixWeightsMap[i].w2;

            pVertices[i].y +=
                ((pMatrix->right.y * pOriginalVertices[i].x) +
                 (pMatrix->up.y * pOriginalVertices[i].y) +
                 (pMatrix->at.y * pOriginalVertices[i].z) +
                 (pMatrix->pos.y)) *
                pSkin->pMatrixWeightsMap[i].w2;

            pVertices[i].z +=
                ((pMatrix->right.z * pOriginalVertices[i].x) +
                 (pMatrix->up.z * pOriginalVertices[i].y) +
                 (pMatrix->at.z * pOriginalVertices[i].z) +
                 (pMatrix->pos.z)) *
                pSkin->pMatrixWeightsMap[i].w2;

            if (pNormals)
            {
                pNormals[i].x +=
                    ((pMatrix->right.x *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.x * pOriginalNormals[i].y) +
                     (pMatrix->at.x * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w2;

                pNormals[i].y +=
                    ((pMatrix->right.y *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.y * pOriginalNormals[i].y) +
                     (pMatrix->at.y * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w2;

                pNormals[i].z +=
                    ((pMatrix->right.z *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.z * pOriginalNormals[i].y) +
                     (pMatrix->at.z * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w2;
            }

        }
        else
        {
            continue;
        }

        if (pSkin->pMatrixWeightsMap[i].w3 > (RwReal) 0.0)
        {
            pMatrix =
                &pMatrixArray[(pSkin->
                               pMatrixIndexMap[i] >> 24) &
                              0xFF];
            /* RWASSERT(rwMatrixValidFlags(pMatrix, _EPSILON)); */

            pVertices[i].x +=
                ((pMatrix->right.x * pOriginalVertices[i].x) +
                 (pMatrix->up.x * pOriginalVertices[i].y) +
                 (pMatrix->at.x * pOriginalVertices[i].z) +
                 (pMatrix->pos.x)) *
                pSkin->pMatrixWeightsMap[i].w3;

            pVertices[i].y +=
                ((pMatrix->right.y * pOriginalVertices[i].x) +
                 (pMatrix->up.y * pOriginalVertices[i].y) +
                 (pMatrix->at.y * pOriginalVertices[i].z) +
                 (pMatrix->pos.y)) *
                pSkin->pMatrixWeightsMap[i].w3;

            pVertices[i].z +=
                ((pMatrix->right.z * pOriginalVertices[i].x) +
                 (pMatrix->up.z * pOriginalVertices[i].y) +
                 (pMatrix->at.z * pOriginalVertices[i].z) +
                 (pMatrix->pos.z)) *
                pSkin->pMatrixWeightsMap[i].w3;

            if (pNormals)
            {
                pNormals[i].x +=
                    ((pMatrix->right.x *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.x * pOriginalNormals[i].y) +
                     (pMatrix->at.x * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w3;

                pNormals[i].y +=
                    ((pMatrix->right.y *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.y * pOriginalNormals[i].y) +
                     (pMatrix->at.y * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w3;

                pNormals[i].z +=
                    ((pMatrix->right.z *
                      pOriginalNormals[i].x) +
                     (pMatrix->up.z * pOriginalNormals[i].y) +
                     (pMatrix->at.z * pOriginalNormals[i].z)) *
                    pSkin->pMatrixWeightsMap[i].w3;
            }

        }
        else
        {
            continue;
        }
    }

#endif /* !defined(NOASM) */

#ifdef TIMEME
TimeFinish = OSGetTick();
#endif

#ifdef TIMEME
OSReport("Time = %d Prep = %d Verts = %d\n", OSTicksToCycles(OSDiffTick(TimeFinish, TimeStart)), OSTicksToCycles(OSDiffTick(TimeMid, TimeStart)), pSkin->totalVertices);
/* OSReport("Bus = %d, time = %d core = %d\n", OS_BUS_CLOCK, OS_TIMER_CLOCK, OS_CORE_CLOCK); */
#endif

    RWRETURN(atomic);
}

#ifdef SKIN_PIPE

/****************************************************************************
 mainResEntryCB
 */
static void
mainResEntryCB(RwResEntry *resEntry)
{
    RxGameCubeResEntryHeader *resEntryHeader
                                  = (RxGameCubeResEntryHeader *)(resEntry + 1);
    RWFUNCTION(RWSTRING("mainResEntryCB"));


    /* Wait on the main resEntry */
    _rwDlTokenWaitDone(resEntryHeader->token);

    /* Destroy the child */
    if (resEntryHeader->positions)
    {
        RwResourcesFreeResEntry(*(((RwResEntry **)
                                          (resEntryHeader->positions))
                                  -1));
    }
    resEntryHeader->positions = NULL;
    resEntryHeader->normals = NULL;

    RWRETURNVOID();
}
/****************************************************************************
 _rpSkinResEntryWaitDone
 */
static void
_rpSkinResEntryWaitDone(RwResEntry *resEntry)
{
    RWFUNCTION(RWSTRING("_rpSkinResEntryWaitDone"));
    _rwDlTokenWaitDone(*(RwUInt16*)(resEntry+1));
    RWRETURNVOID();
}
/****************************************************************************
 SkinMatBlendAtomicRender
 */
static void*
SkinMatBlendAtomicRender(void *object, RxGameCubePipeData *pipeData)
{
    RxGameCubeResEntryHeader *resEntryHeader;
    RpAtomic *atomic = (RpAtomic *)object;
    RpGeometry *geometry = atomic->geometry;
    RwUInt32 numVerts = geometry->numVertices;
    RwV3d *tmp;

    RWFUNCTION(RWSTRING("SkinMatBlendAtomicRender"));

    if (!(*RPSKINATOMICGETDATA(atomic)))
    {
        /* No skin */
        RWRETURN(NULL);
    }

    /* Try to make it unlikely that the main resEntry will vanish */
    RwResourcesUseResEntry(pipeData->resEntry);

    /* Replace the destroy callback to ensure we clean up */
    pipeData->resEntry->destroyNotify = mainResEntryCB;

    resEntryHeader = (RxGameCubeResEntryHeader *)(pipeData->resEntry + 1);

    if (resEntryHeader->positions)
    {
        /* Do we need to orphan current set? */
        RwUInt16 token
               = *(RwUInt16*)(*((RwResEntry **)(resEntryHeader->positions)-1)+1);

        if (!_rwDlTokenQueryDone(token))
        {
            (*((RwResEntry **)(resEntryHeader->positions)-1))->ownerRef
                                                         = (RwResEntry **)NULL;
            resEntryHeader->positions = NULL;
            resEntryHeader->normals = NULL;
        }

    }

    if (!resEntryHeader->positions)
    {
        RwUInt32 size;

        /* Figure out how big this is going to be */
        size = sizeof(RwV3d) * numVerts;
        if (RpGeometryGetFlags(geometry) & rxGEOMETRY_NORMALS)
        {
            size += sizeof(RwV3d) * numVerts;
        }
        size = OSRoundUp32B(sizeof(void*)+sizeof(RwUInt16)+size);
        RwResourcesAllocateResEntry(object,
                                    (RwResEntry**)&(resEntryHeader->positions),
                                    size,
                                    _rpSkinResEntryWaitDone);
        if ((!pipeData->resEntry) || (!resEntryHeader->positions))
        {
            /* We lost one of the resource entries */
            RWRETURN(NULL);
        }

        tmp = resEntryHeader->positions;
        resEntryHeader->positions=(RwV3d*)(((RwUInt32)(resEntryHeader
                                                             ->positions)
                                                       + sizeof(RwResEntry)
                                                       + sizeof(RwResEntry *)
                                                       + sizeof(RwUInt16) + 31)
                                            & ~31);

        /* Actually the RwResEntry pointer */
        *((RwV3d **)(resEntryHeader->positions)-1) = tmp;
        if (RpGeometryGetFlags(geometry) & rxGEOMETRY_NORMALS)
        {
            resEntryHeader->normals = (RwV3d*)resEntryHeader->positions
                                              + numVerts;
        }
        else
        {
            resEntryHeader->normals = NULL;
        }
    }

    RwResourcesUseResEntry(*((RwResEntry **)(resEntryHeader->positions)-1));
    RwResourcesUseResEntry(pipeData->resEntry);

    /* Stamp resource entry */
    *(RwUInt16*)(*((RwResEntry **)(resEntryHeader->positions)-1)+1)
                                                  = _rwDlTokenGetCurrent();

    blendBody(atomic, (RwV3d*)resEntryHeader->positions,
              (RwV3d*)resEntryHeader->normals);

    /* Flushes slightly more than is required */
    DCFlushRange((*((RwResEntry **)(resEntryHeader->positions)-1)+1),
                 (*((RwResEntry **)(resEntryHeader->positions)-1))->size);

    RWRETURN(OldRender(object, pipeData));
}

#else /* SKIN_PIPE */
/****************************************************************************
 SkinMatBlendAtomicRender
 */

static RpAtomic*
SkinMatBlendAtomicRender(RpAtomic * atomic)
{
    RpSkin *pSkin;
    RwV3d *pVertices, *pNormals;

    RWFUNCTION(RWSTRING("SkinMatBlendAtomicRender"));
    RWASSERT(atomic);

    if (atomic)
    {
        pSkin = *RPSKINATOMICGETDATA(atomic);

        if (pSkin)
        {
            if (rpGEOMETRYPERSISTENT & pSkin->pGeometry->instanceFlags)
            {
                RxGameCubeResEntryHeader *resEntryHeader;

                resEntryHeader = (RxGameCubeResEntryHeader *)(pSkin->pGeometry
                                                              ->repEntry + 1);

                pVertices = resEntryHeader->positions;
                pNormals = resEntryHeader->normals;
            }
            else
            {
                RpGeometryLock(pSkin->pGeometry,
                               rpGEOMETRYLOCKVERTICES |
                               rpGEOMETRYLOCKNORMALS);

                pVertices =
                    RpMorphTargetGetVertices(RpGeometryGetMorphTarget
                                             (pSkin->pGeometry, 0));
                pNormals =
                    RpMorphTargetGetVertexNormals(RpGeometryGetMorphTarget
                                                  (pSkin->pGeometry, 0));
            }


            blendBody(atomic, pVertices, pNormals);

            RpGeometryUnlock(pSkin->pGeometry);
            (*pDefRenderCallback) (atomic);
        }
    }
    RWRETURN(atomic);
}
#endif /* SKIN_PIPE */
/****************************************************************************
 _rpSkinMBInitAtomic

 Initialise an atomic's matrix-blending skin data.

 Inputs :   RpAtomic *          A pointer to a skin atomic.

 Outputs:   RwBool              TRUE on success
 */

RwBool
_rpSkinMBInitAtomic(RpAtomic * atomic)
{
    RpSkin             *pSkin;

    RwV3d              *pOriginalVertices;
    RwV3d              *pOriginalNormals;
    RwV3d              *pSrcVertices;
    RwV3d              *pSrcNormals;
    RwInt32             i;

    RWFUNCTION(RWSTRING("_rpSkinMBInitAtomic"));

#ifdef SKIN_PIPE
    RpAtomicSetInstancePipeline(atomic, SkinPipeline);
#else /* SKIN_PIPE */
    if (pDefRenderCallback == NULL)
    {
        pDefRenderCallback = RpAtomicGetRenderCallBack(atomic);
    }

    RpAtomicSetRenderCallBack(atomic, SkinMatBlendAtomicRender);
#endif /* SKIN_PIPE */

    pSkin = *RPSKINATOMICGETDATA(atomic);

    if (pSkin == NULL)
    {
        RWRETURN(FALSE);
    }

    /* Add a copy of the original vertices and normals to the platform-specific skin data pointer */

    RpGeometryLock(pSkin->pGeometry,
                   rpGEOMETRYLOCKVERTICES | rpGEOMETRYLOCKNORMALS);

    pSrcVertices =
        RpMorphTargetGetVertices(RpGeometryGetMorphTarget
                                 (pSkin->pGeometry, 0));
    pSrcNormals =
        RpMorphTargetGetVertexNormals(RpGeometryGetMorphTarget
                                      (pSkin->pGeometry, 0));
    if (pSrcNormals)
    {
        const RwUInt32 bytes = sizeof(RwV3d) * pSkin->totalVertices * 2;
        pOriginalVertices = (RwV3d *) RwMalloc(bytes);
        memset(pOriginalVertices, 0, bytes);

        pOriginalNormals = &(pOriginalVertices[pSkin->totalVertices]);
    }
    else
    {
        const RwUInt32 bytes = sizeof(RwV3d) * pSkin->totalVertices;
        pOriginalVertices = (RwV3d *) RwMalloc(bytes);
        memset(pOriginalVertices, 0, bytes);

        pOriginalNormals = NULL;
    }

    for (i = 0; i < pSkin->totalVertices; i++)
    {
        pOriginalVertices[i] = pSrcVertices[i];
        if (pOriginalNormals)
        {
            pOriginalNormals[i] = pSrcNormals[i];
        }
    }

    RpGeometryUnlock(pSkin->pGeometry);

    pSkin->pPlatformData = pOriginalVertices;

    RWRETURN(TRUE);
}

void
_rpSkinMBDestroyAtomic(RpSkin * pSkin)
{
    RWFUNCTION(RWSTRING("_rpSkinMBDestroyAtomic"));

    if (pSkin && pSkin->pPlatformData)
    {
        RwFree(pSkin->pPlatformData);
        pSkin->pPlatformData = NULL;
    }

    RWRETURNVOID();
}

