#include <string.h>

#include "batypes.h"
#include "batype.h"
#include "balibtyp.h"
#include "bavector.h"
#include "devprofile.h"
#include "baskytran.h"

static const char rcsid[] __RWUNUSED__ =
    "@@(#)$Id: baskytran.c,v 1.11 2001/02/13 14:26:52 hy01 Exp $";

/* Nobody will really use MW for any serious, so we just hard code and hope. */
/* What sort of dolt doesn't provide .equ ?! */
#define mat_mxx 0
#define mat_myx 16
#define mat_mzx 32
#define mat_mxy 4
#define mat_myy 20
#define mat_mzy 36
#define mat_mxz 8
#define mat_myz 24
#define mat_mzz 40
#define mat_mwz 56
#define mat_mwy 52
#define mat_mwx 48
#define sizeof_rwV3d 12

/****************************************************************************
 matrixASMMult

 On entry   : Dest matrix pointer, two source matrix pointers
 On exit    : Matrix pointer contains result

 Comments   : WARNING!!! THIS FUNCTION PRESERVES FLAGS AND STATECOUNT BUT
              NOT THE TWO PAD WORDS, WHICH GET FILLED WITH RUBBISH.
 */
RWASMAPI(void)
matrixASMMult(RwMatrix *dstMat, const RwMatrix *matA, const RwMatrix *matB)
{
    RWFUNCTION(RWSTRING("matrixASMMult"));
    PFENTRY(PFmatrixASMMult);
    RWASSERT(dstMat);
    RWASSERT(RWMATRIXALIGNMENT(dstMat));
    RWASSERT(matA);
    RWASSERT(RWMATRIXALIGNMENT(matA));
    RWASSERT(matB);
    RWASSERT(RWMATRIXALIGNMENT(matB));

#ifndef __MWERKS__
    /* We no attempt to provide error checking on above MW constants */
    asm __volatile__ (" .include \"baequate.i\" ");
#endif
    asm __volatile__ ("

        .set noreorder

        lqc2        vf1,  mat_mxx(%0)                   # matA.right    -> VU0
        lqc2        vf2,  mat_myx(%0)                   # matA.up       -> VU0
        lqc2        vf3,  mat_mzx(%0)                   # matA.at       -> VU0
        lqc2        vf4,  mat_mwx(%0)                   # matA.pos      -> VU0
        
        lqc2        vf5,  mat_mxx(%1)                   # matB.right    -> VU0
        lqc2        vf6,  mat_myx(%1)                   # matB.up       -> VU0
        lqc2        vf7,  mat_mzx(%1)                   # matB.at       -> VU0
        lqc2        vf8,  mat_mwx(%1)                   # matB.pos      -> VU0

        lqc2        vf13, mat_mxx(%2)                   # dstMat.right  -> VU0
	
        vmulax.xyz  ACC,  vf5, vf1
        vmadday.xyz ACC,  vf6, vf1
        vmaddz.xyz  vf9,  vf7, vf1                      # Multiply dest.right.xyz                

        vmulax.xyz  ACC,  vf5, vf2
        vmadday.xyz ACC,  vf6, vf2
        vmaddz.xyz  vf10, vf7, vf2                      # Multiply dest.up.xyz        
        
        vmove.w     vf9,  vf13                          # copy flags

        vmulax.xyz  ACC,  vf5, vf3
        vmadday.xyz ACC,  vf6, vf3
        vmaddz.xyz  vf11, vf7, vf3                      # Multiply dest.at.xyz

        vmulax.xyz  ACC,  vf5, vf4
        vmadday.xyz ACC,  vf6, vf4
        vmaddaz.xyz ACC,  vf7, vf4
        vmaddw.xyz  vf12, vf8, vf0                      # Multiply dest.pos.xyz

        sqc2   		vf9,  mat_mxx(%2)                   # dstMat.right  <- VU0
        sqc2   		vf10, mat_myx(%2)                   # dstMat.up     <- VU0
        sqc2   		vf11, mat_mzx(%2)                   # dstMat.at     <- VU0
        sqc2   		vf12, mat_mwx(%2)                   # dstMat.pos    <- VU0

        .set reorder
        "
        : : "r" (matA), "r" (matB), "r" (dstMat) : "memory");

    /* And that's all folks */
    PFEXIT(PFmatrixASMMult);
    RWRETURNVOID();
}

/****************************************************************************
 vectorASMMultPoint

 On entry   : point, matrix
 On exit    : point on success
 */

RwV3d *
vectorASMMultPoint(RwV3d *pointsOut,  const RwV3d *pointsIn,
                   RwInt32 numPoints, const RwMatrix *matrix)
{
    RwV3d *cachedOut;
    RWFUNCTION(RWSTRING("vectorASMMultPoint"));
    PFENTRY(PFvectorASMMultPoint);
    RWASSERT(pointsIn);
    RWASSERT(pointsOut);
    RWASSERT(matrix);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    cachedOut = pointsOut;

    asm __volatile__ ("

        .set    noreorder

        lqc2    vf1, mat_mxx(%0)                        # right.xyz -> VU0
        lqc2    vf2, mat_myx(%0)                        # up.xyz -> VU0
        lqc2    vf3, mat_mzx(%0)                        # at.xyz -> VU0
	lqc2    vf4, mat_mwx(%0)                        # pos.xyz -> VU0
            lwu     $13, mat_mxx(%1)                    # Vector.x
            lwu     $11, mat_mxy(%1)                    # Vector.y
            lwu     $12, mat_mxz(%1)                    # Vector.z
            dsll32  $11, $11, 0x0			# Assemble vector
            or      $11, $11, $13
            pcpyld  $12, $12, $11
            qmtc2   $12, vf5

  multPointNext:
	blez    %3, doneMultPoints                        # Quick bypass
            addi    %1, %1, sizeof_rwV3d
        vmulax.xyz  ACC, vf1, vf5                      # Multiply vector
        vmadday.xyz ACC, vf2, vf5
        vmaddaz.xyz ACC, vf3, vf5
        vmaddw.xyz  vf6, vf4, vf0
            lwu     $13, mat_mxx(%1)                    # Vector.x
            lwu     $11, mat_mxy(%1)                    # Vector.y
            lwu     $12, mat_mxz(%1)                    # Vector.z
            addi    %3, %3, -1                          # Loop management
            dsll32  $11, $11, 0x0                       # Assemble vector
            or      $11, $11, $13
            pcpyld  $12, $12, $11
            qmtc2   $12, vf5
        qmfc2	$25, vf6	                        # Vector <- VU0
        dsrl32  $2, $25, 0x0
        pcpyud  $3, $25, $25
        sw      $25, mat_mxx(%2)                        # Vector.x
        sw      $2, mat_mxy(%2)                         # Vector.y
        sw      $3, mat_mxz(%2)                         # Vector.z
        b       multPointNext
        addi    %2, %2, sizeof_rwV3d

  doneMultPoints:
        .set    reorder
        "
         : : "r" (matrix), "r" (pointsIn), "r" (pointsOut), "r" (numPoints)
         : "cc",  "memory",  "$2", "$3", "$11", "$12", "$13", "$25");

    PFEXIT(PFvectorASMMultPoint);
    RWRETURN(cachedOut);
}


/****************************************************************************
 vectorASMMultVector

 On entry   : point, matrix
 On exit    : point on success
 */

RwV3d *
vectorASMMultVector(RwV3d *pointsOut, const RwV3d *pointsIn,
                    RwInt32 numPoints,  const RwMatrix *matrix)
{
    RwV3d *cachedOut;
    RWFUNCTION(RWSTRING("vectorASMMultVector"));
    PFENTRY(PFvectorASMMultVector);
    RWASSERT(pointsIn);
    RWASSERT(pointsOut);
    RWASSERT(matrix);
    RWASSERT(RWMATRIXALIGNMENT(matrix));

    cachedOut = pointsOut;

    asm __volatile__ ("

        .set noreorder

        lqc2    vf1, mat_mxx(%0)                        # right.xyz -> VU0
        lqc2    vf2, mat_myx(%0)                        # up.xyz -> VU0
        lqc2    vf3, mat_mzx(%0)                        # at.xyz -> VU0
            lwu     $24, mat_mxx(%1)                    # Vector.x
            lwu     $14, mat_mxy(%1)                    # Vector.y
            lwu     $15, mat_mxz(%1)                    # Vector.z
            dsll32  $14, $14, 0x0
            or      $14, $14, $24
            pcpyld  $15, $15, $14
            qmtc2   $15, vf5                            # Assemble vector

    multVecNext:
        blez    %3, doneMultVects
            addi    %1, %1, sizeof_rwV3d
        vmulax.xyz  ACC, vf1, vf5
        vmadday.xyz ACC, vf2, vf5
        vmaddz.xyz  vf6, vf3, vf5
            lwu     $24, mat_mxx(%1)# m_X_X
            lwu     $14, mat_mxy(%1) # m_X_Y
            lwu     $15, mat_mxz(%1) # m_X_Z
            addi    %3, %3, -1
            dsll32  $14, $14, 0x0
            or      $14, $14, $24
            pcpyld  $15, $15, $14
            qmtc2   $15, vf5                            # Assemble vector
        qmfc2	$25, vf6
        dsrl32  $2, $25, 0x0
        pcpyud  $3, $25, $25
        sw      $25, mat_mxx(%2)
        sw      $2, mat_mxy(%2)
        sw      $3, mat_mxz(%2)
        b       multVecNext
        addi    %2, %2, sizeof_rwV3d

    doneMultVects:
        .set reorder
        "

         : : "r" (matrix), "r" (pointsIn), "r" (pointsOut), "r" (numPoints)
         : "cc",  "memory",  "$24", "$14", "$15", "$25", "$2",  "$3");

    PFEXIT(PFvectorASMMultVector);
    RWRETURN(cachedOut);
}

