// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0
#pragma once

#ifndef HK_MATH_MATH_H
#   error Please include Common/Base/hkBase.h instead of this file.
#endif


// Components that aren't loaded during hkVector4f::load() are initialized to this in Debug and Dev configurations.
#define HK_VECTOR4f_DEBUG_FILL_VALUE (g_vectorfConstants[HK_QUADREAL_NAN])

#define hkSimdFloat32_   hkSimdFloat32Parameter
#define hkVector4fComp  hkVector4fComparison
#define hkVector4fComp_ hkVector4fComparisonParameter

class hkVector4f;

#if !defined(HK_REAL_IS_DOUBLE)
    typedef hkVector4f  hkVector4;
    typedef hkVector4f_ hkVector4_;
    typedef hkVector4f hkVector4;
    typedef hkVector4fParameter hkVector4Parameter;
#endif

/// \class hkVector4f
///
/// A four component vector. It holds four floating point numbers according to the
/// definition of float. There are optimized SIMD implementations
/// available for several platforms. They use platform specific data structures and
/// code to provide efficient processing. Thus to be portable, nothing should be assumed
/// about the internal layout of the four floating point numbers.
///
/// The vector has several methods which take a template parameter for the number of
/// components to work on, for example dot<3>. This guarantees to be able to use
/// optimized implementations for the method as well as using the vector as a 3-component
/// point or a 4-component homogeneous vector which can be verified at compile time.
///
/// Methods for component access or component modification are declared both taking
/// a template argument or an integer index (broadcast() for example). Using the template
/// parameter version is preferred because it allows for compile time address calculation.
/// Use the integer index version only if the index is not known at compile time.
///
/// For accessing the contents of a vector, it is strongly recommended to use the
/// get/setComponent methods as they operate with hkSimdFloat32 which in turn has
/// efficient methods for performing arithmetic and other operations which avoid
/// leaving the SIMD processing pipeline. Using the operator() is only useful for
/// debugging or explicit storage purposes.
///
/// The methods which take a matrix and a vector to transform are available both
/// as forced inline (with _ prefix) or as out-of-line calls. The inline versions produce
/// faster code because of better interleaving of instructions but can bloat the
/// code considerably. In situations where code size matters, use the out-of-line versions.
///
/// Some floating point operations which involve division or square root calculations can be quite
/// slow. The implementations in this class offer improved performance for these operations
/// at the expense of accuracy and handling of denormalized floating point numbers.
/// We have balanced performance with behavior one expects from a standard math operator,
/// but these methods will in general not behave IEEE compliant like the system math library
/// calls. See the comments on setDiv(), setReciprocal() and similar methods on how they behave.
///
/// If one needs to control the denormals handling, can do with even lower floating point
/// accuracy, or your algorithm requires full IEEE compliant behavior, the vector offers an
/// advanced interface in the form of member template methods for the division and square
/// root routines. Each has explicit flags for controlling accuracy and denormal handling.
/// The given bit width is the calculated accuracy in the floating point mantissa.
/// The 23Bit versions are almost as precise as the full version (24Bit) but offer
/// a substantial speedup on most platforms because of less strict denormals handling.
/// The 12Bit versions again offer faster execution with even less precision. Their
/// usefulness depends on the algorithm at hand. When writing new code, it is
/// recommended to start with the full precision version and then check whether a
/// lower precision is acceptable. The template flags for controlling handling of
/// division-by-zero or negative square root arguments offer easy writing of checked
/// fail-safe math code.
///
/// The load() and store() methods take as a template argument the number of components to load/store.
/// The advanced interface in addition has template methods which take a flag for the
/// alignment of the memory pointer and for the rounding mode to apply in case of storing
/// to a lower precision floating point format.
///
/// \sa hkSimdFloat32 hkVector4fComp hkHalf16 hkFloat16 hkMathAccuracyMode hkMathDivByZeroMode hkMathNegSqrtMode hkMathIoMode hkMathRoundingMode
class hkVector4f
{
    public:

        // COM-2625: Work around bad code generation in x64, vs2013 (fixed in Update 4)
        // COM-3428: Work around broken operator overloading in debug builds using GCC 4.7.2 on linux
#if !defined(HK_DISABLE_MATH_CONSTRUCTORS) \
    || (defined(HK_ARCH_X64) && defined(HK_COMPILER_MSVC) && (HK_COMPILER_MSVC_VERSION >= 1800) && (_MSC_FULL_VER < 180031101)) \
    || (defined(HK_PLATFORM_LINUX) && defined(HK_DEBUG) && defined(HK_COMPILER_GCC) && (HK_COMPILER_GCC_VERSION == 40702))

        HK_ALWAYS_INLINE hkVector4f() { }                                       ///< Empty constructor.
        explicit HK_ALWAYS_INLINE hkVector4f(float x) { set(x,x,x,x); }
        HK_ALWAYS_INLINE hkVector4f(float x, float y, float z, float w=float(0)) { set(x,y,z,w); };     ///< ctor, components set to xyzw
        /// Creates a new hkVector4f, copying all 4 values from \a v. ( self.xyzw = v.xyzw ).
        /// \warning This constructor causes the compiler NOT to pass const hkVector4f's by register.
        HK_ALWAYS_INLINE hkVector4f( const hkVector4f& v){ set(v); };

#endif
        HK_DECLARE_CLASS(hkVector4f, New, Pod);
        HK_DETAIL_DECLARE_REFLECT_EXPORT(hkVector4f, HK_EXPORT_COMMON);
        HK_REFLECT_AS_ARRAY_FIXED(4,float);
        HK_RECORD_ATTR(hk::TypeLayout(hkMath::computeTypeSizeAlign));
        HK_RECORD_ATTR( hk::ReflectDetails(fields=false) );
        HK_RECORD_ATTR( hk::Default(0,0,0,0) );
        HK_RECORD_ATTR( hk::DiffAtomic );
        HK_RECORD_ATTR(hk::IncludeInMgd(false));
        HK_RECORD_ATTR(hk::Ui_NotExpandable);
        typedef hkAlignedQuad<float> ReflectDefaultType;

        /// \name Vector initialization
        ///@{

        HK_ALWAYS_INLINE void operator= ( hkVector4f_ v );      ///< self = v

        HK_INLINE void set( hkSimdFloat32_ x, hkSimdFloat32_ y, hkSimdFloat32_ z, hkSimdFloat32_ w );   ///< Sets the components to xyzw
        HK_INLINE void set(float x, float y, float z, float w=float(0));    ///< Sets the components to xyzw
        HK_ALWAYS_INLINE void set(const hkQuadFloat32& q);          ///< Creates a new hkVector4f, copying all 4 values from \a q. ( self = q ).

        HK_ALWAYS_INLINE void setAll( hkSimdFloat32_ x);        ///< Sets all components of self to the same value \a x.
        HK_ALWAYS_INLINE void setAll(const float& x);               ///< Sets all components of self to the same value \a x.
        HK_ALWAYS_INLINE void setZero();                            ///< Sets all components to zero.

        HK_ALWAYS_INLINE void set( hkVector4f_ a){ *this = a; }
        HK_ALWAYS_INLINE void set( hkVector4dParameter a);

        HK_ALWAYS_INLINE void zeroComponent(const int i);   ///< Set component \a i to zero. ( self[i] = 0 ).

        template <int I>
        HK_ALWAYS_INLINE void zeroComponent();              ///< This is the faster version of zeroComponent(int i).

        HK_ALWAYS_INLINE static hkVector4f HK_CALL ctor( hkSimdFloat32_ a ) { hkVector4f res; res.setAll( a ); return res; }
        HK_ALWAYS_INLINE static hkVector4f HK_CALL ctor( float a ) { hkVector4f res; res.setAll( a ); return res; }

        ///@}

        /// \name Vector calc methods
        ///@{
        HK_ALWAYS_INLINE void setAdd( hkVector4f_ a, hkVector4f_   b );     ///<    self = a+b
        HK_ALWAYS_INLINE void setAdd( hkVector4f_ a, hkSimdFloat32_ b );        ///<    self = a+b, b is splatted to all 4 components
        HK_ALWAYS_INLINE void setSub( hkVector4f_ a, hkVector4f_   b );     ///<    self = a-b
        HK_ALWAYS_INLINE void setSub( hkVector4f_ a, hkSimdFloat32_ b );        ///<    self = a+b, b is splatted to all 4 components
        HK_ALWAYS_INLINE void setMul( hkVector4f_ a, hkVector4f_   b );     ///<    self = a*b
        HK_ALWAYS_INLINE void setMul( hkVector4f_ a, hkSimdFloat32_ b );        ///<    self = a*b
        HK_ALWAYS_INLINE void setDiv( hkVector4f_ a, hkVector4f_   b );     ///<    self = a/b  23bit accuracy, no division by zero checks
        HK_ALWAYS_INLINE void setDiv( hkVector4f_ a, hkSimdFloat32_ b );        ///<    self = a/b  23bit accuracy, no division by zero checks

        HK_ALWAYS_INLINE void setMul( hkSimdFloat32_ a, hkVector4f_ b ){ setMul(b,a); };    ///<    self = a*b

        HK_INLINE        void setReciprocal( hkVector4f_ v);        ///<    self = 1/b  23bit accuracy, no division by zero checks
        HK_INLINE        void setSqrt( hkVector4f_ a );         ///<    self = sqrt(a )  23bit accuracy, if (b<=0), zero is returned.
        HK_INLINE        void setSqrtInverse( hkVector4f_ a );      ///<    self = 1.0f/sqrt(a )  23bit accuracy, if (b<=0), zero is returned.

        HK_ALWAYS_INLINE void setAddMul( hkVector4f_ a, hkVector4f_   b, hkVector4f_   c);      ///< self = a + b*c
        HK_ALWAYS_INLINE void setAddMul( hkVector4f_ a, hkVector4f_   b, hkSimdFloat32_ c);     ///< self = a + b*c
        HK_ALWAYS_INLINE void setSubMul( hkVector4f_ a, hkVector4f_   b, hkVector4f_   c);      ///< self = a - b*c
        HK_ALWAYS_INLINE void setSubMul( hkVector4f_ a, hkVector4f_   b, hkSimdFloat32_ c);     ///< self = a - b*c

        HK_ALWAYS_INLINE void add( hkVector4f_   a )    {   setAdd( *this, a );     }   ///< this += a
        HK_ALWAYS_INLINE void sub( hkVector4f_   a )    {   setSub( *this, a );     }   ///< this -= a
        HK_ALWAYS_INLINE void mul( hkVector4f_   a )    {   setMul( *this, a );     }   ///< this *= a
        HK_ALWAYS_INLINE void mul( hkSimdFloat32_ a )   {   setMul( *this, a );     }   ///< this *= a
        HK_INLINE        void div( hkVector4f_   a )    {   setDiv( *this, a );     }   ///< this /= a, 23bit accuracy, no division by zero checks

        HK_ALWAYS_INLINE void operator+=( hkVector4f_   a ) {   setAdd( *this, a ); }   ///< this += a
        HK_ALWAYS_INLINE void operator-=( hkVector4f_   a ) {   setSub( *this, a ); }   ///< this -= a
        HK_ALWAYS_INLINE void operator*=( hkVector4f_   a ) {   setMul( *this, a ); }   ///< this *= a
        HK_ALWAYS_INLINE void operator*=( hkSimdFloat32_ a )    {   setMul( *this, a ); }   ///< this *= a
        HK_INLINE        void operator/=( hkVector4f_   a ) {   setDiv( *this, a ); }   ///< this /= a, 23bit accuracy, no division by zero checks
        HK_INLINE        void operator/=( hkSimdFloat32_ a )    {   setDiv( *this, a ); }   ///< this /= a, 23bit accuracy, no division by zero checks

        HK_ALWAYS_INLINE hkVector4f operator+( hkVector4f_   a ) const { hkVector4f r; r.setAdd( *this, a );    return r; } ///< this += a
        HK_ALWAYS_INLINE hkVector4f operator-( hkVector4f_   a ) const { hkVector4f r; r.setSub( *this, a );    return r; } ///< this -= a
        HK_ALWAYS_INLINE hkVector4f operator-(            )   const { hkVector4f r; r.setSub( getZero(), *this );   return r; } ///< -this
        HK_ALWAYS_INLINE hkVector4f operator*( hkVector4f_   a ) const { hkVector4f r; r.setMul( *this, a );    return r; } ///< this *= a
        HK_ALWAYS_INLINE hkVector4f operator*( hkSimdFloat32_ a ) const { hkVector4f r; r.setMul( *this, a );   return r; } ///< this *= a
        HK_INLINE        hkVector4f operator/( hkVector4f_   a ) const { hkVector4f r; r.setDiv( *this, a );    return r; } ///< this /= a, 23bit accuracy, no division by zero checks
        HK_INLINE        hkVector4f operator/( hkSimdFloat32_ a ) const { hkVector4f r; r.setDiv( *this, a );   return r; } ///< this /= a, 23bit accuracy, no division by zero checks


        HK_ALWAYS_INLINE void addMul    ( hkVector4f_   a, hkVector4f_   b ){ setAddMul(*this, a, b );  }   ///< self += b*c
        HK_ALWAYS_INLINE void addMul    ( hkVector4f_   a, hkSimdFloat32_ b ){ setAddMul(*this, a, b ); }   ///< self += b*c
        HK_ALWAYS_INLINE void addMul    ( hkSimdFloat32_ a, hkVector4f_   b ){ setAddMul(*this, b, a ); }   ///< self += b*c
        HK_ALWAYS_INLINE void subMul    ( hkVector4f_   a, hkVector4f_   b ){ setSubMul(*this, a, b );  }   ///< self -= b*c
        HK_ALWAYS_INLINE void subMul    ( hkVector4f_   a, hkSimdFloat32_ b ){ setSubMul(*this, a, b ); }   ///< self -= b*c
        HK_ALWAYS_INLINE void subMul    ( hkSimdFloat32_ a, hkVector4f_   b ){ setSubMul(*this, b, a ); }   ///< self -= b*c

        HK_ALWAYS_INLINE void setCross( hkVector4f_ a, hkVector4f_   b );                   ///< self = a cross b
        HK_ALWAYS_INLINE hkVector4f cross( hkVector4f_ a) const { hkVector4f r; r.setCross(*this,a); return r; };       ///< return a cross b

        template <int N>
        HK_ALWAYS_INLINE const hkSimdFloat32 dot( hkVector4f_ a ) const;    ///< return self dot<N> a
        HK_ALWAYS_INLINE hkSimdFloat32 dot3( hkVector4f_ a)  const;     ///< return self dot<3> a
        HK_ALWAYS_INLINE hkSimdFloat32 dot4( hkVector4f_ a ) const;     ///< return self dot<4> a

        HK_INLINE        void setInterpolate( hkVector4f_ a, hkVector4f_   b, hkSimdFloat32_ t );   ///< self = (1-t)*a + t*b


        ///@}

        /// \name Comparisons by-value and selection
        ///@{

        HK_ALWAYS_INLINE const hkVector4fComp less( hkVector4f_ a ) const;          ///< result[i] = this[i] <  a[i]
        HK_ALWAYS_INLINE const hkVector4fComp lessEqual( hkVector4f_ a ) const;     ///< result[i] = this[i] <= a[i]
        HK_ALWAYS_INLINE const hkVector4fComp greater( hkVector4f_ a ) const;       ///< result[i] = this[i] >  a[i]
        HK_ALWAYS_INLINE const hkVector4fComp greaterEqual( hkVector4f_ a ) const;  ///< result[i] = this[i] >= a[i]
        HK_ALWAYS_INLINE const hkVector4fComp equal( hkVector4f_ a ) const;         ///< result[i] = this[i] == a[i]
        HK_ALWAYS_INLINE const hkVector4fComp notEqual( hkVector4f_ a ) const;      ///< result[i] = this[i] != a[i]

        HK_ALWAYS_INLINE const hkVector4fComp lessZero() const;                     ///< result[i] = this[i] < 0
        HK_ALWAYS_INLINE const hkVector4fComp lessEqualZero() const;                ///< result[i] = this[i] <=0
        HK_ALWAYS_INLINE const hkVector4fComp greaterZero() const;                  ///< result[i] = this[i] >0
        HK_ALWAYS_INLINE const hkVector4fComp greaterEqualZero() const;             ///< result[i] = this[i] >=0
        HK_ALWAYS_INLINE const hkVector4fComp equalZero() const;                    ///< result[i] = this[i] ==0
        HK_ALWAYS_INLINE const hkVector4fComp notEqualZero() const;                 ///< result[i] = this[i] !=0

        HK_ALWAYS_INLINE hkVector4fComp operator<  ( hkVector4f_ a) const{ return this->less(a); }          ///< result[i] = this[i] <  a[i]
        HK_ALWAYS_INLINE hkVector4fComp operator<= ( hkVector4f_ a) const{ return this->lessEqual(a); }     ///< result[i] = this[i] <= a[i]
        HK_ALWAYS_INLINE hkVector4fComp operator>  ( hkVector4f_ a) const{ return this->greater(a); }           ///< result[i] = this[i] >  a[i]
        HK_ALWAYS_INLINE hkVector4fComp operator>= ( hkVector4f_ a) const{ return this->greaterEqual(a); }  ///< result[i] = this[i] >= a[i]
        HK_ALWAYS_INLINE hkVector4fComp operator== ( hkVector4f_ a) const{ return this->equal(a); }         ///< result[i] = this[i] == a[i]
        HK_ALWAYS_INLINE hkVector4fComp operator!= ( hkVector4f_ a) const{ return this->notEqual(a); }      ///< result[i] = this[i] != a[i]

        HK_ALWAYS_INLINE hkVector4fComp operator<  ( hkSimdFloat32_ a) const{ return this->less(hkVector4f::ctor(a)); }         ///< result[i] = this[i] <  a
        HK_ALWAYS_INLINE hkVector4fComp operator<= ( hkSimdFloat32_ a) const{ return this->lessEqual(hkVector4f::ctor(a)); }        ///< result[i] = this[i] <= a
        HK_ALWAYS_INLINE hkVector4fComp operator>  ( hkSimdFloat32_ a) const{ return this->greater(hkVector4f::ctor(a)); }          ///< result[i] = this[i] >  a
        HK_ALWAYS_INLINE hkVector4fComp operator>= ( hkSimdFloat32_ a) const{ return this->greaterEqual(hkVector4f::ctor(a)); } ///< result[i] = this[i] >= a
        HK_ALWAYS_INLINE hkVector4fComp operator== ( hkSimdFloat32_ a) const{ return this->equal(hkVector4f::ctor(a)); }            ///< result[i] = this[i] == a
        HK_ALWAYS_INLINE hkVector4fComp operator!= ( hkSimdFloat32_ a) const{ return this->notEqual(hkVector4f::ctor(a)); }     ///< result[i] = this[i] != a


        /// Compares the first N components of self to \a v and returns true if all the components are the same respective value. ( self == v )
        template <int N> HK_INLINE        hkBool32 allExactlyEqual( hkVector4f_ v) const;
        template <int N> HK_ALWAYS_INLINE hkBool32 allExactlyEqual( hkSimdFloat32_ v ) const { return allExactlyEqual<N>( hkVector4f::ctor( v ) ); }

        /// Compares the first N components of self to zero and returns true if all the components are zero. ( self == 0 )
        template <int N> HK_INLINE hkBool32 allExactlyEqualZero() const;

        /// Compares the first N components of self to \a v and returns true if all the differences of all components are within \a epsilon range (inclusive). ( self >= v-epsilon && self <= v+epsilon ).
        /// \remark This is not the Euclidean epsilon distance.
        template <int N> HK_INLINE hkBool32 allEqual( hkVector4f_ v, hkSimdFloat32_ epsilon) const;

        /// Compares the first N components of self to zero and returns true if all the differences of all components are within \a epsilon range (inclusive). ( self >= -epsilon && self <= +epsilon ).
        /// \remark This is not the Euclidean epsilon distance.
        template <int N> HK_INLINE hkBool32 allEqualZero( hkSimdFloat32_ epsilon) const;

        /// Compares the first N components and returns true if all the components of self are less than the components of \a a. ( self < a )
        template <int N> HK_INLINE hkBool32 allLess( hkVector4f_ a ) const;

        /// Compares the first N components and returns true if all the components of self are less than zero. ( self < 0 )
        template <int N> HK_INLINE hkBool32 allLessZero() const;

        /// Component-wise select values from \a trueValue or \a falseValue depending on whether
        /// the component is marked true or false in the \a compareMask. ( self = mask ? trueValue : falseValue )
        HK_ALWAYS_INLINE void setSelect( hkVector4fComp_ compareMask, hkVector4f_ trueValue, hkVector4f_ falseValue );

        /// Set every component to zero if the corresponding \a compareMask component is false. (this = compareMask ? this : zero)
        HK_ALWAYS_INLINE void zeroIfFalse( hkVector4fComp_ compareMask );

        /// Set every component to zero if the corresponding \a compareMask component is true. (this = compareMask ? zero : this)
        HK_ALWAYS_INLINE void zeroIfTrue( hkVector4fComp_ compareMask );

        /// Component-wise select values from \a trueValue or \a falseValue depending on whether
        /// the component is marked true or false in the template mask. ( self = mask ? trueValue : falseValue )
        template<hkVector4ComparisonMask::Mask M> HK_ALWAYS_INLINE void setSelect( hkVector4f_ trueValue, hkVector4f_ falseValue );

        ///@}

        /// \name Sign and clamping operations
        ///@{

        /// Store \a v with self while flipping the signs of the first N components. ( self = N ? -v : v )
        template <int N> HK_ALWAYS_INLINE void setNeg( hkVector4f_ v);

        /// Component-wise copy values from \a v while flipping the sign of the value if the corresponding flag in the \a mask is true. ( self = mask ? -v : v )
        HK_ALWAYS_INLINE void setFlipSign( hkVector4f_ v, hkVector4fComp_ mask);

        /// Component-wise copy values from \a v while flipping the sign of the value if the corresponding component in \a vSign is negative. ( self = (vSign < 0) ? -v : v )
        HK_ALWAYS_INLINE void setFlipSign( hkVector4f_ v, hkVector4f_ vSign);

        /// Component-wise copy values from \a v while flipping the sign of each value if the scalar value of \a sSign is negative. ( self = (sSign < 0) ? -v : v )
        HK_ALWAYS_INLINE void setFlipSign( hkVector4f_ v, hkSimdFloat32_ sSign);

        /// Sets the components of self to the absolute value of the components in \a v. ( self = abs(v) )
        HK_ALWAYS_INLINE void setAbs( hkVector4f_ v);

        /// Component-wise compare \a a and \a b and store the smaller value to self. ( self = (a < b) ? a : b )
        HK_ALWAYS_INLINE void setMin( hkVector4f_ a, hkVector4f_   b );

        /// Component-wise compare \a a and \a b and store the larger value to self. ( self = (a > b) ? a : b )
        HK_ALWAYS_INLINE void setMax( hkVector4f_ a, hkVector4f_   b );

        /// setMax() but using integer compare if available, else using floating point compare
        HK_ALWAYS_INLINE void setMaxI( hkVector4f_ a, hkVector4f_ b );

        /// setMin() but using integer compare if available, else using floating point compare
        HK_ALWAYS_INLINE void setMinI( hkVector4f_ a, hkVector4f_ b );


        /// Component-wise clamp \a a between \a minVal and \a maxVal and store to self. ( self = min( maxVal, max(a, minVal) ) ).
        /// Note that if \a a is NaN, the result will be \a maxVal.
        HK_INLINE void setClamped( hkVector4f_ a, hkVector4f_ minVal, hkVector4f_ maxVal );

        /// Component-wise clamp \a a between zero and one and store to self. ( self = min( 1, max(a, 0) ) ).
        /// Note that if \a a is NAN, the result will be one.
        HK_INLINE void setClampedZeroOne( hkVector4f_ a );

        /// Sets self to a copy of \a vSrc that is rescaled to have a maximum length of \a maxLen.
        /// If \a vSrc is shorter than \a maxLen, no rescaling is performed.
        HK_INLINE void setClampedToMaxLength( hkVector4f_ vSrc, hkSimdFloat32_ maxLen);

        ///@}

        /// \name Out-of-line matrix operations
        ///@{

        /// Sets self to the vector \a b rotated by matrix \a a.
        HK_EXPORT_COMMON void setRotatedDir(const hkMatrix3f& a, hkVector4f_   b);

        /// Sets self to the vector \a b rotated by the inverse matrix of \a a.
        HK_EXPORT_COMMON void setRotatedInverseDir(const hkMatrix3f& a, hkVector4f_   b);

        /// Sets self to the vector \a b transformed by matrix \a a.
        HK_EXPORT_COMMON void setTransformedPos(const hkTransformf& a, hkVector4f_   b);

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        HK_EXPORT_COMMON void setTransformedInversePos(const hkTransformf& a, hkVector4f_   b);

        /// Sets self to the vector \a direction rotated by the quaternion \a quat.
        HK_EXPORT_COMMON void setRotatedDir(hkQuaternionfParameter quat, hkVector4f_ direction);

        /// Sets self to the vector \a direction rotated by the inverse of quaternion \a quat.
        HK_EXPORT_COMMON void setRotatedInverseDir(hkQuaternionfParameter quat, hkVector4f_ direction);

        /// Sets self to the vector \a b transformed by matrix \a a.
        /// \remark You need to use this method if scale is present.
        HK_EXPORT_COMMON void setTransformedPos(const hkQsTransformf& a, hkVector4f_   b);

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        /// \remark You need to use this method if scale is present.
        HK_EXPORT_COMMON void setTransformedInversePos(const hkQsTransformf& a, hkVector4f_   b);

        /// Sets self to the vector \a b transformed by matrix \a a.
        HK_EXPORT_COMMON void setTransformedPos(const hkQTransformf& a, hkVector4f_   b);

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        HK_EXPORT_COMMON void setTransformedInversePos(const hkQTransformf& a, hkVector4f_   b);

        ///@}

        /// \name Forced in-line matrix operations
        ///@{

        /// Sets self to the vector \a b rotated by matrix \a a.
        /// This method is enforced inline.
        HK_INLINE void _setRotatedDir(const hkMatrix3f& a, hkVector4f_   b );

        /// Sets self to the vector \a b rotated by the inverse matrix of \a a.
        /// This method is enforced inline.
        HK_INLINE void _setRotatedInverseDir(const hkMatrix3f& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by matrix \a a.
        /// This method is enforced inline.
        HK_INLINE void _setTransformedPos(const hkTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        /// This method is enforced inline.
        HK_INLINE void _setTransformedInversePos(const hkTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by matrix \a a.
        /// This method is enforced inline.
        /// \remark You need to use this method if scale is present.
        HK_INLINE void _setTransformedPos(const hkQsTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        /// This method is enforced inline.
        /// \remark You need to use this method if scale is present.
        HK_INLINE void _setTransformedInversePos(const hkQsTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by matrix \a a.
        /// This method is enforced inline.
        HK_INLINE void _setTransformedPos(const hkQTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a b transformed by the inverse matrix of \a a.
        /// This method is enforced inline.
        HK_INLINE void _setTransformedInversePos(const hkQTransformf& a, hkVector4f_   b );

        /// Sets self to the vector \a direction rotated by the quaternion \a quat.
        /// This method is enforced inline.
        HK_INLINE void _setRotatedDir(hkQuaternionfParameter quat, hkVector4f_ direction);

        /// Sets self to the vector \a direction rotated by the inverse of quaternion \a quat.
        /// This method is enforced inline.
        HK_INLINE void _setRotatedInverseDir(hkQuaternionfParameter quat, hkVector4f_ direction);

        ///@}

        /// \name Length and normalization
        ///@{



        /// Sets all components of self to the N component dot product of \a a and \a b. ( self = a dot b )
        template <int N> HK_ALWAYS_INLINE void setDot( hkVector4f_ a, hkVector4f_   b );

        /// Sums up N components. ( return x+y+z+w )
        template <int N> HK_ALWAYS_INLINE const hkSimdFloat32 horizontalAdd() const;

        /// Sums up N components of \a v and stores to all components of self. ( self = v(x+y+z+w) )
        template <int N> HK_ALWAYS_INLINE void setHorizontalAdd( hkVector4f_ v);

        /// Product of N components. ( return x*y*z*w )
        template <int N> HK_INLINE const hkSimdFloat32 horizontalMul() const;

        /// Product of N components of \a v and stores to all components of self. ( self = v(x*y*z*w) )
        template <int N> HK_INLINE void setHorizontalMul( hkVector4f_ v);

        /// Returns the maximum value occurring in N components. ( return max(x,y,z,w) )
        template <int N> HK_INLINE const hkSimdFloat32 horizontalMax() const;

        /// horizontalMax() but using integer compare if available, else using floating point compare
        template <int N> HK_INLINE const hkSimdFloat32 horizontalMaxI() const;

        /// Calculates the maximum value occurring in N components of \a v and stores to all components of self. ( self = v.max(x,y,z,w) )
        template <int N> HK_INLINE void setHorizontalMax( hkVector4f_ v);

        /// setHorizontalMax() but using integer compare if available, else using floating point compare
        template <int N> HK_INLINE void setHorizontalMaxI(hkVector4f_ v);

        /// Returns the minimum value occurring in N components. ( return min(x,y,z,w) )
        template <int N> HK_INLINE const hkSimdFloat32 horizontalMin() const;

        /// horizontalMin() but using integer compare if available, else using floating point compare
        template <int N> HK_INLINE const hkSimdFloat32 horizontalMinI() const;

        /// Calculates the minimum value occurring in N components of \a v and stores to all components of self. ( self = v.min(x,y,z,w) )
        template <int N> HK_INLINE void setHorizontalMin( hkVector4f_ v);

        /// setHorizontalMin() but using integer compare if available, else using floating point compare
        template <int N> HK_INLINE void setHorizontalMinI( hkVector4f_ v );

        /// this = (a.x+a.y),(a.z+a.w), (b.x+b.y),(b.z+b.w), this maps to a nice intrinsic on SSE3.0 or ARM neon
        HK_INLINE void setPairedAdd(hkVector4f_ a, hkVector4f_ b);

        /// Returns the length of the vector represented by N of its components. ( return sqrt(self dot self) ).
        /// Accuracy: 23 bit, negative values checked and set to zero
        template <int N> HK_ALWAYS_INLINE const hkSimdFloat32 length() const;

        /// Returns the squared length of the vector represented by N of its components. ( return (self dot self) )
        template <int N> HK_ALWAYS_INLINE const hkSimdFloat32 lengthSquared() const;

        /// Returns the inverse length of the vector represented by N of its components. ( return 1 / sqrt(self dot self) ).
        /// Accuracy: 23 bit, negative sqrt values checked and set to zero, divide-by-0 checked and set to zero
        template <int N> HK_INLINE const hkSimdFloat32 lengthInverse() const;


        /// Normalizes self as an N-component vector. Unused components in self are undefined afterwards. ( self = |self| ).
        /// Accuracy: 23 bit, negative sqrt values checked and set to zero, divide-by-0 checked and set to zero
        template <int N> HK_INLINE void normalize();

        /// Normalizes self as an N-component vector and returns the length of self before normalization. Unused components in self are undefined afterwards. ( return self = |self| ).
        /// Accuracy: 23 bit, negative sqrt values checked and set to zero, divide-by-0 checked and set to zero
        template <int N> HK_INLINE const hkSimdFloat32 normalizeWithLength();

        /// Normalizes self as an N-component vector. Unused components in self are undefined afterwards. ( self = |self| ).
        /// If self is the zero vector, no normalization will occur and false is returned, else true.
        /// Accuracy: 23 bit, negative sqrt values checked and set to zero, divide-by-0 checked and set to zero
        template <int N> HK_INLINE hkBool32 normalizeIfNotZero();

        /// Normalizes \a v as an N-component vector, stores with self and returns the length of \a v before normalization. ( return self = |v| ).
        /// If \a v is zero, self is set to the default value (default: xyzw=(1,0,0,0) ).
        /// Accuracy: 23 bit.
        template <int N> HK_ALWAYS_INLINE const hkSimdFloat32 setNormalizedEnsureUnitLength( hkVector4f_ v, hkVector4f_ defaultValue = *(const hkVector4f*) (g_vectorfConstants + HK_QUADREAL_1000) );

        ///@}

        /// \name Special functionality
        ///@{

        /// Set the w component of self to the negative 3-component dot product of \a aPointOnPlane with self. The xyz components of self are unchanged.
        /// ( self.w = -p.dot<3>(self) )
        HK_INLINE void setPlaneConstant( hkVector4f_ aPointOnPlane );

        /// Returns the dot product of self and \a a with the w-component of \a a replaced by 1. ( return self.xyzw dot a.xyz1 )
        HK_ALWAYS_INLINE const hkSimdFloat32 dot4xyz1( hkVector4f_ a ) const;

        /// Returns the Euclidean distance between self as a point and the point \a p. ( return len(self - p) ).
        /// Accuracy: 23 bit, negative sqrt values checked and set to zero, divide-by-0 checked and set to zero
        HK_INLINE const hkSimdFloat32 distanceTo( hkVector4f_ p ) const;

        /// Returns the squared Euclidean distance between self as a point and the point \a p. ( return len*len(self - p) )
        HK_INLINE const hkSimdFloat32 distanceToSquared( hkVector4f_ p ) const;

        ///@}

        /// \name Component access
        ///@{

        /// Set the xyz components of self from \a xyz. Set the w component of self from \a w.
        HK_ALWAYS_INLINE void setXYZ_W( hkVector4f_ xyz, hkVector4f_ w);

        /// Set the xyz components of self from \a xyz. Set the w component of self from \a w.
        HK_ALWAYS_INLINE void setXYZ_W( hkVector4f_ xyz, hkSimdFloat32_ w);

        /// Set the w component of self from \a v. The xyz components are unchanged.
        HK_ALWAYS_INLINE void setW( hkVector4f_ v);

        /// Set the w component of self from \a w. The xyz components are unchanged.
        HK_ALWAYS_INLINE void setW( hkSimdFloat32_ w);

        /// Set the xyz components of self from \a v. The w component is unchanged.
        HK_ALWAYS_INLINE void setXYZ( hkVector4f_ v);

        /// Set the xyz components of self all to the same value \a v. The w component is unchanged.
        HK_ALWAYS_INLINE void setXYZ(float v);

        /// Set the xyz components of self all to the same value \a v. The w component is unchanged.
        HK_ALWAYS_INLINE void setXYZ( hkSimdFloat32_ v);

        /// Set the xyz components of self from \a xyz. Set the w component of self to zero.
        HK_ALWAYS_INLINE void setXYZ_0( hkVector4f_ xyz);

        /// Stores a 24 bit integer value in the w component of self. The integer can be retrieved using the getInt24W() method.
        /// Note for storing negative integers, the sign handling needs to be done prior to using this method.
        /// \warning Using the w component in floating point calculations after this can destroy the integer value and
        ///          will most likely produce denormalized floating point numbers.
        HK_INLINE void setInt24W( int value );

        /// Returns a 24 bit integer stored in the w component of self. The integer value must have been set with
        /// setInt24W() before.
        HK_ALWAYS_INLINE int getInt24W( ) const ;

        /// Returns a 16 bit integer stored in the w component of self. The integer value must have been set with
        /// setInt24W() before.
        HK_ALWAYS_INLINE int getInt16W( ) const ;


        /// Gives read/write access to element i.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        /// \remark Use this method only for storage purposes or immediate manipulation when the component index is not a compile time constant.
        HK_ALWAYS_INLINE float& operator() (int i);

        /// Gives read only access to element i.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        /// \remark Use this method only for storage purposes when the component index is not a compile time constant.
        HK_ALWAYS_INLINE const float& operator() (int i) const;

        /// Return component I.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        template <int I> HK_ALWAYS_INLINE const hkSimdFloat32 getComponent() const;


        HK_ALWAYS_INLINE const hkSimdFloat32 getX() const;  ///< Return the x component, short for getComponent<0>()
        HK_ALWAYS_INLINE const hkSimdFloat32 getY() const;  ///< Return the y component, short for getComponent<1>()
        HK_ALWAYS_INLINE const hkSimdFloat32 getZ() const;  ///< Return the z component, short for getComponent<2>()
        HK_ALWAYS_INLINE const hkSimdFloat32 getW() const;  ///< Return the w component, short for getComponent<3>()

        /// Return component \a i.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        /// \remark Use this method only when the component index is not a compile time constant.
        HK_INLINE const hkSimdFloat32 getComponent(const int i) const;

        /// Sets value of component I.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        template <int I> HK_INLINE void setComponent( hkSimdFloat32_ val);

        /// Sets value of component \a i.  (0,1,2,3) correspond to the (x,y,z,w) components respectively.
        /// \remark Use this method only when the component index is not a compile time constant.
        HK_INLINE void setComponent(const int i, hkSimdFloat32_ val);

        /// Returns the index of the component for the first occurrence of the exact given \a value among the first N components.
        /// Returns -1 if no component matches. ( return self ?= component )
        template <int N> HK_INLINE int findComponent( hkSimdFloat32_ value) const;

        /// Set self to a component permutation of \a v. See the symbols in hkVectorPermutation.
        template <hkVectorPermutation::Permutation P> HK_INLINE void setPermutation( hkVector4f_ v);

        /// Set the value of component I of self on all components of self.
        template <int I> HK_INLINE void broadcast();

        /// Set the value of component \a i of self on all components of self.
        /// \remark Use this method only when the component index is not a compile time constant.
        HK_INLINE void broadcast(int i);

        /// Set the value of component I of \a v on all components of self.
        template <int I> HK_INLINE void setBroadcast( hkVector4f_ v);

        /// Set the value of component \a i of \a v on all components of self.
        /// \remark Use this method only when the component index is not a compile time constant.
        HK_INLINE void setBroadcast(const int i, hkVector4f_ v);

        /// Returns the index of the component with the largest absolute value among the first N components.
        /// In case of equality, returns the last component index given X,Y,Z,W ordering.
        template <int N> HK_INLINE int getIndexOfMaxAbsComponent() const;

        /// Returns the index of the component with the largest signed value among the first N components.
        /// In case of equality, returns the last component index given X,Y,Z,W ordering.
        template <int N> HK_INLINE int getIndexOfMaxComponent() const;

        /// same as getIndexOfMaxComponent, but uses integer compare, so this function works as long
        /// as the max component is positive, slightly faster than getIndexOfMaxComponent;
        template <int N> HK_INLINE int getIndexOfMaxIComponent() const;

        /// Returns the index of the component with the smallest absolute value among the first N components.
        /// In case of equality, returns the first component index given X,Y,Z,W ordering.
        template <int N> HK_INLINE int getIndexOfMinAbsComponent() const;

        /// Returns the index of the component with the smallest signed value among the first N components.
        /// In case of equality, returns the first component index given X,Y,Z,W ordering.
        template <int N> HK_INLINE int getIndexOfMinComponent() const;

        ///@}

        /// \name Checking, loading and storing
        ///@{

        /// Returns true if the first N components are valid finite floating point numbers.
        template <int N> HK_INLINE hkBool32 isOk() const;

        /// Returns true if the length of self is one within the given \a epsilon bounds.
        template <int N> HK_INLINE bool isNormalized(float epsilon = float(1e-4f)) const;

        /// Get a constant vector with all components zero.
        HK_ALWAYS_INLINE static const hkVector4f& HK_CALL getZero();

        /// Get a constant vector. See the symbols in hkVectorConstant.
        template<int vectorConstant>
        HK_ALWAYS_INLINE static const hkVector4f& HK_CALL getConstant();

        /// Get a constant vector. See the symbols in hkVectorConstant.
        /// \remark Use this method only when the constant is not known at compile time.
        HK_ALWAYS_INLINE static const hkVector4f& HK_CALL getConstant( hkVectorConstant constant);

        /// Set this vector to a constant.
        template<int vectorConstant>
        HK_INLINE void setConstant();

        /// Load double precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void load(_In_reads_(N) const hkDouble64* p);

        /// Load single precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void load(_In_reads_(N) const float* p);

        /// Load single precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void load(_In_reads_(N) const hkHalf32* p);

        /// Load and unpack floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void load(_In_reads_(N) const hkHalf16* p);

        /// Load and unpack floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void load(_In_reads_(N) const hkFloat16* p);

        /// Store double precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void store(_Out_writes_all_(N) hkDouble64* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void store(_Out_writes_all_(N) float* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void store(_Out_writes_all_(N) hkHalf32* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void store(_Out_writes_all_(N) hkHalf16* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// The pointer \a p must be aligned for SIMD operations.
        template <int N> HK_INLINE void store(_Out_writes_all_(N) hkFloat16* p) const;

        ///@}


        /// \name Advanced interface
        ///@{

        /// Load double precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void load(_In_reads_(N) const hkDouble64* p);

        /// Load single precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void load(_In_reads_(N) const float* p);

        /// Load single precision floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void load(_In_reads_(N) const hkHalf32* p);

        /// Load and unpack floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void load(_In_reads_(N) const hkHalf16* p);

        /// Load and unpack floating point values for N components from linear addresses at \a p. Not loaded components are undefined.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void load(_In_reads_(N) const hkFloat16* p);

        /// Store double precision floating point values of N components to linear addresses at \a p.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A, hkMathRoundingMode R> HK_INLINE void store(_Out_writes_all_(N) hkDouble64* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A, hkMathRoundingMode R> HK_INLINE void store(_Out_writes_all_(N) float* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A, hkMathRoundingMode R> HK_INLINE void store(_Out_writes_all_(N) hkHalf32* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A, hkMathRoundingMode R> HK_INLINE void store(_Out_writes_all_(N) hkHalf16* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A, hkMathRoundingMode R> HK_INLINE void store(_Out_writes_all_(N) hkFloat16* p) const;

        /// Store double precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void store(_Out_writes_all_(N) hkDouble64* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void store(_Out_writes_all_(N) float* p) const;

        /// Store single precision floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void store(_Out_writes_all_(N) hkHalf32* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void store(_Out_writes_all_(N) hkHalf16* p) const;

        /// Pack and store floating point values of N components to linear addresses at \a p.
        /// The rounding is the system default.
        /// See the documentation at the template values for the requested IO mode.
        template <int N, hkMathIoMode A> HK_INLINE void store(_Out_writes_all_(N) hkFloat16* p) const;

        /// Set self to the component-wise reciprocal of \a v. ( self = 1/v ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathDivByZeroMode D> HK_INLINE void setReciprocal( hkVector4f_ v);

        /// Set self to the component-wise quotient of \a a over \a b. ( self = a/b ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathDivByZeroMode D> HK_INLINE void setDiv( hkVector4f_ a, hkVector4f_   b );

        /// Component-wise division by \a a. ( self /= a ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathDivByZeroMode D> HK_INLINE void div( hkVector4f_ a );

        /// Set self to the component-wise square root of \a a. ( self = sqrt(a ) ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE void setSqrt( hkVector4f_ a );

        /// Set self component-wise to one over square root of \a a. ( self = 1/sqrt(a ) ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE void setSqrtInverse( hkVector4f_ a );

        /// Returns the length of the vector represented by N of its components. ( return sqrt(self dot self) ).
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE const hkSimdFloat32 length() const;

        /// Returns the inverse length of the vector represented by N of its components. ( return 1 / sqrt(self dot self) ).
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE const hkSimdFloat32 lengthInverse() const;

        /// Normalizes self as an N-component vector. Unused components in self are undefined afterwards. ( self = |self| ).
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE void normalize();

        /// Normalizes self as an N-component vector and returns the length of self before normalization. Unused components in self are undefined afterwards. ( return self = |self| ).
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE const hkSimdFloat32 normalizeWithLength();

        /// Normalizes self as an N-component vector. Unused components in self are undefined afterwards. ( self = |self| ).
        /// If self is the zero vector, no normalization will occur and false is returned, else true.
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE hkBool32 normalizeIfNotZero();

        /// Returns the Euclidean distance between self as a point and the point \a p. ( return len(self - p) ).
        /// See the documentation at the template values for the requested behavior.
        template <hkMathAccuracyMode A, hkMathNegSqrtMode S> HK_INLINE const hkSimdFloat32 distanceTo( hkVector4f_ p ) const;

        /// Normalizes \a v as an N-component vector, stores with self and returns the length of \a v before normalization. ( return self = |v| ).
        /// If \a v is zero (within machine epsilon), self is set to the unit vector (1,0,0,0).
        /// See the documentation at the template values for the requested behavior.
        template <int N, hkMathAccuracyMode A> HK_INLINE const hkSimdFloat32 setNormalizedEnsureUnitLength( hkVector4f_ v, hkVector4f_ defaultValue = hkVector4f::getConstant(HK_QUADREAL_1000) );

        /// Sets self to a copy of \a vSrc that is rescaled to have a maximum length of \a maxLen.
        /// If \a vSrc is shorter than \a maxLen, no rescaling is performed.
        template <int N, hkMathAccuracyMode A>
        HK_INLINE void setClampedToMaxLength( hkVector4f_ vSrc, hkSimdFloat32_ maxLen);

        ///@}


        /// Internal data storage of the vector components (platform dependent).
        /// For writing portable code, nothing can be assumed about the internal layout of the values.
        HK_ALIGN_FLOAT( hkQuadFloat32 m_quad ); 


#ifndef HK_DISABLE_OLD_VECTOR4_INTERFACE

        //
        // old interface
        //

        HK_ALWAYS_INLINE  hkQuadFloat32& getQuad();
        HK_ALWAYS_INLINE  const hkQuadFloat32& getQuad() const;
        HK_ALWAYS_INLINE  void operator= ( const hkQuadFloat32& v );
        HK_ALWAYS_INLINE  void add4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void sub4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void mul4( hkVector4f_ a );
        HK_ALWAYS_INLINE  void mul4( hkSimdFloat32_ a );
        HK_ALWAYS_INLINE  void div4( hkVector4f_ a );
        HK_ALWAYS_INLINE  void div4fast( hkVector4f_ a );
        HK_ALWAYS_INLINE  float dot3fpu( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  void setMul4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setMul4( hkSimdFloat32_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void _setMul3(const hkMatrix3f& m, hkVector4f_ v);
        HK_ALWAYS_INLINE  void setMul3(const hkMatrix3f& a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void subMul4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void subMul4( hkSimdFloat32_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setSubMul4( hkVector4f_ a, hkVector4f_ x, hkVector4f_ y);
        HK_ALWAYS_INLINE  void setSubMul4( hkVector4f_ a, hkVector4f_ x, hkSimdFloat32_ y);
        HK_ALWAYS_INLINE  void setDot3( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setDot4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setSelect4( hkVector4fComp_ comp, hkVector4f_ trueValue, hkVector4f_ falseValue);
        HK_ALWAYS_INLINE  void select32( hkVector4f_ falseValue, hkVector4f_ trueValue, hkVector4fComp_ comp);
        HK_ALWAYS_INLINE  void setBroadcast( hkVector4f_ v, int i);
        HK_ALWAYS_INLINE  void setBroadcast3clobberW( hkVector4f_ v, int i);
        HK_ALWAYS_INLINE  void setXYZW( hkVector4f_ xyz, hkVector4f_ w);
        HK_ALWAYS_INLINE  void setXYZW( hkVector4f_ xyz, hkSimdFloat32_ w);
        HK_ALWAYS_INLINE  void setXYZ0( hkVector4f_ xyz);
        HK_ALWAYS_INLINE  void addMul4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void addMul4( hkSimdFloat32_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setZero4();
        HK_ALWAYS_INLINE  void zeroElement( int i );
        HK_ALWAYS_INLINE  void setAll3(float x);
        HK_ALWAYS_INLINE  void setSwapXY(const hkVector4f& x);
        HK_ALWAYS_INLINE  void setNeg3( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setNeg4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setNegMask4( hkVector4f_ v, int mask);
        HK_ALWAYS_INLINE  void setDiv4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setDiv4fast( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  hkSimdFloat32 getSimdAt(int i) const;
        HK_ALWAYS_INLINE  void normalize3();
        HK_ALWAYS_INLINE  void normalize4();
        HK_ALWAYS_INLINE  void fastNormalize3();
        HK_ALWAYS_INLINE  void setFastNormalize3NonZero( hkVector4f_ other);
        HK_ALWAYS_INLINE  hkSimdFloat32 normalizeWithLength3();
        HK_ALWAYS_INLINE  hkSimdFloat32 normalizeWithLength4();
        HK_ALWAYS_INLINE  hkSimdFloat32 fastNormalizeWithLength3();
        HK_ALWAYS_INLINE  void fastNormalize3NonZero();
        HK_ALWAYS_INLINE  hkResult normalize3IfNotZero ();
        HK_ALWAYS_INLINE  hkBool isNormalized3(float eps = 1e-4f) const;
        HK_ALWAYS_INLINE  hkBool isNormalized4(float eps = 1e-4f) const;
        HK_ALWAYS_INLINE  hkSimdFloat32 length3() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 length4() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 lengthSquared3() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 lengthSquared4() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 lengthInverse3() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 lengthInverse4() const;
        HK_ALWAYS_INLINE  void setMulSigns4( hkVector4f_ a, hkVector4f_ signs);
        HK_ALWAYS_INLINE  void setMulSigns4( hkVector4f_ a, hkSimdFloat32_ sharedSign);
        HK_ALWAYS_INLINE  void setAdd4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setSub4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setSqrtInverse4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setSqrtInverse4_7BitAccuracy( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setReciprocal3( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setReciprocal4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setAddMul4( hkVector4f_ a, hkVector4f_ x, hkVector4f_ y);
        HK_ALWAYS_INLINE  void setAddMul4( hkVector4f_ a, hkVector4f_   b, hkSimdFloat32_ r);
        HK_ALWAYS_INLINE  void setAbs4( hkVector4f_ v);
        HK_ALWAYS_INLINE  void setMin4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void setMax4( hkVector4f_ a, hkVector4f_   b );
        HK_ALWAYS_INLINE  hkBool32 equals3(const hkVector4f &a, float epsilon = 1e-3f ) const;
        HK_ALWAYS_INLINE  hkBool32 equals4(const hkVector4f &a, float epsilon = 1e-3f ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareEqual4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareLessThan4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareLessThanEqual4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareGreaterThan4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareGreaterThanEqual4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkVector4fComp compareLessThanZero4() const;
        HK_ALWAYS_INLINE  hkBool32 allLessThan3( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  hkBool32 allLessThan4( hkVector4f_ a ) const;
        HK_ALWAYS_INLINE  void setInterpolate4( hkVector4f_ a, hkVector4f_   b, hkSimdFloat32_ t );
        HK_ALWAYS_INLINE  hkSimdFloat32 distanceTo3( hkVector4f_ p ) const;
        HK_ALWAYS_INLINE  hkSimdFloat32 distanceToSquared3( hkVector4f_ p ) const;
        HK_ALWAYS_INLINE  hkSimdFloat32 horizontalAdd3() const;
        HK_ALWAYS_INLINE  void setHorizontalMax4( hkVector4f_ p);
        HK_ALWAYS_INLINE  hkSimdFloat32 getHorizontalMin3() const;
        HK_ALWAYS_INLINE  hkSimdFloat32 getHorizontalMax3() const;
        HK_ALWAYS_INLINE  void _setMul4xyz1(const hkTransformf& a, hkVector4f_   b );
        template <int S> HK_ALWAYS_INLINE  void setShuffle( hkVector4f_ v);
        HK_ALWAYS_INLINE  hkBool isOk3() const;
        HK_ALWAYS_INLINE  hkBool isOk4() const;
        HK_ALWAYS_INLINE  const hkVector4fComp isNegative() const;
        HK_ALWAYS_INLINE  const hkVector4fComp isPositive() const;
        HK_ALWAYS_INLINE  void load3(_In_reads_(3) const float* p);
        HK_ALWAYS_INLINE  void load4(_In_reads_(4) const float* p);
        HK_ALWAYS_INLINE  void load4a(_In_reads_(4) const float* p);
        HK_ALWAYS_INLINE  void store3(_Out_writes_all_(3) float* p) const;
        HK_ALWAYS_INLINE  void store4(_Out_writes_all_(4) float* p) const;
        HK_ALWAYS_INLINE  void store4a(_Out_writes_all_(4) float* p) const;
        HK_ALWAYS_INLINE  void storeX(_Out_writes_all_(1) float* dest) const;
        HK_ALWAYS_INLINE  void load3(_In_reads_(3) const hkDouble64* p);
        HK_ALWAYS_INLINE  void load4(_In_reads_(4) const hkDouble64* p);
        HK_ALWAYS_INLINE  void load4a(_In_reads_(4) const hkDouble64* p);
        HK_ALWAYS_INLINE  void store3(_Out_writes_all_(3) hkDouble64* p) const;
        HK_ALWAYS_INLINE  void store4(_Out_writes_all_(4) hkDouble64* p) const;
        HK_ALWAYS_INLINE  void store4a(_Out_writes_all_(4) hkDouble64* p) const;
        HK_ALWAYS_INLINE  void storeX(_Out_writes_all_(1) hkDouble64* dest) const;
        HK_ALWAYS_INLINE  hkSimdFloat32 horizontalAdd4() const;
        HK_ALWAYS_INLINE  void setClamped( hkVector4f_ vSrc, const hkSimdFloat32& maxLen);
        HK_ALWAYS_INLINE    int getMaxElementIndex4() const;
        HK_ALWAYS_INLINE    int getMajorAxis3() const;
        HK_ALWAYS_INLINE  void storeUncached( void* dest) const;
        HK_ALWAYS_INLINE  void _setMul4(const hkMatrix3f& a, hkVector4f_   b );
        HK_ALWAYS_INLINE  void add3clobberW( hkVector4f_ a ){ setAdd(*this,a); }
        HK_ALWAYS_INLINE  void sub3clobberW( hkVector4f_ a ){ setSub(*this,a); }
        HK_ALWAYS_INLINE  void setAnd( hkVector4f_ v0, hkVector4f_ v1 );

#endif
};

//HK_ALWAYS_INLINE hkVector4f HK_CALL hkSelect( hkVector4fComp_ sel, hkVector4f_ a, hkVector4f_ b){ hkVector4f r; r.setSelect(sel,a,b); return r; }

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
