// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : CLIENT
//
// ------------------------------------------------------TKBMS v1.0
#pragma once
// this: #include <Geometry/Collide/DataStructures/SimdTree/hkcdSimdTree.h>

#include <Common/Base/System/Hardware/hkHardwareInfo.h>
#include <Common/Base/Thread/TaskQueue/hkTask.h>
#include <Common/Base/Thread/TaskQueue/hkTaskGraph.h>
#include <Common/Base/Types/hkTrait.h>
#include <Common/Base/Types/Geometry/Aabb24_16_24/hkAabb24_16_24_Codec.h>
#include <Common/Base/Math/Vector/hkVectorSort.h>
#include <Common/Base/Container/BitField/hkBitField.h>

#include <Geometry/Collide/Types/hkcdRay.h>
#include <Geometry/Collide/Algorithms/Intersect/hkcdIntersectRayAabb.h>
#include <Geometry/Internal/Types/hkcdFourAabb.h>

/// Enable or disable some internal counters used for profiling.
#define HKCD_SIMDTREE_ENABLE_INTERNAL_COUNTERS 0

/// Use centroid method for NMP computations.
#define HKCD_SIMDTREE_USE_CENTER_METHOD_FOR_NMP 0

/// Enable or disable profiling timers.
#define HKCD_ENABLE_SIMDTREE_TIMERS 0

/// Forward decl.
struct hkGeometry;

///
/// hkcdSimdTree is designed to deliver better collision query performances on SIMD platforms by using a branching factor of four instead of two.
/// Nodes can contain a mixture of: leaves (data first bit set); internal nodes (data first bit cleared); empty nodes (empty AABB and data set to zero).
/// Note that even an empty tree has at least two nodes:
///  - Node #0 : Null node, always empty (both AABBs and data).
///  - Node #1 : Root of the tree, can be empty.
///
struct HK_EXPORT_COMMON hkcdSimdTree
{
    //+version(2)
    HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, hkcdSimdTree );
    HK_DECLARE_REFLECTION();
    HK_RECORD_ATTR( hk::IncludeInMgd( false ) );

    /// Contains at least two valid children, invalid children are marked with an empty AABB and child data set to zero.
    struct Node : hkcdFourAabb
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, Node );
        HK_DECLARE_REFLECTION();
        HK_RECORD_ATTR( hk::IncludeInMgd( false ) );

        /// Node state flags.
        enum Flags
        {
            HAS_INTERNALS = 1,
            HAS_LEAVES = 2,
            HAS_NULLS = 4
        };

        /// Constructor.
        HK_INLINE           Node() {}

        /// Copy constructor.
        HK_INLINE           Node(const Node& other): hkcdFourAabb(other) { for (int i = 0; i < 4; ++i) { m_data[i] = other.m_data[i]; } }

        /// Set the node to empty.
        HK_INLINE void      clear() { setEmpty(); m_data[ 0 ] = m_data[ 1 ] = m_data[ 2 ] = m_data[ 3 ] = 0; }

        /// Returns true if allocated (by testing if the bounding box is not empty).
        HK_INLINE hkBool32  isAllocated() const;

        /// Build the union of all children AABB of the node.
        HK_INLINE void      getLeavesCompoundAabb(_Inout_ hkAabb* HK_RESTRICT aabb ) const;

        /// Set a given child AABB and data.
        HK_INLINE void      setChild( int index, const hkAabb& aabb, hkUint32 data ) { setAabb( index, aabb ); m_data[ index ] = data; }

        /// Set a given child data.
        HK_INLINE void      setChildData( int index, hkUint32 data ) { m_data[ index ] = data; }

        /// Get a given child data.
        HK_INLINE hkUint32  getChildData( int index ) const { return m_data[ index ]; }

        /// Computes and return this node flags.
        HK_INLINE int       getFlags() const;

        /// Return the number of valid children (either leaves or internals).
        HK_INLINE int       countChildren() const;

        /// Return the number of valid leaves.
        HK_INLINE int       countLeaves() const;

        /// Return the number of internals children.
        HK_INLINE int       countInternals() const;

        /// Get a given child data.
        template <int INDEX>
        HK_INLINE hkUint32  getChildData() const { return m_data[ INDEX ]; }

        /// Node data.
        hkUint32    m_data[ 4 ];
    };

    /// Constants.
    enum
    {
        /// Maximum stack depth for unary queries, such as ray-cast, aabb overlaps etc...
        UNARY_STACK_SIZE = 256,

        /// Maxumum stack depth for binary queties, such as treeOverlaps.
        BINARY_STACK_SIZE = 512,
    };

    /// Transposed AABB.
    struct TransposedAabb : public hkcdFourAabb
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, TransposedAabb );

        /// Default constructor.
        HK_INLINE       TransposedAabb() {}

        /// Copy constructor.
        HK_INLINE       TransposedAabb( const hkAabb& aabb ) { splat( aabb ); }

        /// Set the value of all four AABBs to the value of the \p INDEX AABB.
        template <int INDEX>
        HK_INLINE void  splat( const hkcdFourAabb& aabbs );

        /// Set the value of all four AABBs to the value of the \p index AABB.
        HK_INLINE void  splat( const hkcdFourAabb& aabbs, int index );

        /// Set the value of all four AABBs to the value of \p aabb.
        HK_INLINE void  splat( const hkAabb& aabb );

        /// Set \p AABB to the intersection for all four AABBs.
        HK_INLINE void  gatherInner( hkAabb* HK_RESTRICT aabb ) const;

        /// Set \p AABB to the union for all four AABBs.
        HK_INLINE void  gatherOuter( hkAabb* HK_RESTRICT aabb ) const;
    };

    /// Leaf collector
    /// Collector use for out-of-line unary queries.
    struct HK_EXPORT_COMMON LeafCollector
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, LeafCollector );

        /// Virtual destructor.
        virtual         ~LeafCollector() {}

        /// Called by queries for a set of leaves to be added.
        /// \param leaves is a pointer to an array of \p numLeaves leaves data.
        /// \param numLeaves is the number of leaves \p leaves point to.
        /// \param nmp is for internal uses.
        virtual void    addLeaves( const hkUint32* leaves, int numLeaves, hkAabb* nmp ) = 0;
    };

    /// Leave pair collector
    /// Collector use for out-of-line binary queries.
    struct HK_EXPORT_COMMON PairCollector
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, PairCollector );

        /// Virtual destructor.
        virtual                     ~PairCollector() {}

        /// Called by queries for a set of leave pairs to be added.
        /// \param a is a pointer to an array of \p numPairs leaves data.
        /// \param b is a pointer to an array of \p numPairs leaves data.
        virtual void                addPairs( const hkUint32* a, const hkUint32* b, int numPairs ) = 0;

        /// Return a node indexed bit field where a bit is set for valid nodes or cleared for invalid ones.
        /// Implementation must return a valid bit field or HK_NULL.        
        virtual const hkBitField*   getQueryMaskA() const { return HK_NULL; }

        /// Return a node indexed bit field where a bit is set for valid nodes or cleared for invalid ones.
        /// Implementation must return a valid bit field or HK_NULL.
        virtual const hkBitField*   getQueryMaskB() const { return HK_NULL; }
    };

    /// Process ray-cast leaves interface.
    struct HK_EXPORT_COMMON ProcessRayCastLeaves
    {
        /// Virtual destructor.
        virtual ~ProcessRayCastLeaves() {}

        /// Called by rayCast_OutOfLine for a set of leaves to be processed.
        /// Returns the new hit fraction.
        /// \param leaves is a pointer to an array of \p numLeaves leaves data.
        /// \param numLeaves is the number of leaves \p leaves point to.
        /// \param ray is the ray passed to the query.
        virtual hkReal process( const hkUint32* leaves, int numLeaves, const hkcdRay& ray ) = 0;
    };

    /// Parallel build interface.
    /// A valid implementation of this interface must be set on BuildContext::m_parallel in order to enable multi-threader build.
    struct HK_EXPORT_COMMON IParallelBuild
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, IParallelBuild );

        /// Runnable object.
        struct IRunnable
        {
            HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, IRunnable );

            virtual ~IRunnable() {}
            virtual void run() = 0;
        };

        /// Virtual destructor.
        virtual ~IParallelBuild() {}

        /// Called from the main thread to begin the build of tree branches.
        virtual void buildBranches( IRunnable* runnable, int multiplicity ) = 0;

        /// Called when the build a of tree branch is completed.
        /// This is called from the thread in which the runnable passed previously to buildBranch is running.
        /// At this stage, the branch is completed and can be used to collision queries but the tree is not.
        /// WARNING: If the tree is built with m_compact set to <true> (default) its nodes indices might be changed after this call.
        virtual void branchCompleted( int branchIndex, int branchNode, int firstPoint, int numPoints ) = 0;

        /// Called from the main thread.
        virtual void waitForCompletion() = 0;
    };

    /// Range, used during build.
    struct Range
    {
        HK_DECLARE_POD_TYPE();

        #if defined(HK_DEBUG)
        HK_ALWAYS_INLINE   Range() : m_origin( 0x7fffffff ), m_size( 0x7fffffff ), m_root( 0x7fffffff ) {}
        #else
        HK_ALWAYS_INLINE   Range() {}
        #endif
        HK_ALWAYS_INLINE   Range( int origin, int size, hkUint32 root ) : m_origin( origin ), m_size( size ), m_root( root ) {}
        HK_ALWAYS_INLINE   Range( const Range& other, hkUint32 root ) : m_domain(other.m_domain), m_origin( other.m_origin ), m_size( other.m_size ), m_root( root ) {}

        HK_ALWAYS_INLINE void  split( int size, Range* HK_RESTRICT range0, Range* HK_RESTRICT range1 ) const;

        hkAabb      m_domain;       ///< Range domain.
        int         m_origin;       ///< Index of the first leaf of the range.
        int         m_size;         ///< Number of leaves in the range.
        int         m_root;         ///< Root node.        
    };

    /// Trait used to check if QUERY implements processSimdTreeLeaves.
    HK_DECLARE_HAS_METHOD_TRAIT( hkReal, processSimdTreeLeaves, const hkUint32*, int, const hkcdRay& );

    /// Custom refit interface.
    struct HK_EXPORT_COMMON ICustomRefit
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, ICustomRefit );

        /// Virtual destructor.
        virtual ~ICustomRefit() {}

        /// Implementation must set \p aabbOut to the AABB for the given leaf data.
        virtual void getLeafAabb( hkUint32 leaf, hkAabb* HK_RESTRICT aabbOut ) const = 0;
    };

    /// Build context.
    /// This type stores information required by the build methods and well as information set by the user to configure the build itself.
    struct HK_EXPORT_COMMON BuildContext
    {
        HK_DECLARE_NONVIRTUAL_CLASS_ALLOCATOR( HK_MEMORY_CLASS_COLLIDE, BuildContext );

        /// Build mode.
        enum Mode
        {
            BUILD_FULL,         ///< Perform a full build (default).
            BUILD_REFIT_ONLY    ///< Only refit an existing tree.
        };

        /// Partition method.
        enum Partition
        {
            PARTION_FAST,       ///< Fast (default) partition method.
            PARTION_SAH_1_AXIS, ///< 1 axis surface area heuristic partition method.
            PARTION_SAH_3_AXIS  ///< 3 axis surface area heuristic partition method.
        };
        
        /// Constructor.
        HK_INLINE       BuildContext() : m_buildMode( BUILD_FULL ), m_partitionMethod( PARTION_FAST ), m_parallel( HK_NULL ), m_numLevelToUnfold( 1 ), m_compact( true ), m_refit(HK_NULL) {}

        /// Virtual destructor.
        virtual         ~BuildContext() {}

        /// Copy another BuildContext into this one.
        /// Note that m_parallel and m_refit are left unchanged.
        virtual void    copyFrom( BuildContext& other );

        /// Called from the main thread when the tree is about to be built.
        /// Note that even if \m m_parallel is set, \p multi-threaded may be false if the workload is too small.
        virtual void    onBuildHierarchy( bool multithreaded, int numBranches, int reservedNodes ) {}

        /// Split a range in four sub-ranges.
        virtual void    splitRangeInFour( const Range& range, Range* HK_RESTRICT subRanges );

        /// Build mode, BUILD_FULL by default.
        Mode            m_buildMode;

        /// Partition method, PARTION_FAST by default.
        Partition       m_partitionMethod;

        /// IParallelBuild interface.
        /// If set to HK_NULL, the build is done single-threaded.
        IParallelBuild* m_parallel;

        /// Number of level to unfold for multi-threaded builds.
        /// Note: Currently must always be set to 1.
        int             m_numLevelToUnfold;

        /// When built in parallel, the tree contains gaps in the nodes array, setting this to true (default) compact the tree.
        /// This trades slightly slower build time for memory.
        bool            m_compact;

        /// Points and indices buffer.
        /// Refer to the appropriate build method for information on how this buffer is interpreted.
        /// Note that build methods benefits from having this buffer reused and its ordering kept as stable as possible.
        hkArray<hkVector4>  m_points;
        
        /// Carry refit interface during build.
        const ICustomRefit* m_refit;
    };

    struct RangeSizeAndIndex
    {
        HK_INLINE bool operator <(const RangeSizeAndIndex& a) const
        {
            return m_rangeSize > a.m_rangeSize;
        }

        hkUint32 m_rangeIndex;
        hkUint32 m_rangeSize;
        hkUint32 m_rangeFirstNodeOffset;
    };

    /// Shared data used by multiple hkTasks for building the hkcdSimdTree
    struct TaskBuildContext
    {
        TaskBuildContext() : m_numRanges(0), m_onCompletedCallback(nullptr) {}

        typedef void(*onBranchCompletedCallback)(void*, const hkcdSimdTree& tree, int branchIndex, int root);

        hkcdSimdTree::Range m_rangeStacksBuffers[2][64];
        hkcdSimdTree::Range* m_ranges;
        int m_numRanges;
        hkcdSimdTree::Node* m_branchNodes;
        hkArray<hkAabb> m_compounds;
        int m_numNodesToRefit;
        hkUint32 m_iterationNextIndex;
        hkInplaceArray<RangeSizeAndIndex, 16> m_rangeMapBySize;
        hkInplaceArray<int, 16> m_numNodesPerBranch;
        onBranchCompletedCallback m_onCompletedCallback;
        void* m_onCompletedCallbackContext;
    };
    
    /// Constructor.
    HK_INLINE       hkcdSimdTree() { clear(); }

    /// Serialization constructor.
    HK_INLINE       hkcdSimdTree(hkReflect::BypassCtorFlag f)  {;}

    /// Serialization.
    void            afterReflectNew();

    /// Clear the tree.
    inline void     clear();

    /// Return the memory foot-print of the tree in bytes.
    HK_INLINE int   getMemoryFootPrint() const;

    /// Return true if the tree is empty.
    HK_INLINE bool  isEmpty() const;

    /// Check the integrity of the tree.
    /// For internal use only, implementation trigger HK_ERROR if an error is found in the tree.
    void            checkIntegrity() const;

    /// Return the tree AABB.
    HK_INLINE void  getDomain( hkAabb* HK_RESTRICT aabb ) const;

    /// Return the depth of the tree starting at \p root.
    int             getDepth( hkUint32 root = 1 ) const;

    /// Copy an existing tree into this one.
    void            copyFrom( const hkcdSimdTree& other );

    /// Build from another tree (hkcdDynamicTree or hkcdStaticTree)
    template <typename TREE>
    void            buildFromGenericTree( const TREE& tree );

    /// Build from hkGeometry.
    /// Note that this method automatically set the buildContext m_points member.
    void            buildFromGeometry( BuildContext& buildContext, const hkGeometry& geometry );

    /// Generate tasks to build the tree from aabbs.
    static void generateBuildFromAabbsTasks(
        // TaskId to which first task from tree build will add a dependency to.
        hkTaskGraph::TaskId previousTaskId,
        // Input aabbs which should be organized into a tree.
        hkArrayBase<hkAabb> const& aabbs,
        // Temporary data passed between tasks involved in build the tree.
        TaskBuildContext& taskBuildContext,
        // Options for tweaking the tree building.
        BuildContext& buildContext,
        // Temporary points buffer used during the tree building.
        hkArray<hkVector4>& points,
        // Number of threads used for three building.
        int numThreads,
        // Task priority set to all tasks for tree building,
        hkTask::Priority::Enum taskPriority,
        // Resulting tree.
        hkcdSimdTree* tree,
        // Task graph to which tree build tasks are appended.
        hkTaskGraph& graph,
        // Output TaskId for task which build tree sub ranges.
        hkTaskGraph::TaskId& buildRangesIdOut,
        // Output TaskId for task which finalizes the tree building.
        hkTaskGraph::TaskId& finalizeTreeIdOut);

    /// Build from indexed hkAabb.
    /// When doing a full build the BuildContext m_points member must contain all AABB centers,
    /// and the W component of each m_points array element is expected to be the index of the
    /// corresponding AABB in the 'aabbs' buffer as 24-bit int (use setInt24W).
    void            buildFromAabbs( BuildContext& buildContext, const hkAabb* aabbs );

    /// Build from indexed AABB 24.
    /// When doing a full build the BuildContext m_points member must contain all AABB centers,
    /// and the W component of each m_points array element is expected to be the index of the
    /// corresponding AABB in the strided 'aabbs' buffer as 24-bit int (use setInt24W).
    void            buildFromAabbs( BuildContext& buildContext, const hkAabb24_16_24_Codec& intSpace, const hkAabb24_16_24* aabbs, int stride );

    /// Build from indexed AABB 16.
    /// When doing a full build the BuildContext m_points member must contain all AABB centers,
    /// and the W component of each m_points array element is expected to be the index of the
    /// corresponding AABB in the strided 'aabbs' buffer as 24-bit int (use setInt24W).
    void            buildFromAabbs( BuildContext& buildContext, const hkIntSpaceUtil& intSpace, const hkAabb16* aabbs, int stride );

    /// Build from indexed spheres.
    /// The W component of each element in the 'spheres' buffer is the radius of the sphere.
    /// When doing a full build the BuildContext m_points member must contain all sphere centers,
    /// and the W component of each m_points array element is expected to be the index of the
    /// corresponding sphere in the 'spheres' buffer as 24-bit int (use setInt24W).
    void            buildFromSpheres( BuildContext& buildContext, const hkVector4* spheres );

    /// Build from points.
    /// When doing a full build the BuildContext m_points member must contain all points,
    /// and the W component of each m_points array element is expected to be the index of the
    /// corresponding point in the 'points' buffer as 24-bit int (use setInt24W).
    /// Note: Do not simply pass a pointer to the BuildContext m_points array data for the 'points'
    /// buffer! (The BuildContext m_points array gets rearranged during the build process.)
    void            buildFromPoints( BuildContext& buildContext, const hkVector4* points );

    /// Build from user indexed data.
    /// When doing a full build the BuildContext m_points member must contain all pivot points,
    /// and the W component of each m_points array element is the index that will be passed as the
    /// leaf index to the custom getLeafAabb() implementation as 24-bit int (use setInt24W).
    void            buildCustom( BuildContext& buildContext, const ICustomRefit* customRefit );

    /// Build from user indexed data.
    /// When doing a full build the BuildContext m_points member must contain all pivot points,
    /// and the W component of each m_points array element is the index that will be passed as the
    /// leaf index to the custom getLeafAabb() implementation as 24-bit int (use setInt24W).
    template <typename GET_LEAF_AABB>
    HK_INLINE void  buildCustomLambda( BuildContext& buildContext, const GET_LEAF_AABB& getLeafAabb );

    /// Insert an AABB in the tree.
    /// Note that this method should be used to insert a large amount of leaves in the tree
    /// as no tree restructuring is performed and thus queries performances will suffer.
    void            insert( const hkAabb& aabb, hkUint32 leafData );

    /// Count the number of leaves in or under a given node.
    int             countLeaves( int nodeIndex, bool recurse ) const;

    /// Store the number of leaves present under each node in an array.
    void            countLeaves( hkArray<int>& counters ) const;

    /// Build query mask from leaves bit field.
    void            buildQueryMask( const hkBitField& bitField, hkBitField& queryMaskOut ) const;

    /// Traverse each leaf.
    template <typename FUNCTOR>
    void            traverse( FUNCTOR& functor, hkUint32 root = 1 ) const;

    /// Ray-cast with fraction maintained by the query.
    template <typename QUERY>
    void            rayCast( const hkcdRay& ray, QUERY& query, hkUint32 root = 1 ) const;

    /// Out-of-line ray-cast.
    void            rayCast_OutOfLine( const hkcdRay& ray, hkReal initialFraction, ProcessRayCastLeaves& processor, hkUint32 root = 1 ) const;

    /// AABB-cast with fraction maintained by the query.
    /// If sort is true, the method will try to process nodes from the closest to the farthest.
    template <typename QUERY>
    void            aabbCast( const hkcdRay& ray, const hkVector4& halfExtents, QUERY& query, bool sort, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves.
    template <typename QUERY>
    void            aabbOverlaps( const hkAabb& aabb, QUERY& query, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves.
    void            aabbOverlaps_OutOfLine( const hkAabb& aabb, LeafCollector* collector, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves.
    template <typename QUERY>
    void            aabbOverlaps( const TransposedAabb& aabbT, QUERY& query, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves.
    /// The difference with aabbOverlaps is that the query will be called for every single node traversed via 'processSimdTreeNode' with the following parameters:
    /// - const Node* node : Current node.
    /// - hkVector4Comparison&          overlappingInternals    :   Overlapping internals mask ( read-write, can be modified to affect traversal ).
    /// - const hkVector4Comparison&    missedLeaves            :   Non-overlapping leaves mask ( const ).
    /// - const hkVector4Comparison&    overlappingLeaves       :   Overlapping leaves mask ( const ).
    /// - const hkVector4Comparison&    missedInternals         :   Non-overlapping internals mask ( const ).
    template <typename QUERY>
    void            aabbOverlapsNodes( const hkAabb& aabb, QUERY& query, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves. Also compute the NMP AABB.
    template <typename QUERY>
    void            aabbOverlapsNearMiss( const hkAabb& aabb, QUERY& query, hkAabb& nmp, hkUint32 root = 1 ) const;

    template <typename QUERY>
    void            aabbOverlapsNearMissNew( const hkAabb& aabb, QUERY& query, hkAabb& nmp, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between the given AABB and the tree leaves. Also compute the NMP AABB.
    void            aabbOverlapsNearMiss_OutOfLine( const hkAabb& aabb, LeafCollector* collector, hkAabb& nmp, hkUint32 root = 1 ) const;

    /// Enumerate all tree leaves inside or at the convex boundaries defined by the given planes.
    /// Note about planes orientations, a point is classified as inside is its distance to all plane is less or equal to zero.
    template <typename QUERY>
    void            convexOverlaps( const hkVector4* planes, int numPlanes, QUERY& query, hkUint32 root = 1 ) const;

    /// Find the closest(s) leaves from a given point.
    template <typename QUERY>
    void            closestFromPoint( hkVector4Parameter point, hkSimdRealParameter initialDistanceSquared, QUERY& query, hkUint32 root = 1 ) const;

    /// Find the closest(s) leaves from a given point.
    /// Specific to particles, as it supports dynamically increasing the early out threshold.
    /// Relies on the collector counting all hits.
    template <typename QUERY>
    void            closestFromPointParticles(hkVector4Parameter point, hkSimdRealParameter initialDistanceSquared, QUERY& query, hkUint32 root = 1) const;

    /// Find the closest(s) leaves from a given point.
    /// This minimize the number of calls to processLeaf if the minimum distance is expected to be reduced during traversal.
    template <typename QUERY>
    void            closestFromPointStrict( hkVector4Parameter point, hkSimdRealParameter initialDistanceSquared, QUERY& query, hkUint32 root = 1 ) const;

    /// Find the closest(s) leaves from a given point.
    template <typename FILTER, typename QUERY>
    void            closestFromPointWithFilter( hkVector4Parameter point, hkSimdRealParameter initialDistanceSquared, const FILTER& filter, QUERY& query, hkUint32 root = 1 ) const;

    /// Find the closest(s) leaves from a given AABB.
    template <typename QUERY>
    void            closestFromAabb( const hkAabb& aabb, hkSimdRealParameter initialDistanceSquared, QUERY& query, hkUint32 root = 1 ) const;

    /// generic functional query.
    template <typename ACCEPT, typename COLLECT>
    void            genericQuery( const ACCEPT& accept, const COLLECT& collect, hkUint32 root = 1 ) const;

    /// generic functional processing (non-const).
    template <typename ACCEPT>
    void            genericProcess( ACCEPT& accept, hkUint32 root = 1 );

    /// generic functional processing (const).
    template <typename ACCEPT>
    void            genericProcess( ACCEPT& accept, hkUint32 root = 1 ) const;

    /// Enumerate all overlaps between two tree (these can be the same trees).
    static void     treeOverlaps( const hkcdSimdTree& treeA, const hkcdSimdTree& treeB, PairCollector* collector, hkUint32 rootA = 1, hkUint32 rootB = 1 );    

    /// Run self diagnostics.
    static void     runDiagnostics();

    /// Array of tree nodes.
    hkArray<Node>   m_nodes;

    #if HKCD_SIMDTREE_ENABLE_INTERNAL_COUNTERS
    struct ProfileCounters
    {
        HK_INLINE void reset() { hkString::memSet( this, 0, sizeof( *this ) ); }

        hkUint64    m_processBxAs_calls;
        hkUint64    m_processAxBs_calls;
        hkUint64    m_processAA_calls;
        hkUint64    m_processAB_calls;
        hkUint64    m_processAB_II_calls;
        hkUint64    m_processAB_LL_calls;
        hkUint64    m_processAB_IM_calls;
        hkUint64    m_processAB_MI_calls;
    };
    static ProfileCounters  g_profileCounters;
    #endif
};

#include <Geometry/Collide/DataStructures/SimdTree/hkcdSimdTree.inl>

/*
 * Havok SDK - Product file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
