// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM     : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue_HandleImpl.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue_HandleAllocator.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue_HandleGraph.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue_Logger.h>

#include <Common/Base/Thread/TaskQueue/hkTask.h>
#include <Common/Base/Thread/Async/hkAsyncThreadPool.h>
#include <Common/Base/Thread/CriticalSection/hkCriticalSection.h>
#include <Common/Base/Thread/Semaphore/hkSemaphore.h>
#include <Common/Base/Container/MinHeap/hkMinHeap.h>
#include <Common/Base/Thread/Async/hkAsyncThreadPoolLocking.h>

#include <Common/Base/Container/PointerMap/hkMap.hxx>



// #define HK_TASK_QUEUE_TIMERS

#ifdef HK_TASK_QUEUE_TIMERS
#define HK_TASK_QUEUE_ON_TIMERS_ENABLED(X) X
#else
#define HK_TASK_QUEUE_ON_TIMERS_ENABLED(X)
#endif


//////////////////////////////////////////////////////////////////////////
// Thread contexts
//////////////////////////////////////////////////////////////////////////

// Base class
class hkDefaultTaskQueue::ThreadContext
{
    public:

        enum { MAX_CACHED_HANDLES = 32 };

        HK_DECLARE_CLASS( ThreadContext, New );

        HK_INLINE ThreadContext( int index)
        :   m_index( index ), m_recursiveCount(0),
            m_isWaiting( false ),
            m_monitorStream( HK_NULL ),
            m_monitorStreamSafeEnd( HK_NULL ),
            m_clearTimerData( false ),
            m_owner( HK_NULL ),
            m_currentHandle( HK_NULL )
        {}

        virtual ~ThreadContext() {}

        // This parent class doesn't implement virtual functions because for the sake of efficiency
        // we always know exactly what child class we are using at a given time.

    public:

        // The index of this context in hkDefaultTaskQueue::m_threadContexts
        const int m_index;

        // incremented each time a thread uses this data
        int m_recursiveCount;

        // Whether the thread is waiting for its semaphore (or is about to).
        volatile bool m_isWaiting;

        //
        // Timers
        //

        // The thread's monitor stream (taken from TLS). If this is null, m_monitorStreamSafeEnd has no meaning
        hkMonitorStream* m_monitorStream;

        // A safe end point of the monitor stream, equal to or less than the real end point.
        // This is guaranteed to be outside any timer scopes, therefore the stream can be safely parsed up to this point.
        const char* m_monitorStreamSafeEnd;

        // Whether to reset the monitor stream as soon as possible.
        volatile bool m_clearTimerData;

        // A list of thread local cached handles
        hkInplaceArray<hkDefaultTaskQueue::HandleImpl*, MAX_CACHED_HANDLES> m_freeHandles;

        // The owning task queue (used to confirm that hkDefaultTaskQueueThreadContext is appropriate)
        hkDefaultTaskQueue* m_owner;

        /// The handle being executed by the deepest call to processingLoop() on this thread.
        hkDefaultTaskQueue::HandleImpl* m_currentHandle;
};

// Thread local cache of the current context
HK_THREAD_LOCAL( hkDefaultTaskQueue::ThreadContext* ) hkDefaultTaskQueueThreadContext;

// Implementations
namespace hkDefaultTaskQueueDetail
{
    class BlockingThreadContext : public hkDefaultTaskQueue::ThreadContext
    {
        public:
            HK_DECLARE_CLASS(BlockingThreadContext, New);

            HK_INLINE BlockingThreadContext(int index, int spinCount)
            :   ThreadContext(index),
                m_semaphore(0, 1, spinCount)
            {}

            // Helper function to wait for the semaphore
            HK_INLINE void wait(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "WaitForTasks", HK_NULL));
                m_semaphore.acquire();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

            // Helper function to signal the semaphore
            HK_INLINE void signal(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "SignalThread", HK_NULL));
                HK_ASSERT_NO_MSG(0x786293de, m_isWaiting == false); // must have been set beforehand
        #ifdef HK_DEBUG
                HK_ASSERT_NO_MSG(0x786293df, m_semaphore.tryRelease().isSuccess());
        #else
                m_semaphore.release();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
        #endif
            }

            // Assert that the semaphore count is zero
            HK_INLINE void assertUnsignaled()
            {
                HK_ASSERT_NO_MSG(0x184c93d4, m_semaphore.tryAcquire().isFailure());
            }

            HK_ALWAYS_INLINE void tendToThreadPool(hkUint32 id) { }

        protected:

            // A semaphore for waiting for handles to become available.
            hkSemaphore m_semaphore;
    };

    class SpinningThreadContext : public hkDefaultTaskQueue::ThreadContext
    {
        public:
            HK_DECLARE_CLASS(SpinningThreadContext, New);

            HK_INLINE SpinningThreadContext(int index)
            :   ThreadContext(index),
                m_exchangeVariable(0)
            {
            }

            // Helper function to wait for the semaphore
            HK_INLINE void wait(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "WaitForTasks", HK_NULL));
                for (hkAtomic::Backoff<> b; ; b.pause())
                {
                    // If it is 1, set it to 0 and move on, otherwise leave it unchanged and try again.
                    if (hkAtomic::compareAndSwap32(&m_exchangeVariable, 1, 0))
                    {
                        break;
                    }
                }
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

            // Helper function to signal the semaphore
            HK_INLINE void signal(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "SignalThread", HK_NULL));
                HK_ASSERT_NO_MSG(0x786293de, m_isWaiting == false); // must have been set beforehand
                HK_ON_DEBUG(bool ret = )
                hkAtomic::compareAndSwap32(&m_exchangeVariable, 0, 1);
                HK_ASSERT_NO_MSG(0x786293df, ret == true); // Must not be released already.
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

            HK_INLINE void assertUnsignaled()
            {
                HK_ASSERT_NO_MSG(0x184c93d4, m_exchangeVariable == 0);
            }

            HK_ALWAYS_INLINE void tendToThreadPool(hkUint32 id) { }

        protected:
            // A variable used for interlocked scheduling.
            HK_ALIGN(hkUint32, 64) m_exchangeVariable;

            // To avoid false sharing.
            hkUchar  m_padding[hkAtomic::CACHELINE_SIZE - sizeof(hkUint32)];
    };

    class AsyncThreadPoolContext : public hkDefaultTaskQueue::ThreadContext
    {
        public:
            HK_DECLARE_CLASS(AsyncThreadPoolContext, New);

            AsyncThreadPoolContext(int index, hkAsyncThreadPool* threadPool)
                : ThreadContext(index),
                m_threadPool(threadPool),
                m_threadPoolThreadIdx(-1)
            {
            }

            HK_INLINE void wait(hkMonitorStream* monitorStream)
            {
                HK_ASSERT(0x1e568816, m_threadPool == hkAsyncThreadPool::getCurrentThreadPool(),
                    "The m_threadPool must be the thread pool the thread which "
                    "is currently running belongs to.");
                HK_ASSERT(0x5dad518b, m_threadPoolThreadIdx == hkAsyncThreadPool::getCurrentThreadIndex(),
                    "m_threadPoolThreadIdx must be the index of the thread "
                    "pool thread from which this context's wait function is called.");

                m_semaphore.acquire(m_threadPool, m_threadPoolThreadIdx);
            }

            // Helper function to signal the semaphore
            HK_INLINE void signal(hkMonitorStream* monitorStream)
            {
                HK_ASSERT(0x52cc8499, m_threadPoolThreadIdx != -1,
                    "Call setThreadPoolThreadIdx before using the AsyncThreadPoolContext.");

                m_semaphore.release(m_threadPool, m_threadPoolThreadIdx);
            }

            HK_INLINE void setThreadPoolThreadIdx(int i)
            {
                m_threadPoolThreadIdx = i;
            }

            HK_INLINE void tendToThreadPool(hkUint32 id)
            {
                m_threadPool->tendToThreadPool(id);
            }

        private:
            /// The semaphore for this context's worker thread. The thread pool
            /// and thread index passed to the semaphore's acquire and release
            /// functions should be this class' m_threadPool and m_threadPoolThreadidx.
            hkAsyncThreadPoolSimpleSemaphore m_semaphore;

            /// The thread pool and thread index of the async thread pool worker
            /// thread this context is for.
            hkAsyncThreadPool* m_threadPool;
            int m_threadPoolThreadIdx;
    };
}


HK_COMPILE_TIME_ASSERT(hkAtomic::CACHELINE_SIZE == 64);

//////////////////////////////////////////////////////////////////////////
// Critical sections
//////////////////////////////////////////////////////////////////////////

// Enter the critical section.
void hkDefaultTaskQueue::CriticalSection::enter()
{
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_ON_MONITORS_ENABLED( hkMonitorStream* monitorStream = hkMonitorStream::getInstancePtr() ) );
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_BEGIN2( monitorStream, "EnterQueue", HK_NULL ) );
    enterImpl();
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_END2( monitorStream ) );
}

// Override with explicit monitor stream (for efficiency).
void hkDefaultTaskQueue::CriticalSection::enter(_Inout_opt_ hkMonitorStream* monitorStream )
{
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_BEGIN2( monitorStream, "EnterQueue", HK_NULL ) );
    enterImpl();
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_END2( monitorStream ) );
}

// Leave the critical section.
void hkDefaultTaskQueue::CriticalSection::leave()
{
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_ON_MONITORS_ENABLED( hkMonitorStream* monitorStream = hkMonitorStream::getInstancePtr() ) );
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_BEGIN2( monitorStream, "ExitQueue", HK_NULL ) );
    leaveImpl();
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_END2( monitorStream ) );
}

// Override with explicit monitor stream (for efficiency).
void hkDefaultTaskQueue::CriticalSection::leave(_Inout_opt_ hkMonitorStream* monitorStream )
{
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_BEGIN2( monitorStream, "ExitQueue", HK_NULL ) );
    leaveImpl();
    HK_TASK_QUEUE_ON_TIMERS_ENABLED( HK_TIMER_END2( monitorStream ) );
}


// Implementations
namespace
{
    // Regular hkCriticalSection
    class BlockingCriticalSection : public hkDefaultTaskQueue::CriticalSection
    {
        public:

            HK_DECLARE_CLASS(BlockingCriticalSection, New);

            HK_INLINE BlockingCriticalSection(int spinCount)
            :   m_lock(spinCount)
            {}

            // This can be called directly in special cases.
            HK_INLINE void enterNonVirtual(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "EnterQueue", HK_NULL));
                BlockingCriticalSection::enterImpl();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

            // This can be called directly in special cases.
            HK_INLINE void leaveNonVirtual(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "ExitQueue", HK_NULL));
                BlockingCriticalSection::leaveImpl();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

        private:

            // Enter the critical section.
            HK_INLINE virtual void enterImpl() HK_OVERRIDE
            {
                m_lock.enter();
            }

            // Leave the critical section.
            HK_INLINE virtual void leaveImpl() HK_OVERRIDE
            {
                m_lock.leave();
            }

        protected:

            hkCriticalSection m_lock;
    };

    /// This critical section implementation lets in the section 1 thread at a time. There is no guarantee
    /// of the order in which the threads will be allowed in the critical section and there's no guarantee
    /// of "fairness" for threads trying to access the critical section, similarly to Windows' CriticalSection
    /// objects.
    class SpinningCriticalSection : public hkDefaultTaskQueue::CriticalSection
    {
        public:
            HK_DECLARE_CLASS(SpinningCriticalSection, New);

            HK_INLINE SpinningCriticalSection()
            :   m_exchangeVariable(0)
            {
                HK_COMPILE_TIME_ASSERT(sizeof(SpinningCriticalSection) == hkAtomic::CACHELINE_SIZE);
            }

            // This can be called directly in special cases.
            HK_INLINE void enterNonVirtual(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "EnterQueue", HK_NULL));
                SpinningCriticalSection::enterImpl();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

            // This can be called directly in special cases.
            HK_INLINE void leaveNonVirtual(_Inout_opt_ hkMonitorStream* monitorStream)
            {
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "ExitQueue", HK_NULL));
                SpinningCriticalSection::leaveImpl();
                HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
            }

        private:

            // Enter the critical section.
            virtual void enterImpl() HK_OVERRIDE
            {
                for (hkAtomic::Backoff<> b; ; b.pause())
                {
                    // If it is 0, set it to 1 and move on, otherwise leave it unchanged and try again.
                    if (hkAtomic::compareAndSwap32(&m_exchangeVariable, 0, 1))
                    {
                        break;
                    }
                }
            }

            // Leave the critical section.
            virtual void leaveImpl() HK_OVERRIDE
            {
                HK_ON_DEBUG(bool ret = )
                    hkAtomic::compareAndSwap32(&m_exchangeVariable, 1, 0);
                HK_ASSERT_NO_MSG(0xf0a7087e, ret == true); // Must have been busy before leave.
            }

        protected:

            // Exchange variable for synchronization, if 1 the critical section is busy, if 0 it is free.
            hkUint32 m_exchangeVariable;

            // To avoid false sharing.
            hkUchar m_padding[hkAtomic::CACHELINE_SIZE - sizeof(hkUint32) - sizeof(hkDefaultTaskQueue::CriticalSection)];
    };

    class AsyncThreadPoolCriticalSection : public hkDefaultTaskQueue::CriticalSection
    {
    public:
        HK_INLINE void enterNonVirtual(hkMonitorStream* monitorStream)
        {
            HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "EnterQueue", HK_NULL));
            m_lock.enter();
            HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
        }

        // This can be called directly in special cases.
        HK_INLINE void leaveNonVirtual(hkMonitorStream* monitorStream)
        {
            HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_BEGIN2(monitorStream, "ExitQueue", HK_NULL));
            m_lock.leave();
            HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_TIMER_END2(monitorStream));
        }

    private:

        // Enter the critical section.
        virtual void enterImpl() HK_OVERRIDE
        {
            m_lock.enter();
        }

        // Leave the critical section.
        virtual void leaveImpl() HK_OVERRIDE
        {
            m_lock.leave();
        }

        hkAsyncThreadPoolMutex m_lock;
    };
}


//////////////////////////////////////////////////////////////////////////
// An internal version of hkDefaultTaskQueue with added functionality
//////////////////////////////////////////////////////////////////////////

class hkDefaultTaskQueueEx : public hkDefaultTaskQueue
{
    public:

        /// Make the given handle available for processing in the appropriate queue.
        /// Note: This is NOT thread safe.
        HK_INLINE void _enqueueHandle(_In_ HandleImpl* handle );

        /// Add the given handles for scheduling. Any with no unfinished predecessors are immediately enqueued.
        /// Returns the number which were enqueued.
        /// Note: This is NOT thread safe.
        HK_INLINE int _addHandles(_In_reads_bytes_(numHandles * striding) HandleImpl** handles, int numHandles, Order::Enum order, int striding = sizeof(Handle) );

        /// Add a dependency between handles. Can only be called before the successor is added.
        /// Note: This is NOT thread safe.
        HK_INLINE void _addDependency(_Inout_ HandleImpl* predecessor, _Inout_ HandleImpl* successor );

        HK_INLINE void _freeThreadContext(_Inout_ ThreadContext* threadContext);

        template < typename CriticalSectionType >
        HK_INLINE _Ret_notnull_ ThreadContext* _allocateThreadContext(_Inout_ hkMonitorStream* monitorStream);

        /// Get the next available handle from the given queue.
        /// ShouldWait specifies whether to wait for a signal whenever no handles are immediately available.
        /// If handleToWaitFor is set, this function will exit after that handle has been processed by *any* thread.
        /// Note: This is NOT thread safe.
        template < bool ShouldWait, typename CriticalSectionType, typename ThreadContextType >
        HK_INLINE _Ret_maybenull_ HandleImpl* _getNextHandle(
            ThreadContextType& threadContext, hkMinHeap<QueueEntry>& queue, _Inout_opt_ HandleImpl* handleToWaitFor, _Out_ int* multiplicityIdOut);

        /// Finish the given handle, and enqueue any newly available successors.
        /// This should be called after processing the handle's task.
        /// Returns the number which were enqueued.
        /// Note: This is NOT thread safe.
        template < typename ThreadContextType >
        HK_INLINE int _finishHandle(_Inout_ HandleImpl* handle );

        /// Grab up to the given number of sleeping threads in preparation for waking them.
        /// Call ThreadContext::signal() later on each to actually wake them.
        /// Note: This is NOT thread safe.
        HK_INLINE void _preWakeThreads( int numThreadsToWake, hkInplaceArray<ThreadContext*, MAX_NUM_THREADS>& threadsOut );

        /// Wake all threads that are sleeping.
        /// Note: This is NOT thread safe.
        template < typename ThreadContextType >
        HK_INLINE void _wakeAllThreads();

        /// Wake threads in the given list.
        template < typename ThreadContextType >
        HK_INLINE void _wakeThreads(const hkArrayBase<ThreadContext*>& threadsToWake);

        /// Check the consistency of all thread contexts.
        template < typename ThreadContextType >
        HK_INLINE void _checkConsistency();

        /// General thread worker function.
        /// Keeps grabbing any available handle from the queue and processing it's task.
        template < bool ShouldLoop, bool FREE_HANDLE, typename CriticalSectionType, typename ThreadContextType >
        bool processingLoop(_Inout_opt_ HandleImpl* handleToWaitFor = HK_NULL);

        /// Find if there is an dependency path between predecessor and successor
        bool hasTransitiveDependency( HandleImpl* predecessor, HandleImpl* sucessor );

    private:

        /// Task execution context
        struct ExecutionContext : public hkTask::ExecutionContext
        {
            HK_DECLARE_CLASS(ExecutionContext, New);

            /// Returns true if the execution of the task should be aborted
            virtual bool isAbortRequested() const HK_OVERRIDE
            {
                return m_abortRequest ? bool(*m_abortRequest) : false;
            }

            /// The abort request
            hkAtomic::Variable<hkBool32>* m_abortRequest;
        };

        // Can't construct this class
        hkDefaultTaskQueueEx() {}
};

HK_COMPILE_TIME_ASSERT( sizeof(hkDefaultTaskQueueEx) == sizeof(hkDefaultTaskQueue) );

HK_INLINE void hkDefaultTaskQueueEx::_enqueueHandle(_In_ HandleImpl* handle)
{
    QueueEntry e;
    e.m_handle = handle;
    e.m_priority = handle->m_sortKey;

    m_queue.addEntry( e );
}

HK_INLINE int hkDefaultTaskQueueEx::_addHandles( _In_reads_bytes_(numHandles * striding) HandleImpl** handles, int numHandles, Order::Enum order, int striding )
{
    // This is a serial number with the highest bit set, equal to the middle of the range of allowed serials
    const hkUint32 baseSerial = 1 << (HandleImpl::NUM_SERIAL_BITS - 1);

    // Reserve a batch of consecutive serial numbers
    hkUint32 serial = m_serial;
    {
        hkUint32 nextSerial = serial + numHandles;
        if( HK_VERY_UNLIKELY( nextSerial >= baseSerial ) )  // will overflow, so reset
        {
            HK_ASSERT( 0x13b06321, numHandles < baseSerial-1, "Too many handles" );
            serial = 1;     // zero is reserved
            nextSerial = 1 + numHandles;
        }
        m_serial = nextSerial;
    }

    HandleImpl** ph = handles;
    int numEnqueuedHandles = 0;
    for( int i=0; i<numHandles; i++, serial++ )
    {
        HandleImpl* h = *ph;
        ph = hkAddByteOffset( ph, striding );
        HK_ASSERT( 0x13306990, h && h->isAllocated(), "Invalid handle" );
        HK_ASSERT( 0x13306991, h->isInitialized(), "Handle not initialized" );
        HK_ASSERT( 0x6d7d1a60, !h->isSubmitted(), "Handle already submitted" );

        // Set the priority, not needed, since LIFO should be > FIFO, this is ensured by how we calculate the serial
        // h->m_sortKey.m_isLifo = (hkUint32)( order == Order::LIFO );
        hkUint32 priority = hkUint32(h->m_priority) << HandleImpl::NUM_SERIAL_BITS;

        // Note: any LIFO task should have higher priority than FIFO
        if( order == Order::FIFO )
        {
            h->m_sortKey = priority | (baseSerial - serial);    // decreasing priority
        }
        else
        {
            h->m_sortKey = priority | (baseSerial + serial);    // increasing priority
        }
        HK_ASSERT( 0x6d7d1a61, h->m_sortKey != 0, "Invalid serial" );   // zero is reserved

        // Remove the implicit dependency that was added during allocation
        HK_ASSERT_NO_MSG( 0x18961baa, h->m_dependencyCount > 0 );
        if( --h->m_dependencyCount == 0 )
        {
            _enqueueHandle( h );
            numEnqueuedHandles += h->m_multiplicity;
        }
    }
    return numEnqueuedHandles;
}

HK_INLINE bool hkDefaultTaskQueueEx::hasTransitiveDependency( HandleImpl* predecessor, HandleImpl* successor )
{
    if (predecessor->m_successors.indexOf(successor) != -1)
    {
        return true;
    }

    for (int i = 0; i < predecessor->m_successors.getSize(); ++i)
    {
        if (hasTransitiveDependency(predecessor->m_successors[i], successor))
        {
            return true;
        }
    }
    return false;
}

HK_INLINE void hkDefaultTaskQueueEx::_addDependency(_Inout_ HandleImpl* predecessor, _Inout_ HandleImpl* successor)
{
    HK_ASSERT( 0x4b1a97f5, predecessor && predecessor->isAllocated() && successor && successor->isAllocated(), "Invalid handle(s)" );
    HK_ASSERT( 0x3daefc7d, successor != predecessor, "A handle can't depend on itself" );
    HK_ASSERT( 0x554f104c, !successor->isAvailable(), "Not safe to add dependency: successor might be running" );
    HK_ASSERT( 0x554f16dc, !successor->isFinished(), "Successor has already run" );

    HK_ASSERT(0x43cdf4f9, hasTransitiveDependency(successor, predecessor) == false, "Cyclic dependency detected");

    // If the predecessor is already processed, the dependency is already satisfied
    if( !predecessor->isFinished() )
    {
        HK_WARN_ON_DEBUG_IF( predecessor->m_successors.indexOf(successor) != -1,
            0x23a9d80c, "Adding the same dependency twice" );
        predecessor->m_successors.pushBack( successor );
        successor->m_dependencyCount++;
    }

#ifdef HK_DEBUG
    if( m_logger )
    {
        m_logger->logDependency( (HandleImpl*)predecessor, (HandleImpl*)successor );
    }
#endif
}

template < bool ShouldWait, typename CriticalSectionType, typename ThreadContextType >
HK_INLINE _Ret_maybenull_ hkDefaultTaskQueue::HandleImpl* hkDefaultTaskQueueEx::_getNextHandle(
    ThreadContextType& threadContext, hkMinHeap<QueueEntry>& queue, _Inout_opt_ HandleImpl* handleToWaitFor, _Out_ int* multiplicityIdOut)
{
    CriticalSectionType* lock = static_cast<CriticalSectionType*>(m_lock);

    HK_ASSERT_NO_MSG( 0x5b56293d, !threadContext.m_isWaiting );
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED( monitorStream = threadContext.m_monitorStream ));

    // Loop until we get a handle from the queue, or our exit condition is met
    HandleImpl* handleOut = HK_NULL;
    while(1)
    {
        // Exit without a handle if the one we are waiting for is finished
        if( handleToWaitFor && handleToWaitFor->isFinished() )
        {
            break;
        }

        // Take the highest priority handle from the queue, if any
        if( !queue.isEmpty() )
        {
            handleOut = queue.getTop().m_handle;
            handleOut->m_numActiveThreads++;
            int multiplicityId = --handleOut->m_multiplicity;
            multiplicityIdOut[0] = multiplicityId;
            if ( multiplicityId == 0 )
            {
                queue.popTop();
            }
            break;
        }

        if( ShouldWait && ( !m_isClosed || handleToWaitFor ) )
        {
            // Wait for a signal before checking the queue again
            if( handleToWaitFor )
            {
                handleToWaitFor->m_waitingThreadsBitfield.setBit(threadContext.m_index);
            }
            threadContext.m_isWaiting = true;

            lock->leaveNonVirtual(monitorStream);
            threadContext.wait(monitorStream);
            lock->enterNonVirtual(monitorStream);

            HK_ASSERT_NO_MSG( 0xe8c97d37, !threadContext.m_isWaiting );
            if( handleToWaitFor )
            {
                handleToWaitFor->m_waitingThreadsBitfield.clearBit(threadContext.m_index);
            }
        }
        else
        {
            // Exit without a handle
            break;
        }
    }

    HK_ASSERT_NO_MSG( 0x19a260eb, !handleOut || (handleOut->isSubmitted() && (handleOut->m_dependencyCount == 0)) );
    return handleOut;
}

template < typename ThreadContextType >
HK_INLINE int hkDefaultTaskQueueEx::_finishHandle(_Inout_ HandleImpl* handle)
{
    HK_ASSERT_NO_MSG( 0xd30c8c91, handle->m_dependencyCount == 0 );

    if( handle->m_onFinishZeroMultiplicity && handle->m_multiplicity != 0 )
    {
        // Remove it from the queue now
        handle->m_multiplicity = 0;
        HK_ON_DEBUG( bool somethingRemoved = false; );
        for (int i =0; i < m_queue.getContents().getSize(); i++ )
        {
            if (handle == m_queue.getEntry(i).m_handle )
            {
                HK_ON_DEBUG( somethingRemoved = true; );
                m_queue.removeEntry(i);
                break;
            }
        }
        HK_ASSERT_NO_MSG( 0xf0456567,  somethingRemoved );
    }

    handle->m_numActiveThreads--;
    if( handle->m_multiplicity + handle->m_numActiveThreads > 0 )
    {
        // this is not the last thread to work on this task
        return 0;
    }

    // Decrement any successors
    hkInplaceArray<HandleImpl*, 16>::Temp newlyAvailableHandles;
    for( int i=0, n=handle->m_successors.getSize(); i<n; ++i )
    {
        HandleImpl* successor = handle->m_successors[i];
        HK_ASSERT_NO_MSG( 0x1737c570, successor->m_dependencyCount > 0 );
        if( --successor->m_dependencyCount == 0 )
        {
            HK_ASSERT_NO_MSG( 0x69c34bb2, successor->isSubmitted() );
            newlyAvailableHandles.pushBack( successor );
        }
    }
    handle->m_successors.clear();

    // Immediately wake any other threads that are waiting for this specific handle
    if( handle->m_waitingThreadsBitfield.anyIsSet() )
    {
        hkMonitorStream* monitorStream = HK_NULL;
        HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

        for (int ui = 0; ui < HandleImpl::WaitingThreadsBitfield::NUM_UNIT; ++ui)
        {
            HandleImpl::WaitingThreadsBitfield::Unit bitfield = handle->m_waitingThreadsBitfield.getUnit(ui);
            for (int bi = 0; bitfield != 0; bi++, bitfield >>= 1)
            {
                if( bitfield & 1 )
                {
                    // Note: The bitfield isn't updated until after the semaphore is acquired, so the thread might already be awake.
                    // Therefore we also check m_isWaiting (which is guaranteed to be cleared before the semaphore is signaled).
                    int i = bi + ui * HandleImpl::WaitingThreadsBitfield::BITS_PER_UNIT;
                    HK_ASSERT_NO_MSG(0x786395d0, i < m_maxUsedThreadContext);
                    if(m_threadContexts[i]->m_isWaiting)
                    {
                        m_threadContexts[i]->m_isWaiting = false;
                        ThreadContextType* threadContext = static_cast<ThreadContextType*>(m_threadContexts[i]);
                        threadContext->signal(monitorStream);
                    }
                }
            }
        }
        handle->m_waitingThreadsBitfield.clear();
    }

    // Add any newly available handles to the queue
    int numNewTasks = 0;
    for( int i=0; i<newlyAvailableHandles.getSize(); ++i )
    {
        HandleImpl* newAvailable = newlyAvailableHandles[i];
        numNewTasks += newAvailable->m_multiplicity;
        _enqueueHandle( newAvailable );
    }

    // mark the handle as finished
    handle->m_dependencyCount = hkUint16(-1);

    return numNewTasks;
}

HK_INLINE void hkDefaultTaskQueueEx::_preWakeThreads( int numThreadsToWake, hkInplaceArray<ThreadContext*, MAX_NUM_THREADS>& threadsOut )
{
    for( int i=0; i<m_maxUsedThreadContext && numThreadsToWake>0; ++i )
    {
        ThreadContext* threadContext = m_threadContexts[i];
        if( threadContext->m_isWaiting )
        {
            threadContext->m_isWaiting = false;
            threadsOut.pushBackUnchecked( threadContext );
            numThreadsToWake--;
        }
    }
}

template < typename ThreadContextType >
HK_INLINE void hkDefaultTaskQueueEx::_wakeAllThreads()
{
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    for (int i = 0; i < m_maxUsedThreadContext; ++i)
    {
        ThreadContextType* threadContext = static_cast<ThreadContextType*>(m_threadContexts[i]);
        if( threadContext->m_isWaiting )
        {
            threadContext->m_isWaiting = false;
            threadContext->signal(monitorStream);
        }
    }
}

template < typename ThreadContextType >
HK_INLINE void hkDefaultTaskQueueEx::_wakeThreads(const hkArrayBase<ThreadContext*>& threadsToWake)
{
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    // Waking the threads is thread safe
    if( !threadsToWake.isEmpty() )
    {
        for( int i=0; i<threadsToWake.getSize(); ++i )
        {
            static_cast<ThreadContextType*>(threadsToWake[i])->signal(monitorStream);
        }
    }
}

/// Check the consistency of all thread contexts.
template < typename ThreadContextType >
HK_INLINE void hkDefaultTaskQueueEx::_checkConsistency()
{
    // Check consistency
    for (int i = 0; i < m_maxUsedThreadContext; ++i)
    {
        ThreadContextType* threadContext = static_cast<ThreadContextType*>(m_threadContexts[i]);

        HK_ASSERT(0xb3890c2, threadContext->m_monitorStream == HK_NULL, "Cannot reset a queue while it is being processed");
        threadContext->assertUnsignaled();
    }
}


template < typename CriticalSectionType >
HK_INLINE _Ret_notnull_ hkDefaultTaskQueueEx::ThreadContext* hkDefaultTaskQueueEx::_allocateThreadContext(_Inout_ hkMonitorStream* monitorStream)
{
    CriticalSectionType* lock = static_cast<CriticalSectionType*>(m_lock);

    // Try to reuse the cached thread context
    ThreadContext* tc = HK_THREAD_LOCAL_GET(hkDefaultTaskQueueThreadContext);
    if( tc && tc->m_owner == this )
    {
        if( tc->m_monitorStream != monitorStream )  
        {
            lock->enter(monitorStream);
            tc->m_monitorStream = monitorStream;
            tc->m_monitorStreamSafeEnd = monitorStream ? monitorStream->getEnd() : HK_NULL;
            lock->leave(monitorStream);
        }
        tc->m_recursiveCount++;
        return tc;
    }

    // Borrow a thread context from the pool
    lock->enter(monitorStream);
    {
        HK_ASSERT(0x527bc01a, !m_freeThreadContexts.isEmpty(), "No thread context available");
        const int threadIndex = m_freeThreadContexts.back();
        m_maxUsedThreadContext = hkMath::max2(threadIndex+1, m_maxUsedThreadContext);
        m_freeThreadContexts.popBack();
        HK_ASSERT_NO_MSG(0x5b7be0d9, threadIndex < m_maxNumThreads);
        tc = m_threadContexts[threadIndex];
        HK_ASSERT_NO_MSG(0x5b7be0d9, threadIndex == tc->m_index);

        // Initialize timer data (in critical section to avoid race with getTimerData())
        tc->m_monitorStream = monitorStream;
        tc->m_monitorStreamSafeEnd = monitorStream ? monitorStream->getEnd() : HK_NULL;
        tc->m_recursiveCount = 1;

        HK_THREAD_LOCAL_SET(hkDefaultTaskQueueThreadContext, tc);

        if(m_schedulingMode == MODE_ASYNC_THREAD_POOL)
        {
            auto asyncThreadPoolCtx = static_cast<hkDefaultTaskQueueDetail::AsyncThreadPoolContext*>(tc);
            asyncThreadPoolCtx->setThreadPoolThreadIdx(hkAsyncThreadPool::getCurrentThreadIndex());
        }
    }
    lock->leave(monitorStream);

    return tc;
}

HK_INLINE void hkDefaultTaskQueueEx::_freeThreadContext(_Inout_ ThreadContext* threadContext )
{
    if (--threadContext->m_recursiveCount > 0)
    {
        HK_ASSERT(0x32980611, threadContext->m_currentHandle != nullptr, "A recursively obtained thread context should have a current task handle.");
        return;
    }

    HK_ASSERT(0x44b5b8aa, threadContext->m_currentHandle == nullptr, "Shouldn't free a thread context while a task is executing in it.");

    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    // De-initialize timer data (in critical section to avoid race with getTimerData())
    m_lock->enter(monitorStream);
    {
        threadContext->m_monitorStream = HK_NULL;
        threadContext->m_monitorStreamSafeEnd = HK_NULL;
        threadContext->m_clearTimerData = false;

        // Return the thread context to the pool
        m_freeThreadContexts.pushBackUnchecked(threadContext->m_index);
        HK_THREAD_LOCAL_SET(hkDefaultTaskQueueThreadContext, HK_NULL);
    }
    m_lock->leave(monitorStream);
}


template < bool ShouldLoop, bool FREE_HANDLE, typename CriticalSectionType, typename ThreadContextType >
bool hkDefaultTaskQueueEx::processingLoop(_Inout_opt_ HandleImpl* handleToWaitFor )
{
    hkMemoryRouterSubframeGuard memoryRouterGuard(&hkMemoryRouter::getInstance());

    hkMonitorStream* monitorStream = hkMonitorStream::getInstancePtr();
    ThreadContextType* threadContext = static_cast<ThreadContextType*>(_allocateThreadContext<CriticalSectionType>( monitorStream ));

    ExecutionContext executionCtx;

    hkTask::Input input;
    input.m_taskQueue = this;
    input.m_monitorStream = threadContext->m_monitorStream;
    input.m_executionContext = &executionCtx;

    // Process and finish any available handles until our exit condition is met
    HandleImpl* handle = HK_NULL;
    hkInplaceArray<ThreadContext*, MAX_NUM_THREADS> threadsToWake;
    bool processedAny = false;
    while(1)
    {
        threadContext->tendToThreadPool(0x08db8cde);

        //
        // Serial section
        //

        const char* monitorStreamEnd = monitorStream ? monitorStream->getEnd() : HK_NULL;       // must read this outside of any timer scope

        CriticalSectionType* lock = static_cast<CriticalSectionType*>(m_lock);
        lock->enter(monitorStream);

        threadContext->m_monitorStreamSafeEnd = monitorStreamEnd;   // must write this inside the critical section

        HandleImpl* outerHandle = threadContext->m_currentHandle;

        if(handleToWaitFor && !handle) // first time through the loop
        {
            // Record the outer task (or the null handle, if this is from outside a task) as waiting for the target handle
            handleToWaitFor->m_waitingHandles.pushBack(outerHandle);
        }

        // Finish the previous handle, if any
        HK_TIMER_BEGIN_LIST2(monitorStream, "GetNextTask", "FinishPrevious");
        int numNewHandles = 0;
        if( handle )
        {
            HK_ASSERT_NO_MSG( 0x2ee6a42c, handle->m_waitingThreadsBitfield.isClear(threadContext->m_index) );
            numNewHandles = _finishHandle<ThreadContextType>( handle );
        }

        if( ShouldLoop || !handle )
        {
            // Try to get the next one
            HK_TIMER_SPLIT_LIST2( monitorStream, "GetNext" );
            handle = _getNextHandle<ShouldLoop, CriticalSectionType, ThreadContextType>( *threadContext, m_queue, handleToWaitFor, &input.m_multiplicityIndex );
        }
        else
        {
            // Clear this to exit the loop
            handle = HK_NULL;
        }

        HK_TIMER_END_LIST2( monitorStream );

        if(handleToWaitFor && !handle) // last time through the loop
        {
            // Remove the record added earlier
            handleToWaitFor->m_waitingHandles.removeAt(handleToWaitFor->m_waitingHandles.indexOf(outerHandle));
        }

        // Clear timers now if requested
        if( HK_VERY_UNLIKELY( threadContext->m_clearTimerData ) )
        {
            if (monitorStream )
            {
                monitorStream->reset();
            }
            threadContext->m_clearTimerData = false;
        }

        // Update timers endpoint (outside any timer scope)
        threadContext->m_monitorStreamSafeEnd = monitorStream ? monitorStream->getEnd() : HK_NULL;

        // Prepare to wake enough other threads to allow the new handles to be processed in parallel
        if( numNewHandles > 0 )
        {
            int numThreadsToWake = (handle) ? numNewHandles-1 : numNewHandles; // this thread may have taken one
            _preWakeThreads( numThreadsToWake, threadsToWake );
        }

        lock->leave(monitorStream);

        threadContext->tendToThreadPool(0xb011ac3e);

        //
        // Concurrent section
        //

        // Wake the other threads now (outside the critical section because it can be expensive)
        if( !threadsToWake.isEmpty() )
        {
            for( int i=0; i<threadsToWake.getSize(); ++i )
            {
                static_cast<ThreadContextType*>(threadsToWake[i])->signal(monitorStream);
            }
            threadsToWake.clear();
        }

        threadContext->tendToThreadPool(0xf0e3c62c);

        if( handle )
        {
            // Process the task, using the timer stack of thread that submitted it
            HK_MONITOR_ENTER_TAG( monitorStream, handle->m_parentMonitorStreamTag );
            {
                threadContext->m_currentHandle = handle;

                executionCtx.m_abortRequest = &handle->m_abortRequest;
                handle->m_task->process( input );

                HK_ASSERT_NO_MSG(0x5e6c8026, threadContext->m_currentHandle == handle);
                threadContext->m_currentHandle = outerHandle;
            }
            HK_MONITOR_EXIT_TAG( monitorStream );
            processedAny = true;

            hkMemoryRouter::getInstance().advanceFrame();
        }
        else
        {
            // Exit condition met
            break;
        }

        threadContext->tendToThreadPool(0xe52d5b60);
    }

    if( FREE_HANDLE )
    {
        if( threadContext->m_freeHandles.getSize() < ThreadContext::MAX_CACHED_HANDLES )
        {
            threadContext->m_freeHandles.pushBackUnchecked(handle);
        }
    }

    threadContext->tendToThreadPool(0x77bbddcf);

    _freeThreadContext( threadContext );

    threadContext->tendToThreadPool(0x51ba5d33);

    return processedAny;
}

//////////////////////////////////////////////////////////////////////////
// hkDefaultTaskQueue
//////////////////////////////////////////////////////////////////////////

hkDefaultTaskQueue::hkDefaultTaskQueue( const Cinfo& cinfo )
:   m_serial( 1 ),
    m_maxNumThreads( hkMath::min2( cinfo.m_maxNumThreads, HK_COUNT_OF(m_threadContexts) ) ),
    m_isClosed( false ),
    m_numThreadsHint( 0 ),
    m_schedulingMode( cinfo.m_schedulingMode )
{
    HK_ASSERTV( 0xb6ca15f5, m_maxNumThreads == cinfo.m_maxNumThreads,
        "m_maxNumThreads cannot be greater than {}", HK_COUNT_OF( m_threadContexts ) );

    HK_ON_DEBUG( m_logger = HK_NULL );

    switch(m_schedulingMode)
    {
        HK_NO_DEFAULT_CASE(0x7a0772ec, "Invalid m_schedulingMode.");

    case MODE_BLOCKING:
        m_lock = new BlockingCriticalSection(cinfo.m_spinCount);
        break;

    case MODE_SPINNING:
        m_lock = new SpinningCriticalSection();
        break;

    case MODE_ASYNC_THREAD_POOL:
        m_lock = new AsyncThreadPoolCriticalSection();
        break;
    }

    m_handleAllocator = new HandleAllocator( cinfo.m_spinCount );
    m_queue.reserve(256);

    // Allocate all thread contexts
    for( int i = 0; i < m_maxNumThreads; i++ )
    {
        switch(m_schedulingMode)
        {
            HK_NO_DEFAULT_CASE(0x4494c0dd, "Invalid m_schedulingMode.");

        case MODE_BLOCKING:
            m_threadContexts[i] = new hkDefaultTaskQueueDetail::BlockingThreadContext(i, cinfo.m_spinCount);
            break;

        case MODE_SPINNING:
            m_threadContexts[i] = new hkDefaultTaskQueueDetail::SpinningThreadContext(i);
            break;

        case MODE_ASYNC_THREAD_POOL:
            m_threadContexts[i] = new hkDefaultTaskQueueDetail::AsyncThreadPoolContext(i, cinfo.m_asyncThreadPool);
            break;
        }

        m_threadContexts[i]->m_owner = this;

        m_freeThreadContexts.pushBackUnchecked( m_maxNumThreads - i - 1 );
    }

    for( int i = m_maxNumThreads; i < HK_COUNT_OF(m_threadContexts); ++i )
    {
        m_threadContexts[i] = HK_NULL;
    }
    m_maxUsedThreadContext = 0;
}

hkDefaultTaskQueue::~hkDefaultTaskQueue()
{
    // Instruct all worker threads to exit      
    close();

    HK_ASSERT( 0xf0456567, m_freeThreadContexts.getSize() == m_maxNumThreads,
        "You called the destructor while threads are still working on the queue");

    // Delete all thread contexts
    for (int i = 0; i < m_maxNumThreads; i++)
    {
        ThreadContext* tc = m_threadContexts[i];
#ifdef HK_DEBUG
        if( !tc->m_freeHandles.isEmpty() )
        {
            freeHandles( (Handle*)tc->m_freeHandles.begin(), tc->m_freeHandles.getSize(), sizeof(Handle) );
            tc->m_freeHandles.clear();
        }
#endif
        delete tc;
        m_threadContexts[i] = HK_NULL;
    }

    // Check for any handles still in use
#ifdef HK_DEBUG
    for( HandleAllocator::Iterator it = m_handleAllocator->getIterator(); it.isValid(); it.next() )
    {
        if( it.getHandle()->isAllocated() )
        {
            HK_WARN( 0xb8e01a44, "Your task queue still has some handles allocated. Please check for leaks." );
            break;
        }
    }
#endif

    // Delete everything
    delete m_handleAllocator;
    delete m_lock;
    m_lock = HK_NULL;

    HK_ON_DEBUG( delete m_logger );
}

void hkDefaultTaskQueue::allocateHandles(_Inout_updates_bytes_(numHandles * striding) Handle* handlesOut, int numHandles, int striding )
{
    // Try to grab some handles from the cached thread context
    if ( numHandles < ThreadContext::MAX_CACHED_HANDLES / 2 )
    {
        ThreadContext* tc = HK_THREAD_LOCAL_GET(hkDefaultTaskQueueThreadContext);
        if ( tc && tc->m_owner == this )
        {
            int oldSize = tc->m_freeHandles.getSize();
            if ( numHandles > oldSize)
            {
                // refill
                int numToAllocate = ThreadContext::MAX_CACHED_HANDLES / 2;
                m_handleAllocator->allocate((HandleImpl**)tc->m_freeHandles.begin() + oldSize, numToAllocate, sizeof(Handle) );
                tc->m_freeHandles.setSizeUnchecked(oldSize + numToAllocate);
            }
            for (int i =0; i < numHandles; i++ )
            {
                HandleImpl* handle = tc->m_freeHandles[tc->m_freeHandles.getSize() - i - 1];
                m_handleAllocator->initializeHandle(handle);
                hkAddByteOffset(handlesOut, i * striding)[0] = (_Handle*)handle;
            }
            tc->m_freeHandles.popBack(numHandles);
            return;
        }
    }

    // Thread safe
    m_handleAllocator->allocate( (HandleImpl**)handlesOut, numHandles, striding );
}

void hkDefaultTaskQueue::freeHandles(_Inout_updates_bytes_(numHandles * striding) Handle* handlesOut, int numHandles, int striding )
{
    // Try to grab some handles from the cached thread context
    if ( numHandles < ThreadContext::MAX_CACHED_HANDLES / 2 )
    {
        ThreadContext* tc = HK_THREAD_LOCAL_GET(hkDefaultTaskQueueThreadContext);
        if ( tc && tc->m_owner == this )
        {
            int oldSize = tc->m_freeHandles.getSize();
            if (numHandles + oldSize >= ThreadContext::MAX_CACHED_HANDLES)
            {
                m_handleAllocator->free((HandleImpl**)tc->m_freeHandles.begin(), oldSize, sizeof(Handle) );
                tc->m_freeHandles.setSizeUnchecked(0);
            }
            {
                for (int i = 0; i < numHandles; i++)
                {
                    tc->m_freeHandles.pushBackUnchecked((HandleImpl*)hkAddByteOffset(handlesOut, i * striding)[0]);
                }
            }
            return;
        }
    }

    // Thread safe
    m_handleAllocator->free( (HandleImpl**)handlesOut, numHandles, striding );
}

void hkDefaultTaskQueue::initHandle(Handle handle, _In_opt_ hkTask* task, hkTask::Priority::Enum priority )
{
    // Not thread safe, but shouldn't be called from multiple threads with the same handle anyway..
    HandleImpl* h = (HandleImpl*)handle;
    HK_ASSERT( 0x7b72f952, h && h->isAllocated(), "Invalid handle" );
    HK_ASSERT( 0x7b72f953, (priority & ((1<<HandleImpl::NUM_PRIORITY_BITS)-1)) == priority, "Priority out of range" );
    h->m_task = task ? task : &m_emptyTask; // disallow HK_NULL, to avoid an 'if' later
    h->m_priority = priority;
    h->m_abortRequest = false;

#ifdef HK_DEBUG
    if( m_logger )
    {
        m_logger->logHandle( h );
    }
#endif
}

void hkDefaultTaskQueue::setMultiplicity( Handle handle, int multiplicity, hkTask::MultiplicityMode::Enum mode )
{
    // Not thread safe, but shouldn't be called from multiple threads with the same handle anyway..
    HandleImpl* h = (HandleImpl*)handle;
    HK_ASSERT( 0x7b72f952, h && h->isAllocated(), "Invalid handle" );
    HK_ASSERT( 0xf043df56, multiplicity < 0x10000, "Multiplicity must be less than 2^16" );
    h->m_multiplicity = (hkUint16)multiplicity;
    h->m_onFinishZeroMultiplicity = (mode == hkTask::MultiplicityMode::ABORT_ON_FIRST_FINISHED_TASK);
}

void hkDefaultTaskQueue::addDependency( Handle predecessor, Handle successor )
{
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    HK_ASSERT( 0x63062888, !m_isClosed, "Cannot modify a closed queue. Call reset() first." );
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    // Queue isn't thread safe, so lock this
    m_lock->enter(monitorStream);
    self->_addDependency( (HandleImpl*)predecessor, (HandleImpl*)successor );
    m_lock->leave(monitorStream);
}

void hkDefaultTaskQueue::addDependencies( _Inout_updates_(numDependencies) Dependency* dependencies, int numDependencies )
{
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    HK_ASSERT( 0x6306289, !m_isClosed, "Cannot modify a closed queue. Call reset() first." );
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    // Queue isn't thread safe, so lock this
    m_lock->enter(monitorStream);
    for( int i=0; i<numDependencies; ++i )
    {
        self->_addDependency( (HandleImpl*)(dependencies[i].m_predecessor), (HandleImpl*)(dependencies[i].m_successor) );
    }
    m_lock->leave(monitorStream);
}

void hkDefaultTaskQueue::abortHandles(_Inout_updates_bytes_(numHandles * striding) Handle* handles, int numHandles, int striding )
{
    if( !numHandles )
    {
        return;
    }
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    m_lock->enter(monitorStream);
    {
        HandleGraph graph(m_handleAllocator);
        graph.abortHandles(handles, numHandles, striding);
    }
    m_lock->leave(monitorStream);
}

namespace HK_UNITY_ANONYMOUS_NAMESPACE
{
    bool currentThreadBelongsToTaskQueue(hkDefaultTaskQueue const* taskQueue)
    {
        hkDefaultTaskQueue::ThreadContext* tc = HK_THREAD_LOCAL_GET(hkDefaultTaskQueueThreadContext);
        return(tc && tc->m_owner == taskQueue);
    }
}

void hkDefaultTaskQueue::submitHandles(_Inout_updates_bytes_(numHandles * striding) Handle* handles, int numHandles, Order::Enum order, int striding )
{
    HK_UNITY_USING_ANONYMOUS_NAMESPACE;

    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    if( !numHandles )
    {
        return;
    }

    hkMonitorStream* monitorStream = HK_NULL;
    HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr());

    // Add a unique tag to the caller's monitor stream, and set it in the handle.
    // A timer with this tag will then wrap each processed task in each worker threads monitor stream.
    {
        hkUint32 tag;
        if(m_schedulingMode == MODE_ASYNC_THREAD_POOL && !currentThreadBelongsToTaskQueue(this))
        {
            // If an async task is submitted from a non-async thread, we can't associate the async task with a tag
            // from the submitting thread, because it wouldn't be valid past the current frame.
            tag = 0;
        }
        else
        {
            tag = monitorStream ? monitorStream->addTag() : 0;
        }

        Handle* ph = handles;
        for( int i=0; i<numHandles; i++ )
        {
            ((HandleImpl**)ph)[0]->m_parentMonitorStreamTag = tag;
            ph = hkAddByteOffset( ph, striding );
        }
    }

    hkInplaceArray<ThreadContext*, MAX_NUM_THREADS> threadsToWake;

    // Queue isn't thread safe, so lock this
    m_lock->enter(monitorStream);
    const int numEnqueuedHandles = self->_addHandles( (HandleImpl**)handles, numHandles, order, striding );
    self->_preWakeThreads( numEnqueuedHandles, threadsToWake );
    m_lock->leave(monitorStream);

    switch(m_schedulingMode)
    {
        HK_NO_DEFAULT_CASE(0x2dc30671, "Invalid m_schedulingMode.");

    case MODE_BLOCKING:
        self->_wakeThreads<hkDefaultTaskQueueDetail::BlockingThreadContext>(threadsToWake);
        break;

    case MODE_SPINNING:
        self->_wakeThreads<hkDefaultTaskQueueDetail::SpinningThreadContext>(threadsToWake);
        break;

    case MODE_ASYNC_THREAD_POOL:
        self->_wakeThreads<hkDefaultTaskQueueDetail::AsyncThreadPoolContext>(threadsToWake);
        break;
    }
}

void hkDefaultTaskQueue::process()
{
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    switch(m_schedulingMode)
    {
        HK_NO_DEFAULT_CASE(0x601f9b85, "Invalid m_schedulingMode.");

    case MODE_BLOCKING:
        self->processingLoop<true, false, BlockingCriticalSection, hkDefaultTaskQueueDetail::BlockingThreadContext>();
        break;

    case MODE_SPINNING:
        self->processingLoop<true, false, SpinningCriticalSection, hkDefaultTaskQueueDetail::SpinningThreadContext>();
        break;

    case MODE_ASYNC_THREAD_POOL:
        self->processingLoop<true, false, AsyncThreadPoolCriticalSection, hkDefaultTaskQueueDetail::AsyncThreadPoolContext>();
        break;
    }
}

hkTaskQueue::Handle hkDefaultTaskQueue::addAndSubmitTask(
    hkTask* task, int multiplicity, hkTask::MultiplicityMode::Enum mode, hkTask::Priority::Enum priority )
{
    Handle h;
    allocateHandles( &h, 1 );
    initHandle( h, task, priority );
    if( multiplicity != 1 )
    {
        setMultiplicity( h, multiplicity, mode );
    }
    submitHandles( &h, 1, Order::LIFO );
    return h;
}

bool hkDefaultTaskQueue::isFinished( Handle handle )
{
    return hkDefaultTaskQueue::isHandleFinished( handle );
}

void hkDefaultTaskQueue::processAndFree(Handle handle)
{
    processUntilFinished( handle );
    freeHandles( &handle, 1 );
}

bool hkDefaultTaskQueue::processOnce()
{
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    switch(m_schedulingMode)
    {
        HK_NO_DEFAULT_CASE(0x5826fefd, "Invalid m_schedulingMode.");

    case MODE_BLOCKING:
        return self->processingLoop<false, false, BlockingCriticalSection, hkDefaultTaskQueueDetail::BlockingThreadContext>();

    case MODE_SPINNING:
        return self->processingLoop<false, false, SpinningCriticalSection, hkDefaultTaskQueueDetail::SpinningThreadContext>();

    case MODE_ASYNC_THREAD_POOL:
        return self->processingLoop<false, false, AsyncThreadPoolCriticalSection, hkDefaultTaskQueueDetail::AsyncThreadPoolContext>();
        break;
    }
}

void hkDefaultTaskQueue::processUntilFinished( Handle handle )
{
    HandleImpl* h = (HandleImpl*)handle;
    HK_ASSERT( 0x578367fa, h && h->isAllocated(), "Invalid handle" );
    if( !h->isFinished() )
    {
        // Work until it becomes finished
        hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
        switch(m_schedulingMode)
        {
            HK_NO_DEFAULT_CASE(0x3af3480c, "Invalid m_schedulingMode.");

        case MODE_BLOCKING:
            self->processingLoop<true, false, BlockingCriticalSection, hkDefaultTaskQueueDetail::BlockingThreadContext>(h);
            break;

        case MODE_SPINNING:
            self->processingLoop<true, false, SpinningCriticalSection, hkDefaultTaskQueueDetail::SpinningThreadContext>(h);
            break;

        case MODE_ASYNC_THREAD_POOL:
            self->processingLoop<true, false, AsyncThreadPoolCriticalSection, hkDefaultTaskQueueDetail::AsyncThreadPoolContext>(h);
            break;
        }
    }
}

void hkDefaultTaskQueue::processUntilFinished(_Inout_updates_(numHandles) Handle* handles, int numHandles )
{
    // Filter out finished handles
    hkInplaceArray<Handle,16>::Temp unfinishedHandles;
    unfinishedHandles.reserve( numHandles );
    for( int i=0; i<numHandles; i++ )
    {
        if( handles && handles[i] && !((HandleImpl*)handles[i])->isFinished() )
        {
            unfinishedHandles.pushBackUnchecked( handles[i] );
        }
    }

    // Pass the remaining ones to the default implementation
    hkTaskQueue::processUntilFinished( unfinishedHandles.begin(), unfinishedHandles.getSize() );
}

void hkDefaultTaskQueue::close()
{
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;

    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    m_lock->enter(monitorStream);
    if( !m_isClosed )
    {
        m_isClosed = true;
        // Wake all threads so that they can exit from taskProcessingLoop()
        switch(m_schedulingMode)
        {
            HK_NO_DEFAULT_CASE(0x5338dec8, "Invalid m_schedulingMode.");

        case MODE_BLOCKING:
            self->_wakeAllThreads<hkDefaultTaskQueueDetail::BlockingThreadContext>();
            break;

        case MODE_SPINNING:
            self->_wakeAllThreads<hkDefaultTaskQueueDetail::SpinningThreadContext>();
            break;

        case MODE_ASYNC_THREAD_POOL:
            self->_wakeAllThreads<hkDefaultTaskQueueDetail::AsyncThreadPoolContext>();
            break;
        }
    }
    m_lock->leave(monitorStream);
}

void hkDefaultTaskQueue::reset()
{
#ifdef HK_DEBUG
    hkDefaultTaskQueueEx* self = (hkDefaultTaskQueueEx*)this;
    if (m_schedulingMode == MODE_SPINNING)
    {
        self->_checkConsistency<hkDefaultTaskQueueDetail::SpinningThreadContext>();
    }
    else
    {
        self->_checkConsistency<hkDefaultTaskQueueDetail::BlockingThreadContext>();
    }
#endif

    HK_ASSERT( 0xf0456567, m_freeThreadContexts.getSize() == m_maxNumThreads,
        "You called reset() while threads are still working on the queue");

    for (int i = 0; i < m_maxUsedThreadContext; i++)
    {
        ThreadContext* tc = m_threadContexts[i];
        if (!tc->m_freeHandles.isEmpty())
        {
            freeHandles( (Handle*)tc->m_freeHandles.begin(), tc->m_freeHandles.getSize() );
            tc->m_freeHandles.clear();
        }
        tc->m_monitorStream = HK_NULL;
        tc->m_monitorStreamSafeEnd = HK_NULL;
    }
    // get the order right
    for (int j = 0; j < m_freeThreadContexts.getSize(); j++ )
    {
        m_freeThreadContexts[j] = m_freeThreadContexts.getSize() - j - 1;
    }
    m_maxUsedThreadContext = 0;

#if 0 //defined(HK_DEBUG)
    for (HandleAllocator::Iterator it = m_handleAllocator->getIterator(); it.isValid(); it.next())
    {
        HK_ASSERT(0xf0456567, !it.getHandle()->isAllocated(), "Your task queue still has some handles allocated" );
    }
#endif

    m_handleAllocator->freeAll();
    m_serial = 1;
    m_isClosed = false;
}

bool hkDefaultTaskQueue::isClosed() const
{
    return m_isClosed;
}

bool hkDefaultTaskQueue::isHandleFinished( Handle handle )
{
    HandleImpl* h = (HandleImpl*)handle;
    HK_ASSERT( 0x494e0d25, h && h->isAllocated(), "Invalid handle" );
    return h->isFinished();
}

bool hkDefaultTaskQueue::isHandleAborted(Handle handle)
{
    HandleImpl* h = (HandleImpl*)handle;
    HK_ASSERT(0x494e0d25, h && h->isAllocated(), "Invalid handle");
    return h->m_abortRequest;
}

void hkDefaultTaskQueue::getTimerData( hkArray<hkTimerData>& timerDataOut )
{
    hkMonitorStream* monitorStream = HK_NULL;
    HK_TASK_QUEUE_ON_TIMERS_ENABLED(HK_ON_MONITORS_ENABLED(monitorStream = hkMonitorStream::getInstancePtr()));

    m_lock->enter( monitorStream );
    timerDataOut.reserve(timerDataOut.getSize() + m_maxUsedThreadContext);
    for (int i = 0; i < m_maxUsedThreadContext; ++i)
    {
        ThreadContext* threadContext = m_threadContexts[i];

        if( threadContext->m_monitorStream )
        {
            HK_ASSERT_NO_MSG(0x5a2e81fa,
                threadContext->m_monitorStreamSafeEnd >= threadContext->m_monitorStream->getStart() &&
                threadContext->m_monitorStreamSafeEnd <= threadContext->m_monitorStream->getEnd() );

            hkTimerData& dataOut = timerDataOut.expandOne();
            dataOut.m_streamBegin = threadContext->m_monitorStream->getStart();
            dataOut.m_streamEnd = threadContext->m_clearTimerData ?
                threadContext->m_monitorStream->getStart() : threadContext->m_monitorStreamSafeEnd;
        }
    }
    m_lock->leave(monitorStream);
}

void hkDefaultTaskQueue::clearTimerData()
{
    for (int i = 0; i < m_maxUsedThreadContext; ++i)
    {
        ThreadContext* threadContext = m_threadContexts[i];

        if( threadContext->m_monitorStream )
        {
            threadContext->m_clearTimerData = true;
        }
    }
}

void hkDefaultTaskQueue::startTrace()
{
#ifdef HK_DEBUG
    if( !m_logger )
    {
        m_logger = new Logger();
    }
#endif
}

void hkDefaultTaskQueue::endTrace( hkTaskGraph& traceGraphOut )
{
#ifdef HK_DEBUG
    if( m_logger )
    {
        traceGraphOut.append( m_logger->m_graph );
        delete m_logger;
        m_logger = HK_NULL;
    }
#endif
}

#if 0
/* threadcontext is passed to input structure
    taskids can be allocated/freed from thread context

    */
hkDefaultTaskQueue::;


// example app
async async(function, multiplicity, multiType)
{
    taskqueue->getThreadContext();

    Handle = threadContext->allocateAndInitHandle(&task, multiplicity);
    submitHandle();
    return Handle;
}

wait(async()
{

}


maint
{

    auto async = Async(dostuff);

    int x = dostuff();
    async.waitAndFree();
}

#endif

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
