// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : WIN32 LINUX32 LINUX64 X64 MAC IOS ANDROID WIIU NACL32 NACL64 DURANGO APOLLO_ARM APOLLO_X86 METRO_X86 METRO_X64 METRO_ARM PS4 UWP OSINTERNAL NX32 NX64
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Thread/Pool/hkCpuThreadPool.h>
#include <Common/Base/DebugUtil/DeterminismUtil/hkCheckDeterminismUtil.h>
#include <Common/Base/Memory/System/hkMemorySystem.h>
#include <Common/Base/System/hkBaseSystem.h>
#include <Common/Base/Thread/CriticalSection/hkCriticalSection.h>
#include <Common/Base/Thread/Thread/hkWorkerThreadContext.h>
#include <Common/Base/Thread/TaskQueue/Default/hkDefaultTaskQueue.h>
#include <Common/Base/System/Hardware/hkHardwareInfo.h>
#include <Common/Base/Container/PointerMap/hkPointerMap.h>

#if defined(HK_PLATFORM_WIN32) && !defined(HK_PLATFORM_WINRT)
    #include <Common/Base/Fwd/hkwindows.h>
#endif


hkCpuThreadPoolCinfo::hkCpuThreadPoolCinfo()
    : m_hardwareThreadBinding( HardwareThreadBinding::UNSPECIFIED ),
    m_numThreads( 1 ),
    m_stackSize( hkThread::HK_THREAD_DEFAULT_STACKSIZE ),
    m_timerBufferPerThreadAllocation( 0 ),
    m_threadName( "HavokWorkerThread" )
{

}


hkCpuThreadPool::ThreadData::ThreadData()
    : m_threadPool( HK_NULL ),
    m_threadId( -1 ),
    m_hardwareThreadBinding(hkCpuThreadPoolCinfo::HardwareThreadBinding::UNSPECIFIED ),
    m_killThread( false ),
    m_semaphore( 0, 1 )
{

}

HK_INLINE static int calcHardwareThreadId( int threadId )
{
#if defined(HK_PLATFORM_WIN32)
    // X360: { 2,4,1,3,5, 0, 2,4,.. }
    const int numCores = hkHardwareInfo::getNumHardwareThreads();
    const int numThreadsPerCore = 1;
    const int procGroup = ( threadId % numCores ) * numThreadsPerCore;
    return procGroup + ( numThreadsPerCore > 1 ? ( ( threadId / numCores ) % numThreadsPerCore ) : 0 );
#else
    // Default affinity is trying to assign a physical core to each logical thread
    return threadId;
#endif
}


hkCpuThreadPool::hkCpuThreadPool( const hkCpuThreadPoolCinfo& cinfo, ThreadStartMode threadStartMode )
:   m_timerBufferAllocation( cinfo.m_timerBufferPerThreadAllocation ),
    m_gcThreadMemoryOnCompletion( false )
{
    m_isRunning = false;
    m_threadName = cinfo.m_threadName;
    m_stackSize = cinfo.m_stackSize;
    m_jobQueue = HK_NULL;
    m_taskQueue = HK_NULL;

    if ( threadStartMode == START_THREADS_ON_CONSTRUCTION )
    {
        startThreads( cinfo );
    }
}


void hkCpuThreadPool::startThreads( const hkCpuThreadPoolCinfo& cinfo )
{
    if ( !m_workerThreads.isEmpty() )
    {
        HK_WARN( 0x5f450f2e, "Pool threads have already been started" );
        return;
    }

    if ( m_taskQueue )
    {
        m_taskQueue->setNumThreadsHint( cinfo.m_numThreads );
    }

    // Create threads
    m_workerThreads.setSize( cinfo.m_numThreads );
    for ( int i = 0; i < cinfo.m_numThreads; i++ )
    {
        ThreadData* threadData = new ThreadData();
        m_workerThreads[i] = threadData;

        threadData->m_threadPool = this;
        threadData->m_killThread = false;
        threadData->m_clearTimers = false;

        // Worker thread IDs start from 1, 0 is reserved for the main thread
        threadData->m_threadId = i + 1;

        // Set the hardware thread ID for platforms that support HW thread affinity/preference
        if ( cinfo.m_hardwareThreadMasksOrIds.getSize() > 0 )
        {
            HK_ASSERT( 0x975fe134, cinfo.m_hardwareThreadMasksOrIds.getSize() >= cinfo.m_numThreads,
                "If you initialize hardware thread ids, you must give an ID to all threads" );
            HK_ASSERT(0x40a4bd94, cinfo.m_hardwareThreadBinding != hkCpuThreadPoolCinfo::HardwareThreadBinding::UNSPECIFIED,
                "You've supplied preferred HW thread IDs or affinity masks, but haven't specified how they should be interpretted" );
            threadData->m_hardwareThreadBinding = cinfo.m_hardwareThreadBinding;
            threadData->m_hardwareThreadMasksOrId = cinfo.m_hardwareThreadMasksOrIds[i];
        }
        else
        {
            HK_WARN_ONCE_ON_DEBUG_IF( cinfo.m_hardwareThreadBinding != hkCpuThreadPoolCinfo::HardwareThreadBinding::UNSPECIFIED, 0x4caf0713,
                "You've requested HW thread affinity/preference but haven't supplied masks/IDs - applying default preferences");
            // Unspecified per-thread masks force SCHEDULER_HINT usage
            threadData->m_hardwareThreadBinding = hkCpuThreadPoolCinfo::HardwareThreadBinding::SCHEDULER_HINT;
            threadData->m_hardwareThreadMasksOrId = calcHardwareThreadId( threadData->m_threadId );
        }

        // Start thread
        threadData->m_thread.startThread( &threadMainForwarder, threadData, m_threadName, m_stackSize );
    }
}


_Ret_null_ void* HK_CALL hkCpuThreadPool::threadMainForwarder(_Inout_ void* threadDataAsVoid)
{
    ThreadData* threadData = static_cast<ThreadData*>( threadDataAsVoid );
    threadData->m_threadPool->threadMain( threadData );
    return 0;
}


void hkCpuThreadPool::threadMain( ThreadData* threadDataPtr )
{
    ThreadData& threadData = *threadDataPtr;

    // Create worker thread context
    hkWorkerThreadContext threadContext( threadData.m_threadId );

    // Allocate a monitor stream for this thread (this enables timers)
    if ( m_timerBufferAllocation > 0 )
    {
        hkMonitorStream::getInstance().resize( m_timerBufferAllocation );
    }
    threadData.m_timerData.m_streamBegin = hkMonitorStream::getInstance().getStart();
    threadData.m_timerData.m_streamEnd = hkMonitorStream::getInstance().getEnd();

    if (threadData.m_hardwareThreadBinding == hkCpuThreadPoolCinfo::HardwareThreadBinding::HARD_AFFINITY)
    {
        threadData.m_thread.setThreadAffinityMask((hkUint32)threadData.m_hardwareThreadMasksOrId);
    }
    else
    {
        threadData.m_thread.setIdealProcessor(threadData.m_hardwareThreadMasksOrId);
    }

    threadProcessingLoop( threadDataPtr, &threadContext.m_memoryRouter );
}


void hkCpuThreadPool::threadProcessingLoop( _In_ ThreadData* threadDataPtr, _Inout_ hkMemoryRouter* memoryRouter )
{
    ThreadData& threadData = *threadDataPtr;
    while ( 1 )
    {
        // Wait for the main thread to release the worker thread
        threadData.m_semaphore.acquire();
        if ( threadData.m_killThread )
        {
            break;
        }
        hkMonitorStream& monitorStream = hkMonitorStream::getInstance();
        if ( threadData.m_clearTimers )
        {
            monitorStream.reset();
            threadData.m_timerData.m_streamEnd = hkMonitorStream::getInstance().getEnd();
            threadData.m_clearTimers = false;
        }

        hkCheckDeterminismUtil::workerThreadStartFrame( false );

        // Enable timers for critical sections just during the step call
        hkCriticalSection::setTimersEnabled();

        // Work on the work load
        m_workerFunction( m_workLoad );

        // Disable timers for critical sections just during the step call
        hkCriticalSection::setTimersDisabled();

        // Note collected timer data
        threadData.m_timerData.m_streamEnd = hkMonitorStream::getInstance().getEnd();

        hkCheckDeterminismUtil::workerThreadFinishFrame();

        // Perform garbage collection when requested
        if ( m_gcThreadMemoryOnCompletion )
        {
            hkMemorySystem::getInstance().garbageCollectThread( *memoryRouter );
        }

        // Release any thread (usually the main thread) which may be waiting for all worker threads to finish.
        m_workerThreadFinished.release();
    }
}

bool HK_CALL hkCpuThreadPool::doesThreadBelongToPool(hkUint64 threadId) const
{
#if defined(HK_PLATFORM_WIN32) && !defined(HK_PLATFORM_WINRT)
    const int numThreads = m_workerThreads.getSize();
    for (int i = 0; i < numThreads; ++i)
    {
        const ThreadData* data = m_workerThreads[i];
        if (data->m_thread.getChildThreadId() == threadId)
            return true;
    }
#else
    HK_ASSERT_NOT_IMPLEMENTED(0x3ff72d58);
#endif
    return false;
}

// Destroy threads
hkCpuThreadPool::~hkCpuThreadPool()
{
    waitForCompletion();

    const int numThreads = m_workerThreads.getSize();
    for ( int i = 0; i < numThreads; i++ )
    {
        ThreadData* data = m_workerThreads[i];
        data->m_killThread = true;
        data->m_semaphore.release(); // sets the thread off to enable it to finish
    }

    for ( int i = 0; i < numThreads; i++ )
    {
        ThreadData* data = m_workerThreads[i];
        data->m_thread.joinThread();
        delete data;
    }
}


void hkCpuThreadPool::gcThreadMemoryOnNextCompletion()
{
    m_gcThreadMemoryOnCompletion = true;
}


void hkCpuThreadPool::addThread(hkCpuThreadPoolCinfo::HardwareThreadBinding::Enum hardwareBinding, hkUint32 maskOrId)
{
    HK_ASSERT( 0xad67bd88, !m_isRunning,
        "You can only add or remove working threads via calls from the master thread and not between processWorkLoad() "
        "and waitForCompletion() calls. " );

    ThreadData* data = new ThreadData();
    m_workerThreads.pushBack( data );

    data->m_threadPool = this;
    data->m_threadId = m_workerThreads.getSize();
    data->m_killThread = false;
    data->m_clearTimers = false;
    data->m_timerData.m_streamBegin = HK_NULL;
    data->m_timerData.m_streamEnd = HK_NULL;

    if( hardwareBinding == hkCpuThreadPoolCinfo::HardwareThreadBinding::UNSPECIFIED )
    {

        data->m_hardwareThreadBinding = hkCpuThreadPoolCinfo::HardwareThreadBinding::SCHEDULER_HINT;
        data->m_hardwareThreadMasksOrId = calcHardwareThreadId( data->m_threadId );
    }
    else
    {
        data->m_hardwareThreadBinding = hardwareBinding;
        data->m_hardwareThreadMasksOrId = maskOrId;
    }

    data->m_thread.startThread( &threadMainForwarder, data, m_threadName, m_stackSize );
}


void hkCpuThreadPool::removeThread()
{
    HK_ASSERT( 0xad67bd89, !m_isRunning,
        "You can only add or remove working threads via calls from the master thread and not between processWorkLoad() "
        "and waitForCompletion() calls. " );
    HK_ASSERT( 0xcede9735, m_workerThreads.getSize() > 0, "You cannot set a negative number of threads" );

    ThreadData* data = m_workerThreads.back();

    // Signal the thread to be killed, and release the thread
    data->m_killThread = true;
    data->m_semaphore.release();

    
    // Close handle to thread to avoid resource leak
    data->m_thread.joinThread();

    delete data;
    m_workerThreads.popBack(1);
}


void hkCpuThreadPool::processWorkLoad(WorkerFunction workerFunction, _Inout_ void* workLoad)
{
    HK_ASSERT( 0xad56dd77, m_isRunning == false,
        "Calling hkCpuThreadPool::processWorkLoad() for the second time, without having called "
        "hkCpuThreadPool::waitForCompletion()." );
    m_isRunning = true;

    m_workerFunction = workerFunction;
    m_workLoad = workLoad;

    for ( int i = m_workerThreads.getSize() - 1; i >= 0; i-- )
    {
        ThreadData* data = m_workerThreads[i];
        data->m_semaphore.release();
    }
}


void hkCpuThreadPool::processJobQueue(_Inout_ hkJobQueue* jobQueue, hkJobType notUsed )
{
    HK_ASSERT( 0x22440613, ( m_jobQueue == HK_NULL ), "This thread pool already has a job queue" );
    m_jobQueue = jobQueue;
    processWorkLoad( jobQueueWorkerFunction, jobQueue );
}


void hkCpuThreadPool::processTaskQueue(_Inout_ hkDefaultTaskQueue* taskQueue )
{
    HK_ASSERT( 0x5a81fc8e, ( m_taskQueue == HK_NULL ), "This thread pool already has a task queue" );
    HK_WARN_ON_DEBUG_IF( taskQueue->getNumThreadsHint() != 0, 0x22440533, "Sharing a task queue between pools makes the num threads hint inaccurate" );
    m_taskQueue = taskQueue;
    taskQueue->setNumThreadsHint( m_workerThreads.getSize() );
    processWorkLoad( taskQueueWorkerFunction, taskQueue );
}


void HK_CALL hkCpuThreadPool::jobQueueWorkerFunction(_Inout_ void* workLoad )
{
    hkJobQueue* jobQueue = static_cast<hkJobQueue*>( workLoad );
    jobQueue->processAllJobs();
}


void HK_CALL hkCpuThreadPool::taskQueueWorkerFunction(_Inout_ void* workLoad )
{
    hkDefaultTaskQueue* taskQueue = static_cast<hkDefaultTaskQueue*>( workLoad );
    taskQueue->process();
}


void hkCpuThreadPool::waitForCompletion()
{
    if ( m_isRunning )
    {
        for ( int i = 0; i < m_workerThreads.getSize(); ++i )
        {
            m_workerThreadFinished.acquire();
        }
        m_isRunning = false;
        m_gcThreadMemoryOnCompletion = false;
    }

    m_jobQueue = HK_NULL;

    if ( m_taskQueue != HK_NULL )
    {
        m_taskQueue->setNumThreadsHint( 0 );
        m_taskQueue = HK_NULL;
    }
}


bool hkCpuThreadPool::isProcessing() const
{
    return m_isRunning;
}

_Ret_maybenull_
hkJobQueue* hkCpuThreadPool::getProcessingJobQueue() const
{
    return isProcessing() ? m_jobQueue : HK_NULL;
}

_Ret_maybenull_
hkDefaultTaskQueue* hkCpuThreadPool::getProcessingTaskQueue() const
{
    return isProcessing() ? m_taskQueue : HK_NULL;
}


void hkCpuThreadPool::appendTimerData( hkArray<hkTimerData>& timerDataOut )
{
    if ( m_taskQueue )
    {
        // Update each worker thread's cached stream end
        hkInplaceArray< hkTimerData, 32 > taskQueueTimerData;
        m_taskQueue->getTimerData( taskQueueTimerData );

        hkPointerMap< const char*, const char* > streamStartToEndMap;
        for ( int i = 0; i < taskQueueTimerData.getSize(); ++i )
        {
            streamStartToEndMap.insert( taskQueueTimerData[i].m_streamBegin, taskQueueTimerData[i].m_streamEnd );
        }

        for ( int i = 0; i < m_workerThreads.getSize(); ++i )
        {
            const char* streamEnd = HK_NULL;
            if ( streamStartToEndMap.get( m_workerThreads[i]->m_timerData.m_streamBegin, &streamEnd ).isSuccess() )
            {
                HK_ASSERT_NO_MSG( 0x552c8540, streamEnd >= m_workerThreads[i]->m_timerData.m_streamEnd );
                m_workerThreads[i]->m_timerData.m_streamEnd = streamEnd;
            }
        }
    }

    hkTimerData* appendedTimerData = timerDataOut.expandBy(m_workerThreads.getSize());
    for ( int i = 0; i < m_workerThreads.getSize(); ++i )
    {
        appendedTimerData[i] = m_workerThreads[i]->m_timerData;
    }
}


void hkCpuThreadPool::clearTimerData()
{
    if ( m_taskQueue )
    {
        // Tell the task queue to clear its streams ASAP
        m_taskQueue->clearTimerData();
    }

    for ( int i = 0; i < m_workerThreads.getSize(); ++i )
    {
        m_workerThreads[i]->m_timerData.m_streamEnd = m_workerThreads[i]->m_timerData.m_streamBegin;
        m_workerThreads[i]->m_clearTimers = true;
    }
}


int hkCpuThreadPool::getNumThreads() const
{
    return m_workerThreads.getSize();
}


void hkCpuThreadPool::setNumThreads( int numThreads )
{
    // Stop processing the task queue while we change the number of threads     
    hkDefaultTaskQueue* taskQueue = m_taskQueue;
    if ( taskQueue )
    {
        taskQueue->close();
        waitForCompletion();
    }

    while ( m_workerThreads.getSize() < numThreads )
    {
        addThread();
    }

    while ( m_workerThreads.getSize() > numThreads )
    {
        removeThread();
    }

    if ( taskQueue )
    {
        taskQueue->reset();
        processTaskQueue( taskQueue );
    }
}


#if 0

// This isn't called anywhere, it is only used to generate documentation
static void* HK_CALL Function_For_Docs_Do_not_Delete( void *v )
{
    extern hkResult waitForStartSignal();
    extern void doWork();
    extern void sendWorkDoneSignal();

    
    hkMemorySystem& memSystem = hkMemorySystem::getInstance();
    hkMemoryRouter memRouter;
    memSystem.threadInit( memRouter, "worker", hkMemorySystem::FLAG_PERSISTENT );
    hkBaseSystem::initThread( &memRouter );

    while ( waitForStartSignal().isSuccess() )
    {
        memSystem.threadInit( memRouter, "worker", hkMemorySystem::FLAG_TEMPORARY );
        doWork();
        memSystem.threadQuit( memRouter, hkMemorySystem::FLAG_TEMPORARY );
        sendWorkDoneSignal();
    }

    hkBaseSystem::quitThread();
    memSystem.threadQuit( memRouter, hkMemorySystem::FLAG_PERSISTENT );
    

    return 0;
}

#endif

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
