// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>

#include <Common/Base/System/Hardware/hkHardwareInfo.h>


#include <Common/Base/Thread/JobQueue/hkJobQueue.h>
#include <Common/Base/System/hkBaseSystem.h>

HK_COMPILE_TIME_ASSERT(sizeof(hkJobQueue::JobQueueEntry) == hkJobQueue::JOB_QUEUE_ENTRY_SIZE);

HK_THREAD_LOCAL( int ) hkThreadNumber;

//#define JOB_QUEUE_PRINTF(A) printf("Thread: %d: ", HK_THREAD_LOCAL_GET(hkThreadNumber)); printf(A)

//#include <stdio.h>
//void hkPrintJobQueue( hkJobQueue* queue, hkJobQueue::DynamicData* data )
//{
//  printf("Thread %d", HK_THREAD_LOCAL_GET(hkThreadNumber) );
//
//  for (int i = 0; i < queue->m_numJobQueues; ++i)
//  {
//      printf("\nQ%d\t", i);
//      for (int j = 0; j < data->m_jobQueue[i].getSize(); ++j)
//      {
//          hkJobQueue::JobQueueEntry entry;
//          data->m_jobQueue[i].peek( entry );
//          printf("Type: %d SubType: %d\n",entry.m_jobType, entry.m_jobSubType ) ;
//      }
//  }
//}


hkJobQueueHwSetup::hkJobQueueHwSetup()
{
    m_numCpuThreads = hkHardwareInfo::getNumHardwareThreads();
}


hkJobQueue::JobPopFuncResult HK_CALL defaultPopDispatchFunc( hkJobQueue& queue, _Inout_ hkJobQueue::DynamicData* data, hkJobQueue::JobQueueEntry& jobIn, hkJobQueue::JobQueueEntry& jobOut )
{
    return queue.m_jobFuncs[jobIn.m_jobType].m_popJobFunc(queue, data, jobIn, jobOut);
}

hkJobQueue::JobCreationStatus HK_CALL defaultFinishDispatchFunc( hkJobQueue& queue, _Inout_ hkJobQueue::DynamicData* data, const hkJobQueue::JobQueueEntry& jobIn, hkJobQueue::JobQueueEntryInput& newJobCreatedOut )
{
    return queue.m_jobFuncs[jobIn.m_jobType].m_finishJobFunc( queue, data, jobIn, newJobCreatedOut );
}


static hkJobQueue::JobPopFuncResult  HK_CALL hkDefaultPopJobFunc(hkJobQueue& queue,
    _Inout_ hkJobQueue::DynamicData* data,
    hkJobQueue::JobQueueEntry& jobIn,
    hkJobQueue::JobQueueEntry& jobOut)
{
    HK_ASSERTV(0x9762fe35, 0, "Unregistered pop job function for job type {}.", jobIn.m_jobType );
    return hkJobQueue::POP_QUEUE_ENTRY;
}

static hkJobQueue::JobCreationStatus HK_CALL hkDefaultFinishJobFunc(    hkJobQueue& queue,
                                                                    _Inout_ hkJobQueue::DynamicData* data,
                                                                    const hkJobQueue::JobQueueEntry& jobIn,
                                                                    hkJobQueue::JobQueueEntryInput& newJobCreatedOut )
{
    HK_ASSERTV(0x9762fe36, 0, "Unregistered finish job function for job type {}.", jobIn.m_jobType );
    return hkJobQueue::NO_JOB_CREATED;
}

hkJobQueue::hkJobQueue( const hkJobQueueCinfo& cinfo )
:   m_criticalSection(0),
    m_numJobTypes( cinfo.m_maxNumJobTypes ),
    m_externalJobProfiler(HK_NULL)
{
    HK_THREAD_LOCAL_SET( hkThreadNumber, 0);

    m_data = new DynamicData();
    m_queryRulesAreUpdated = false;
    m_data->m_outOfMemory = false;

    m_data->m_waitPolicy = WAIT_UNTIL_ALL_WORK_COMPLETE;

    m_hwSetup = cinfo.m_jobQueueHwSetup;

    m_data->m_masterThreadFinishingFlags = 0;
    for (int i=0; i< m_numJobTypes; i++)
    {
        m_jobFuncs[i].m_numProcessJobFuncs = 0;
        m_jobFuncs[i].m_processJobFuncs = HK_NULL; // = hkDefaultProcessJobFunc;
        m_jobFuncs[i].m_popJobFunc     = hkDefaultPopJobFunc;
        m_jobFuncs[i].m_finishJobFunc  = hkDefaultFinishJobFunc;

        m_data->m_numActiveJobs[i] = 0;

        m_data->m_masterThreadFinishingFlags |= 1 << i;
    }

    m_popJobFunc = defaultPopDispatchFunc;
    m_finishJobFunc = defaultFinishDispatchFunc;

    m_numQueueSemaphores = 0;

    for (int i = 0 ; i < MAX_NUM_THREAD_TYPES; i++)
    {
        m_queueSemaphores[i] = HK_NULL;
    }

    // Initialize all values in m_cpuThreadIndexToSemaphoreIndex
    updateJobQueryRules( );
}

hkJobQueue::~hkJobQueue()
{
    delete m_data;

    if (m_queryRulesAreUpdated)
    {
        for (int i = 0; i < MAX_NUM_THREAD_TYPES; i++)
        {
            delete m_queueSemaphores[i];
        }
    }
}




void hkJobQueue::registerJobWithCpuThread( hkJobType jobType, hkJobSubType subType, int threadId )
{
    // only add the custom job if it differs from the existing ones
    if ( m_customJobSetup.getSize() >= 1 )
    {
        for ( int i = 0; i < m_customJobSetup.getSize(); i++ )
        {
            if ( ( m_customJobSetup[i].m_jobType == jobType ) &&
                ( m_customJobSetup[i].m_jobSubType == subType ) &&
                ( m_customJobSetup[i].m_threadId == threadId ) )
            {
                return;
            }
        }
    }

    CustomJobTypeSetup& setup = m_customJobSetup.expandOne();
    setup.m_jobType = jobType;
    setup.m_jobSubType = subType;
    setup.m_threadId = threadId;
    updateJobQueryRules();
}

void hkJobQueue::updateJobQueryRules()
{
    int numSharedCaches = m_hwSetup.m_threadIdsSharingCaches.getSize() == 0 ? 1 : m_hwSetup.m_threadIdsSharingCaches.getSize();
    m_numCustomJobs = m_customJobSetup.getSize();

    m_cpuCacheQueuesBegin = 0;
    m_cpuCustomQueuesBegin = m_cpuCacheQueuesBegin + numSharedCaches;
    m_cpuTypesQueuesBegin = m_cpuCustomQueuesBegin + m_customJobSetup.getSize();
    m_numJobQueues = m_cpuTypesQueuesBegin + m_numJobTypes;

    // Set capacity for standard CPU queues
    for (int  qIdx = m_cpuCacheQueuesBegin ; qIdx < m_cpuCustomQueuesBegin ; qIdx++)
    {
        m_data->m_jobQueue[qIdx].setCapacity( 128 );
    }


    // Duplicate value - only here for readability
    m_cpuSemaphoreBegin = m_cpuCacheQueuesBegin;

    m_directMapSemaphoreEnd = m_cpuTypesQueuesBegin;

    //
    // Next, setup the CPU queues, using specified cache groupings, if present
    //

    m_cpuThreadIndexToSemaphoreIndex.clear();
#if defined HK_PLATFORM_WIN32

    if ( m_hwSetup.m_threadIdsSharingCaches.getSize() > 0)
    {
        m_cpuThreadIndexToSemaphoreIndex.setSize( m_hwSetup.m_numCpuThreads, -1 );
        for ( int i = 0; i < m_hwSetup.m_threadIdsSharingCaches.getSize(); ++i )
        {
            for ( int j = 0; j < m_hwSetup.m_threadIdsSharingCaches[i].getSize(); ++j )
            {
                m_cpuThreadIndexToSemaphoreIndex[ m_hwSetup.m_threadIdsSharingCaches[i][j] ] = hkUint8(m_cpuCacheQueuesBegin + i);
            }
        }
        for (int i = 0; i < m_hwSetup.m_numCpuThreads; ++i)
        {
            HK_ASSERT(0x278ff346, m_cpuThreadIndexToSemaphoreIndex[i] != -1, "Incomplete thread cache specification. You must specify caches all threads in a contiguous block up to m_hwSetup.m_numCpuThreads" );
        }
    }
#endif

    if ( m_hwSetup.m_threadIdsSharingCaches.getSize() == 0)
    {
        m_cpuThreadIndexToSemaphoreIndex.setSize( m_hwSetup.m_numCpuThreads, hkUint8(m_cpuCacheQueuesBegin) );
    }



    for (int i = 0; i < numSharedCaches; ++i)
    {
        hkInt8* table = m_nextQueueToGet[i + m_cpuCacheQueuesBegin];
        *(table++) = (hkUint8)(i + m_cpuCacheQueuesBegin);

        #if defined HK_PLATFORM_WIN32
            // Next, assign a thread to the shared cache
            for (int j = 0; j < m_hwSetup.m_threadIdsSharingCaches.getSize(); ++j)
            {
                // NOTE: This is not always the best thing to do - it might be better to take jobs from type queues first
                if (i != j)
                {
                    *(table++) = hkUint8(j + m_cpuCacheQueuesBegin);
                }
            }
        #endif

        // Next look in all CPU type queues starting from the first type
        for (int j = 0; j < m_numJobTypes; ++j)
        {
            *(table++) = hkUint8(j + m_cpuTypesQueuesBegin);
        }

        // Finally look in the custom job queues
        for ( int j = 0; j < m_numCustomJobs; ++j)
        {
            *(table++) = hkUint8(j + m_cpuCustomQueuesBegin);
        }


        // Place marker at end
        *(table++) = -1;
    }

    //
    // Handle custom jobs
    //
    for ( int i = 0; i < m_numCustomJobs; ++i)
    {
        m_customJobs[i].m_jobType = m_customJobSetup[i].m_jobType;
        m_customJobs[i].m_jobSubType = m_customJobSetup[i].m_jobSubType;
        m_customJobs[i].m_queueId = hkUint8( i + m_cpuCustomQueuesBegin );
        HK_ASSERT(0x27836482, m_customJobSetup[i].m_threadId <= m_hwSetup.m_numCpuThreads, "You cannot register a job with a thread with an id greater than the job queue is set up to handle.");
        HK_ASSERT(0x27836434, m_cpuThreadIndexToSemaphoreIndex[ m_customJobSetup[i].m_threadId ] < m_cpuCustomQueuesBegin, "Multiple custom jobs per thread not currently supported");

        int originalSemaphoreIndex = m_cpuThreadIndexToSemaphoreIndex[m_customJobSetup[i].m_threadId];
        m_cpuThreadIndexToSemaphoreIndex[ m_customJobSetup[i].m_threadId ] = m_customJobs[i].m_queueId;

        // Fill out the table entry for this custom job
        hkInt8* table = m_nextQueueToGet[i + m_cpuCustomQueuesBegin];
        *(table++) = (hkUint8)(i + m_cpuCustomQueuesBegin);

        // Copy values from original list
        for ( int j = 0; m_nextQueueToGet[originalSemaphoreIndex][j] != -1; ++j )
        {
            if (m_nextQueueToGet[originalSemaphoreIndex][j] != m_customJobs[i].m_queueId)
                *(table++) = m_nextQueueToGet[originalSemaphoreIndex][j];
        }
        *(table++) = -1;

    }

    // Setup master thread semaphore

    int numSemaphores = m_cpuTypesQueuesBegin;

    m_masterThreadQueue = m_cpuThreadIndexToSemaphoreIndex[0];
    bool newSemaphoreNeeded = false;
    for (int i = 1; i < m_hwSetup.m_numCpuThreads; ++i)
    {
        if ( m_cpuThreadIndexToSemaphoreIndex[i] == m_cpuThreadIndexToSemaphoreIndex[0] )
        {
            newSemaphoreNeeded = true;
            break;
        }
    }
    if (newSemaphoreNeeded)
    {
        m_cpuThreadIndexToSemaphoreIndex[0] = (hkUint8)numSemaphores;
        numSemaphores++;
    }

    // Note : Sempahores are allocated and deallocate here for CPU threads as these change dynamically
    if (m_queryRulesAreUpdated)
    {
        // deallocate cpu and custom job semaphores
        for (int i = m_cpuCacheQueuesBegin ; i < m_numQueueSemaphores; i++)
        {
            delete m_queueSemaphores[i];
            m_queueSemaphores[i] = HK_NULL;
        }
    }

    m_numQueueSemaphores = numSemaphores;
    HK_ASSERT(0x44443331, m_numQueueSemaphores < MAX_NUM_THREAD_TYPES, "Max num thread types exceeded");

    for (int i =0; i < m_numQueueSemaphores; i++)
    {
        m_data->m_numThreadsWaiting[i] = 0;

        // Only allocate semaphores for CPU queues
        if ( i >= m_cpuCacheQueuesBegin)
        {
            m_queueSemaphores[i] = new hkSemaphoreBusyWait( 0,1000 );
        }
    }

    m_queryRulesAreUpdated = true;
}


static hkJobQueue::JobStatus HK_CALL hkDefaultProcessJobFunc( hkJobQueue& jobQueue, hkJobQueue::JobQueueEntry& jobInOut )
{
    HK_ASSERT(0x9762fe34, 0, "Unregistered process job function" );
    return jobQueue.finishJobAndGetNextJob( &jobInOut, jobInOut, hkJobQueue::WAIT_FOR_NEXT_JOB );
}

void hkJobQueue::setQueueCapacityForJobType(hkJobType jobType, int queueCapacity )
{
    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );

    data->m_jobQueue[jobType + m_cpuTypesQueuesBegin].setCapacity(queueCapacity);

    // This is a hack, but needed (this is only used for physics broad phase anyway)
    for (int i = 0; i < m_numCustomJobs; ++i)
    {
        data->m_jobQueue[m_customJobs[i].m_queueId].setCapacity(queueCapacity);
    }

    // Also resize for post collide jobs
    for (int qIdx = m_cpuCacheQueuesBegin; qIdx < m_cpuCustomQueuesBegin; qIdx++ )
    {
        data->m_jobQueue[ qIdx ].setCapacity( queueCapacity );
    }

    unlockQueue( data );
}

void hkJobQueue::setQueueCapacityForCpuCache( int queueCapacity )
{
}

void hkJobQueue::setExternalProfiler(_In_opt_ hkExternalJobProfiler* p)
{
    m_externalJobProfiler = p;
}

hkJobQueue::JobStatus hkJobQueue::processAllJobs( bool addTimers )
{
    hkJobQueue::JobQueueEntry job;


    hkJobQueue::JobStatus jobStatus = getNextJob( job);
#define MONITOR_COMMAND_TIMER_BEGIN "Tt"
    const char* timerName = MONITOR_COMMAND_TIMER_BEGIN"Unknown";
    while ( jobStatus == hkJobQueue::GOT_NEXT_JOB )
    {
        hkJob& typedJob = reinterpret_cast<hkJob&>(job);

        // Cache job type because it may get overwritten in the m_processJobFuncs call
        const hkJobType jobType = typedJob.m_jobType;

        HK_ASSERT(0xafe1a255, jobType < m_numJobTypes, "Invalid job type. Type exceeds allowed m_numJobTypes.");
        HK_ASSERT(0xafe1a256, typedJob.m_jobSubType < m_jobFuncs[jobType].m_numProcessJobFuncs, "Invalid job type. No function registered");

        switch (jobType)
        {
        case HK_JOB_TYPE_DYNAMICS:          timerName = MONITOR_COMMAND_TIMER_BEGIN"Physics 2012";          break;
        case HK_JOB_TYPE_COLLIDE_STATIC_COMPOUND:
        case HK_JOB_TYPE_COLLIDE:           timerName = MONITOR_COMMAND_TIMER_BEGIN"Physics 2012";          break;
        case HK_JOB_TYPE_COLLISION_QUERY:   timerName = MONITOR_COMMAND_TIMER_BEGIN"Collision Query";       break;
        case HK_JOB_TYPE_RAYCAST_QUERY:     timerName = MONITOR_COMMAND_TIMER_BEGIN"RayCast Query";         break;
        case HK_JOB_TYPE_ANIMATION_SAMPLE_AND_COMBINE:  timerName = MONITOR_COMMAND_TIMER_BEGIN"Animation Sample and Combine";  break;
        case HK_JOB_TYPE_ANIMATION_SAMPLE_AND_BLEND:timerName = MONITOR_COMMAND_TIMER_BEGIN"Animation Sample and Blend";    break;
        case HK_JOB_TYPE_ANIMATION_MAPPING: timerName = MONITOR_COMMAND_TIMER_BEGIN"Animation Mapping";     break;
        case HK_JOB_TYPE_BEHAVIOR:          timerName = MONITOR_COMMAND_TIMER_BEGIN"Behavior";          break;
        case HK_JOB_TYPE_CLOTH:             timerName = MONITOR_COMMAND_TIMER_BEGIN"Cloth";             break;
        case HK_JOB_TYPE_DESTRUCTION:       timerName = MONITOR_COMMAND_TIMER_BEGIN"Destruction";       break;
        case HK_JOB_TYPE_CHARACTER_PROXY:   timerName = MONITOR_COMMAND_TIMER_BEGIN"Character Proxy";   break;
        case HK_JOB_TYPE_VEHICLE:           timerName = MONITOR_COMMAND_TIMER_BEGIN"Vehicle";           break;
        case HK_JOB_TYPE_USER_0:            timerName = MONITOR_COMMAND_TIMER_BEGIN"UserJob";           break;
        default:                            timerName = MONITOR_COMMAND_TIMER_BEGIN"Other";             break;
        }

        HK_ON_MONITORS_ENABLED(
            hkMonitorStream* mStream = hkMonitorStream::getInstancePtr();
            if ( addTimers && mStream && mStream->memoryAvailable() )
            {
                hkMonitorStream::TimerCommand* h = reinterpret_cast<hkMonitorStream::TimerCommand*>(mStream->getEnd());
                h->m_commandAndMonitor = timerName;
                h->setTime();
                mStream->setEnd( (char*)(h+1) );
            }
        );

        if (m_externalJobProfiler) m_externalJobProfiler->onStartJob(jobType, typedJob.m_jobSubType );

        jobStatus = m_jobFuncs[jobType].m_processJobFuncs[typedJob.m_jobSubType]( *this, job );

        if (m_externalJobProfiler) m_externalJobProfiler->onEndJob(jobType);

        HK_ON_MONITORS_ENABLED(
            if ( addTimers )
            {
                HK_TIMER_END2(mStream);
            }
        )
        // Call finish and get next here, don't get the process functions to do it
        // Need to clean up logic with finish
    }

    return jobStatus;
}

hkJobQueue::QueueIndex hkJobQueue::getQueueIndexForJob( const hkJob& job ) const
{
    // First check if its a job for a special queue
    for (int i = 0; i < m_numCustomJobs; ++i)
    {
        
        if ( ( job.m_jobType == m_customJobs[i].m_jobType)
            && ( job.m_jobSubType == m_customJobs[i].m_jobSubType) )
        {
            return m_customJobs[i].m_queueId;
        }
    }
#if defined HK_PLATFORM_WIN32
    // PC only
    if (job.m_threadAffinity != -1)
    {
        
        //return m_cpuThreadIndexToSemaphoreIndex(job.m_threadAffinity); // The semaphore is also the index of the first queue
    }
#endif

    return m_cpuTypesQueuesBegin + job.m_jobType;
}

hkJobQueue::DynamicData* hkJobQueue::lockQueue( _Reserved_ char* )
{
    //HK_TIME_CODE_BLOCK("Lock Q", HK_NULL);

    // HK_ASSERT_NO_MSG( 0xf03ef576, !m_criticalSection.haveEntered() );
    m_criticalSection.enter();
    return m_data;
}

void hkJobQueue::unlockQueue(_Reserved_ DynamicData* )
{
    //HK_TIME_CODE_BLOCK("Unlock Q", HK_NULL);

    m_criticalSection.leave();
}


hkJobQueue::JobStatus hkJobQueue::getNextJob( JobQueueEntry& job, WaitStatus waitStatus )
{
    return finishJobAndGetNextJob( HK_NULL, job, waitStatus);
}

void hkJobQueue::releaseOneWaitingThread(_Inout_ DynamicData* data)
{
    // wake up ppu threads first
    int semaphoreIndex;
    {
        bool cpuJobAvailable = false;
        int i = m_cpuSemaphoreBegin; // The start of CPU semaphores

        // First check the custom queues and the shared cache queues
        for (; i < m_directMapSemaphoreEnd; ++i)
        {
            // Each of these queues has an associated semaphore, and threads waiting on those semaphores
            // will want jobs from those queues first
            if ( !data->m_jobQueue[i].isEmpty() )
            {
                cpuJobAvailable = true;
                if( data->m_numThreadsWaiting[i] )
                {
                    semaphoreIndex = i;
                    goto releaseSemaphore;
                }
            }
        }
        // Next check the rest of the queues (the CPU types)
        for(; i < m_numJobQueues; ++i)
        {
            if ( !data->m_jobQueue[i].isEmpty() )
            {
                cpuJobAvailable = true;
                break;
            }
        }

        if ( cpuJobAvailable )
        {
            for (semaphoreIndex = m_cpuSemaphoreBegin; semaphoreIndex < m_numQueueSemaphores; semaphoreIndex++)
            {
                if ( data->m_numThreadsWaiting[semaphoreIndex])
                {
                    goto releaseSemaphore;
                }
            }
        }
    }

    return;

releaseSemaphore:
    data->m_numThreadsWaiting[semaphoreIndex]--;
    hkSemaphoreBusyWait::release( m_queueSemaphores[semaphoreIndex] );
}

void hkJobQueue::checkQueueAndReleaseOneWaitingThread( QueueIndex queueIndex, _Inout_ DynamicData* data )
{
    HK_ON_DEBUG( Queue* queue = &data->m_jobQueue[queueIndex] );
    HK_ASSERT_NO_MSG( 0xf0323454, !queue->isEmpty() );

    // There is a (kind of) one to one mapping between semaphores and queues. There are more queues than semaphores
    // but they are later in the list of queues. A thread waiting on a semaphore will always look first in a queue
    // with the same index as the semaphore.
    if ( (queueIndex < m_numQueueSemaphores) && (data->m_numThreadsWaiting[queueIndex] > 0) )
    {
        data->m_numThreadsWaiting[queueIndex]--;
        hkSemaphoreBusyWait::release( m_queueSemaphores[queueIndex] );
    }
    else
    {
        releaseOneWaitingThread( data );
    }
}

void hkJobQueue::addJobQueueLocked(_Inout_ DynamicData* data, const JobQueueEntry& job, JobPriority priority )
{
    QueueIndex queueIndex = getQueueIndexForJob( (hkJob&)job );
    HK_ASSERT_NO_MSG(0xf032e454, queueIndex >= 0 && queueIndex < MAX_NUM_QUEUES );

    // Add the jobEntry to the queue
    Queue& queue = data->m_jobQueue[queueIndex];
    if ( priority == JOB_HIGH_PRIORITY )
    {
        queue.enqueueInFront( job );
    }
    else
    {
        queue.enqueue( job );
    }
    checkQueueAndReleaseOneWaitingThread( queueIndex, data );
}

void hkJobQueue::addJob( JobQueueEntry& job, JobPriority priority )
{
    HK_ASSERT(0x67556565, HK_THREAD_LOCAL_GET(hkThreadNumber) < m_hwSetup.m_numCpuThreads, "More thread using job queue than Job queue was initialized to handle");

    // Temporary storage used on SPU to DMA the DynamicData into it
    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );
    addJobQueueLocked( data, job, priority );
    unlockQueue( data );
}

void hkJobQueue::addJob( hkJob& job, JobPriority priority )
{
    // XXX temp - change hkQueue to take a size.
    JobQueueEntry entry;
    const int copySize = hkMath::min2(job.m_size, sizeof(entry));
    hkString::memCpy(&entry, &job, copySize );
    addJob( entry, priority );
}

void hkJobQueue::addJobBatch( const hkArrayBase<hkJob*>& jobs, JobPriority priority )
{
    HK_ASSERT(0x67556565, HK_THREAD_LOCAL_GET(hkThreadNumber) < m_hwSetup.m_numCpuThreads, "More thread using job queue than Job queue was initialized to handle");
    HK_TIME_CODE_BLOCK("AddJobBatch", HK_NULL);

    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );
    {
        for(int i = 0; i < jobs.getSize(); i++)
        {
            JobQueueEntry entry;
            const int copySize = hkMath::min2(jobs[i]->m_size, sizeof(entry));
            hkString::memCpy(&entry, jobs[i], copySize);

            QueueIndex queueIndex = getQueueIndexForJob( entry );

            // Add the jobEntry to the queue
            if ( priority == JOB_HIGH_PRIORITY )
            {
                data->m_jobQueue[queueIndex].enqueueInFront( entry );
            }
            else
            {
                data->m_jobQueue[queueIndex].enqueue( entry );
            }

            checkQueueAndReleaseOneWaitingThread( queueIndex, data );
        }
    }
    unlockQueue( data );
}


void hkJobQueue::setWaitPolicy( WaitPolicy waitPolicy )
{
    HK_ASSERT(0x5454dd52, HK_THREAD_LOCAL_GET(hkThreadNumber) == 0, "Only the master thread may call this function");
    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );
    m_data->m_waitPolicy = waitPolicy;
    if (waitPolicy == WAIT_UNTIL_ALL_WORK_COMPLETE)
    {
        releaseWaitingThreads( data );
    }
    unlockQueue( data );
}

/// Get the wait policy. See comments for setWaitPolicy for details.
hkJobQueue::WaitPolicy hkJobQueue::getWaitPolicy() const
{
    HK_ASSERT(0x5454dd52, HK_THREAD_LOCAL_GET(hkThreadNumber) == 0, "Only the master thread may call this function");
    return m_data->m_waitPolicy;
}

int hkJobQueue::getMasterThreadFinishingFlags() const
{
    HK_ASSERT(0x5454dd52, HK_THREAD_LOCAL_GET(hkThreadNumber) == 0, "Only the master thread may call this function");
    return m_data->m_masterThreadFinishingFlags;
}

void hkJobQueue::setMasterThreadFinishingFlags( int flags )
{
    HK_ASSERT(0x5454dd52, HK_THREAD_LOCAL_GET(hkThreadNumber) == 0, "Only the master thread may call this function");
    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );

    m_data->m_masterThreadFinishingFlags = flags;
    if ( data->m_numThreadsWaiting[m_cpuThreadIndexToSemaphoreIndex[0]] > 0 )
    {
        // Release the one master thread.
        data->m_numThreadsWaiting[m_cpuThreadIndexToSemaphoreIndex[0]]--;
        hkSemaphoreBusyWait::release( m_queueSemaphores[m_cpuThreadIndexToSemaphoreIndex[0]] );
    }

    unlockQueue( data );
}

HK_INLINE hkJobQueue::QueueIndex hkJobQueue::findNextJob( JobQueueEntry& jobOut, _Inout_ DynamicData* data )
{
    // WARNING: THIS FUNCTION MUST ALWAYS BE CALLED WHEN THE MT CRITICAL SECTION IS LOCKED
    // TODO - add isLocked to critical section and add assert
    //
    //  check queues based on rules
    //
    QueueIndex queueIndex;
    Queue* queue;
    {
        // This is necessary because the thread 0 can be the master thread, in which case it may point to a different queue than
        // the semaphore. This if statement saves us having a map for all threads.
        int index = HK_THREAD_LOCAL_GET(hkThreadNumber) == 0 ? m_masterThreadQueue : getSemaphoreIndex(HK_THREAD_LOCAL_GET(hkThreadNumber));
        hkInt8* queueIndices = m_nextQueueToGet[ index ];
        while ( ( queueIndex = queueIndices[0] ) >=0 )
        {
            queue = &data->m_jobQueue[ queueIndex ];
            queueIndices++;
            if ( !queue->isEmpty() )
            {
                goto GOT_JOB;
            }
        }
    }
    return -1;

GOT_JOB:
    {
        HK_ALIGN16(JobQueueEntry job);
        queue->dequeue(job);

        if ( m_popJobFunc(*this, data, job, jobOut) == DO_NOT_POP_QUEUE_ENTRY )
        {
            queue->enqueueInFront(job);
            checkQueueAndReleaseOneWaitingThread( queueIndex,data );
        }
        data->m_numActiveJobs[job.m_jobType]++;
    }
    return queueIndex;
}


HK_INLINE hkBool hkJobQueue::allQueuesEmpty(_Inout_ hkJobQueue::DynamicData* data )
{
    int numJobs = 0;
    for (int i = 0; i < m_numJobQueues; i++)
    {
        numJobs += data->m_jobQueue[i].getSize();
    }
    return numJobs == 0;
}



HK_INLINE int hkJobQueue::getSemaphoreIndex( int threadNumber ) const
{
    return m_cpuThreadIndexToSemaphoreIndex[ threadNumber ];
}

// assumes a locked queue
hkJobQueue::JobStatus hkJobQueue::findJobInternal( QueueIndex queueIndexOfNewJob, _Inout_ DynamicData* data, WaitStatus waitStatus, JobQueueEntry& jobOut )
{
    // Try to find another job from available job queues
    QueueIndex queueIndexOfFoundJob = findNextJob( jobOut, data );

    // If we have just added a job prior to calling this function (from addJob... or finishJob...)
    // then release a thread. Note that if we have just got a job from that queue then we can skip this release.
    // If the job we got actually got split, then the release happens from findNextJob().
    if ( (queueIndexOfNewJob != -1) && (queueIndexOfNewJob != queueIndexOfFoundJob) )
    {
        checkQueueAndReleaseOneWaitingThread( queueIndexOfNewJob, data );
    }

    if ( queueIndexOfFoundJob >= 0 )
    {
        unlockQueue( data );
        return GOT_NEXT_JOB;
    }

    //
    // Finish the job
    //

    bool masterThreadShouldFinish = false;
    bool allWorkComplete = false;
    {
        // First check if the cpu cache queues are empty
        int numCpuCacheJobsOnQueues = 0;
        for (int i = m_cpuSemaphoreBegin; i < m_directMapSemaphoreEnd; ++i)
        {
            numCpuCacheJobsOnQueues += data->m_jobQueue[i].getSize();
        }

        // If they are, then check the type queues - check one (or two on PlayStation(R)3) queues and threads waiting for each type
        if (numCpuCacheJobsOnQueues == 0)
        {
            // Create a bitfield for all the job types, where a 1 means the job type is finished
            int currentFlags = 0;
            for (int i = 0; i < m_numJobTypes; ++i)
            {
                // Note: This requires SPU to have all queues
                int typeActive = (data->m_numActiveJobs[i] > 0) || !data->m_jobQueue[m_cpuTypesQueuesBegin + i].isEmpty();
                currentFlags |= typeActive << i;
            }
            masterThreadShouldFinish = (currentFlags & data->m_masterThreadFinishingFlags) == 0;
            allWorkComplete = (currentFlags == 0);
        }
    }

    if (masterThreadShouldFinish)
    {
        if ( HK_THREAD_LOCAL_GET(hkThreadNumber) == 0 )
        {
            if (allWorkComplete)
            {
                releaseWaitingThreads(data);
            }
            unlockQueue( data );
            return ALL_JOBS_FINISHED;
        }
        else
        if ( data->m_numThreadsWaiting[m_cpuThreadIndexToSemaphoreIndex[0]] > 0 )
        {
            // Release the one master thread.
            data->m_numThreadsWaiting[m_cpuThreadIndexToSemaphoreIndex[0]]--;
            hkSemaphoreBusyWait::release( m_queueSemaphores[m_cpuThreadIndexToSemaphoreIndex[0]] );
        }
    }

    if (allWorkComplete)
    {
        //printf("All work complete %d\n", HK_THREAD_LOCAL_GET(hkThreadNumber) );
        if ( data->m_waitPolicy != WAIT_INDEFINITELY )
        {
            releaseWaitingThreads(data);
            unlockQueue( data );
            return ALL_JOBS_FINISHED;
        }
    }

    if ( waitStatus == DO_NOT_WAIT_FOR_NEXT_JOB )
    {
        unlockQueue( data );
        return NO_JOBS_AVAILABLE;
    }

    //
    //  Wait for a semaphore
    //
    {
        int mySemaphoreIndex = getSemaphoreIndex(HK_THREAD_LOCAL_GET(hkThreadNumber));
        data->m_numThreadsWaiting[mySemaphoreIndex]++;
        unlockQueue( data );
        HK_TIMER_BEGIN("NoJobAvailable",HK_NULL);
        hkSemaphoreBusyWait::acquire( m_queueSemaphores[mySemaphoreIndex] );
        HK_TIMER_END();
    }

    return JOB_INVALID;
}


void hkJobQueue::finishJob(_In_ const JobQueueEntry* oldJob, FinishJobFlag flag)
{
    HK_ASSERT(0x67556565, HK_THREAD_LOCAL_GET(hkThreadNumber) < m_hwSetup.m_numCpuThreads, "More thread using job queue than Job queue was initialized to handle");


    HK_TIME_CODE_BLOCK("finishJob", HK_NULL);
    HK_ASSERT_NO_MSG(0x975efae9, oldJob != HK_NULL );

    HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
    DynamicData* data = lockQueue( dynamicDataStorage );

    JobQueueEntryInput createdJob;

    QueueIndex queueIndexOfNewJob = -1;

    // If we have an old job, we need to check whether this old job just triggers a new job
    if ( m_finishJobFunc( *this, data, *oldJob, createdJob ) == JOB_CREATED )
    {
        queueIndexOfNewJob = getQueueIndexForJob( (hkJob&)createdJob.m_job );

        // Add the job to the queue
        if ( createdJob.m_jobPriority == JOB_HIGH_PRIORITY )
        {
            data->m_jobQueue[queueIndexOfNewJob].enqueueInFront( (const JobQueueEntry&)createdJob.m_job );
        }
        else
        {
            data->m_jobQueue[queueIndexOfNewJob].enqueue( (const JobQueueEntry&)createdJob.m_job );
        }
        checkQueueAndReleaseOneWaitingThread( queueIndexOfNewJob, data );
    }


    if (flag == FINISH_FLAG_NORMAL)
    {
        data->m_numActiveJobs[oldJob->m_jobType]--;
    }
    unlockQueue( data );

}

hkJobQueue::JobStatus hkJobQueue::finishJobAndGetNextJob(_In_opt_ const JobQueueEntry* oldJob, JobQueueEntry& jobOut, WaitStatus waitStatus)
{
    HK_ASSERT(0x67556565, HK_THREAD_LOCAL_GET(hkThreadNumber) < m_hwSetup.m_numCpuThreads, "More thread using job queue than Job queue was initialized to handle");

    HK_TIME_CODE_BLOCK("GetNextJob", HK_NULL);

    while(1)
    {
        HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
        DynamicData* data = lockQueue( dynamicDataStorage );

        QueueIndex queueIndexOfNewJob = -1;
        JobQueueEntryInput createdJob;
        // If we have an old job, we need to check whether this old job just triggers a new job

        if (oldJob)
        {
            if (m_finishJobFunc( *this, data, *oldJob, createdJob ) == JOB_CREATED )
            {
                // Add the job to the queue
                queueIndexOfNewJob = getQueueIndexForJob( (hkJob&)createdJob.m_job );
                Queue& queue = data->m_jobQueue[queueIndexOfNewJob];

                if ( createdJob.m_jobPriority == JOB_HIGH_PRIORITY )
                {
                    queue.enqueueInFront( (const JobQueueEntry&)createdJob.m_job );
                }
                else
                {
                    queue.enqueue( (const JobQueueEntry&)createdJob.m_job );
                }
            }

            data->m_numActiveJobs[oldJob->m_jobType]--;
            oldJob = 0;
        }

        hkJobQueue::JobStatus status = findJobInternal( queueIndexOfNewJob, data, waitStatus, jobOut );
        if (status != JOB_INVALID)
        {
            return status;
        }
    }
}

void hkJobQueue::registerJobHandler(hkJobType jobId, hkJobHandlerFuncs jobHandlerFuncs )
{
    HK_ASSERT(0xaf3526ea, jobId < int(m_numJobTypes), "You can only register a maximum of m_numJobTypes.");

    m_jobFuncs[jobId] = jobHandlerFuncs;
}

hkJobQueue::JobStatus hkJobQueue::finishAddAndGetNextJob( hkJobType oldJobType, JobPriority priority, JobQueueEntry& jobInOut, WaitStatus waitStatus )
{
    HK_ASSERT(0x67556565, HK_THREAD_LOCAL_GET(hkThreadNumber) < m_hwSetup.m_numCpuThreads, "More thread using job queue than Job queue was initialized to handle");

    HK_TIME_CODE_BLOCK("GetNextJob", HK_NULL);

    bool firstTime = true;
    while (1)
    {
        HK_ALIGN16(char dynamicDataStorage[sizeof(DynamicData)]);
        DynamicData* data = lockQueue( dynamicDataStorage );
        QueueIndex queueIndexOfNewJob = -1;
        if (firstTime)
        {
            // Finish the old job
            data->m_numActiveJobs[oldJobType]--;

            queueIndexOfNewJob = getQueueIndexForJob( (hkJob&)jobInOut );
            Queue& queue = data->m_jobQueue[queueIndexOfNewJob];

            // Add the job to the queue
            if ( priority == JOB_HIGH_PRIORITY )
            {
                queue.enqueueInFront( jobInOut );
            }
            else
            {
                queue.enqueue( jobInOut );
            }
            firstTime = false;
        }
        hkJobQueue::JobStatus status = findJobInternal(queueIndexOfNewJob, data, waitStatus, jobInOut );
        if (status != JOB_INVALID )
        {
            return status;
        }

    }
}

void hkJobQueue::releaseWaitingThreads(_Inout_ DynamicData* data)
{
#if defined(HK_DEBUG)
    //{for (int i=0; i< m_numJobQueues; i++){   HK_ASSERT( 0xf032de21, data->m_jobQueue[i].isEmpty(), "Queues not empty" ); }}
#endif
    for (int i = 0; i < m_numQueueSemaphores; i++ )
    {
        int numThreadsWaiting = data->m_numThreadsWaiting[i];
        data->m_numThreadsWaiting[i] = 0;

        for ( ;numThreadsWaiting > 0; numThreadsWaiting--)
        {
            hkSemaphoreBusyWait* semaphore = m_queueSemaphores[i];
            hkSemaphoreBusyWait::release(semaphore);
        }
    }
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
