// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM     : ALL
// PRODUCT      : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Thread/Atomic/hkIntegerDistributor.h>

void hkIntegerDistributor::initThreadInfo( int maxNumThreads, int numItems )
{
    HK_COMPILE_TIME_ASSERT( sizeof(hkIntegerDistributor::Queue) == hkAtomic::CACHELINE_SIZE );
    HK_COMPILE_TIME_ASSERT( sizeof(hkIntegerDistributor::ThreadData) == hkAtomic::CACHELINE_SIZE );
    m_threadData.setSize( maxNumThreads + 1 );
    m_queues.setSize( maxNumThreads + 1 );

    m_enableWorkStealing = true;
    int d = 0;

    //int numThreads = hkMath::min2( 4,numThreads );    // reduce the n
    int itemsPerThread = numItems / maxNumThreads;

    // set the queues, note that queue0 should get the highest items as it makes addMoreItems() simpler
    for (int i = m_threadData.getSize() - 1; i > 0; i-- )
    {
        m_queues[i].set( d, d + itemsPerThread );
        m_threadData[i].m_lastSuccesfulQueue = i;
        d += itemsPerThread;
    }
    m_queues[0].set( d, numItems ); // this are the remaining items
    m_threadData[0].m_lastSuccesfulQueue = 0;
}

void hkIntegerDistributor::addMoreItems( int numItems )
{
    // add our items to the queue with the highest max value.
    // Since queue 0 always contains the highest max, no need to search.
    hkUint64* ptrCam = &m_queues[0].m_counterAndMax;

    // we need to set the max, and if the current index >= max, set it to the old max
    hkUint64 oldCAm;
    hkUint64 newCAm;
    for ( hkAtomic::Backoff<> b; ; b.pause() )
    {
        oldCAm = *ptrCam;   // atomically get the value

        hkUint32 index = counterOf(oldCAm);
        hkUint32 maxValue = maxValueOf(oldCAm);
        if ( index > maxValue )
        {
            index = maxValue;
        }
        maxValue += numItems;
        newCAm = (hkUint64(maxValue) << 32) | hkUint64(index);
        if ( hkAtomic::compareAndSwap64( ptrCam, oldCAm, newCAm ) )
        {
            break;
        }
    }

    if ( !m_enableWorkStealing && numItems >= MIN_QUEUE_LEN_FOR_WORK_STEALING  )
    {
        m_enableWorkStealing = true;
        hkAtomic::readWriteBarrier();
    }
}

hkResult hkIntegerDistributor::refillLocalCounterAndGetItem( int currentThreadId, _Out_ hkUint32* indexOut )
{
    // lets try to refill our own queue by stealing items from the largest other queue
    if ( m_enableWorkStealing )
    {
        // search largest queue, this can be pretty expensive
        int largestQueueIndex = 0;
        int largestSize = 0;
        for (int i = 0; i < m_threadData.getSize(); i++ )
        {
            Queue& queue = m_queues[i];
            hkUint64 cAm = queue.m_counterAndMax;   // atomically get the value
            int numElements = maxValueOf(cAm) - counterOf(cAm);
            if ( numElements > largestSize )
            {
                largestSize = numElements;
                largestQueueIndex = i;
            }
        }
        if ( largestSize < MIN_QUEUE_LEN_FOR_WORK_STEALING  )
        {
            m_enableWorkStealing = false;   // queues are getting too small, don't steal a batch, its too expensive, steal single items instead
        }
        else
        {
            // steal items from the largest queue. Note that the largest queue might no longer be the largest now
            Queue& largestQueue = m_queues[largestQueueIndex];

            int numItemsToSteal = largestSize/4;
            hkUint64 cAm = hkAtomic::exchangeAdd64( &largestQueue.m_counterAndMax, numItemsToSteal );

            hkUint32 index  = counterOf(cAm);
            hkUint32 oldMax = maxValueOf(cAm);      // this is the max value where we stole our items from
            hkUint32 newMax = index + numItemsToSteal;  // this is our new max

            if ( newMax > oldMax )
            {
                newMax = oldMax;    // clip the value
            }

            // stick the new range onto my local queue (minus 1)
            Queue& queue = m_queues[currentThreadId];
            queue.set( index+1, newMax );

            // reset m_lastSuccesfullQueue
            m_threadData[currentThreadId].m_lastSuccesfulQueue = currentThreadId;
            if (index < newMax )
            {
                *indexOut = index;
                return HK_SUCCESS;
            }
            // fall through

            // now there is a very tiny chance that a thread will return HK_FAILURE even if there are work items
            // available. Details:
            //  - if we steal items, for a short moment they are not visible to any other threads.
            //  E.g. if we have 16 items on the queue, 4 are stolen. So if the 12 remaining items are so quickly
            //  consumed before this thread can stick the 4 on the queue, we get the wrong info for the other threads.
            //  In this very case the 4 items will be processed not by all threads, at a minimum by this thread.
        }
    }

    // now search all queues, this can happen only if one
    int qi = m_threadData[currentThreadId].m_lastSuccesfulQueue;
    for( int i =0; i < m_threadData.getSize(); i++)
    {
        qi++;   // the last queue is exhausted, don't try
        if (qi == m_threadData.getSize() )      {       qi = 0;     }

        //      if ( qi == currentThreadId )
        //      {
        //          return HK_FAILURE;  // all queues searched // note this early out does not work with work stealing
        //      }
        Queue& queue = m_queues[ qi ];

        hkUint64 counterAndMax = hkAtomic::exchangeAdd64( &queue.m_counterAndMax, 1 );
        hkUint32 index = counterOf(counterAndMax);
        if ( index < maxValueOf(counterAndMax) )
        {
            m_threadData[currentThreadId].m_lastSuccesfulQueue = qi;
            *indexOut = index;
            return HK_SUCCESS;
        }
    }
    return HK_FAILURE;
}


hkResult hkIntegerDistributor::getItems(int currentThreadId, hkUint32* numRequestedInOut, hkUint32* startIndexOut, hkUint32* maxIndexOut)
{
    currentThreadId = currentThreadId + 1;
    int queueIndex = m_threadData[currentThreadId].m_lastSuccesfulQueue;
    Queue& counter = m_queues[queueIndex];

    // the next 2 lines must be atomic
    hkUint64 counterAndMax = hkAtomic::exchangeAdd64(&counter.m_counterAndMax, *numRequestedInOut);
    hkUint32 mav = maxValueOf(counterAndMax);
    hkUint32 index = counterOf(counterAndMax);

    *startIndexOut = index; // optimistically set the output

    if (index >= mav)
    {
        *numRequestedInOut = 1; // work stealing, do this slowly
        hkResult res = refillLocalCounterAndGetItem(currentThreadId, startIndexOut);
        *maxIndexOut = 1 + *startIndexOut;
        return res;
    }

    hkUint32 totalToProcess = mav - index;
    hkUint32 batchSize = *numRequestedInOut;
    *maxIndexOut = index + batchSize;
    if (batchSize > 1+(totalToProcess>>3))
    {
        *maxIndexOut = index + hkMath::min2(totalToProcess, batchSize);
        batchSize >>= 1;
        *numRequestedInOut = batchSize;
    }
    else while (batchSize < totalToProcess >> 4)
    {
        batchSize <<= 1;
        *numRequestedInOut = batchSize;
    }
    return HK_SUCCESS;
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
