// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : WIN32 X64 DURANGO METRO_X86 METRO_X64 METRO_ARM APOLLO_ARM APOLLO_X86 UWP OSINTERNAL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/Base/hkBase.h>
#include <Common/Base/Config/hkConfigThread.h>
#include <Common/Base/System/hkBaseSystem.h>
#include <Common/Base/Thread/CriticalSection/hkCriticalSection.h>
#include <Common/Base/System/Hardware/hkHardwareInfo.h>
#include <Common/Base/Fwd/hkwindows.h>

#if HK_CONFIG_THREAD == HK_CONFIG_MULTI_THREADED

#if defined(HK_USE_FAST_WIN32_CRITICAL_SECTION)

HK_INLINE bool hkCriticalSection_lockUsingCmpExch(_Inout_ volatile hkUint32* lockingThread, hkUint32 threadID)
{
    return hkAtomic::compareAndSwap<hkUint32>(lockingThread, 0, threadID);
}

hkCriticalSection::hkCriticalSection(int dwSpinMax)
{
//#if !defined(HK_PLATFORM_WINRT) && !defined(HK_PLATFORM_DURANGO)
    m_semaphoreHandle = CreateSemaphore(NULL, 0, 0x7FFFFFFF, NULL);
// #else
//  m_semaphore = CreateSemaphoreExW(NULL, 0, 0x7FFFFFFF, NULL, 0, STANDARD_RIGHTS_ALL | SEMAPHORE_MODIFY_STATE);
// #endif
    m_lockingThread = 0;
    m_numThreadsWaitingOnSemaphore = 0;
    m_numRecursiveLocks = 0;
    m_spinCount = (hkUint16)dwSpinMax;
}

hkCriticalSection::~hkCriticalSection()
{
    if (m_semaphoreHandle)
    {
        CloseHandle((HANDLE)m_semaphoreHandle);
    }
}


void hkCriticalSection::unlockImpl()
{
    hkAtomic::exchangeAdd32((hkUint32*)&m_numThreadsWaitingOnSemaphore, -1);    // low bits come first on intel
    HK_ASSERT_NO_MSG(0xf04346f, m_semaphoreHandle);
    HK_ON_DEBUG(hkBool32 success = ) ReleaseSemaphore((HANDLE)m_semaphoreHandle, 1, NULL);
    HK_ASSERT_NO_MSG(0xf04f2343, success);
}


void hkCriticalSection::enter()
{
    hkUint32 threadId = GetCurrentThreadId();
    if (!m_lockingThread && hkCriticalSection_lockUsingCmpExch(&m_lockingThread, threadId))
    {
        m_numRecursiveLocks = 1;
        goto RETURN;
    }
    if (threadId == m_lockingThread)
    {
        m_numRecursiveLocks++;
        goto RETURN;
    }
    {
        // Attempt spin-lock
        for (hkUint32 dwSpin = 0; dwSpin < m_spinCount; dwSpin++)
        {
            if (hkCriticalSection_lockUsingCmpExch(&m_lockingThread, threadId))
            {
                m_numRecursiveLocks++;
                goto RETURN;
            }
            hkAtomic::pauseTicks();
        }

        // Ensure we have the kernel event created
        if (!m_semaphoreHandle)
        {
            HANDLE semaphore = CreateSemaphore(NULL, 0, 0x7FFFFFFF, NULL);
            HK_ASSERT_NO_MSG(0xf03dfe34, semaphore);
            if (!hkAtomic::compareAndSwap<HANDLE>((HANDLE*)&m_semaphoreHandle, NULL, semaphore))
            {
                CloseHandle(semaphore); // we're late
            }
        }

        while (true)
        {
            hkAtomic::exchangeAdd32(&m_numThreadsWaitingOnSemaphore, 1);
            if (hkCriticalSection_lockUsingCmpExch(&m_lockingThread, threadId))
            {
                hkAtomic::exchangeAdd32(&m_numThreadsWaitingOnSemaphore, -1);
                m_numRecursiveLocks++;
                goto RETURN;
            }
            HK_ON_DEBUG(int errorCode = ) WaitForSingleObject((HANDLE)m_semaphoreHandle, INFINITE);
            HK_ASSERT_NO_MSG(0xf034fee5, errorCode == WAIT_OBJECT_0);
            //      case WAIT_OBJECT_0:     case WAIT_TIMEOUT:      default:
        }
    }
RETURN:
    hkAtomic::readWriteBarrier(); // The CS is locked, read barrier
    return;
}

/*
Issue: Race condition on nWaiters:
Answer: http://www.codeproject.com/Articles/18371/Fast-critical-sections-with-timeout#_comments
Notes on the implementation:
I understood your scenario. But it's ok. There is absolutely no problem if m_nWaiters becomes negative and extra ReleaseSemaphore is called.
BTW this may also happen without the intervention of Thread 3. For instance imagine Thread 1 gets suspended exactly where you suggested, meanwhile Thread 2 gets "tired" waiting (timeout), and returns from the PerfLockKernel function. At this point m_nWaiters is already 0. Then Thread 1 resumes, decrements the m_nWaiters and calls ReleaseSemaphore.
So you are left with an unlocked critical section, whereas m_nWaiters is negative, and the semaphore is already released appropriate amount of times. Let's see what happens then.
Since the critical section is unlocked - next thread that tries lock it will succeed, without even looking at m_nWaiters or the semaphore. During the unlock it will not release the semaphore, since m_nWaiters is not positive. So, everything will work normally (without any performance hit) until the next synchronization collision occurs.
Now let's see what happens during synchronization collision. A particular thread enters the PerfLockKernel. It eventually calls WaiterPlus, and then calls WaitForSingleObject, which returns immediately. At this time m_nWaiters becomes 0, and the extra semaphore charge is consumed. By such a normal situation is restored. On the next loop iteration the locker will call WaiterPlus + WaitForSingleObject again, the latter won't return now until either the critical section is unlocked or timeout.
In conclusion: this situation is normal. There may happen a situation where an extra semaphore is released, and m_nWaiters goes negative. But the only consequence of this is a little performance heap: every call to ReleaseSemaphore is expensive (it's a kernel-mode function), plus every such an "unneeded" semaphore charge will be eaten later by WaitForSingleObject (which is also a kernel-mode function) on the next synchronization collision.
Since those situations are rare - the performance hit should be minor. Our critical sections are optimized mostly for situations where there are no collisions (otherwise one should just use a standard mutex).
OTOH imagine a reverse situation: Eventually under some sophisticated scenario the unlocker does not release a semaphore as needed. Here you are left with positive m_nWaiters, and there is a thread waiting for the semaphore. Which may never be released. This situation is problematic. And, if everything is designed correctly, this situation is impossible.
The starvation is prevented, at expense of a low probability of overfeeding+overeating.
*/


#else
HK_COMPILE_TIME_ASSERT(sizeof(hkCriticalSection::CRITICAL_SECTION) == sizeof(CRITICAL_SECTION));

HK_DETAIL_DIAG_MSVC_PUSH()
HK_DETAIL_DIAG_MSVC_OFF(4355) // "this" in init list

static int s_numHardwareThreads = -1;

hkCriticalSection::~hkCriticalSection()
{
    DeleteCriticalSection(m_section.cast());
}

// Win32 style only impl here for the moment.
hkCriticalSection::hkCriticalSection( int spinCount )
{
#if defined(HK_COMPILER_MSVC) && (HK_COMPILER_MSVC_VERSION < 1300)
    InitializeCriticalSection( &m_section );
#else // VC7 and higher
    if ( spinCount == 0 )
    {
        const int spinCountPerThread = 1000;
    #if defined(HK_PLATFORM_SIM)
        spinCount = 2 * spinCountPerThread;
    #else
        if(s_numHardwareThreads == -1)
        {
            s_numHardwareThreads = hkHardwareInfo::calcNumHardwareThreads();
        }

        spinCount = s_numHardwareThreads * spinCountPerThread;
    #endif
    }
    #ifndef HK_PLATFORM_WINRT
        InitializeCriticalSectionAndSpinCount( m_section.cast(), spinCount );
    #else
        InitializeCriticalSectionEx( m_section.cast(), spinCount,
#ifndef HK_DEBUG
            CRITICAL_SECTION_NO_DEBUG_INFO
#else
            0
#endif
        );
    #endif
#endif
}
HK_DETAIL_DIAG_MSVC_POP()

#ifdef HK_TIME_CRITICAL_SECTION_LOCKS

void hkCriticalSection::enter()
{
    if ( TryEnterCriticalSection(m_section.cast()) )
    {
    }
    else
    {
        if ( HK_THREAD_LOCAL_GET(hkCriticalSection__m_timeLocks) )
        {
            HK_TIMER_BEGIN("CriticalLock", HK_NULL);
            EnterCriticalSection( m_section.cast() );
            HK_TIMER_END();
        }
        else
        {
            EnterCriticalSection( m_section.cast() );
        }
    }
}

#endif //! HK_TIME_CRITICAL_SECTION_LOCKS

#endif // HK_USE_FAST_WIN32_CRITICAL_SECTION
#endif // HK_CONFIG_THREAD == HK_CONFIG_MULTI_THREADED

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
