// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0
#include <Common/Base/hkBase.h>
#include <Common/Base/Container/String/hkUtf8.h>
#include <Common/Base/Container/String/hkUtf8Detail.h>
#include <wchar.h>

#ifndef WCHAR_MAX
    #error wchar.h should provide WCHAR_MAX
#endif

#if defined(HK_DEBUG_SLOW)
    #define HK_SCRUB_UTF8_BUFFER_ON_DEBUG() \
        hkString::memSet4( m_utf8.begin(), 0xFFFFFFFFu, 64 ); // 0xFF is invalid in UTF-8
    #define HK_SCRUB_WIDE_BUFFER_ON_DEBUG() \
        hkString::memSet4( m_wide.begin(), 0xD800D800u, 64 ); // Double high-surrogate pair; invalid in UTF-16
#else
    #define HK_SCRUB_UTF8_BUFFER_ON_DEBUG()
    #define HK_SCRUB_WIDE_BUFFER_ON_DEBUG()
#endif


namespace
{
    template<unsigned N>
    struct BinFromOct
    {
        enum { value = (N%8) + (BinFromOct<N/8>::value << 1) };
    };

    template<>
    struct BinFromOct<0>
    {
        enum { value = 0 };
    };

    #define NIBBLE( hi, lo ) BinFromOct<0##hi##lo>::value
}

// used to replace an incoming character whose value is unknown or unrepresentable in Unicode
static const hkUtf8::CodePoint UNICODE_REPLACEMENT_CHARACTER = 0xfffd;
static const char UNICODE_REPLACEMENT_CHARACTER_UTF8[] = "\xef\xbf\xbd";

static HK_INLINE bool s_isUtf8ContinuationByte( int c )
{
    // continuation bytes look like  binary(10......)
    return (c & NIBBLE(1100,0000)) == NIBBLE(1000,0000);
}

static HK_INLINE bool s_isUtf8LeadByte( int c )
{
    // lead bytes look like  binary(11......)
    return (c & NIBBLE(1100,0000)) == NIBBLE(1100,0000);
}

static HK_INLINE bool s_isPlainAscii( int c )
{
    // ascii bytes look like  binary(0.......)
    return (c & NIBBLE(1000,0000)) == 0x0;
}

int hkUtf8::strLen(const char* s)
{
    if ( !s )
    {
        return 0;
    }

    int count = 0;
    for ( const char* p = s; *p != '\0'; ++p )
    {
        if ( s_isUtf8ContinuationByte(*p) == false )
        {
            ++count; // i.e. ascii byte or lead byte
        }
    }
    return count;
}

int hkUtf8::utf8FromCodePoint(_Out_writes_(6) char buf[6], CodePoint cp)
{
    if (cp <= 0x007f)
    {
        buf[0] = char(cp);
        return 1;
    }
    else if( cp <= 0x07ff )
    {
        buf[0] = char(0xc0 | (cp>>6));
        buf[1] = char(0x80 | (cp & 0x3f));
        return 2;
    }
    else if( cp <= 0xffff )
    {
        buf[0] = char(0xe0 | (cp>>12));
        buf[1] = char(0x80 | ((cp>>6) & 0x3f));
        buf[2] = char(0x80 | (cp & 0x3f));
        return 3;
    }
    else if( cp <= 0x001fffff )
    {
        buf[0] = char(0xf0 | (cp>>18));
        buf[1] = char(0x80 | ((cp>>12) & 0x3f));
        buf[2] = char(0x80 | ((cp>>6) & 0x3f));
        buf[3] = char(0x80 | (cp & 0x3f));
        return 4;
    }
    else if( cp <= 0x03ffffff )
    {
        buf[0] = char(0xf8 | (cp>>24));
        buf[1] = char(0x80 | ((cp>>18) & 0x3f));
        buf[2] = char(0x80 | ((cp>>12) & 0x3f));
        buf[3] = char(0x80 | ((cp>>6) & 0x3f));
        buf[4] = char(0x80 | (cp & 0x3f));
        return 5;
    }
    else if( cp <= 0x7fffffff )
    {
        buf[0] = char(0xfc | (cp>>30));
        buf[1] = char(0x80 | ((cp>>24) & 0x3f));
        buf[2] = char(0x80 | ((cp>>18) & 0x3f));
        buf[3] = char(0x80 | ((cp>>12) & 0x3f));
        buf[4] = char(0x80 | ((cp>>6) & 0x3f));
        buf[5] = char(0x80 | (cp & 0x3f));
        return 6;
    }
    else
    {
        HK_ASSERTV(0x79ab8123, false, "Invalid code point: {}.", cp);
        return 0;
    }
}

int hkUtf8::utf8FromWide(char* dst, int dstCap, const wchar_t* src, int srcCount)
{
#if WCHAR_MAX == 0xffff
    return Detail::utf8FromUtf16(dst, dstCap, (const hkUint16*)src, srcCount);
#else
    HK_COMPILE_TIME_ASSERT(WCHAR_MAX == 0xffffffff || WCHAR_MAX == 0x7fffffff);
    return Detail::utf8FromUtf32(dst, dstCap, (const hkUint32*)src, srcCount);
#endif
}

int hkUtf8::wideFromUtf8(wchar_t* dst, int dstCap, const char* src, int srcCount)
{
#if WCHAR_MAX == 0xffff
    return Detail::utf16FromUtf8((hkUint16*)dst, dstCap, src, srcCount);
#else
    HK_COMPILE_TIME_ASSERT(WCHAR_MAX == 0xffffffff || WCHAR_MAX == 0x7fffffff);
    return Detail::utf32FromUtf8((hkUint32*)dst, dstCap, src, srcCount);
#endif
}

int hkUtf8::wideLengthForUtf8(const char* str, int srcCount)
{
#if WCHAR_MAX == 0xffff
    return Detail::utf16LengthForUtf8(str, srcCount);
#else
    HK_COMPILE_TIME_ASSERT(WCHAR_MAX == 0xffffffff || WCHAR_MAX == 0x7fffffff);
    return Detail::utf32LengthForUtf8(str, srcCount);
#endif
}

int HK_CALL hkUtf8::utf8LengthForWide(const wchar_t* str, int srcCount)
{
#if WCHAR_MAX == 0xffff
    return Detail::utf8LengthForUtf16((const hkUint16*)str, srcCount);
#else
    HK_COMPILE_TIME_ASSERT(WCHAR_MAX == 0xffffffff || WCHAR_MAX == 0x7fffffff);
    return Detail::utf8LengthForUtf32((const hkUint32*)str, srcCount);
#endif
}

int hkUtf8::Detail::utf8FromUtf16(char* dst, int dstCap, const hkUint16* src, int srcCount)
{
    HK_ASSERT(0x77649b, findFirstInvalidUtf16(src, srcCount) == -1, "The string passed to utf8FromUtf16 isn't a valid utf-16 string.");

    if(dstCap == 0)
    {
        return utf8LengthForUtf16(src, srcCount);
    }

    // Iterate over all utf-16 words, and output the utf-8 bytes.
    int dstIdx = 0;
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint16 c = src[srcIdx++];
        if(c == 0)
        {
            // We're done.
            break;
        }
        else if(c < 0x80)
        {
            // The current code point fits in 1 utf-8 byte.

            if(dstIdx + 1 < dstCap)
            {
                dst[dstIdx++] = (char)c;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 1 + utf8LengthForUtf16(src + srcIdx, srcCount - srcIdx);
            }

        }
        else if(c < 0x800)
        {
            // The current code point fits in 2 utf-8 bytes.

            if(dstIdx + 2 < dstCap)
            {
                dst[dstIdx++] = char(0xc0 | (c >> 6));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 2 + utf8LengthForUtf16(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < 0xd800 || c >= 0xe000)
        {
            // The current code point fits in 3 utf-8 bytes.

            if(dstIdx + 3 < dstCap)
            {
                dst[dstIdx++] = char(0xe0 | (c >> 12));
                dst[dstIdx++] = char(0x80 | ((c >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 3 + utf8LengthForUtf16(src + srcIdx, srcCount - srcIdx);
            }
        }
        else
        {
            // The current code point is a surrogate pair, so we need to read
            // the second input utf-16 word. The output will be 4 utf-8 bytes.

            if(srcIdx >= (unsigned)srcCount || src[srcIdx] == 0)
            {
                // Incomplete input, so write the replacement character
                if(dstIdx + 3 < dstCap)
                {
                    dst[dstIdx++] = UNICODE_REPLACEMENT_CHARACTER_UTF8[0];
                    dst[dstIdx++] = UNICODE_REPLACEMENT_CHARACTER_UTF8[1];
                    dst[dstIdx++] = UNICODE_REPLACEMENT_CHARACTER_UTF8[2];

                    // Write the null terminator, and return.
                    dst[dstIdx++] = 0;
                    return dstIdx;
                }
                else
                {
                    // No output space left, so write the null terminator and
                    // compute the remaining length.
                    dst[dstIdx++] = 0;

                    // The replacement char needs 3 utf-8 bytes.
                    return dstIdx + 3;
                }

                break;
            }

            if(dstIdx + 4 < dstCap)
            {
                hkUint16 lower = c;
                hkUint16 higher = src[srcIdx++];

                hkUint32 cp = ((hkUint32)(lower - 0xd800) << 10) + (higher - 0xdc00) + 0x010000;
                dst[dstIdx++] = char(0xf0 | (cp >> 18));
                dst[dstIdx++] = char(0x80 | ((cp >> 12) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((cp >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (cp & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx++;
                return dstIdx + 4 + utf8LengthForUtf16(src + srcIdx, srcCount - srcIdx);
            }
        }
    }

    // Output the null terminator.
    dst[dstIdx++] = 0;
    return dstIdx;
}

int hkUtf8::Detail::utf16FromUtf8(hkUint16* dst, int dstCap, const char* src, int srcCount)
{
    HK_ASSERT(0x1eff3585, findFirstInvalidUtf8(src, srcCount) == -1, "The string passed to utf16FromUtf8 isn't a valid utf-8 string.");

    if(dstCap == 0)
    {
        return utf16LengthForUtf8(src, srcCount);
    }

    // Iterate over all utf-8 bytes, and output the utf-16 words.
    int dstIdx = 0;
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint8 c = (hkUint8)src[srcIdx++];
        if(c == 0)
        {
            // We're done.
            break;
        }
        else if(c < NIBBLE(1100, 0000))
        {
            // If c < 0x80, it's a 7 bit code point, if 0x80 <= c < 0xc0, we
            // have an invalid utf-8 string, but we can safely convert it to
            // a single utf-16 word, so we'll do that instead.

            if(dstIdx + 1 < dstCap)
            {
                dst[dstIdx++] = c;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 1 + utf16LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1110, 0000))
        {
            // The current code point has at most 11 bits. It uses 2 utf-8 bytes
            // and 1 utf-16 word.

            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 1 > (unsigned)srcCount || src[srcIdx] == 0)
                {
                    break;
                }

                hkUint16 c16 = (hkUint16)(c & NIBBLE(0001, 1111)) << 6;
                c16 += (hkUint16)src[srcIdx++] & NIBBLE(0011, 1111);
                dst[dstIdx++] = c16;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx++;
                return dstIdx + 1 + utf16LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1111, 0000))
        {
            // The current code point has at most 16 bits. It uses 4 utf-8 bytes
            // and 1 utf-16 word.

            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 2 > (unsigned)srcCount || src[srcIdx] == 0 || src[srcIdx + 1] == 0)
                {
                    break;
                }

                hkUint16 c16 = (hkUint16)(c & NIBBLE(0000, 1111)) << 12;
                c16 += (hkUint16)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                c16 += (hkUint16)(src[srcIdx++] & NIBBLE(0011, 1111));
                dst[dstIdx++] = c16;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx += 2;
                return dstIdx + 1 + utf16LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else
        {
            // The current code point has more than 16 bits, so it will require
            // a utf-16 surrogate pair. We first compute the 32 bit code point,
            // which might require 4, 5 or 6 utf-8 bytes, and then output the
            // corresponding utf-16 surrogate pair.

            hkUint32 cp32;
            if(c < NIBBLE(1111, 1000))
            {
                if(srcIdx + 3 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0)
                {
                    break;
                }

                cp32 = (hkUint32)(c & NIBBLE(0000, 0111)) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
            }
            else if(c < NIBBLE(1111, 1100))
            {
                if(srcIdx + 4 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0 || src[srcIdx + 3] == 0)
                {
                    break;
                }

                cp32 = (hkUint32)(c & 0x3) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
            }
            else
            {
                if(srcIdx + 5 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0 || src[srcIdx + 3] == 0 || src[srcIdx + 4] == 0)
                {
                    break;
                }

                cp32 = (hkUint32)(c & 0x1) << 30;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 24;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
            }

            if(dstIdx + 2 < dstCap)
            {
                cp32 -= 0x10000;
                dst[dstIdx++] = (hkUint16)(cp32 >> 10) + 0xd800;
                dst[dstIdx++] = (hkUint16)(cp32 & 0x3ff) + 0xdc00;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 2 + utf16LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
    }

    dst[dstIdx++] = 0;
    return dstIdx;
}

int hkUtf8::Detail::utf16LengthForUtf8(const char* str, int srcCount)
{
    HK_ASSERT(0x7c81e7b9, findFirstInvalidUtf8(str) == -1, "The string passed to utf16LengthForUtf8 isn't a valid utf-8 string.");

    unsigned srcIdx = 0;
    int ret = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint8 c = (hkUint8)str[srcIdx];
        if(c == 0)
        {
            break;
        }
        else if(c < 0xc0)
        {
            ret++;
            srcIdx++;
        }
        else if(c < 0xe0)
        {
            ret++;
            srcIdx += 2;
        }
        else if(c < 0xf0)
        {
            ret++;
            srcIdx += 3;
        }
        else if(c < 0xf8)
        {
            ret += 2;
            srcIdx += 4;
        }
        else if(c < 0xfc)
        {
            ret += 2;
            srcIdx += 5;
        }
        else
        {
            ret += 2;
            srcIdx += 6;
        }
    }

    return ret + 1;
}

int hkUtf8::Detail::utf8LengthForUtf16(const hkUint16* str, int srcCount)
{
    HK_ASSERT(0x1c79db42, findFirstInvalidUtf16(str, srcCount) == -1, "The string passed to utf8LengthForUtf16 isn't a valid utf-16 string.");

    int ret = 0;
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint16 c = str[srcIdx++];
        if(c == 0)
        {
            // We're done.
            break;
        }
        else if(c < 0x80)
        {
            ret++;
        }
        else if(c < 0x800)
        {
            ret += 2;
        }
        else if(c < 0xd800 || c >= 0xe000)
        {
            ret += 3;
        }
        else
        {
            // The current code point is a surrogate pair, so we need to read
            // the second input utf-16 word. The output will be 4 utf-8 bytes.

            if(srcIdx >= (unsigned)srcCount || str[srcIdx] == 0)
            {
                // The last surrogate pair is incomplete, so we need room for
                // the unicode replacement character.
                ret += 3;
                break;
            }

            ret += 4;
            srcIdx++;
        }
    }

    return ret + 1;
}

template <int TAILLENGTH>
static HK_INLINE bool validateUtf8Sequence(_In_z_ const char* str, int srcCount, char head, unsigned& srcIdx)
{
    if(srcIdx + TAILLENGTH > (unsigned)srcCount)
    {
        return false;
    }

    char tailBegin = str[srcIdx];

    // Check if all bytes of the tail have 10 as their first two bits.
    for(int i = 0; i < TAILLENGTH; i++)
    {
        if((str[srcIdx++] & NIBBLE(1100, 0000)) != NIBBLE(1000, 0000))
        {
            return false;
        }
    }

    // Overlong sequences are not allowed (ie. it's not allowed to use, say,
    // 16 bits for a code point which would fit in 11 bits. To check this, we
    // check if at least one of the bits which shorter sequences don't have are
    // set to one. For this, it's always enough to check the bits in the first
    // two bytes. The exact bits which need to be checked are specified using
    // the masks in headMasks and tailBeginMasks.

    static const char headMasks[] =
    {
        NIBBLE(0001, 1110),
        NIBBLE(0000, 1111),
        NIBBLE(0000, 0111),
        NIBBLE(0000, 0011),
        NIBBLE(0000, 0001),
    };

    static const char tailBeginMasks[] =
    {
        NIBBLE(0000, 0000),
        NIBBLE(0010, 0000),
        NIBBLE(0011, 0000),
        NIBBLE(0011, 1000),
        NIBBLE(0011, 1100),
    };

    // Check if the bits in the current headMask and tailBeginMask are set,
    // to make sure the character doesn't use overlong encoding.
    return (head & headMasks[TAILLENGTH - 1]) != 0 || (tailBegin & tailBeginMasks[TAILLENGTH - 1]) != 0;
}

int hkUtf8::Detail::findFirstInvalidUtf8(const char* str, int srcCount)
{
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        int curCharIdx = srcIdx;
        hkUint8 c = (hkUint8)str[srcIdx++];
        if(c == 0)
        {
            break;
        }
        else if(c < NIBBLE(1000, 0000))
        {
            // nothing
        }
        else if(c < NIBBLE(1100, 0000))
        {
            return curCharIdx;
        }
        else if(c < NIBBLE(1110, 0000))
        {
            if(!validateUtf8Sequence<1>(str, srcCount, c, srcIdx))
            {
                return curCharIdx;
            }
        }
        else if(c < NIBBLE(1111, 0000))
        {
            if(!validateUtf8Sequence<2>(str, srcCount, c, srcIdx))
            {
                return curCharIdx;
            }
        }
        else if(c < NIBBLE(1111, 1000))
        {
            if(!validateUtf8Sequence<3>(str, srcCount, c, srcIdx))
            {
                return curCharIdx;
            }
        }
        else if(c < NIBBLE(1111, 1100))
        {
            if(!validateUtf8Sequence<4>(str, srcCount, c, srcIdx))
            {
                return curCharIdx;
            }
        }
        else
        {
            if(!validateUtf8Sequence<5>(str, srcCount, c, srcIdx))
            {
                return curCharIdx;
            }
        }
    }

    return -1;
}

int hkUtf8::Detail::findFirstInvalidUtf16(const hkUint16* str, int srcCount)
{
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        int curCharIdx = srcIdx;
        hkUint16 c = str[srcIdx++];
        if(c == 0)
        {
            break;
        }
        else if(c < 0xd800 || c >= 0xe000)
        {
            // Single word code point.
        }
        else if(c <= 0xdc00)
        {
            // It's a surrogate pair. See if it is followed by a surrogate
            // pair's second word.

            if(srcIdx + 1 > (unsigned)srcCount)
            {
                return curCharIdx;
            }

            hkUint16 higher = str[srcIdx++];
            if(higher < 0xdc00 || higher >= 0xe000)
            {
                return curCharIdx;
            }
        }
        else
        {
            // The current word is a surrogate pair's second word, but isn't
            // preceded by a first word.
            return curCharIdx;
        }
    }

    return -1;
}

int hkUtf8::Detail::utf8FromUtf32(char* dst, int dstCap, const hkUint32* src, int srcCount)
{
    if(dstCap == 0)
    {
        return utf8LengthForUtf32(src, srcCount);
    }

    int dstIdx = 0;
    for(unsigned i = 0; i < (unsigned)srcCount && src[i]; i++)
    {
        hkUint32 c = src[i];
        if(c < 0x80)
        {
            // 1 utf-8 byte.
            if(dstIdx + 1 < dstCap)
            {
                dst[dstIdx++] = (char)c;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 1 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
        else if(c < 0x800)
        {
            // 2 utf-8 bytes.
            if(dstIdx + 2 < dstCap)
            {
                dst[dstIdx++] = char(0xc0 | (c >> 6));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 2 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
        else if(c < 0x10000)
        {
            // 3 utf-8 bytes.
            if(dstIdx + 3 < dstCap)
            {
                dst[dstIdx++] = char(0xe0 | (c >> 12));
                dst[dstIdx++] = char(0x80 | ((c >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 3 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
        else if(c < 0x200000)
        {
            // 4 utf-8 bytes.
            if(dstIdx + 4 < dstCap)
            {
                dst[dstIdx++] = char(0xf0 | (c >> 18));
                dst[dstIdx++] = char(0x80 | ((c >> 12) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 4 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
        else if(c < 0x4000000)
        {
            // 5 utf-8 bytes.
            if(dstIdx + 5 < dstCap)
            {
                dst[dstIdx++] = char(0xf8 | (c >> 24));
                dst[dstIdx++] = char(0x80 | ((c >> 18) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 12) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 5 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
        else
        {
            // 6 utf-8 bytes.
            if(dstIdx + 6 < dstCap)
            {
                dst[dstIdx++] = char(0xfc | (c >> 30));
                dst[dstIdx++] = char(0x80 | ((c >> 24) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 18) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 12) & 0x3f));
                dst[dstIdx++] = char(0x80 | ((c >> 6) & 0x3f));
                dst[dstIdx++] = char(0x80 | (c & 0x3f));
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 6 + utf8LengthForUtf32(src + i + 1, srcCount - i - 1);
            }
        }
    }

    dst[dstIdx++] = 0;
    return dstIdx;
}

int hkUtf8::Detail::utf32FromUtf8(hkUint32* dst, int dstCap, const char* src, int srcCount)
{
    HK_ASSERT(0x4180641d, findFirstInvalidUtf8(src, srcCount) == -1, "The string passed to utf32FromUtf8 isn't a valid utf-8 string.");

    if(dstCap == 0)
    {
        return utf32LengthForUtf8(src, srcCount);
    }

    // Iterate over all utf-8 bytes, and output the utf-16 words. The null
    // terminator is appended after this loop.
    int dstIdx = 0;
    unsigned srcIdx = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint8 c = (hkUint8)src[srcIdx++];
        if(c == 0)
        {
            // We're done.
            break;
        }
        else if(c < NIBBLE(1100, 0000))
        {
            // If c < 0x80, it's a 7 bit code point, if 0x80 <= c < 0xc0, we
            // have an invalid utf-8 string, but we can safely convert it to
            // a single utf-16 word, so we'll do that.

            if(dstIdx + 1 < dstCap)
            {
                dst[dstIdx++] = c;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1110, 0000))
        {
            // The current code point has at most 11 bits, so it takes up 2
            // utf-8 bytes, and 1 utf-16 word.

            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 1 > (unsigned)srcCount || src[srcIdx] == 0)
                {
                    break;
                }

                hkUint32 c32 = (hkUint32)(c & NIBBLE(0001, 1111)) << 6;
                c32 += (hkUint32)src[srcIdx++] & NIBBLE(0011, 1111);
                dst[dstIdx++] = c32;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx++;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1111, 0000))
        {
            // The current code point has at most 16 bits, so it takes up 3
            // utf-8 bytes, and 1 utf-16 word.

            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 2 > (unsigned)srcCount || src[srcIdx] == 0 || src[srcIdx + 1] == 0)
                {
                    // There either aren't enough input bytes available to complete
                    // the current code point, or there's not enough output space
                    // left to output the utf-16 word and the null terminator.
                    break;
                }

                hkUint32 c32 = (hkUint32)(c & NIBBLE(0000, 1111)) << 12;
                c32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                c32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                dst[dstIdx++] = c32;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx += 2;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1111, 1000))
        {
            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 3 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0)
                {
                    break;
                }

                hkUint32 cp32 = (hkUint32)(c & NIBBLE(0000, 0111)) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                dst[dstIdx++] = cp32;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx += 3;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else if(c < NIBBLE(1111, 1100))
        {
            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 4 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0 || src[srcIdx + 3] == 0)
                {
                    break;
                }

                hkUint32 cp32 = (hkUint32)(c & 0x3) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                dst[dstIdx++] = cp32;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx += 4;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
        else
        {
            if(dstIdx + 1 < dstCap)
            {
                if(srcIdx + 5 > (unsigned)srcCount ||
                    src[srcIdx] == 0 || src[srcIdx + 1] == 0 || src[srcIdx + 2] == 0 || src[srcIdx + 3] == 0 || src[srcIdx + 4] == 0)
                {
                    break;
                }

                hkUint32 cp32 = (hkUint32)(c & 0x1) << 30;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 24;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 18;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 12;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111)) << 6;
                cp32 += (hkUint32)(src[srcIdx++] & NIBBLE(0011, 1111));
                dst[dstIdx++] = cp32;
            }
            else
            {
                // No output space left, so write the null terminator, and
                // compute the remaining length.
                dst[dstIdx] = 0;
                srcIdx += 5;
                return dstIdx + 1 + utf32LengthForUtf8(src + srcIdx, srcCount - srcIdx);
            }
        }
    }

    dst[dstIdx++] = 0;
    return dstIdx;
}

int hkUtf8::Detail::utf32LengthForUtf8(const char* str, int srcCount)
{
    HK_ASSERT(0x4d609ca4, findFirstInvalidUtf8(str) == -1, "The string passed to utf16LengthForUtf8 isn't a valid utf-8 string.");

    unsigned srcIdx = 0;
    int ret = 0;
    while(srcIdx < (unsigned)srcCount)
    {
        hkUint8 c = (hkUint8)str[srcIdx];
        if(c == 0)
        {
            break;
        }
        else if(c < 0xc0)
        {
            ret++;
            srcIdx++;
        }
        else if(c < 0xe0)
        {
            ret++;
            srcIdx += 2;
        }
        else if(c < 0xf0)
        {
            ret++;
            srcIdx += 3;
        }
        else if(c < 0xf8)
        {
            ret++;
            srcIdx += 4;
        }
        else if(c < 0xfc)
        {
            ret++;
            srcIdx += 5;
        }
        else
        {
            ret++;
            srcIdx += 6;
        }
    }

    return ret + 1;
}

int hkUtf8::Detail::utf8LengthForUtf32(const hkUint32* str, int srcCount)
{
    int ret = 0;
    for(unsigned i = 0; i < (unsigned)srcCount && str[i]; i++)
    {
        hkUint32 c = str[i];
        if(c < 0x80)
        {
            ret++;
        }
        else if(c < 0x800)
        {
            ret += 2;
        }
        else if(c < 0x10000)
        {
            ret += 3;
        }
        else if(c < 0x200000)
        {
            ret += 4;
        }
        else if(c < 0x4000000)
        {
            ret += 5;
        }
        else
        {
            ret += 6;
        }
    }

    return ret + 1;
}

namespace hkUtf8
{
    template<unsigned N>
    static bool decodeLead(hkUint8 b, CodePoint& cp )
    {
        HK_COMPILE_TIME_ASSERT(N>=2);
        enum {
            mask = hkUint8(int(0x80000000) >> (24+N)),
            bits = hkUint8(int(0x80000000) >> (23+N))
        };

        if( (b & mask) == bits )
        {
            cp = b & ~mask;
            return true;
        }
        return false;
    }
}

bool hkUtf8::Iterator::advance(int* lenOut)
{
    if( m_utf8 == HK_NULL || m_utf8[0] == 0 )
    {
        m_current = CodePoint(-1);
        return false;
    }
    // default to "bad char", set the values if no decoding error
    CodePoint cp = UNICODE_REPLACEMENT_CHARACTER;
    int len = 1;
    if( s_isPlainAscii(m_utf8[0]) )
    {
        cp = m_utf8[0];
    }
    else if( s_isUtf8LeadByte(m_utf8[0]) )
    {
        // find out how many more bytes we expect and extract the initial bits
        if( decodeLead<2>(m_utf8[0], cp) )
        {
            len = 2;
        }
        else if( decodeLead<3>(m_utf8[0], cp) )
        {
            len = 3;
        }
        else if( decodeLead<4>(m_utf8[0], cp) )
        {
            len = 4;
        }
        else if( decodeLead<5>(m_utf8[0], cp) )
        {
            len = 5;
        }
        else if( decodeLead<6>(m_utf8[0], cp) )
        {
            len = 6;
        }

        // now extract the rest of the continuation
        for( int i = 1; i < len; ++i )
        {
            if( s_isUtf8ContinuationByte(m_utf8[i]) )
            {
                cp <<= 6;
                cp |= m_utf8[i] & 0x3f;
            }
            else // eh? it went bad, bail out and return a bad char.
            {
                cp = UNICODE_REPLACEMENT_CHARACTER;
                len = i;
                break;
            }
        }
    }

    if( lenOut )
    {
        *lenOut = len;
    }
    m_current = cp;
    m_utf8 += len;
    return true;
}

#if defined(HK_UTF8_SUPPORT_HSTRING)
#include <Winstring.h>
hkUtf8::Utf8FromWide::Utf8FromWide(HSTRING h)
{
    HK_SCRUB_UTF8_BUFFER_ON_DEBUG();
    UINT32 len;
    const wchar_t* s = WindowsGetStringRawBuffer(h, &len);
    init(s);
}

HRESULT hkUtf8::WideFromUtf8::dupHSTRING(_Outptr_ HSTRING* ret) const
{
    return m_wide.isEmpty() ? WindowsCreateString(NULL, 0, ret) : WindowsCreateString(m_wide.begin(), m_wide.getSize() - 1, ret);
}
#endif

hkUtf8::Utf8FromWide::Utf8FromWide(const wchar_t* s)
{
    HK_SCRUB_UTF8_BUFFER_ON_DEBUG();
    init(s);
}

void hkUtf8::Utf8FromWide::init(const wchar_t* s)
{
    HK_ASSERT_NO_MSG(0x988fe82, m_utf8.isEmpty() );
    if ( s )
    {
        const int numCodeUnits = utf8LengthForWide( s );
        m_utf8.setSize( numCodeUnits );
        utf8FromWide( m_utf8.begin(), numCodeUnits, s );
    }
    else
    {
        m_utf8.clearAndDeallocate();
        HK_ASSERT_NO_MSG( 0x65139c27, m_utf8.begin() == HK_NULL );
    }
}

hkUtf8::WideFromUtf8::WideFromUtf8(const char* s)
{
    HK_SCRUB_WIDE_BUFFER_ON_DEBUG();
    if ( s )
    {
        const int numCodeUnits = wideLengthForUtf8( s );
        m_wide.setSize( numCodeUnits );
        wideFromUtf8( m_wide.begin(), numCodeUnits, s );
    }
    else
    {
        m_wide.clearAndDeallocate();
        HK_ASSERT_NO_MSG( 0xc1fab09, m_wide.begin() == HK_NULL );
    }
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
