// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM     : ALL
// PRODUCT      : COMMON
// VISIBILITY       : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0

#include <Common/ImageUtilities/hkImageUtilities.h>

#include <Common/ImageUtilities/Processing/hkImageProcessing.h>

#include <Common/Base/Types/Color/hkColor.h>
#include <Common/ImageUtilities/Image/hkImage.h>
#include <Common/ImageUtilities/Conversion/hkImageConversion.h>
#include <Common/ImageUtilities/Processing/hkImageFilter.h>

static hkColorf loadSample(hkArrayView<const hkColorf> source, hkInt32 index, hkImageAddressMode::Enum addressMode, hkColorfParameter borderColor)
{
    switch (addressMode)
    {
    case hkImageAddressMode::WRAP:
        index = index % hkInt32(source.getSize());

        // result is in the range [-(w-1), (w-1)], bring it to [0, w - 1]
        if (index < 0)
        {
            index += source.getSize();
        }

        return source[index];

    case hkImageAddressMode::MIRROR:
        for (;;)
        {
            if (index < 0)
            {
                index = -1 - index;
            }
            else if (index >= source.getSize())
            {
                index = 2 * source.getSize() - 1 - index;
            }
            else
            {
                return source[index];
            }
        }

    case hkImageAddressMode::CLAMP:
        return source[hkMath::clamp<hkInt32>(index, 0, source.getSize() - 1)];

    case hkImageAddressMode::BORDER:
        return hkUint32(index) < hkUint32(source.getSize()) ? source[index] : borderColor;

    case hkImageAddressMode::MIRROR_ONCE:
        if (index < 0)
        {
            index = -1 - index;
        }
        if (index >= source.getSize())
        {
            index = source.getSize() - 1;
        }
        return source[index];

    default:
        HK_ASSERT_NOT_IMPLEMENTED(0x1dbe85c0);
        return hkColorf(1.0f, 0.0f, 1.0f, 1.0f);
    }
}


static void filterLine(hkArrayView<const hkColorf> source, hkArrayView<hkVector4f> target, const hkImageFilterWeights& weights, hkImageAddressMode::Enum addressMode, hkColorfParameter borderColor)
{
    // Convolve the image using the precomputed weights
    const hkUint32 numWeights = weights.getNumWeights();
    const hkUint32 lengthOut = target.getSize();
    hkImageFilterWeightsRange weightsRange(weights.getWeightsRange());
    for (hkUint32 indexOut = 0; indexOut < lengthOut; ++indexOut)
    {
        hkColorf total;
        total.setZero();

        hkInt32 firstSourceIdx = weights.getFirstSourceSampleIndex(indexOut);

        for (hkUint32 weightIdx = 0; weightIdx < numWeights; ++weightIdx)
        {
            hkInt32 sourceIdx = firstSourceIdx + hkInt32(weightIdx);
            total.setAddMul(total, loadSample(source, sourceIdx, addressMode, borderColor), weightsRange.front());
            weightsRange.popFront();
        }

        target[indexOut] = total;
    }
}

static void downScaleFastLine(_In_range_(<=, strideOut) hkUint32 pixelStride, _In_reads_(lengthIn * strideIn) const hkUint8* src, _Out_writes_(lengthOut * strideOut) hkUint8* dest, _In_range_(>= , 1) hkUint32 lengthIn, _In_range_(>=, 1) hkUint32 strideIn, _In_range_(>= , 1) hkUint32 lengthOut, _In_range_(>= , 1) hkUint32 strideOut)
{
    const hkUint32 downScaleFactor = lengthIn / lengthOut;

    const hkUint32 downScaleFactorLog2 = hkMath::log2(static_cast<hkUint32>(downScaleFactor));
    const hkUint32 roundOffset = downScaleFactor / 2;

    for (hkUint32 offset = 0; offset < lengthOut; ++offset)
    {
        for (hkUint32 channel = 0; channel < pixelStride; ++channel)
        {
            const hkUint32 destOffset = offset * strideOut + channel;

            hkUint32 curChannel = roundOffset;
            for (hkUint32 index = 0; index < downScaleFactor; ++index)
            {
                curChannel += static_cast<hkUint32>(src[channel + index * strideIn]);
            }

            curChannel = curChannel >> downScaleFactorLog2;
            dest[destOffset] = static_cast<hkUint8>(curChannel);
        }

        src += downScaleFactor * strideIn;
    }
}

static void downScaleFast(const hkImage& image, hkImage& out_Result, hkUint32 width, hkUint32 height)
{
    hkImageFormat::Enum format = image.getFormat();

    hkUint32 originalWidth = image.getWidth();
    hkUint32 originalHeight = image.getHeight();

    hkUint32 pixelStride = hkImageFormat::getBitsPerPixel(format) / 8;

    hkImageHeader intermediateHeader;
    intermediateHeader.setWidth(width);
    intermediateHeader.setHeight(originalHeight);
    intermediateHeader.setFormat(format);

    hkImage intermediate;
    intermediate.reset(intermediateHeader);

    for (hkUint32 row = 0; row < originalHeight; row++)
    {
        downScaleFastLine(pixelStride, image.getPixelPointer<hkUint8>(0, 0, 0, 0, row), intermediate.getPixelPointer<hkUint8>(0, 0, 0, 0, row), originalWidth, pixelStride, width, pixelStride);
    }

    // input and output images may be the same, so we can't access the original image below this point

    hkImageHeader outHeader;
    outHeader.setWidth(width);
    outHeader.setHeight(height);
    outHeader.setFormat(format);

    out_Result.reset(outHeader);

    for (hkUint32 col = 0; col < width; col++)
    {
        downScaleFastLine(pixelStride, intermediate.getPixelPointer<hkUint8>(0, 0, 0, col), out_Result.getPixelPointer<hkUint8>(0, 0, 0, col), originalHeight, intermediate.getRowPitch(), height, out_Result.getRowPitch());
    }
}

static float evaluateAverageCoverage(hkArrayView<const hkColorf> colors, float alphaThreshold)
{
    hkUint32 totalPixels = colors.getSize();
    hkUint32 count = 0;
    for (hkUint32 idx = 0; idx < totalPixels; ++idx)
    {
        count += colors[idx].getAlpha() >= alphaThreshold;
    }

    return float(count) / float(totalPixels);
}

static void normalizeCoverage(hkArrayView<hkColorf> colors, float alphaThreshold, float targetCoverage)
{
    // Based on the idea in http://the-witness.net/news/2010/09/computing-alpha-mipmaps/. Note we're using a histogram
    // to find the new alpha threshold here rather than bisecting.

    // Generate histogram of alpha values
    hkUint32 totalPixels = colors.getSize();
    hkUint32 alphaHistogram[256] = {};
    for (hkUint32 idx = 0; idx < totalPixels; ++idx)
    {
        alphaHistogram[hkColorf::floatToUint8(colors[idx].getAlpha())]++;
    }

    // Find range of alpha thresholds so the number of covered pixels matches by summing up the histogram
    hkInt32 targetCount = hkInt32(targetCoverage * totalPixels);
    hkInt32 coverageCount = 0;
    hkInt32 maxThreshold = 255;
    for (; maxThreshold >= 0; maxThreshold--)
    {
        coverageCount += alphaHistogram[maxThreshold];

        if (coverageCount >= targetCount)
        {
            break;
        }
    }

    coverageCount = targetCount;
    hkInt32 minThreshold = 0;
    for (; minThreshold < 256; minThreshold++)
    {
        coverageCount -= alphaHistogram[maxThreshold];

        if (coverageCount <= targetCount)
        {
            break;
        }
    }

    hkInt32 currentThreshold = hkColorf::floatToUint8(alphaThreshold);

    // Each of the alpha test thresholds in the range [minThreshold; maxThreshold] will result in the same coverage. Pick a new threshold
    // close to the old one so we scale by the smallest necessary amount.
    hkInt32 newThreshold;
    if (currentThreshold < minThreshold)
    {
        newThreshold = minThreshold;
    }
    else if (currentThreshold > maxThreshold)
    {
        newThreshold = maxThreshold;
    }
    else
    {
        // Avoid rescaling altogether if the current threshold already preserves coverage
        return;
    }

    // Rescale alpha values
    float alphaScale = alphaThreshold / (newThreshold / 255.0f);
    for (hkUint32 idx = 0; idx < totalPixels; ++idx)
    {
        colors[idx].setAlpha(colors[idx].getAlpha() * alphaScale);
    }
}


hkResult hkImageProcessing::scale(const hkImage& source, hkImage& target, hkUint32 width, hkUint32 height, _In_opt_ const hkImageFilter* filter,
    hkImageAddressMode::Enum addressModeU, hkImageAddressMode::Enum addressModeV, hkColorfParameter borderColor)
{
    if (width == 0 || height == 0)
    {
        hkImageHeader header;
        header.setFormat(source.getFormat());
        target.reset(header);
        return HK_SUCCESS;
    }

    hkImageFormat::Enum format = source.getFormat();
    hkUint32 bytesPerPixel = hkImageFormat::getBitsPerPixel(format) / 8;

    hkUint32 originalWidth = source.getWidth();
    hkUint32 originalHeight = source.getHeight();

    if (originalWidth == width && originalHeight == height)
    {
        target = source;
        return HK_SUCCESS;
    }

    // Scaling down by an even factor?
    const hkUint32 downScaleFactorX = originalWidth / width;
    const hkUint32 downScaleFactorY = originalHeight / height;

    hkImageFormat::Enum fastScaleFormatsArray[] =
    {
      hkImageFormat::R8_G8_B8_A8_UNSIGNED_NORMALIZED,
      hkImageFormat::R8_G8_B8_A8_UNSIGNED,
      hkImageFormat::B8_G8_R8_A8_UNSIGNED_NORMALIZED,
      hkImageFormat::B8_G8_R8_UNSIGNED_NORMALIZED,
      hkImageFormat::R8_UNSIGNED_NORMALIZED,
      hkImageFormat::R8_UNSIGNED,
    };

    if (
        filter == HK_NULL &&
        hkArrayViewT::make(fastScaleFormatsArray).indexOf(format) != -1 &&
        downScaleFactorX * width == originalWidth &&
        downScaleFactorY * height == originalHeight &&
        hkMath::isPower2(downScaleFactorX) &&
        hkMath::isPower2(downScaleFactorY))
    {
        downScaleFast(source, target, width, height);
        return HK_SUCCESS;
    }

    // Fallback to default filter
    hkImageFilterTriangle defaultFilter;
    if (!filter)
    {
        filter = &defaultFilter;
    }

    hkInplaceArray<hkImageConversion::ConversionPathNode, 16> pathA, pathB;
    hkUint32 numIntermediatesA, numIntermediatesB;

    if (hkImageConversion::buildPath(format, hkImageFormat::R32_G32_B32_A32_FLOAT, false, pathA, numIntermediatesA).isFailure() ||
        hkImageConversion::buildPath(hkImageFormat::R32_G32_B32_A32_FLOAT, format, false, pathB, numIntermediatesB).isFailure())
    {
        return HK_FAILURE;
    }

    hkImageHeader intermediateHeader;
    intermediateHeader.setWidth(width);
    intermediateHeader.setHeight(originalHeight);
    intermediateHeader.setFormat(hkImageFormat::R32_G32_B32_A32_FLOAT);

    hkImage intermediate;
    intermediate.reset(intermediateHeader);

    hkArray<hkColorf> buffer1;
    hkArray<hkColorf> buffer2;
    hkArray<hkUint8> buffer3;

    hkImageFilterWeights horizontalFilterWeights(*filter, originalWidth, width);
    hkImageFilterWeights verticalFilterWeights(*filter, originalHeight, height);

    buffer1.setSize(originalWidth);
    for (hkUint32 row = 0; row < originalHeight; row++)
    {
        hkArrayView<const hkColorf> rowView;
        if (format == hkImageFormat::R32_G32_B32_A32_FLOAT)
        {
            rowView = hkArrayViewT::make(source.getPixelPointer<hkColorf>(0, 0, 0, 0, row), source.getWidth());
        }
        else
        {
            if (hkImageConversion::convertRaw(
                hkArrayViewT::make(source.getPixelPointer<void>(0, 0, 0, 0, row), source.getRowPitch()),
                hkArrayViewT::make(static_cast<void*>(buffer1.begin()), buffer1.getSize() * sizeof(buffer1[0])),
                originalWidth, pathA, numIntermediatesA).isFailure())
            {
                return HK_FAILURE;
            }
            rowView = buffer1;
        }

        if (width == originalWidth)
        {
            hkString::memCpy(intermediate.getPixelPointer<void>(0, 0, 0, 0, row), rowView.begin(), rowView.getSize() * sizeof(rowView[0]));
        }
        else
        {
            filterLine(rowView, intermediate.getRowView<hkColorf>(0, 0, 0, row), horizontalFilterWeights, addressModeU, borderColor);
        }
    }

    // input and output images may be the same, so we can't access the original image below this point

    hkImageHeader targetHeader;
    targetHeader.setFormat(format);
    targetHeader.setWidth(width);
    targetHeader.setHeight(height);

    target.reset(targetHeader);

    buffer1.setSize(originalHeight);
    buffer2.setSize(height);
    buffer3.setSize(height * bytesPerPixel);

    for (hkUint32 col = 0; col < width; ++col)
    {
        // Read column into temp buffer for more efficient filtering
        {
            hkColorf* sourcePointer = intermediate.getPixelPointer<hkColorf>(0, 0, 0, col);
            hkUint32 stride = intermediate.getRowPitch();
            for (hkUint32 row = 0; row < originalHeight; ++row)
            {
                buffer1[row] = *sourcePointer;

                sourcePointer = hkAddByteOffset(sourcePointer, stride);
            }
        }

        if (height == originalHeight)
        {
            hkString::memCpy(buffer2.begin(), buffer1.begin(), buffer1.getSize() * sizeof(buffer1[0]));
        }
        else
        {
            filterLine(buffer1, buffer2, verticalFilterWeights, addressModeV, borderColor);
        }

        hkArrayView<const hkUint8> colView;
        if (format == hkImageFormat::R32_G32_B32_A32_FLOAT)
        {
            colView = hkArrayViewT::make(reinterpret_cast<const hkUint8*>(buffer2.begin()), buffer2.getSize() * sizeof(buffer2[0]));
        }
        else
        {
            if (hkImageConversion::convertRaw(
                hkArrayViewT::make(static_cast<const void*>(buffer2.begin()), buffer2.getSize() * sizeof(buffer2[0])),
                hkArrayViewT::make(static_cast<void*>(buffer3.begin()), buffer3.getSize() * sizeof(buffer3[0])),
                height, pathB, numIntermediatesB).isFailure())
            {
                return HK_FAILURE;
            }
            colView = buffer3;
        }

        {
            const void* sourcePointer = colView.begin();
            void* targetPointer = target.getPixelPointer<void>(0, 0, 0, col);

            hkUint32 sourceStride = bytesPerPixel;
            hkUint32 targetStride = target.getRowPitch();

            for (hkUint32 row = 0; row < height; ++row)
            {
                hkString::memCpy(targetPointer, sourcePointer, bytesPerPixel);

                sourcePointer = hkAddByteOffset(sourcePointer, sourceStride);
                targetPointer = hkAddByteOffset(targetPointer, targetStride);
            }
        }
    }

    return HK_SUCCESS;
}

void hkImageProcessing::flipVertical(hkImage& image)
{
    hkInplaceArray<hkUint8, 4096> rowBuffer;

    for (hkUint32 arrayIndex = 0; arrayIndex < image.getNumArrayElements(); arrayIndex++)
    {
        for (hkUint32 face = 0; face < image.getNumFaces(); face++)
        {
            for (hkUint32 mipLevel = 0; mipLevel < image.getNumMipLevels(); mipLevel++)
            {
                hkUint32 rowPitch = image.getRowPitch(mipLevel);

                rowBuffer.setSize(rowPitch);

                hkUint32 height = image.getHeight(mipLevel);

                for (hkUint32 row = 0; row < height / 2; row++)
                {
                    void* rowA = image.getPixelPointer<void>(mipLevel, face, arrayIndex, 0, row);
                    void* rowB = image.getPixelPointer<void>(mipLevel, face, arrayIndex, 0, height - row - 1);
                    hkString::memCpy(rowBuffer.begin(), rowA, rowPitch);
                    hkString::memCpy(rowA, rowB, rowPitch);
                    hkString::memCpy(rowB, rowBuffer.begin(), rowPitch);
                }
            }
        }
    }
}

void hkImageProcessing::generateMipMaps(const hkImage& source, hkImage& target, const MipMapOptions& options)
{
    hkImageHeader header = source.getHeader();
    HK_ASSERT(0x6795cf0a, header.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "The source image must be a RGBA 32-bit float format.");
    HK_ASSERT(0x2d625c25, header.getDepth() == 1, "3D textures are not supported.");
    HK_ASSERT(0x73f735ce, &source != &target, "Source and target must not be the same image.");

    // Make a local copy to be able to tweak some of the options
    hkImageProcessing::MipMapOptions mipMapOptions = options;

    // Enforce CLAMP addressing mode for cubemaps
    if (source.getNumFaces() == 6)
    {
        mipMapOptions.m_addressModeU = hkImageAddressMode::CLAMP;
        mipMapOptions.m_addressModeV = hkImageAddressMode::CLAMP;
    }

    const hkUint32 numMipMaps = hkImage::computeNumberOfMipMaps(header.getWidth(), header.getHeight());
    header.setNumMipLevels(numMipMaps);

    target.reset(header);

    for (hkUint32 arrayIndex = 0; arrayIndex < source.getNumArrayElements(); arrayIndex++)
    {
        for (hkUint32 face = 0; face < source.getNumFaces(); face++)
        {
            hkImageHeader currentMipMapHeader = header;
            currentMipMapHeader.setNumMipLevels(1);
            currentMipMapHeader.setNumFaces(1);
            currentMipMapHeader.setNumArrayElements(1);

            hkMemUtil::memCpy(
                target.getSubImagePointer<void>(0, face, arrayIndex),
                source.getSubImagePointer<void>(0, face, arrayIndex),
                source.getDepthPitch(0));

            float targetCoverage = 0.0f;
            if (mipMapOptions.m_preserveCoverage)
            {
                targetCoverage = evaluateAverageCoverage(
                    hkArrayView<const hkColorf>(source.getSubImagePointer<hkColorf>(0, face, arrayIndex), source.getWidth() * source.getHeight()),
                    mipMapOptions.m_alphaThreshold);
            }

            for (hkUint32 mipMapLevel = 0; mipMapLevel < numMipMaps - 1; mipMapLevel++)
            {
                hkImageHeader nextMipMapHeader = currentMipMapHeader;
                nextMipMapHeader.setWidth(hkMath::max2(1, nextMipMapHeader.getWidth() / 2));
                nextMipMapHeader.setHeight(hkMath::max2(1, nextMipMapHeader.getHeight() / 2));

                auto sourceData = hkArrayViewT::make(
                    target.getSubImagePointer<void>(mipMapLevel, face, arrayIndex),
                    currentMipMapHeader.getDepthPitch());
                hkImage currentMipMap(currentMipMapHeader, sourceData);

                auto dstData = hkArrayViewT::make(
                    target.getSubImagePointer<void>(mipMapLevel + 1, face, arrayIndex),
                    nextMipMapHeader.getDepthPitch());
                hkImage nextMipMap(nextMipMapHeader, dstData);

                hkImageProcessing::scale(
                    currentMipMap, nextMipMap,
                    nextMipMapHeader.getWidth(), nextMipMapHeader.getHeight(),
                    mipMapOptions.m_filter, mipMapOptions.m_addressModeU, mipMapOptions.m_addressModeV, mipMapOptions.m_borderColor);

                if (mipMapOptions.m_preserveCoverage)
                {
                    normalizeCoverage(
                        hkArrayView<hkColorf>(nextMipMap.getDataPointer<hkColorf>(), nextMipMap.getWidth() * nextMipMap.getHeight()),
                        mipMapOptions.m_alphaThreshold,
                        targetCoverage);
                }

                if (mipMapOptions.m_renormalizeNormals)
                {
                    renormalizeNormalMap(nextMipMap);
                }

                currentMipMapHeader = nextMipMapHeader;
            }
        }
    }
}

void hkImageProcessing::reconstructNormalZ(hkImage& image)
{
    HK_ASSERT(0x732b338b, image.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "This algorithm currently expects a RGBA 32 Float as input");

    hkVector4f* cur = image.getDataPointer<hkVector4f>();
    hkVector4f* const end = hkAddByteOffset(cur, image.getDataSize());

    hkSimdFloat32 oneScalar = hkSimdFloat32_1;

    hkVector4f two;
    two.setAll(2.0f);

    hkVector4f minusOne;
    minusOne.setAll(-1.0f);

    hkVector4f half;
    half.setAll(0.5f);

    for (; cur < end; cur++)
    {
        hkVector4f normal;
        // unpack from [0,1] to [-1, 1]
        normal.setAddMul(minusOne, *cur, two);

        // compute Z component
        normal.setComponent<2>((oneScalar - normal.dot<2>(normal)).sqrt<HK_ACC_23_BIT, HK_SQRT_SET_ZERO>());

        // pack back to [0,1]
        cur->setAddMul(half, normal, half);
    }
}

void hkImageProcessing::renormalizeNormalMap(hkImage& image)
{
    HK_ASSERT(0x18b08e93, image.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "This algorithm currently expects a RGBA 32 Float as input");

    hkVector4f* start = image.getPixelPointer<hkVector4f>();

    hkVector4f two;
    two.setAll(2.0f);

    hkVector4f minusOne;
    minusOne.setAll(-1.0f);

    hkVector4f half;
    half.setAll(0.5f);

    hkVector4f* const end = hkAddByteOffset(start, image.getDataSize());
    for (; start < end; start++)
    {
        hkVector4f normal;
        normal.setAddMul(minusOne, *start, two);
        normal.normalize<3>();
        start->setAddMul(half, normal, half);
    }
}

void hkImageProcessing::adjustRoughness( hkImage& roughnessMap, const hkImage& normalMap )
{
    HK_ASSERT(0x5ee2286f, roughnessMap.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "This algorithm currently expects a RGBA 32 Float as input" );
    HK_ASSERT(0x596c1b26, normalMap.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "This algorithm currently expects a RGBA 32 Float as input" );

    HK_ASSERT(0x208a26a, roughnessMap.getWidth() >= normalMap.getWidth() && roughnessMap.getHeight() >= normalMap.getHeight(), "The roughness map needs to be bigger or same size than the normal map." );

    hkImage filteredNormalMap;
    hkImageProcessing::MipMapOptions options;

    // Box filter normal map without re-normalization so we have the average normal length in each mip map.
    if (roughnessMap.getWidth() != normalMap.getWidth() || roughnessMap.getHeight() != normalMap.getHeight())
    {
        hkImage temp;
        hkImageProcessing::scale( normalMap, temp, roughnessMap.getWidth(), roughnessMap.getHeight() );
        hkImageProcessing::renormalizeNormalMap( temp );
        hkImageProcessing::generateMipMaps( temp, filteredNormalMap, options );
    }
    else
    {
        hkImageProcessing::generateMipMaps( normalMap, filteredNormalMap, options );
    }

    HK_ASSERT(0x3bbd813, roughnessMap.getNumMipLevels() == filteredNormalMap.getNumMipLevels(), "Roughness and normal map must have the same number of mip maps" );

    hkVector4f two; two.setAll( 2.0f );
    hkVector4f minusOne; minusOne.setAll( -1.0f );

    hkUint32 numMipLevels = roughnessMap.getNumMipLevels();
    for (hkUint32 mipLevel = 1; mipLevel < numMipLevels; ++mipLevel)
    {
        hkArrayView<hkVector4f> roughnessData = hkArrayViewT::make( roughnessMap.getSubImagePointer<hkVector4f>( mipLevel, 0, 0 ), roughnessMap.getDepthPitch( mipLevel ) / sizeof(hkVector4f) );
        hkArrayView<hkVector4f> normalData = hkArrayViewT::make( filteredNormalMap.getSubImagePointer<hkVector4f>( mipLevel, 0, 0 ), filteredNormalMap.getDepthPitch( mipLevel ) / sizeof( hkVector4f ) );

        for (int i = 0; i < roughnessData.getSize(); ++i)
        {
            hkVector4f normal;
            normal.setAddMul( minusOne, normalData[i], two );

            float avgNormalLength = normal.length<3>().getReal();
            if (avgNormalLength < 1.0f)
            {
                float avgNormalLengthSquare = avgNormalLength * avgNormalLength;
                float kappa = (3.0f * avgNormalLength - avgNormalLength * avgNormalLengthSquare) / (1.0f - avgNormalLengthSquare);
                float variance = 1.0f / (2.0f * kappa);

                float oldRoughness = roughnessData[i].getComponent<0>().getReal();
                float newRoughness = hkMath::sqrt( oldRoughness * oldRoughness + variance );

                roughnessData[i].setAll( newRoughness );
            }
        }
    }
}

/// Converts the cube map 'src' with the Direct3D layout into the cube map 'dst' with the internal HKR layout.
static inline void convertCubemapLayout(const hkImage& src, hkImage& dst)
{
    HK_ASSERT(0x6969615, 1 == src.getNumArrayElements() &&
                 1 == src.getNumMipLevels() &&
                 6 == src.getNumFaces() &&
                 1 == src.getDepth(),
              "The source image is expected to be a cube map without MIP maps.");
    HK_ASSERT(0x35ac2632, src.getWidth() > 0 && src.getHeight() > 0,
              "The source image is expected to be of non-zero dimensions.");
    HK_ASSERT(0x28980e6f, src.getDataPointer<void*>() != dst.getDataPointer<void*>(),
              "The destination image cannot share its data pointer with the source image.");
    HK_ASSERT(0x65fbaccd, src.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT,
             "The source image is expected to be 4 x 32-bit float RGBA.");

    // Create the destination image.
    dst.reset(src.getHeader());
    // Compute the size of a face of the cube map in QWORDs.
    const hkUint32 size = src.getWidth();
    const hkUint32 faceSizeQWords = size * size;
    // Iterate over the faces of the source cube map.
    for (hkUint32 f = 0; f < 6; ++f)
    {
        // Mapping from the source face indices to the destination face indices.
        static hkUint32 faceMap[6] = {5, 4, 3, 2, 0, 1};
        switch (f)
        {
            case 2:
            case 3:
                for (hkUint32 j = 0; j < size; ++j)
                {
                    for (hkUint32 i = 0; i < size; ++i)
                    {
                        const hkColorf* srcPixel = src.getPixelPointer<hkColorf>(0, f, 0, i, j);
                        hkColorf* dstPixel;
                        if (f == 2)
                        {
                            // Swap the top and the bottom faces, and rotate the face 90 degrees clockwise.
                            dstPixel = dst.getPixelPointer<hkColorf>(0, faceMap[f], 0, (size - 1) - j, i);
                        }
                        else
                        {
                            // Swap the top and the bottom faces, and rotate the face 90 degrees counter-clockwise.
                            dstPixel = dst.getPixelPointer<hkColorf>(0, faceMap[f], 0, j, (size - 1) - i);
                        }
                        *dstPixel = *srcPixel;
                    }
                }
                break;

            default:
                // Rotate side faces 90 degrees clockwise.
                hkString::memCpy16(dst.getPixelPointer<hkFloat32>(0, faceMap[f], 0, 0, 0),
                                   src.getPixelPointer<hkFloat32>(0, f, 0, 0, 0), faceSizeQWords);
        }
    }
}

// Spherical map (a.k.a. latitude-longitude or cylindrical).
// Parametrized as { phi: theta } in { X: Y }.
static void convertSphericalToCubemap(const hkImage& src, hkImage& dst, hkUint32 dstDim)
{
    static const float HK_M_2PI = 6.283185307f; // 2*pi
    static const float HK_M_PI = 3.141592653f; // pi
    static const float HK_M_PI_2 = 1.570796326f; // pi/2

    // Get the source image's dimensions.
    const hkUint32 srcHeight = src.getHeight();
    const hkUint32 srcWidth  = src.getWidth();
    // Copy and then patch the metadata for the resulting cube map.
    hkImageHeader dstDesc = src.getHeader();
    dstDesc.setWidth(dstDim);
    dstDesc.setHeight(dstDim);
    dstDesc.setNumFaces(6);
    // Create a temporary image with the Direct3D layout.
    hkImage img;
    img.reset(dstDesc);
    // Write (gather) the cube map image data.
    for (hkUint32 f = 0; f < 6; ++f)
    {
        // Compute the cube map faces axes: right, up, forward.
        hkVector4f axes[3];
        // Face mappings per https://msdn.microsoft.com/en-us/library/windows/desktop/bb204881
        switch (f)
        {
            case 0: // +X: right face.
                axes[0].set( 0.f,  0.f, -1.f);
                axes[1].set( 0.f,  1.f,  0.f);
                axes[2].set( 1.f,  0.f,  0.f);
                break;
            case 1: // -X: left face.
                axes[0].set( 0.f,  0.f,  1.f);
                axes[1].set( 0.f,  1.f,  0.f);
                axes[2].set(-1.f,  0.f,  0.f);
                break;
            case 2: // +Y: top face.
                axes[0].set( 1.f,  0.f,  0.f);
                axes[1].set( 0.f,  0.f, -1.f);
                axes[2].set( 0.f,  1.f,  0.f);
                break;
            case 3: // -Y: bottom face.
                axes[0].set( 1.f,  0.f,  0.f);
                axes[1].set( 0.f,  0.f,  1.f);
                axes[2].set( 0.f, -1.f,  0.f);
                break;
            case 4: // +Z: back face.
                axes[0].set( 1.f,  0.f,  0.f);
                axes[1].set( 0.f,  1.f,  0.f);
                axes[2].set( 0.f,  0.f,  1.f);
                break;
            default: // -Z: front face.
                axes[0].set(-1.f,  0.f,  0.f);
                axes[1].set( 0.f,  1.f,  0.f);
                axes[2].set( 0.f,  0.f, -1.f);
        }
        // Compute the bottom left corner of the face.
        const hkSimdFloat32 simdHalf   = hkSimdFloat32_Half;
        const hkSimdFloat32 simdDstDim = hkSimdFloat32::fromInt32(dstDim);
        const hkVector4f    bottomLeft = axes[2] * (simdHalf * simdDstDim)
                                       - axes[0] * (simdHalf * simdDstDim)
                                       - axes[1] * (simdHalf * simdDstDim);
        // Gather the cube map image data.
        for (hkUint32 j = 0; j < dstDim; ++j)
        {
            for (hkUint32 i = 0; i < dstDim; ++i)
            {
                // Compute the normalized cube map direction.
                // Apply the texel center offset of 0.5.
                hkVector4f dir = bottomLeft + axes[0] * hkSimdFloat32::fromFloat(i + 0.5f)
                                            + axes[1] * hkSimdFloat32::fromFloat(j + 0.5f);
                dir.normalize<3, HK_ACC_FULL, HK_SQRT_IGNORE>();
                // Represent 'dir' in the spherical coordinates.
                const hkFloat32 cosTheta = dir.getComponent<1>().getReal();
                const hkFloat32 phi      = hkMath::atan2(dir.getComponent<2>().getReal(),
                                                             dir.getComponent<0>().getReal());
                // Bias the phi angle into the (-1/2 * Pi, 3/2 * Pi] range.
                const hkFloat32 biasedPhi = (phi > -HK_M_PI_2) ? phi : phi + HK_M_2PI;
                // Convert (phi, theta) into (x, y) coordinates of the source image.
                // Source image parametrization:
                // X: [0, w) <- [3/2 * Pi, -1/2 * Pi)
                // Y: [0, h) <- [cos(0), cos(Pi)) = [1, -1)
                const hkFloat32 u = ((HK_M_PI + HK_M_PI_2) - biasedPhi) * (1.f / HK_M_2PI);
                // For some reason, 'v' has to be mirrored. Perhaps the D3D9 docs are wrong.
                const hkFloat32 v = 1.f - (hkMath::acos(cosTheta) * (1.f / HK_M_PI));
                // Rescale to account for the resolution of the source image.
                const hkFloat32 x = u * srcWidth  - 0.5f;
                const hkFloat32 y = v * srcHeight - 0.5f;
                // Compute sample positions for bilinear filtering.
                const int xL = hkMath::clamp(static_cast<int>(hkMath::floor(x)), 0, static_cast<int>(srcWidth  - 1));
                const int xH = hkMath::min2(xL + 1, srcWidth  - 1);
                const int yL = hkMath::clamp(static_cast<int>(hkMath::floor(y)), 0, static_cast<int>(srcHeight - 1));
                const int yH = hkMath::min2(yL + 1,  srcHeight - 1);
                // Fetch 4x samples[x][y].
                hkVector4f samples[2][2];
                samples[0][0].load<4, HK_IO_SIMD_ALIGNED>(src.getPixelPointer<hkFloat32>(0, 0, 0, xL, yL));
                samples[1][0].load<4, HK_IO_SIMD_ALIGNED>(src.getPixelPointer<hkFloat32>(0, 0, 0, xH, yL));
                samples[0][1].load<4, HK_IO_SIMD_ALIGNED>(src.getPixelPointer<hkFloat32>(0, 0, 0, xL, yH));
                samples[1][1].load<4, HK_IO_SIMD_ALIGNED>(src.getPixelPointer<hkFloat32>(0, 0, 0, xH, yH));
                // Compute bilinear weights.
                const hkSimdFloat32 wX = hkSimdFloat32::fromFloat(x - xL);
                const hkSimdFloat32 wY = hkSimdFloat32::fromFloat(y - yL);
                // Perform bilinear filtering of the source image.
                hkVector4f lerpX0, lerpX1, result;
                lerpX0.setInterpolate(samples[0][0], samples[1][0], wX);
                lerpX1.setInterpolate(samples[0][1], samples[1][1], wX);
                result.setInterpolate(lerpX0, lerpX1, wY);
                // Store the results.
                result.store<4, HK_IO_SIMD_ALIGNED>(img.getPixelPointer<hkFloat32>(0, f, 0, i, j));
            }
        }
    }
    // Perform conversion into the internal HKR layout.
    convertCubemapLayout(img, dst);
}

void hkImageProcessing::convertToCubemap(const hkImage& src, hkImage& dst)
{
    HK_ASSERT(0x78bcbca5, 1 == src.getNumArrayElements() &&
        1 == src.getNumMipLevels() &&
        1 == src.getNumFaces() &&
        1 == src.getDepth(),
        "The source image is expected to be a 2D texture.");
    HK_ASSERT(0x686c1340, src.getWidth() > 0 && src.getHeight() > 0,
        "The source image is expected to be of non-zero dimensions.");
    HK_ASSERT(0x1e1a393b, src.getDataPointer<void*>() != dst.getDataPointer<void*>(),
        "The destination image cannot share its data pointer with the source image.");
    HK_ASSERT(0x3e113e6b, src.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT,
        "The source image is expected to be 4 x 32-bit float RGBA.");

    const hkUint32 srcWidth = src.getWidth();
    const hkUint32 srcHeight = src.getHeight();

    hkUint32 dstDim;
    hkUint32 tileCountX;
    hkUint32 tileCountY;

    const auto init = [&](hkUint32 a_tileCountX, hkUint32 a_tileCountY)
    {
        if (srcWidth % a_tileCountX == 0 && srcHeight % a_tileCountY == 0 && srcWidth / a_tileCountX == srcHeight / a_tileCountY)
        {
            dstDim = srcWidth / a_tileCountX;
            tileCountX = a_tileCountX;
            tileCountY = a_tileCountY;
            return true;
        }
        return false;
    };

    struct Transform
    {
        int srcBaseX;
        int srcBaseY;
        int dstX_mulSrcX, dstX_mulSrcY, dstX_add;
        int dstY_mulSrcX, dstY_mulSrcY, dstY_add;
    };
    enum { kFaceCount = 6 };
    Transform transform[kFaceCount];

    const auto setTransform = [&](int face, const int srcTileX, const int srcTileY, const int rotation)
    {
        Transform& t = transform[face];
        t.srcBaseX = (int)dstDim * srcTileX;
        t.srcBaseY = (int)dstDim * srcTileY;
        switch (rotation & 3)
        {
        case 0: // 0 degrees CCW
            t.dstX_mulSrcX = 1;  t.dstX_mulSrcY = 0;  t.dstX_add = 0;
            t.dstY_mulSrcX = 0;  t.dstY_mulSrcY = 1;  t.dstY_add = 0;
            break;
        case 1: // 90 degrees CCW
            t.dstX_mulSrcX = 0;  t.dstX_mulSrcY = 1;  t.dstX_add = 0;
            t.dstY_mulSrcX = -1; t.dstY_mulSrcY = 0;  t.dstY_add = dstDim - 1;
            break;
        case 2: // 180 degrees CCW
            t.dstX_mulSrcX = -1; t.dstX_mulSrcY = 0;  t.dstX_add = dstDim - 1;
            t.dstY_mulSrcX = 0;  t.dstY_mulSrcY = -1; t.dstY_add = dstDim - 1;
            break;
        default: // 270 degrees CCW
            t.dstX_mulSrcX = 0;  t.dstX_mulSrcY = -1; t.dstX_add = dstDim - 1;
            t.dstY_mulSrcX = 1;  t.dstY_mulSrcY = 0;  t.dstY_add = 0;
            break;
        }
    };

    // Determine layout of the input image and act based on it.
    // Note that HKR requires LRUDBF order of faces (Left, Right, Up, Down, Backward, Forward).
    if (init(4, 3))
    {
        // Horizontal cross
        //  U
        // LFRB
        //  D
        setTransform(0, 0, 1, 0);
        setTransform(1, 2, 1, 0);
        setTransform(2, 1, 0, 2);
        setTransform(3, 1, 2, 2);
        setTransform(4, 3, 1, 0);
        setTransform(5, 1, 1, 0);
    }
    else if (init(3, 4))
    {
        // Vertical cross
        //  U
        // LFR
        //  D
        //  B
        setTransform(0, 0, 1, 0);
        setTransform(1, 2, 1, 0);
        setTransform(2, 1, 0, 2);
        setTransform(3, 1, 2, 2);
        setTransform(4, 1, 3, 2);
        setTransform(5, 1, 1, 0);
    }
    else if (init(6, 1))
    {
        // Horizontal strip
        // LRUDBF
        setTransform(0, 0, 0, 0);
        setTransform(1, 1, 0, 0);
        setTransform(2, 2, 0, 2);
        setTransform(3, 3, 0, 2);
        setTransform(4, 4, 0, 0);
        setTransform(5, 5, 0, 0);
    }
    else if (init(1, 6))
    {
        // Vertical strip
        // LRUDBF
        setTransform(0, 0, 0, 0);
        setTransform(1, 0, 1, 0);
        setTransform(2, 0, 2, 2);
        setTransform(3, 0, 3, 2);
        setTransform(4, 0, 4, 0);
        setTransform(5, 0, 5, 0);
    }
    else
    {
        // TODO: add support for angular spherical map (a.k.a. mirror ball)

        // Latitude-Longitude
        convertSphericalToCubemap(src, dst, srcHeight);
        return;
    }

    /* Uncomment the following code (and use 'pSrc->' instead of 'src.' in the loop
    below) when/if we support producing cubemaps that have different resolution
    with the input image.

    // Rescale the image to match the output cubemap size
    const hkImage* pSrc = &src;
    hkImage tmp;
    if (srcWidth / tileCountX != dstDim)
    {
        const hkImageFilterBox filter;
        const hkResult result = hkImageProcessing::scale(
            src, tmp, dstDim * countWidth, dstDim * countHeight, &filter,
            hkImageAddressMode::CLAMP, hkImageAddressMode::CLAMP, hkColorf(0, 0, 0));
        HK_ASSERT(0xb8252bc, result.isSuccess(), "Unexpected failure in scaling cubemap image");
        pSrc = &tmp;
    }
    */

    HK_ASSERT(0x58eb2444, srcWidth / tileCountX == dstDim, "Unexpected wrong cubemap size.");

    {
        hkImageHeader dstDesc = src.getHeader();
        dstDesc.setWidth(dstDim);
        dstDesc.setHeight(dstDim);
        dstDesc.setNumFaces(kFaceCount);
        dst.reset(dstDesc);
    }

    for (int face = 0; face < kFaceCount; ++face)
    {
        const Transform& t = transform[face];
        hkVector4f sample;
        for (int y = 0; y < (int)dstDim; ++y)
        {
            for (int x = 0; x < (int)dstDim; ++x)
            {
                sample.load<4, HK_IO_SIMD_ALIGNED>(src.getPixelPointer<hkFloat32>(0, 0, 0, hkUint32(t.srcBaseX + x), hkUint32(t.srcBaseY + y)));
                const auto xx = hkUint32(t.dstX_mulSrcX * x + t.dstX_mulSrcY * y + t.dstX_add);
                const auto yy = hkUint32(t.dstY_mulSrcX * x + t.dstY_mulSrcY * y + t.dstY_add);
                sample.store<4, HK_IO_SIMD_ALIGNED>(dst.getPixelPointer<hkFloat32>(0, face, 0, xx, yy));
            }
        }
    }
}


void hkImageProcessing::changeExposure(hkImage& image, hkFloat32 bias)
{
    HK_ASSERT(0x65940d4a, image.getFormat() == hkImageFormat::R32_G32_B32_A32_FLOAT, "This algorithm currently expects a RGBA 32 Float as input");

    hkVector4f* start = image.getPixelPointer<hkVector4f>();

    hkVector4f exp;
    exp.setAll(hkMath::pow(2.0f, bias));

    hkVector4f* const end = hkAddByteOffset(start, image.getDataSize());
    for (; start < end; start++)
    {
        start->setMul(*start, exp);
    }
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
