// TKBMS v1.0 -----------------------------------------------------
//
// PLATFORM   : ALL
// PRODUCT   : COMMON
// VISIBILITY   : PUBLIC
//
// ------------------------------------------------------TKBMS v1.0
#include <Common/Base/hkBase.h>
#include <Common/Base/Monitor/MonitorStreamAnalyzer/hkMonitorStreamParser.h>
#include <Common/Base/Monitor/MonitorStreamAnalyzer/hkMonitorStreamFlameGraph.h>
#include <Common/Base/Container/LocalArray/hkLocalArray.h>

#define PIXELS_PER_NODE 15

namespace {

    //xx move to config
    static const char* _ignoredTimerNames[] = { "DrawCallHandle", "DrawCallRecording" };


    static inline hkUint32 hkRoundUpPow2(hkUint32 n)
    {
        n--;
        n |= n >> 1;
        n |= n >> 2;
        n |= n >> 4;
        n |= n >> 8;
        n |= n >> 16;
        n++;
        return n;
    }

    struct TimerRange
    {
        hkUint16 threadNum;
        double displayStartTime;
        double displayEndTime;
        hkUint16 startPixelX;
        hkUint16 startPixelY;
        hkUint16 endPixelX;
        hkUint16 maxLevels;
        hkUint32 pixelRange; // usually just feed main render target num pixels for this
    };

    hkColor::Argb findColor(_In_opt_ const hkMonitorStreamParser::Node* node, _In_ const hkMonitorStreamColorTableCache* colorTable)
    {
        const hkMonitorStreamParser::Node* colorNode = node;
        bool cacheColor = false;
        hkColor::Argb c = 0xffffffff;
        while (colorNode && !colorTable->findColor(colorNode->m_name, c))
        {
            colorNode = colorNode->m_parent; // keep going up until we find a named node with a set color, otherwise we will end up with default color
            cacheColor = true;
        }

        if (cacheColor)
        {
            colorTable->cacheColor(node->m_name, c);
        }

        return c;
    }

    void findRange(_In_ const hkMonitorStreamParser::Node* node, int highlightMask, double& minTime, double& maxTime)
    {
        if ( ( (!highlightMask) || node->m_flags.allAreSet( (hkUint16)highlightMask) ) && (!node->m_flags.anyIsSet( hkMonitorStreamParser::Node::FLAGS_TIME_IS_NOT_ABSOLUTE)) )
        {
            double startTime = node->m_absoluteStartTime;
            float nodeDurationTime = node->m_value;
            double endTime = startTime + nodeDurationTime;
            if ( (startTime > 0.f) && (endTime > startTime))
            {
                minTime = hkMath::min2(startTime, minTime);
                maxTime = hkMath::max2(endTime, maxTime);
            }
        }

        for (int c=0; c < node->m_children.getSize(); ++c)
        {
            findRange( node->m_children[c], highlightMask, minTime, maxTime );
        }
    }

    void findRange( const hkArrayView<hkMonitorStreamParser::Tree*>& timers, double& totalStartTime, double& totalEndTime )
    {
        totalStartTime = HK_REAL_MAX;
        totalEndTime = 0.f;
        for (int n=0; n < timers.getSize(); ++n)
        {
            const hkMonitorStreamParser::Node* frameGraph = timers[n];
            for (int f=0; f < frameGraph->m_children.getSize(); ++f)
            {
                const hkMonitorStreamParser::Node* timerGraph = frameGraph->m_children[f];
                for (int c=0; c < timerGraph->m_children.getSize(); ++c)
                {
                    if ( !timerGraph->m_children[c]->m_flags.anyIsSet( hkMonitorStreamParser::Node::FLAGS_TIME_IS_NOT_ABSOLUTE ) && (timerGraph->m_children[c]->m_absoluteStartTime > 0.f) )
                    {
                        totalStartTime = hkMath::min2<double>( timerGraph->m_children[c]->m_absoluteStartTime, totalStartTime);
                        double endTime = timerGraph->m_children[c]->m_absoluteStartTime + timerGraph->m_children[c]->m_value;
                        totalEndTime = hkMath::max2<double>(totalEndTime, endTime );
                    }
                }
            }
        }
    }

    void findRange( const hkArrayView<hkGpuTraceResult>& gpuTrace, double& totalStartTime, double& totalEndTime )
    {
        totalStartTime = HK_REAL_MAX;
        totalEndTime = 0.f;
        for (int g=0; g < gpuTrace.getSize(); ++g)
        {
            totalEndTime = hkMath::max2<double>(totalEndTime, gpuTrace[g].m_gpuTimeEnd );
            if ( gpuTrace[g].m_type == hkGpuTraceResult::SCOPE_PROBE)
            {
                totalStartTime = hkMath::min2<double>( totalStartTime, gpuTrace[g].m_gpuTimeBegin );
            }
        }
    }

    _Ret_maybenull_ const hkMonitorStreamParser::Node* getMeaningfulNode(_In_opt_ const hkMonitorStreamParser::Node* n, hkArrayView<const char*> ignoreNames)
    {
        if (n && n->m_name)
        {
            for (int i = 0; i < ignoreNames.getSize(); ++i)
            {
                if (hkString::beginsWithCase(n->m_name, ignoreNames[i]))
                {
                    return getMeaningfulNode(n->m_parent, ignoreNames);
                }
            }
        }
        return n;
    }

    _Ret_maybenull_ const hkMonitorStreamParser::Node* getMeaningfulParentNode(_In_opt_ const hkMonitorStreamParser::Node* n, hkArrayView<const char*> ignoreNames)
    {
        if (n && n->m_parent)
        {
            return getMeaningfulNode(n->m_parent, ignoreNames);
        }
        return n;
    }

    void findRange( const hkArrayView<hkMonitorStreamParser::Tree*>& cpuThreads, const hkArrayView<hkGpuTraceResult>& gpuTrace, int highlightMask, double& minTime, double& maxTime)
    {
        for (int c = 0; c < gpuTrace.getSize(); ++c)
        {
            const hkGpuTraceResult& gn = gpuTrace[c];

            if ( highlightMask )
            {
                hkArrayView<const char*> ignoredTimerNames = hkArrayViewT::make(_ignoredTimerNames);

                hkLocalArray<hkMonitorStreamGpuHandleCache::Mapping*> cpuContext(4);
                const hkMonitorStreamParser::Node* cpuNode = HK_NULL;

                for (int thread = 0; thread < cpuThreads.getSize(); ++thread  )
                {
                    hkMonitorStreamGpuHandleCache::Mapping* ctx = cpuThreads[thread]->m_gpuHandleCache.get( gn.m_id, false );
                    if (ctx) cpuContext.pushBack(ctx);
                }

                // addHandle node is in the cpu timer in the task that triggered the draw call etc
                // The timerNode is usually just in the executeCommandBuffer task so not as interesting a CPU context
                // so check all for that first.
                for (int ci = 0; (!cpuNode) && (ci < cpuContext.getSize()); ++ci)
                {
                    cpuNode = cpuContext[ci]->m_addHandleNode;
                }
                for (int cj = 0; (!cpuNode) && (cj < cpuContext.getSize()); ++cj)
                {
                    cpuNode = cpuContext[cj]->m_timerNode;
                }

                cpuNode = getMeaningfulNode(cpuNode, ignoredTimerNames);

                if ( !cpuNode || !cpuNode->m_flags.allAreSet( (hkUint16)highlightMask) )
                {
                    continue;
                }
            }

            if ( (gn.m_gpuTimeBegin > 0) && (gn.m_gpuTimeEnd > gn.m_gpuTimeBegin))
            {
                minTime = hkMath::min2(minTime, gn.m_gpuTimeBegin);
                maxTime = hkMath::max2(maxTime, gn.m_gpuTimeEnd);
            }
        }
    }


    hkMonitorStreamFlameGraph::Quad& addQuad(_In_z_ const char* name, float duration, hkColor::Argb color,
        const TimerRange& timerInfo, int currentNodeLevel,
        hkArray<hkMonitorStreamFlameGraph::Quad>& quads,
        double startTime,
        float displayWidthTime,
        int displayWidthPixels,
        int pixelNodeWidth)
    {
        hkMonitorStreamFlameGraph::Quad& q = quads.expandOne();
        q.m_srcNode = HK_NULL;
        q.m_srcGpuResult = HK_NULL;

        q.m_time = duration;
        q.m_threadNum = (hkUint16)timerInfo.threadNum;
        q.m_timerName = name;
        q.m_color = color;

        float startTimePercent = float(startTime - timerInfo.displayStartTime) / displayWidthTime;
        q.m_startX = timerInfo.startPixelX + (hkUint16)(startTimePercent * displayWidthPixels);
        q.m_endX = q.m_startX + (hkUint16)pixelNodeWidth;
        q.m_startY = (hkUint16)currentNodeLevel*PIXELS_PER_NODE + timerInfo.startPixelY;
        q.m_endY = (hkUint16)(currentNodeLevel + 1)*PIXELS_PER_NODE + timerInfo.startPixelY;

        return q;
    }

    hkMonitorStreamFlameGraph::Quad& addQuadWithHeight(_In_z_ const char* name, float duration, hkUint16 yPixels, hkColor::Argb color,
        const TimerRange& timerInfo, int currentNodeLevel,
        hkArray<hkMonitorStreamFlameGraph::Quad>& quads,
        double startTime,
        float displayWidthTime,
        int displayWidthPixels,
        int pixelNodeWidth )
    {
        hkMonitorStreamFlameGraph::Quad& q = quads.expandOne();
        q.m_srcNode = HK_NULL;
        q.m_srcGpuResult = HK_NULL;

        q.m_time = duration;
        q.m_threadNum = (hkUint16)timerInfo.threadNum;
        q.m_timerName = name;
        q.m_color = color;

        float startTimePercent = float(startTime - timerInfo.displayStartTime) / displayWidthTime;
        q.m_startX = timerInfo.startPixelX + (hkUint16)(startTimePercent * displayWidthPixels);
        q.m_endX = q.m_startX + (hkUint16)pixelNodeWidth;
        q.m_startY = (hkUint16)currentNodeLevel*PIXELS_PER_NODE + timerInfo.startPixelY;
        q.m_endY = q.m_startY + yPixels;

        return q;
    }

    hkMonitorStreamFlameGraph::Quad& addQuad(_In_ const hkMonitorStreamParser::Node* node, hkColor::Argb color,
        const TimerRange& timerInfo, int currentNodeLevel,
        hkArray<hkMonitorStreamFlameGraph::Quad>& quads,
        double startTime,
        float displayWidthTime,
        int displayWidthPixels,
        int pixelNodeWidth )
    {
        hkMonitorStreamFlameGraph::Quad& q = quads.expandOne();
        q.m_srcNode = node;
        q.m_srcGpuResult = HK_NULL;

        q.m_time = node->m_value;
        q.m_threadNum = (hkUint16)timerInfo.threadNum;
        q.m_timerName = node->m_name;
        q.m_color = color;

        float startTimePercent = float(startTime - timerInfo.displayStartTime) / displayWidthTime;
        q.m_startX = timerInfo.startPixelX + (hkUint16)(startTimePercent * displayWidthPixels);
        q.m_endX = q.m_startX + (hkUint16)pixelNodeWidth;
        q.m_startY = (hkUint16)currentNodeLevel*PIXELS_PER_NODE + timerInfo.startPixelY;
        q.m_endY = (hkUint16)(currentNodeLevel + 1)*PIXELS_PER_NODE + timerInfo.startPixelY;

        return q;
    }


    void gerenateQuadsForChildren(_In_ const hkMonitorStreamParser::Node* node, _In_ const hkMonitorStreamColorTableCache* colorTable, const TimerRange& timerInfo, int currentNodeLevel,
                        hkArray<hkMonitorStreamFlameGraph::Quad>& quads, int highlightMask, int smallestQuadSize)
    {

        bool hadChildren = false;
        float accumulatedIgnoredTimers = 0.f;
        float ignoredNodeDuration = 0.f;
        double ignoredStartTime = 0.;

        const int displayWidthPixels = timerInfo.endPixelX - timerInfo.startPixelX;
        const float displayWidthTime = (float)(timerInfo.displayEndTime - timerInfo.displayStartTime);

        for (int c = 0; c < node->m_children.getSize(); ++c)
        {
            const hkMonitorStreamParser::Node* cn = node->m_children[c];

            // multi timer things, normally don't show?
            if ( cn->m_flags.anyIsSet( hkMonitorStreamParser::Node::FLAGS_TIME_IS_NOT_ABSOLUTE) )
                continue;

            // see if in range
            double startTime = cn->m_absoluteStartTime;
            if ((startTime <= 0.f) || (startTime > (timerInfo.displayEndTime + 1.f)) )
                continue;


            float nodeDurationTime = cn->m_value;
            if (nodeDurationTime <= 0.f)
                continue;

            // when zooming etc, clamp to start pos
            if (startTime < timerInfo.displayStartTime)
            {
                float diff = float( timerInfo.displayStartTime - startTime );
                nodeDurationTime -= diff;
                startTime = timerInfo.displayStartTime;
            }

            double endTime = startTime + nodeDurationTime;
            if (endTime < (timerInfo.displayStartTime - 1.f))
                continue;

            hadChildren = true;

            // when zooming etc, clamp to end pos (so that text etc can center ok)
            if (endTime > timerInfo.displayEndTime)
            {
                float diff = float( endTime - timerInfo.displayEndTime );
                nodeDurationTime -= diff;
                endTime = timerInfo.displayEndTime;
            }

            //have a node to display, if it covers enough pixels to be seen
            float percentNodeWidth = nodeDurationTime/displayWidthTime;  // round down so that we don't have never ending set of 1 pixel nodes
            int pixelNodeWidth = hkMath::hkFloatToInt( percentNodeWidth * displayWidthPixels );
            if (pixelNodeWidth < smallestQuadSize)
            {
                // not big enough to display by itself but is a sample in range. Need to accumulate these ones
                accumulatedIgnoredTimers += percentNodeWidth;
                ignoredNodeDuration += nodeDurationTime;
                if (ignoredStartTime == 0)
                    ignoredStartTime = startTime;
                continue;
            }

            // See if we have ignored enough nodes to warant a quad before adding new node
            int ignoredPixelNodeWidth = hkMath::hkFloatToInt(accumulatedIgnoredTimers * displayWidthPixels);
            if (ignoredPixelNodeWidth >= smallestQuadSize)
            {
                hkColor::Argb color = findColor( node, colorTable );
                if ( highlightMask && !node->m_flags.allAreSet( (hkUint16)highlightMask) )
                {
                    // then darken and alpha out bit
                    color = 0x3FFFFFFF & hkColor::darken(color);
                }
                addQuad("..", ignoredNodeDuration, color,
                        timerInfo, currentNodeLevel, quads, ignoredStartTime, displayWidthTime, displayWidthPixels, ignoredPixelNodeWidth);
                accumulatedIgnoredTimers = 0.f;
                ignoredNodeDuration = 0.f;
                ignoredStartTime = 0.0;
            }

            hkColor::Argb color = findColor( cn, colorTable );
            if ( highlightMask && !cn->m_flags.allAreSet( (hkUint16)highlightMask) )
            {
                // then darken and alpha out bit
                color = 0x3FFFFFFF & hkColor::darken(color);
            }

            // node is big enough, we know we have room, so lets display it
            addQuad(cn, color, timerInfo, currentNodeLevel, quads, startTime, displayWidthTime, displayWidthPixels, pixelNodeWidth);
        }

        // See if we have ignored enough nodes to warrant a quad before ending
        int ignoredPixelNodeWidth = hkMath::hkFloatToInt(accumulatedIgnoredTimers * displayWidthPixels);
        if (ignoredPixelNodeWidth >= smallestQuadSize)
        {
            hkColor::Argb color = findColor(node, colorTable);
            if (highlightMask && !node->m_flags.allAreSet((hkUint16)highlightMask))
            {
                // then darken and alpha out bit
                color = 0x3FFFFFFF & hkColor::darken(color);
            }

            addQuad("..", ignoredNodeDuration, color,
                timerInfo, currentNodeLevel, quads, ignoredStartTime, displayWidthTime, displayWidthPixels, ignoredPixelNodeWidth);
        }

        // see if no more room
        if ( (currentNodeLevel != 0) || hadChildren)
            ++currentNodeLevel;

        if ( currentNodeLevel > timerInfo.maxLevels )
            return;

        // otherwise continue on
        for (int c=0; c < node->m_children.getSize(); ++c)
        {
            gerenateQuadsForChildren( node->m_children[c], colorTable, timerInfo, currentNodeLevel, quads, highlightMask, smallestQuadSize );
        }

    }


    #define OVERLAP_TOLERANCE 0.00001

    struct Block
    {
        Block() : m_parentNode(HK_NULL), m_minBlockStartTime(HK_REAL_MAX), m_maxBlockEndTime(0), m_displayLevel(0) { }
        Block(_In_ const hkMonitorStreamParser::Node* n) : m_parentNode(n), m_minBlockStartTime(HK_REAL_MAX), m_maxBlockEndTime(0), m_displayLevel(0) { }

        bool overlaps(const Block& b, double tolerance ) const
        {
            return (b.m_displayLevel == m_displayLevel) && (b.m_minBlockStartTime < (m_maxBlockEndTime-tolerance) )  && (b.m_maxBlockEndTime > (m_minBlockStartTime + tolerance));
        }

        struct Trace
        {
            Trace() : m_t(HK_NULL), m_displayStartTime(0.0), m_displayEndTime(0.f), m_displayLevel(0)  { }
            Trace(_In_ const hkGpuTraceResult* res, double s = 0, double e = 0) : m_t(res), m_displayStartTime(s), m_displayEndTime(e), m_displayLevel(0)  { }

            bool overlaps(const Trace& t, double tolerance ) const
            {
                return (t.m_displayLevel == m_displayLevel) && (t.m_displayStartTime < (m_displayEndTime-tolerance) ) && (t.m_displayEndTime > (m_displayStartTime + tolerance));
            }

            const hkGpuTraceResult* m_t;

            double m_displayStartTime;
            double m_displayEndTime;

            int m_displayLevel;
        };

        void appendTrace(const Trace& t)
        {
            m_traces.pushBack(t);
        }

        int getTraceVerticalSize() const
        {
            int maxTraceLevel = 0;
            for (int t=0; t < m_traces.getSize(); ++t)
            {
                maxTraceLevel = hkMath::max2(maxTraceLevel, m_traces[t].m_displayLevel);
            }
            return maxTraceLevel + 1;
        }

        const hkMonitorStreamParser::Node* m_parentNode;
        hkArray<Trace> m_traces;

        double m_minBlockStartTime;
        double m_maxBlockEndTime;

        int m_displayLevel;
    };

    void gerenateQuadsForGpuTimers( const hkArrayView<hkMonitorStreamParser::Tree*>& cpuThreads, const hkArrayView<hkGpuTraceResult>& gpuTrace, _In_ const hkMonitorStreamColorTableCache* colorTable, const TimerRange& timerInfo, hkArray<hkMonitorStreamFlameGraph::Quad>& quads, int highlightMask, int smallestQuadSize)
    {
        const int displayWidthPixels = timerInfo.endPixelX - timerInfo.startPixelX;
        const float displayWidthTime = (float)(timerInfo.displayEndTime - timerInfo.displayStartTime);
        double overlapTol = (displayWidthTime / displayWidthPixels); // 1 pixel in time

        hkArray<Block> blocks;
        hkUlong totalNumPixelsTouched = 0;

        hkArrayView<const char*> ignoredTimerNames = hkArrayViewT::make(_ignoredTimerNames);
        for (int c = 0; c < gpuTrace.getSize(); ++c)
        {
            const hkGpuTraceResult& gn = gpuTrace[c];

            totalNumPixelsTouched += (gn.m_numPixelsTouched != 0xFFFFFFFFu ? gn.m_numPixelsTouched : 0);

            // find a cpu timer node that will give us the context in which the renderer call happened
            hkLocalArray<hkMonitorStreamGpuHandleCache::Mapping*> cpuContext(4);
            const hkMonitorStreamParser::Node* cpuNode = HK_NULL;

            for (int thread = 0; thread < cpuThreads.getSize(); ++thread  )
            {
                hkMonitorStreamGpuHandleCache::Mapping* ctx = cpuThreads[thread]->m_gpuHandleCache.get( gn.m_id, false );
                if (ctx) cpuContext.pushBack(ctx);
            }

            // addHandle node is in the cpu timer in the task that triggered the draw call etc
            // The timerNode is usually just in the executeCommandBuffer task so not as interesting a CPU context
            // so check all for that first.
            for (int ci = 0; (!cpuNode) && (ci < cpuContext.getSize()); ++ci)
            {
                cpuNode = cpuContext[ci]->m_addHandleNode;
            }
            for (int cj = 0; (!cpuNode) && (cj < cpuContext.getSize()); ++cj)
            {
                cpuNode = cpuContext[cj]->m_timerNode;
            }

            cpuNode = getMeaningfulNode(cpuNode, ignoredTimerNames);
            if (!cpuNode)
            {
                continue;
            }

            int foundTraceParent = -1;
            for (int bi = blocks.getSize()-1; bi >= 0; --bi)
            {
                if (blocks[bi].m_parentNode == cpuNode)
                {
                    foundTraceParent = bi;
                    break;
                }
            }
            if (foundTraceParent < 0)
            {
                foundTraceParent = blocks.getSize();
                blocks.expandOne();
            }

            Block& block = blocks[foundTraceParent];
            block.m_parentNode = cpuNode;
            Block::Trace gt(gpuTrace.begin() + c);
            block.appendTrace(gt);
            block.m_minBlockStartTime = hkMath::min2(block.m_minBlockStartTime, gn.m_gpuTimeBegin);
            block.m_maxBlockEndTime = hkMath::max2(block.m_maxBlockEndTime, gn.m_gpuTimeEnd);
        }


        for (int b = 0; b < blocks.getSize(); ++b)
        {
            Block& block = blocks[b];

            for (int bb = b - 1; bb >= 0; --bb)
            {
                // see if we overlap with a block
                if (block.overlaps(blocks[bb], overlapTol ))
                {
                    block.m_displayLevel += 4;
                    bb = b;
                }
            }

            double minNodeStartTime = HK_REAL_MAX;
            double maxNodeEndTime = 0;
            double minDisplayStartTime = HK_REAL_MAX;
            double maxDisplayEndTime = 0;

            bool notHighlighted = highlightMask && ( !block.m_parentNode->m_flags.allAreSet((hkUint16)highlightMask) );

            int ignoredPixels = 0;
            double ignoredStartTime = 0;
            float ignoredDuration = 0;
            int ignoredTracePixelCount = 0;

            for (int g=0; g < block.m_traces.getSize(); ++g)
            {
                const hkGpuTraceResult& gn = *(block.m_traces[g].m_t);

                // see if in range
                double startTime = gn.m_gpuTimeBegin;
                double endTime = gn.m_gpuTimeEnd;
                float nodeDurationTime = (float)(endTime - startTime);
                minNodeStartTime = hkMath::min2(startTime, minNodeStartTime);
                maxNodeEndTime = hkMath::max2(endTime, maxNodeEndTime);

                if (   (startTime <= 0.f)
                    || (startTime > (timerInfo.displayEndTime + 1.f) )
                    || (endTime < (timerInfo.displayStartTime - 0.0001) )
                    || (nodeDurationTime <= 0.f) )
                {
                    continue;
                }

                float origNodeDurationTime = nodeDurationTime;

                // when zooming etc, clamp to start pos
                if (startTime < timerInfo.displayStartTime)
                {
                    float diff = float(timerInfo.displayStartTime - startTime);
                    nodeDurationTime -= diff;
                    startTime = timerInfo.displayStartTime;
                }

                // when zooming etc, clamp to end pos (so that text etc can center ok)
                if (endTime > timerInfo.displayEndTime)
                {
                    float diff = float(endTime - timerInfo.displayEndTime);
                    nodeDurationTime -= diff;
                    endTime = timerInfo.displayEndTime;
                }

                minDisplayStartTime = hkMath::min2(startTime, minDisplayStartTime);
                maxDisplayEndTime = hkMath::max2(endTime, maxDisplayEndTime);

                block.m_traces[g].m_displayStartTime = startTime;
                block.m_traces[g].m_displayEndTime = endTime;

                // see if overlaps with any before in this block to get the level
                for (int ti = g-1; ti >= 0; --ti)
                {
                    if (block.m_traces[g].overlaps( block.m_traces[ti], overlapTol ))
                    {
                        block.m_traces[g].m_displayLevel++;
                        ti = g;
                    }
                }

                //have a node to display, if it covers enough pixels to be seen
                float percentNodeWidth = nodeDurationTime / displayWidthTime;  // round down so that we don't have never ending set of 1 pixel nodes
                int pixelNodeWidth = hkMath::hkFloatToInt(percentNodeWidth * displayWidthPixels);
                if (pixelNodeWidth < smallestQuadSize)
                {
                    if (ignoredPixels == 0)
                    {
                        ignoredStartTime = startTime;
                    }
                    ignoredPixels += pixelNodeWidth;
                    ignoredDuration += origNodeDurationTime;
                    if (gn.m_numPixelsTouched < 0xFFFFFFFFu)
                    {
                        ignoredTracePixelCount += gn.m_numPixelsTouched;
                    }
                    continue;
                }

                if (ignoredPixels > 0)
                {
                    // block had some small timers, but now have larger one, so add to start of pur larger one (and hope viewer zooms in a bit if really interested)
                    // have to ignore for now as does not take into acounnt the different displayLevels..
                    //startTime = ignoredStartTime;
                    //pixelNodeWidth += ignoredPixels;
                }

                // submit gpu probe quad
                const char* name = gn.m_type == hkGpuTraceResult::SCOPE_CALL ? "Call" : "Probe";
                hkColor::Argb traceColor = gn.m_type == hkGpuTraceResult::SCOPE_CALL ? hkColor::DARKRED : hkColor::RED;
                if (notHighlighted)
                {
                    traceColor = 0x3FFFFFFF & hkColor::darken(traceColor);
                }

                const hkGpuTraceResult* srcGpuResult = gpuTrace.begin() + g;
                {
                    hkMonitorStreamFlameGraph::Quad& q = addQuad( name, origNodeDurationTime*1000.f, traceColor, timerInfo, block.m_displayLevel + 2 + block.m_traces[g].m_displayLevel,
                                                                  quads, startTime, displayWidthTime, displayWidthPixels, pixelNodeWidth );
                    q.m_srcGpuResult = srcGpuResult;
                }

                // pixel count
                if (gn.m_numPixelsTouched < 0xFFFFFFFFu)
                {
                    int level = 5;
                    float percent = (100.f * gn.m_numPixelsTouched) / (float)totalNumPixelsTouched;
                    float percentRange = (100.f * gn.m_numPixelsTouched) / (float)timerInfo.pixelRange;

                    hkStringBuf nameStr; nameStr.printf("Pixels:%d (%.2f%% of frame, %.2f%% of screen)", gn.m_numPixelsTouched, percent, percentRange);
                    float yscale = PIXELS_PER_NODE * 0.03f; // 100% == 3 levels
                    hkColor::Argb pixelColor =  hkColor::PINK;
                    if (notHighlighted)
                    {
                        pixelColor = 0x3FFFFFFF & hkColor::darken(pixelColor);
                    }
                    hkMonitorStreamFlameGraph::Quad& qp = addQuadWithHeight(nameStr.cString(), origNodeDurationTime*1000.f, (hkUint16)(percentRange * yscale), pixelColor, timerInfo, level * 3, quads, startTime, displayWidthTime, displayWidthPixels, pixelNodeWidth);
                    qp.m_srcGpuResult = srcGpuResult;
                }

            }

            if (ignoredPixels >= smallestQuadSize)
            {
                // small quads at end of block, an big enoug to show
                hkColor::Argb traceColor = hkColor::INDIANRED;
                if (notHighlighted)
                {
                    traceColor = 0x3FFFFFFF & hkColor::darken(traceColor);
                }
                int assumedLevel = 0;
                /*hkMonitorStreamFlameGraph::Quad& q =*/ addQuad("...", ignoredDuration*1000.f, traceColor, timerInfo, block.m_displayLevel + 2 + block.m_traces[assumedLevel].m_displayLevel,
                                                             quads, ignoredStartTime, displayWidthTime, displayWidthPixels, ignoredPixels);

                //do ignoredTracePixelCount here if you want too
            }

            // Submit cpu block now
            if (maxDisplayEndTime > 0)
            {
                const hkMonitorStreamParser::Node* cpuNode = block.m_parentNode;
                const hkMonitorStreamParser::Node* topLevelCpuNode = getMeaningfulParentNode(cpuNode, ignoredTimerNames);
                double totalTimeDuration = maxNodeEndTime - minNodeStartTime;
                float timeInMs = float( totalTimeDuration*1000.0 );
                double totalDisplayTimeDuration = maxDisplayEndTime - minDisplayStartTime;
                int pixelBlockDuration = int( ( totalDisplayTimeDuration / displayWidthTime) * displayWidthPixels );
                if ( (pixelBlockDuration > 0) && topLevelCpuNode && cpuNode)
                {
                    hkColor::Argb nodeColor = hkColor::BEIGE;
                    if (notHighlighted)
                    {
                        nodeColor = 0x3FFFFFFF & hkColor::darken(nodeColor);
                    }

                    hkMonitorStreamFlameGraph::Quad& topQ = addQuad(topLevelCpuNode->m_name, timeInMs, nodeColor, timerInfo, block.m_displayLevel, quads, minDisplayStartTime, displayWidthTime, displayWidthPixels, pixelBlockDuration);
                    topQ.m_srcNode = topLevelCpuNode;
                    hkMonitorStreamFlameGraph::Quad& parentQ = addQuad(cpuNode->m_name, timeInMs, nodeColor, timerInfo,  block.m_displayLevel + 1, quads, minDisplayStartTime, displayWidthTime, displayWidthPixels, pixelBlockDuration);
                    parentQ.m_srcNode = cpuNode;

                }
            }
        }


    }
}

//
// hkMonitorStreamFlameGraph

hkMonitorStreamFlameGraph::hkMonitorStreamFlameGraph()
{
    m_graphArea.setPosition(0,0);
    m_graphArea.setSize(0,0);

    m_frameTime = 16.666f;
    m_numThreads = 0;

    m_colorTable.setAndDontIncrementRefCount( new hkMonitorStreamColorTable() );
    m_colorTable->setupDefaultColorTable();

    m_graphStartTime = m_graphEndTime = 0.f;

    m_smallestQuadSize = 3;
}

void hkMonitorStreamFlameGraph::init( hkRect16 graphArea )
{
    m_graphArea = graphArea;
    m_quads.setSize(0);
}

hkInt32 hkMonitorStreamFlameGraph::getNumThreads() const
{
    return m_numThreads;
}

float hkMonitorStreamFlameGraph::getGraphStartTime() const
{
    return m_graphStartTime;
}

float hkMonitorStreamFlameGraph::getGraphEndTime() const
{
    return m_graphEndTime;
}

void hkMonitorStreamFlameGraph::updateCpu( const hkArrayView<hkMonitorStreamParser::Tree*>& timers, float startPos /*= 0*/, float endPos /*= -1.f*/, int ignoreFirstNCalls /*= 0*/, int highlightNodeFlagMask)
{
    m_quads.setSize(0);

    m_numThreads = timers.getSize();

    // Work out our full range (so max across all threads)
    double totalStartTime = HK_REAL_MAX;
    double totalEndTime = 0.f;
    findRange( timers, totalStartTime, totalEndTime);

    if ( totalStartTime >= totalEndTime )
    {
        m_numThreads = 0;
        return;
    }

    double maxFrameTime = totalEndTime - totalStartTime;

    // round up to ms boundaries
    m_frameTime = ( int(maxFrameTime) + 1.f );

    //Clamp min to 60Hz frame
    m_frameTime = hkMath::max2(16.666f, m_frameTime);

    // See if we only want to get a sub range of that total time
    double displayStartTime = totalStartTime + (startPos * m_frameTime);
    double displayEndTime = totalStartTime + (( endPos > 0? endPos : 1.f) * m_frameTime );
    if (displayEndTime <= displayStartTime)
    {
        m_numThreads = 0;
        m_graphStartTime = m_graphEndTime = 0.f;
        return;
    }

    m_graphStartTime = float(startPos * m_frameTime);
    m_graphEndTime = float((endPos > 0? endPos : 1.f) * m_frameTime );

    // generate the quads, starting at ignoreFirstNCalls deep, stopping when reach too high for this thread or < 1 pixel.
    hkUint16 pixelsPerThread = hkUint16( m_graphArea.getHeight() / m_numThreads );

    TimerRange tr;
    tr.displayStartTime = displayStartTime;
    tr.displayEndTime = displayEndTime;
    tr.maxLevels = pixelsPerThread / PIXELS_PER_NODE;
    tr.startPixelX = m_graphArea.getX();
    tr.endPixelX   = m_graphArea.getX() + m_graphArea.getWidth();

    hkMonitorStreamColorTableCache cache;
    cache.m_colorTable = m_colorTable;
    for (int t=0; t < m_numThreads; ++t)
    {
        const hkMonitorStreamParser::Node* timerGraph = timers[t];

        tr.threadNum = (hkUint16) t;
        tr.startPixelY =  (hkUint16) ( m_graphArea.getY() + t*pixelsPerThread );
        gerenateQuadsForChildren( timerGraph, &cache, tr, 0, m_quads, highlightNodeFlagMask, m_smallestQuadSize);
    }

}

void hkMonitorStreamFlameGraph::updateGpu( const hkArrayView<hkMonitorStreamParser::Tree*>& timers, const hkArrayView<hkGpuTraceResult>& gpuTrace, float startPos, float endPos, hkUint32 viewPixels, int highlightNodeFlagMask )
{
    m_quads.setSize(0);
    m_numThreads = 1;

    // Work out our full range (so max across all threads)
    double totalStartTime = HK_REAL_MAX;
    double totalEndTime = 0.f;
    findRange( gpuTrace, totalStartTime, totalEndTime);

    double maxFrameTime = totalEndTime - totalStartTime;
    if (maxFrameTime < 0.0)
    {
        return; // no SCOPE_PROBE normally
    }

    // round up to ms boundaries
    m_frameTime = ( int(maxFrameTime * 1000.0) + 1 ) / 1000.f;

    //Clamp min to 60Hz frame
    //m_frameTime = hkMath::max2(0.0166f, m_frameTime);

    // See if we only want to get a sub range of that total time
    double displayStartTime = totalStartTime + (startPos * m_frameTime);
    double displayEndTime = totalStartTime + (( endPos > 0? endPos : 1.f) * m_frameTime );
    if (displayEndTime <= displayStartTime)
    {
        m_numThreads = 0;
        m_graphStartTime = m_graphEndTime = 0.f;
        return;
    }

    m_graphStartTime = float(startPos * m_frameTime) * 1000.f;
    m_graphEndTime = float((endPos > 0? endPos : 1.f) * m_frameTime ) * 1000.f;

    // generate the quads, starting at ignoreFirstNCalls deep, stopping when reach too high for this thread or < 1 pixel.
    hkUint16 pixelsPerThread = hkUint16( m_graphArea.getHeight() / m_numThreads );

    TimerRange tr;
    tr.threadNum = (hkUint16) 0;
    tr.displayStartTime = displayStartTime;
    tr.displayEndTime = displayEndTime;
    tr.maxLevels = pixelsPerThread / PIXELS_PER_NODE;
    tr.startPixelX = m_graphArea.getX();
    tr.endPixelX = m_graphArea.getX() + m_graphArea.getWidth();
    tr.startPixelY = (hkUint16) ( m_graphArea.getY() );
    tr.pixelRange = viewPixels;

    hkMonitorStreamColorTableCache cache;
    cache.m_colorTable = m_colorTable;
    gerenateQuadsForGpuTimers(timers, gpuTrace, &cache, tr, m_quads, highlightNodeFlagMask, m_smallestQuadSize);

}

_Ret_maybenull_ const hkMonitorStreamFlameGraph::Quad* hkMonitorStreamFlameGraph::getQuadUnderPos(int x, int y) const
{
    // XX could make faster if turns out slow / too many quads
    for (int q=0; q < m_quads.getSize(); ++q)
    {
        const Quad& quad = m_quads[q];
        if ( ( quad.m_startX <= x) && ( quad.m_endX >= x)
            && ( quad.m_startY <= y) && ( quad.m_endY >= y) )
        {
            return &quad;
        }
    }

    return HK_NULL;
}

void hkMonitorStreamFlameGraph::findTightZoomCpu( const hkArrayView<hkMonitorStreamParser::Tree*>& timers, int nodeFlagMask, float& startPos, float& endPos )
{
    startPos = 0.f;
    endPos = 1.f;

    double totalStartTime = HK_REAL_MAX;
    double totalEndTime = 0.f;
    findRange( timers, totalStartTime, totalEndTime);

    if ( totalStartTime >= totalEndTime )
    {
        return;
    }

    double maxFrameTime = totalEndTime - totalStartTime;
    float displayFrameTime = ( int(maxFrameTime) + 1.f );
    displayFrameTime = hkMath::max2(16.666f, displayFrameTime);

    if (nodeFlagMask)
    {
        double minTime = HK_REAL_MAX;
        double maxTime = 0;
        for (int t=0; t < timers.getSize(); ++t)
        {
            // ignore root
            for (int c=0; c < timers[t]->m_children.getSize(); ++c)
            {
                findRange(timers[t]->m_children[c], nodeFlagMask, minTime, maxTime);
            }
        }
        if (maxTime > 0)
        {
            startPos = float(minTime - totalStartTime) / (displayFrameTime);
            endPos = float(maxTime - totalStartTime) / (displayFrameTime);
            startPos = hkMath::clamp(startPos, 0.f, 1.f);
            endPos = hkMath::clamp(endPos, 0.f, 1.f);
            return;
        }
    }

    // just remove the padding the main func does, buit normally this func called with a node mask
    endPos = float(totalEndTime - totalStartTime) / (displayFrameTime);
}

void hkMonitorStreamFlameGraph::findTightZoomGpu( const hkArrayView<hkMonitorStreamParser::Tree*>& timers, const hkArrayView<hkGpuTraceResult>& gpuTrace, int nodeFlagMask, float& startPos, float& endPos )
{
    startPos = 0.f;
    endPos = 1.f;

    // Work out our full range (so max across all threads)
    double totalStartTime = HK_REAL_MAX;
    double totalEndTime = 0.f;
    findRange( gpuTrace, totalStartTime, totalEndTime);

    double maxFrameTime = totalEndTime - totalStartTime;
    if (maxFrameTime <= 0.0)
        return;

    float displayFrameTime = ( int(maxFrameTime * 1000.0) + 1 ) / 1000.f;
    if (nodeFlagMask)
    {
        double minTime = HK_REAL_MAX;
        double maxTime = 0;
        findRange(timers, gpuTrace, nodeFlagMask, minTime, maxTime);
        if (maxTime > 0)
        {
            startPos = float(minTime - totalStartTime) / (displayFrameTime);
            endPos = float(maxTime - totalStartTime) / (displayFrameTime);
            startPos = hkMath::clamp(startPos, 0.f, 1.f);
            endPos = hkMath::clamp(endPos, 0.f, 1.f);
            return;
        }
    }

    // just remove the padding the main func does, buit normally this func called with a node mask
    endPos = float(totalEndTime - totalStartTime) / (displayFrameTime);
}

void hkMonitorStreamFlameGraph::setColorTable(_In_opt_ hkMonitorStreamColorTable* c)
{
    m_colorTable = c;
}

/*
 * Havok SDK - Base file, BUILD(#20180110)
 * 
 * Confidential Information of Microsoft Corporation.
 * Not for disclosure or distribution without Microsoft's prior written
 * consent.  This software contains code, techniques and know-how which
 * is confidential and proprietary to Microsoft.  Product and Trade Secret
 * source code contains trade secrets of Microsoft.  Havok Software (C)
 * Copyright 1999-2018 Microsoft Corporation.
 * All Rights Reserved. Use of this software is subject to the
 * terms of an end user license agreement.
 * 
 * The Havok Logo, and the Havok buzzsaw logo are trademarks of Microsoft.
 * Title, ownership rights, and intellectual property rights in the Havok
 * software remain in Microsoft and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and
 * indicates acceptance of the End User licence Agreement for this
 * product. A copy of the license is included with this software and is
 * also available from Havok Support.
 * 
 */
