/*
 *
 * Confidential Information of Telekinesys Research Limited (t/a Havok). Not for disclosure or distribution without Havok's
 * prior written consent. This software contains code, techniques and know-how which is confidential and proprietary to Havok.
 * Product and Trade Secret source code contains trade secrets of Havok. Havok Software (C) Copyright 1999-2014 Telekinesys Research Limited t/a Havok. All Rights Reserved. Use of this software is subject to the terms of an end user license agreement.
 *
 */

#include <Demos/demos.h>
#include <Demos/Animation/Api/Multithreading/SampleAndBlend/SampleAndBlendMultithreadingDemo.h>
#include <Animation/Animation/Animation/Quantized/hkaQuantizedAnimation.h>
#include <Animation/Animation/Animation/Mirrored/hkaMirroredAnimation.h>
#include <Animation/Animation/Animation/Mirrored/hkaMirroredSkeleton.h>
#include <Animation/Animation/Animation/PredictiveCompressed/hkaPredictiveCompressedAnimation.h>
#include <Animation/Animation/Animation/SplineCompressed/hkaSplineCompressedAnimation.h>
#include <Animation/Animation/Animation/ReferencePose/hkaReferencePoseAnimation.h>
#include <Animation/Animation/Animation/Util/hkaAdditiveAnimationUtility.h>
#include <Animation/Animation/hkaAnimationContainer.h>
#include <Animation/Animation/Playback/Control/Default/hkaDefaultAnimationControl.h>
#include <Animation/Animation/Playback/hkaAnimatedSkeleton.h>
#include <Animation/Animation/Playback/Multithreaded/hkaMultithreadedAnimationUtils.h>
#include <Animation/Animation/Playback/Multithreaded/Common/hkaCommonAnimation.h>
#include <Animation/Animation/Playback/SampleAndBlend/hkaSampleBlendJob.h>
#include <Animation/Animation/Playback/SampleAndBlend/hkaSampleBlendJobQueueUtils.h>
#include <Animation/Animation/Playback/Multithreaded/SampleAndCombine/hkaAnimationSampleAndCombineJobQueueUtils.h>
#include <Animation/Animation/Rig/hkaPose.h>
#include <Common/Serialize/Util/hkLoader.h>
#include <Common/Serialize/Util/hkRootLevelContainer.h>
#include <Demos/DemoCommon/Utilities/Animation/AnimationUtils.h>
#include <Demos/DemoCommon/Utilities/Asset/hkAssetManagementUtil.h>
#include <Demos/DemoCommon/Utilities/WindowedAverage/WindowedAverage.h>

#define NUM_SKELETONS 50

static const bool SYNCHRONIZE_ANIMATIONS = true;

#if defined(HK_REAL_IS_DOUBLE)
static const char* ANIMATION_FILE_NAMES[] =
{
	"Resources/Animation/HavokGirl/hkWalkLoop_DP.hkt",
	"Resources/Animation/HavokGirl/hkRunLoop_DP.hkt",

 	"Resources/Animation/HavokGirl/hkHeadMovement_DP.hkt",
	"Resources/Animation/HavokGirl/hkIdle_DP.hkt",

	"Resources/Animation/HavokGirl/hkJumpLandLoop_DP.hkt",
	"Resources/Animation/HavokGirl/hkLongAnim_DP.hkt",

	"Resources/Animation/HavokGirl/hkRunTurnLLoop_DP.hkt",
	"Resources/Animation/HavokGirl/hkRunTurnRLoop_DP.hkt",

	"Resources/Animation/HavokGirl/hkWalkTurnLLoop_DP.hkt",
	"Resources/Animation/HavokGirl/hkWalkTurnRLoop_DP.hkt",

	"Resources/Animation/HavokGirl/hkWaveLoop_DP.hkt",
};
static const char* RIG_FILE_NAME = "Resources/Animation/HavokGirl/hkRig_DP.hkt";
static const char* ANIM_PATH = HK_NULL;
#else
static const char* ANIMATION_FILE_NAMES[] =
{
	"Resources/Animation/HavokGirl/hkWalkLoop.hkt",
	"Resources/Animation/HavokGirl/hkRunLoop.hkt",

	"Resources/Animation/HavokGirl/hkHeadMovement.hkt",
	"Resources/Animation/HavokGirl/hkIdle.hkt",

	"Resources/Animation/HavokGirl/hkJumpLandLoop.hkt",
	"Resources/Animation/HavokGirl/hkLongAnim.hkt",

	"Resources/Animation/HavokGirl/hkRunTurnLLoop.hkt",
	"Resources/Animation/HavokGirl/hkRunTurnRLoop.hkt",

	"Resources/Animation/HavokGirl/hkWalkTurnLLoop.hkt",
	"Resources/Animation/HavokGirl/hkWalkTurnRLoop.hkt",

	"Resources/Animation/HavokGirl/hkWaveLoop.hkt",
};
static const char* RIG_FILE_NAME = "Resources/Animation/HavokGirl/hkRig.hkt";
static const char* ANIM_PATH = HK_NULL;
#endif

/*
static const char* ANIMATION_FILE_NAMES[] =
{
	"Resources/Animation/Firefighter/Animations/cin_groggygetup.hkt",
	"Resources/Animation/Firefighter/Animations/hkDive.hkt",
	"Resources/Animation/Firefighter/Animations/hkGetupBack1.hkt",
	"Resources/Animation/Firefighter/Animations/hkGetupFront1.hkt",
	"Resources/Animation/Firefighter/Animations/hkHardLand.hkt",
	"Resources/Animation/Firefighter/Animations/hkIdle1.hkt",
	"Resources/Animation/Firefighter/Animations/hkInAir.hkt",
	"Resources/Animation/Firefighter/Animations/hkRun.hkt",
	"Resources/Animation/Firefighter/Animations/hkRunJump.hkt",
	"Resources/Animation/Firefighter/Animations/hkWalk.hkt",
	"Resources/Animation/Firefighter/Animations/idl_cough1.hkt",
	"Resources/Animation/Firefighter/Animations/idl_crouchidle.hkt",
	"Resources/Animation/Firefighter/Animations/idl_flinch01_additive.hkt",
	"Resources/Animation/Firefighter/Animations/idl_plantC4.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet01.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet02.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet03.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet04.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet05.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet06.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet07.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet09.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet10.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet11.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet12.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet14.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet15.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet16.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet20.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet21.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet22.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet23.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet24.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet25.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet26.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet31.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet32.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet33.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet39.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet45.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet46.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet47.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet48.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet49.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet50.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet51.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet60.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet61.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet62.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet63.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet64.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet65.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet66.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet67.hkt",
	"Resources/Animation/Firefighter/Animations/idl_snippet68.hkt",
	"Resources/Animation/Firefighter/Animations/mov_crouchwalk.hkt",
};
static const char* RIG_FILE_NAME = "Resources/Animation/Firefighter/Internal/CharacterAssets/Firefighter_Rig.hkt";
static const char* ANIM_PATH = HK_NULL;
*/

const int NUM_ANIMATIONS = sizeof(ANIMATION_FILE_NAMES) / sizeof(char*);

const int WINDOWED_AVERAGE_SIZE = 256;

const hkReal ERROR_TOLERANCE = 1.0e-3f;

enum AnimationType
{
	Uncompressed,
	Spline,
	QuantizedSlerp,
	QuantizedQLerp,
	Predictive,
	Reference,
};

enum JobType
{
	JobTypeSampleAndCombine,
	JobTypeQuantizedSampleAndCombine,
	JobTypeSampleAndBlend,
};

enum MirrorType
{
	Mirrored,
	NotMirrored,
};

enum ThreadingType
{
	MultiThreaded,
	SingleThreaded
};

struct DemoVariant
{
	const char* m_name;
	const char* m_details;
	AnimationType m_animationType;
	JobType m_jobType;
	ThreadingType m_threadingType;
	MirrorType m_mirrorType;
};


#	define ST_STRING "Single-threaded"
#	define MT_STRING "Multi-threaded"

const struct DemoVariant s_variants[] =
{
#if HK_CONFIG_THREAD==HK_CONFIG_MULTI_THREADED
	{
		MT_STRING " - SampleAndCombine - Uncompressed",
		"Sampling on " MT_STRING,
		Uncompressed,
		JobTypeSampleAndCombine,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndCombine - Spline",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleAndCombine,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndCombine - Spline - Mirrored",
		"Mirrored Spline on " MT_STRING,
		Spline,
		JobTypeSampleAndCombine,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - QuantizedSampleAndCombine - Quantized (Slerp)",
		"Quantized compression on " MT_STRING,
		QuantizedSlerp,
		JobTypeQuantizedSampleAndCombine,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - QuantizedSampleAndCombine - Quantized (QLerp)",
		"Quantized compression on " MT_STRING,
		QuantizedQLerp,
		JobTypeQuantizedSampleAndCombine,
		MultiThreaded,
		NotMirrored
	},
	// Reference pose decompression isn't supported on SPU for deprecated SampleAndCombineJob.
	// It's only supported with SampleAndBlendJob.
	{
		MT_STRING " - SampleAndCombine - ReferencePose",
		"Sampling on " MT_STRING,
		Reference,
		JobTypeSampleAndCombine,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndCombine - ReferencePose - Mirrored",
		"Mirrored sampling on " MT_STRING,
		Reference,
		JobTypeSampleAndCombine,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - SampleAndBlend - Spline",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Spline - Mirrored",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleAndBlend,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - SampleAndBlend - Quantized (Slerp)",
		"Quantized compression on " MT_STRING,
		QuantizedSlerp,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Quantized (QLerp)",
		"Quantized compression on " MT_STRING,
		QuantizedQLerp,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Predictive",
		"Predictive compression on " MT_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Predictive - Mirrored",
		"Predictive compression on " MT_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - SampleAndBlend - ReferencePose",
		"Sampling on " MT_STRING,
		Reference,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - ReferencePose - Mirrored",
		"Mirrored sampling on " MT_STRING,
		Reference,
		JobTypeSampleAndBlend,
		MultiThreaded,
		Mirrored
	},
#endif // HK_CONFIG_THREAD==HK_CONFIG_MULTI_THREADED
	{
		ST_STRING " - SampleAndCombine - Uncompressed",
		"Sampling on " ST_STRING,
		Uncompressed,
		JobTypeSampleAndCombine,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndCombine - Spline",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleAndCombine,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndCombine - Spline - Mirrored",
		"Mirrored Spline on " ST_STRING,
		Spline,
		JobTypeSampleAndCombine,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - QuantizedSampleAndCombine - Quantized (Slerp)",
		"Quantized compression on " ST_STRING,
		QuantizedSlerp,
		JobTypeQuantizedSampleAndCombine,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - QuantizedSampleAndCombine - Quantized (QLerp)",
		"Quantized compression on " ST_STRING,
		QuantizedQLerp,
		JobTypeQuantizedSampleAndCombine,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndCombine - ReferencePose",
		"Sampling on " ST_STRING,
		Reference,
		JobTypeSampleAndCombine,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndCombine - ReferencePose - Mirrored",
		"Mirrored sampling on " ST_STRING,
		Reference,
		JobTypeSampleAndCombine,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - SampleAndBlend - Spline",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Spline - Mirrored",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleAndBlend,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - SampleAndBlend - Quantized (Slerp)",
		"Quantized compression on " ST_STRING,
		QuantizedSlerp,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Quantized (QLerp)",
		"Quantized compression on " ST_STRING,
		QuantizedQLerp,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Predictive",
		"Predictive compression on " ST_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Predictive - Mirrored",
		"Predictive compression on " ST_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - SampleAndBlend - ReferencePose",
		"Sampling on " ST_STRING,
		Reference,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - ReferencePose - Mirrored",
		"Mirrored sampling on " ST_STRING,
		Reference,
		JobTypeSampleAndBlend,
		SingleThreaded,
		Mirrored
	},
};

SampleAndBlendMultithreadingDemo::SampleAndBlendMultithreadingDemo( hkDemoEnvironment* env )
:	hkDefaultAnimationDemo(env),
	m_useMt(s_variants[m_variantId].m_threadingType == MultiThreaded),
	m_convertLocalToModelInAnimationJob(true),
	m_asyncSampling(false),
	m_simpleStatistics(true),
	m_windowedAverage(HK_NULL),
	m_time(0.0f)
{
	// Disable reports:
	if(m_env->m_reportingLevel < hkDemoEnvironment::REPORT_INFO )
	{
		setErrorEnabled(0x36118e94, false);
	}

	//
	// Setup the camera
	//
	{
		hkVector4 from(  -1.0f, -7.0f,  1.0f);
		hkVector4 to  (  0.0f,  0.0f,   0.0f);
		hkVector4 up  (  0.0f,  0.0f,   1.0f);
		setupDefaultCameras( env, from, to, up );
	}

	m_loader = new hkLoader();

	//
	// Get the rig
	//
	{
		hkStringBuf assetFile(RIG_FILE_NAME); hkAssetManagementUtil::getFilePath(assetFile);
		hkRootLevelContainer* container = m_loader->load( HK_GET_DEMOS_ASSET_FILENAME(assetFile.cString()) );
		HK_ASSERT2(0x27343437, container != HK_NULL , "Could not load asset");
		hkaAnimationContainer* ac = reinterpret_cast<hkaAnimationContainer*>( container->findObjectByType( hkaAnimationContainerClass.getName() ));

		HK_ASSERT2(0x27343435, ac && (ac->m_skeletons.getSize() > 0), "No skeleton loaded");
		m_skeleton = ac->m_skeletons[0];
	}

	int uncompressedSize = 0;

	//
	// Get the animations and the bindings
	//
	for ( int i = 0; i < NUM_ANIMATIONS; i++ )
	{
		hkStringBuf assetFile( ANIMATION_FILE_NAMES[ i ] );

		if ( ANIM_PATH != HK_NULL )
		{
			hkStringBuf buf( ANIM_PATH, ANIMATION_FILE_NAMES[ i ] );
			assetFile = buf;
		}

		hkAssetManagementUtil::getFilePath(assetFile);
		hkRootLevelContainer* container = m_loader->load( HK_GET_DEMOS_ASSET_FILENAME(assetFile.cString()) );
		HK_ASSERT2(0x27343437, container != HK_NULL , "Could not load asset");
		hkaAnimationContainer* ac = reinterpret_cast<hkaAnimationContainer*>( container->findObjectByType( hkaAnimationContainerClass.getName() ));

		HK_ASSERT2(0x27343435, ac && (ac->m_animations.getSize() > 0), "No animation loaded");

		// if the incoming animation is already compressed then uncompress it so we can compress it using any type of compression
		if ( ac->m_animations[0]->getType() != hkaAnimation::HK_INTERLEAVED_ANIMATION )
		{
			ac->m_animations[0] = new hkaInterleavedUncompressedAnimation( *ac->m_animations[0] );
			ac->m_bindings[0]->m_animation = ac->m_animations[0];
			ac->m_animations[0]->removeReference();
		}
		m_animations.pushBack( ac->m_animations[0] );

		HK_ASSERT2(0x27343435, ac && (ac->m_bindings.getSize() > 0), "No binding loaded");
		m_bindings.pushBack( ac->m_bindings[0] );

		hkaInterleavedUncompressedAnimation* uncompressedAnimation = static_cast<hkaInterleavedUncompressedAnimation*>(ac->m_animations[0].val());
		uncompressedSize += uncompressedAnimation->getSizeInBytes(); // due to alignment sizeof(hkQsTransform) == 48 which is a bit high
	}

	// Create an additive animations
	// This can also be done offline in the toolchain using the CreateAdditiveAnimation filter
	// See the Additive configuration of hkHeadMovement.max
	for ( int i = 0; i < NUM_ANIMATIONS; i++ )
	{
		// Test for animation file names that should be made additive
		if ( ! ( hkString::strStr( ANIMATION_FILE_NAMES[ i ], "hkHeadMovement" ) ) )
		{
			continue;
		}

		hkaInterleavedUncompressedAnimation* interleavedAnim = static_cast< hkaInterleavedUncompressedAnimation* >( m_animations[ i ] );

		hkaAdditiveAnimationUtility::Input input;
		input.m_originalData = interleavedAnim->m_transforms.begin();
		input.m_numberOfPoses = interleavedAnim->m_transforms.getSize() / interleavedAnim->m_numberOfTransformTracks;
		input.m_numberOfTransformTracks = interleavedAnim->m_numberOfTransformTracks;
		input.m_baseData = interleavedAnim->m_transforms.begin();


		// We create an additive animation by subtracting off the initial pose for the first frame of the animation
		// This is done by passing the same animation for both the originalData and the baseData
		// Note that only the first frame of the basedata is used so this initial frame is subtracted
		// from each of the frames in the animation
		hkaAdditiveAnimationUtility::createAdditiveFromPose( input, interleavedAnim->m_transforms.begin() );

		// Switch the binding to additive so this animation will be blended differently in sample and combine.
		m_bindings[ i ]->m_blendHint = hkaAnimationBinding::ADDITIVE;
	}

	//
	// Make new (compressed or mirrored) versions depending on the variant.
	//
	{
		int compressedSize = 0;
		m_maxDmaSize = 0;

		switch( s_variants[m_variantId].m_animationType )
		{
			case Uncompressed: // Uncompressed
			{
				compressedSize = uncompressedSize;
				// No need to do anything
				break;
			}
			case Spline: // Spline
			{
				hkaSplineCompressedAnimation::TrackCompressionParams p;
				hkaSplineCompressedAnimation::AnimationCompressionParams a;

				a.m_maxFramesPerBlock = 32;

				p.m_floatingTolerance = ERROR_TOLERANCE;
				p.m_rotationTolerance = ERROR_TOLERANCE;
				p.m_scaleTolerance = ERROR_TOLERANCE;
				p.m_translationTolerance = ERROR_TOLERANCE;

				for (int i=0; i < m_animations.getSize(); i++)
				{
					hkaInterleavedUncompressedAnimation* uncompressedAnimation = static_cast<hkaInterleavedUncompressedAnimation*>(m_animations[i]);
					hkaSplineCompressedAnimation* compressedAnim = new hkaSplineCompressedAnimation( *uncompressedAnimation, p, a );
					addAnimation( i, compressedAnim, s_variants[m_variantId].m_mirrorType == Mirrored );
					compressedSize += compressedAnim->getSizeInBytes();
					m_maxDmaSize = hkMath::max2( m_maxDmaSize, compressedAnim->getSizeInBytes() );
				}

				break;
			}
			case QuantizedSlerp: // Quantized
			case QuantizedQLerp: // Quantized
			{
				hkaQuantizedAnimation::TrackCompressionParams tp;
				tp.m_rotationTolerance = ERROR_TOLERANCE;
				tp.m_translationTolerance = ERROR_TOLERANCE;
				tp.m_scaleTolerance = ERROR_TOLERANCE;
				tp.m_floatingTolerance = ERROR_TOLERANCE;

				for ( int i = 0; i < m_animations.getSize(); i++ )
				{
					hkaQuantizedAnimation* compressedAnim = new hkaQuantizedAnimation( *m_bindings[ i ], *m_skeleton, tp );
					addAnimation( i, compressedAnim, s_variants[m_variantId].m_mirrorType == Mirrored );
					compressedSize += compressedAnim->getSizeInBytes();
					m_maxDmaSize = hkMath::max2( m_maxDmaSize, compressedAnim->getSizeInBytes() );
				}
				break;
			}
			case Predictive: // Predictive
			{
				hkaPredictiveCompressedAnimation::CompressionParams p(ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE,ERROR_TOLERANCE);
//				p.m_parameterPalette[0].m_dynamicRotationTolerance = 1.0f;
//				p.m_parameterPalette[0].m_dynamicScaleTolerance = 1.0f;
//				p.m_parameterPalette[0].m_dynamicTranslationTolerance = 1.0f;
//				p.m_parameterPalette[0].m_staticRotationTolerance = 0.0f;
//				p.m_parameterPalette[0].m_staticScaleTolerance = 0.0f;
//				p.m_parameterPalette[0].m_staticTranslationTolerance = 0.0f;

				for ( int i = 0; i < m_animations.getSize(); i++ )
				{
					hkaPredictiveCompressedAnimation* compressedAnim = new hkaPredictiveCompressedAnimation(*m_bindings[ i ], *m_skeleton, p);
					addAnimation( i, compressedAnim, s_variants[m_variantId].m_mirrorType == Mirrored );
					compressedSize += compressedAnim->getSizeInBytes();
					m_maxDmaSize = hkMath::max2( m_maxDmaSize, compressedAnim->getSizeInBytes() );
				}
				break;
			}
			case Reference: // ReferencePose
			{
				compressedSize = uncompressedSize;

				for ( int i = 0; i < m_animations.getSize(); i++ )
				{
					hkaReferencePoseAnimation* refposeAnim = new hkaReferencePoseAnimation( *m_skeleton, m_bindings[i]->m_animation->m_numberOfTransformTracks, m_bindings[i]->m_animation->m_numberOfFloatTracks );
					addAnimation( i, refposeAnim, s_variants[m_variantId].m_mirrorType == Mirrored, s_variants[m_variantId].m_animationType == Reference );
				}

				break;
			}
		}

		m_compressionRatio = hkReal(uncompressedSize) / hkReal(compressedSize);
	}

	//
	// Create animated skeletons
	//
	for ( int i = 0; i < NUM_SKELETONS; ++i )
	{
		// Create a new animated skeleton
		hkaAnimatedSkeleton* skeleton = new hkaAnimatedSkeleton( m_skeleton );
		m_activeSkeletons.pushBack( skeleton );

		for (int j=0; j < m_animations.getSize(); j++)
		{
			// Create an animation control
			hkaDefaultAnimationControl* ac = new hkaDefaultAnimationControl(m_bindings[j]);
			ac->setLocalTime( i * .2f );

			// Bind the control to the skeleton
			skeleton->addAnimationControl( ac );

			if ( SYNCHRONIZE_ANIMATIONS && j )
			{
				const hkReal baseTime = skeleton->getAnimationControl( 0 )->getAnimationBinding()->m_animation->m_duration;
				const hkReal curTime = ac->getAnimationBinding()->m_animation->m_duration;
				ac->setPlaybackSpeed( curTime / baseTime );
			}

			// The animated skeleton now owns the control
			ac->removeReference();
		}
	}

	setupGraphics( );

	//
	// Allocate output buffers for pose sampling
	//
	m_poses.setSize( NUM_SKELETONS );
	for (int p=0; p < NUM_SKELETONS; p++)
	{
		m_poses[p] = new hkaPose(m_activeSkeletons[p]->getSkeleton());
		m_poses[p]->setToReferencePose();
	}

#if HK_CONFIG_THREAD==HK_CONFIG_MULTI_THREADED
	hkaSampleBlendJobQueueUtils::registerWithJobQueue( m_jobQueue );
	hkaAnimationSampleAndCombineJobQueueUtils::registerWithJobQueue( m_jobQueue );
#endif

	//
	// Make helper which will allow use to average time, see windowed average etc.
	//
	m_windowedAverage = new WindowedAverage( WINDOWED_AVERAGE_SIZE );
}

void SampleAndBlendMultithreadingDemo::makeMirroredAnimation(hkaAnimation* original, int index )
{

	hkArray<hkStringPtr> ltag;
	hkArray<hkStringPtr> rtag;

	ltag.pushBack( " L " );
	ltag.pushBack( "EyeL" );

	rtag.pushBack( " R " );
	rtag.pushBack( "EyeR" );

	hkaMirroredSkeleton *mirroredSkeleton = new hkaMirroredSkeleton( m_skeleton );

	mirroredSkeleton->computeBonePairingFromNames( ltag, rtag );

	hkQuaternion v_mir( 1.0f, 0.0f, 0.0f, 0.0f );
	mirroredSkeleton->setAllBoneInvariantsFromReferencePose( v_mir, 0.0f );

	hkaMirroredAnimation *mirroredAnimation = new hkaMirroredAnimation( original, m_bindings[index], mirroredSkeleton );
	m_bindings[index] = mirroredAnimation->createMirroredBinding();
	m_animations[index] = mirroredAnimation;

	mirroredSkeleton->removeReference();
}

void SampleAndBlendMultithreadingDemo::addAnimation( int i, hkaAnimation* animation, bool mirror, bool refPose )
{
	if (refPose)
	{
		// alter the binding to be an identity mapping and normal blend
		for (hkInt16 t=0; t < m_bindings[i]->m_transformTrackToBoneIndices.getSize(); t++)
		{
			m_bindings[i]->m_transformTrackToBoneIndices[t] = t;
		}
		for (hkInt16 f=0; f < m_bindings[i]->m_floatTrackToFloatSlotIndices.getSize(); f++)
		{
			m_bindings[i]->m_floatTrackToFloatSlotIndices[f] = f;
		}
		m_bindings[i]->m_blendHint = hkaAnimationBinding::NORMAL;
	}

	if ( mirror )
	{
		makeMirroredAnimation( animation, i );
		animation->removeReference();
	}
	else
	{
		m_animations[i] = animation;
		m_bindings[i]->m_animation = animation;
	}
}

SampleAndBlendMultithreadingDemo::~SampleAndBlendMultithreadingDemo()
{
	// Delete the active skeletons
	for (int s=0; s< m_activeSkeletons.getSize(); s++)
	{
		m_activeSkeletons[s]->removeReference();
		delete m_poses[s];
	}

	// If we created a mirrored animation, clean up binding
	if (m_animations[0]->getType() == hkaAnimation::HK_MIRRORED_ANIMATION)
	{
		for ( int i = 0; i < m_bindings.getSize(); i++ )
		{
			hkaMirroredAnimation::destroyMirroredBinding( m_bindings[i] );
		}
	}

	// If we created compressed animations, clean them up.
	if (m_animations[0]->getType() != hkaAnimation::HK_INTERLEAVED_ANIMATION)
	{
		for ( int i = 0; i < m_animations.getSize(); i++ )
		{
			m_animations[i]->removeReference();
		}
	}

	// Delete the windowed averager
	delete m_windowedAverage;

	// Finally delete the loader (all serialized data)
	delete m_loader;
}

hkDemo::Result SampleAndBlendMultithreadingDemo::stepDemo()
{
	//
	// Handle input
	//
	{
		if ( m_useMt && m_env->m_gamePad->wasButtonPressed(HKG_PAD_BUTTON_3) )
		{
			m_convertLocalToModelInAnimationJob = !m_convertLocalToModelInAnimationJob;
		}

		if ( m_env->m_gamePad->wasButtonPressed(HKG_PAD_BUTTON_2) )
		{
			m_asyncSampling = !m_asyncSampling;
		}

		if ( m_env->m_gamePad->wasButtonPressed(HKG_PAD_BUTTON_1) )
		{
			m_simpleStatistics = !m_simpleStatistics;
		}
	}

	HK_TIMER_BEGIN("StepAnimations", HK_NULL);

	const int numSkeletons = m_activeSkeletons.getSize();

	//
	// Step the skeletons - this happens in single threaded mode
	//
	{
		for (int i = 0; i < numSkeletons; ++i )
		{
			hkaAnimatedSkeleton* inst = m_activeSkeletons[i];

			const int numControls = inst->getNumAnimationControls();

			for ( int j = 0; j < numControls; j++ )
			{
				// Distribute the animation weights based on the current time, skeleton and control indices
				hkReal weight = .5f*(1.0f + hkMath::sin( m_time * hkReal( i + 1 ) / hkReal( numSkeletons ) * hkReal( j + 1 ) / hkReal( numControls ) ) );
				hkaDefaultAnimationControl* ac = static_cast< hkaDefaultAnimationControl* >( inst->getAnimationControl( j ) ); // We know these are Default controls
				ac->setMasterWeight( weight );
			}

			// Advance the animation
			inst->stepDeltaTime( m_timestep );
		}
	}

	HK_TIMER_END();

	HK_TIMER_BEGIN( "sample and blend", HK_NULL );

	//
	// Do the sampling and blending
	//
	hkStopwatch timer;
	{
		timer.start();

		if(m_useMt)
		{
			if ( s_variants[m_variantId].m_jobType == JobTypeSampleAndBlend )
			{
				doSamplingUsingNewSampleBlendJob();
			}
			else if(m_asyncSampling)
			{
				doMultithreadedSamplingAsynchronously();
			}
			else
			{
				doMultithreadedSamplingSynchronously();
			}
		}
		else
		{
			if ( s_variants[m_variantId].m_animationType == QuantizedSlerp || s_variants[m_variantId].m_animationType == QuantizedQLerp )
			{
				// Run animation on the PPU in single threaded mode
				for (int i = 0; i < numSkeletons; ++i )
				{
					hkaQuantizedSampleAndCombineJob job;

					// We gather data for each active control and place it in a buffer to be used by the SPU
					// The buffer must persist in memory as long as the job is active.
					// Allocate space for the buffer (typically 50 bytes per active control)
					hkaMultithreadedAnimationUtils::allocateQuantizedSampleAndCombineJob( m_activeSkeletons[i], job );

					{
						// Get output - local or model
						hkQsTransform* outputTransforms = HK_NULL;
						const hkInt16* parentIndices = HK_NULL;	// Used as flag for job to do local-to-model conversion
						if ( m_convertLocalToModelInAnimationJob )
						{
							outputTransforms = m_poses[i]->accessUnsyncedPoseModelSpace().begin();
							parentIndices = m_skeleton->m_parentIndices.begin();
						}
						else
						{
							outputTransforms = m_poses[i]->accessUnsyncedPoseLocalSpace().begin();
							parentIndices = HK_NULL;
						}

						const bool useSlerp = ( s_variants[m_variantId].m_animationType == QuantizedSlerp );


						// Populate the buffer with data from the controls.
						// Warning - if you are *not* using a hkaPose class to allocate your output array for
						// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
						// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
						hkaMultithreadedAnimationUtils::createQuantizedSampleAndCombineJob(
							m_activeSkeletons[i],
							outputTransforms,
							m_poses[i]->getFloatSlotValues().begin(),
							HK_NULL, HK_NULL,
							parentIndices,
							job,
							useSlerp, // Slerp
							!useSlerp, // Normalize lerps, do not necessarily need to normalize slerp
							1.0e-3f );
					}

					executeQuantizedSampleAndCombineJob( job );

					hkaMultithreadedAnimationUtils::deallocateQuantizedSampleAndCombineJob( job );

				}
			}
			else
			{
				// Run animation on the PPU in single threaded mode
				for (int i = 0; i < numSkeletons; ++i )
				{
					m_activeSkeletons[i]->sampleAndCombineAnimations(	m_poses[i]->accessUnsyncedPoseLocalSpace().begin(),
						m_poses[i]->getFloatSlotValues().begin() );
				}
			}
		}

		timer.stop();
	}

	HK_TIMER_END();

	hkReal sampleTime = ( timer.getSplitSeconds() * 1000000 );
	m_windowedAverage->pushNewValue( sampleTime );

	HK_MONITOR_ADD_VALUE( "WallClockSampleAndBlend", float( m_windowedAverage->getLastTime() / 1000.0f ), HK_MONITOR_TYPE_SINGLE );
	HK_MONITOR_ADD_VALUE( "WallClockSmpAndBlndMean", float( m_windowedAverage->getWindowedMean() / 1000.0f ), HK_MONITOR_TYPE_SINGLE );

	//
	// Display only if not being run with NULL renderer (e.g. bootstrapping for stats generation)
	//
	if( hkString::strNcmp( m_env->m_options->m_renderer, "n", 1) != 0)
	{
		if (!m_convertLocalToModelInAnimationJob)		// To compare timings, make sure that we explicitly do conversion if not already done on SPU
		{
			HK_TIMER_BEGIN("syncModelSpace", HK_NULL);
			for (int i = 0; i < numSkeletons; ++i )
			{
				m_poses[i]->syncModelSpace();
			}
			HK_TIMER_END();
		}

		hkQsTransform worldFromModel (hkQsTransform::IDENTITY);
		for (int i = 0; i < numSkeletons; ++i )
		{
			worldFromModel.m_translation.set( hkReal(i - 1 - (NUM_SKELETONS>>2)), 0, 0);
			AnimationUtils::drawPose( *m_poses[i], worldFromModel );
		}

		const int h = getWindowHeight();
		if ( m_useMt )
		{
			m_env->m_textDisplay->outputText( (m_asyncSampling?"Asynchronous processing (Hit \x12 to change)":"Synchronous processing (Hit \x12 to change)"), 20, h-60 );
			m_env->m_textDisplay->outputText( (m_convertLocalToModelInAnimationJob?"LocalToModel in Animation Job (Hit \x13 to change)":"LocalToModel Single Threaded On CPU  (Hit \x13 to change)"), 20, h-40 );
		}

		printTimingStatistics();

	}

	m_time += m_timestep;

	return DEMO_OK;
}

void SampleAndBlendMultithreadingDemo::doSamplingUsingNewSampleBlendJob()
{
	const int numSkeletons = m_activeSkeletons.getSize();

	// Set up the jobs
	hkLocalArray<hkaSampleBlendJob> sampleBlendJobs(numSkeletons);
	sampleBlendJobs.setSize(numSkeletons);

	HK_TIMER_BEGIN("JobSetup new", HK_NULL);

	// Make jobs
	for (int i = 0; i < numSkeletons; ++i )
	{
		HK_TIMER_BEGIN( "build job", HK_NULL );

		sampleBlendJobs[i].build(
			m_activeSkeletons[i],
			m_poses[i]->accessUnsyncedPoseLocalSpace().begin(),
			m_poses[i]->getFloatSlotValues().begin() ); //, false, numBones );

		sampleBlendJobs[i].m_useSlerpForQuantized = ( s_variants[m_variantId].m_animationType == QuantizedSlerp );

		HK_TIMER_END();
	}

	// Add jobs as a batch
	{
		HK_TIMER_BEGIN( "AddJobBatch", HK_NULL );
		hkLocalArray<hkJob*> jobPointers( numSkeletons );
		jobPointers.setSize( numSkeletons );
		for ( int i = 0; i < numSkeletons; ++i )
		{
			jobPointers[i] = &( sampleBlendJobs[i] );
		}

		m_jobQueue->addJobBatch( jobPointers, hkJobQueue::JOB_HIGH_PRIORITY );
		HK_TIMER_END();
	}

	HK_TIMER_END();

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Wait for all threads to finish

	// There's no need to wait on the hkaAnimationSampleAndCombineJob's semaphore here, since we're going to end up waiting for all the jobs to finish.
	// However, if each job had its own semaphore and we wanted to wait on an specific job, this would be the place to do it.

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	// Clean up the control buffers
	for (int i = 0; i < numSkeletons; ++i )
	{
		sampleBlendJobs[i].destroy();
	}
}

void SampleAndBlendMultithreadingDemo::doQuantizedSamplingSynchronously()
{
	const int numSkeletons = m_activeSkeletons.getSize();

	// Set up the jobs
	hkLocalArray< hkaQuantizedSampleAndCombineJob > sampleAndCombineJobs( numSkeletons );
	sampleAndCombineJobs.setSize( numSkeletons );

	HK_TIMER_BEGIN("JobSetup quantized old", HK_NULL);

	// Make jobs
	for (int i = 0; i < numSkeletons; ++i )
	{
		HK_TIMER_BEGIN( "build job", HK_NULL );

		// We gather data for each active control and place it in a buffer to be used by the SPU
		// The buffer must persist in memory as long as the job is active.
		// Allocate space for the buffer (typically 50 bytes per active control)
		hkaMultithreadedAnimationUtils::allocateQuantizedSampleAndCombineJob( m_activeSkeletons[i], sampleAndCombineJobs[i] );

		{
			// Get output - local or model
			hkQsTransform* outputTransforms = HK_NULL;
			const hkInt16* parentIndices = HK_NULL;	// Used as flag for job to do local-to-model conversion
			if ( m_convertLocalToModelInAnimationJob )
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseModelSpace().begin();
				parentIndices = m_skeleton->m_parentIndices.begin();
			}
			else
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseLocalSpace().begin();
				parentIndices = HK_NULL;
			}

			const bool useSlerp = ( s_variants[m_variantId].m_animationType == QuantizedSlerp );

			// Populate the buffer with data from the controls.
			// Warning - if you are *not* using a hkaPose class to allocate your output array for
			// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
			// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
			hkaMultithreadedAnimationUtils::createQuantizedSampleAndCombineJob(
				m_activeSkeletons[i],
				outputTransforms,
				m_poses[i]->getFloatSlotValues().begin(),
				HK_NULL, HK_NULL,
				parentIndices,
				sampleAndCombineJobs[ i ],
				useSlerp,
				!useSlerp, // Normalize lerps, do not necessarily need to normalize slerp
				1.0e-3f );
		}

		HK_TIMER_END();

		// Add the job - execution begins immediately on the SPU.
		// Since however this uses a critical section, and the job set up code above is relatively fast,
		// we defer adding until all jobs are set up, and then use an addJobBatch - this will be faster.
		// HK_TIMER_BEGIN("AddJob", HK_NULL);
		// m_jobQueue->addJob( sampleAndCombineJobs[i], hkJobQueue::JOB_HIGH_PRIORITY);
		// HK_TIMER_END();
	}

	// Add jobs as a batch
	{
		HK_TIMER_BEGIN( "AddJobBatch", HK_NULL );
		hkLocalArray<hkJob*> jobPointers(  numSkeletons );
		jobPointers.setSize( numSkeletons );
		for ( int i = 0; i < numSkeletons; ++i )
		{
			jobPointers[i] = &( sampleAndCombineJobs[i] );
		}

		// Add the job - execution begins immediately on the SPU
		m_jobQueue->addJobBatch( jobPointers, hkJobQueue::JOB_HIGH_PRIORITY );
		HK_TIMER_END();
	}

	HK_TIMER_END();

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Wait for all threads to finish

	// There's no need to wait on the hkaAnimationSampleAndCombineJob's semaphore here, since we're going to end up waiting for all the jobs to finish.
	// However, if each job had its own semaphore and we wanted to wait on an specific job, this would be the place to do it.

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	// Clean up the control buffers
	for (int i = 0; i < numSkeletons; ++i )
	{
		hkaMultithreadedAnimationUtils::deallocateQuantizedSampleAndCombineJob( sampleAndCombineJobs[i] );
	}
}

void SampleAndBlendMultithreadingDemo::doMultithreadedSamplingSynchronously()
{
	if (	( s_variants[m_variantId].m_jobType == JobTypeQuantizedSampleAndCombine ) &&
			( s_variants[m_variantId].m_animationType == QuantizedSlerp || s_variants[m_variantId].m_animationType == QuantizedQLerp ) )
	{
		doQuantizedSamplingSynchronously();
		return;
	}

	const int numSkeletons = m_activeSkeletons.getSize();

	// Set up the jobs
	hkLocalArray<hkaAnimationSampleAndCombineJob> sampleAndCombineJobs(numSkeletons);
	sampleAndCombineJobs.setSize(numSkeletons);

	HK_TIMER_BEGIN("JobSetup old", HK_NULL);

	// Make jobs
	for (int i = 0; i < numSkeletons; ++i )
	{
		// We gather data for each active control and place it in a buffer to be used by the SPU
		// The buffer must persist in memory as long as the job is active.
		// Allocate space for the buffer (typically 50 bytes per active control)
		hkaMultithreadedAnimationUtils::allocateSampleAndCombineJob( m_activeSkeletons[i], sampleAndCombineJobs[i] );

		{
			// Get output - local or model
			hkQsTransform* outputTransforms = HK_NULL;
			const hkInt16* parentIndices = HK_NULL;	// Used as flag for job to do local-to-model conversion
			if (m_convertLocalToModelInAnimationJob)
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseModelSpace().begin();
				parentIndices = m_skeleton->m_parentIndices.begin();
			}
			else
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseLocalSpace().begin();
				parentIndices = HK_NULL;
			}

			// Populate the buffer with data from the controls.
			// Warning - if you are *not* using a hkaPose class to allocate your output array for
			// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
			// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
			hkaMultithreadedAnimationUtils::createSampleAndCombineJob( m_activeSkeletons[i],
				m_activeSkeletons[i]->getSkeleton()->m_bones.getSize(), outputTransforms,
				m_activeSkeletons[i]->getSkeleton()->m_floatSlots.getSize(), m_poses[i]->getFloatSlotValues().begin(),
				const_cast<hkInt16*>(parentIndices),
				sampleAndCombineJobs[i] );
		}


		// Add the job - execution begins immediately on the SPU.
		// Since however this uses a critical section, and the job set up code above is relatively fast,
		// we defer adding until all jobs are set up, and then use an addJobBatch - this will be faster.
		// HK_TIMER_BEGIN("AddJob", HK_NULL);
		// m_jobQueue->addJob( sampleAndCombineJobs[i], hkJobQueue::JOB_HIGH_PRIORITY);
		// HK_TIMER_END();
	}

	// Add jobs as a batch
	{
		HK_TIMER_BEGIN("AddJobBatch", HK_NULL);
		hkLocalArray<hkJob*> jobPointers( numSkeletons );
		jobPointers.setSize(numSkeletons);
		for (int i = 0; i < numSkeletons; ++i )
		{
			jobPointers[i] = &( sampleAndCombineJobs[i] );
		}

		// Add the job - execution begins immediately on the SPU
		m_jobQueue->addJobBatch( jobPointers, hkJobQueue::JOB_HIGH_PRIORITY );
		HK_TIMER_END();
	}

	HK_TIMER_END();

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Wait for all threads to finish

	// There's no need to wait on the hkaAnimationSampleAndCombineJob's semaphore here, since we're going to end up waiting for all the jobs to finish.
	// However, if each job had its own semaphore and we wanted to wait on an specific job, this would be the place to do it.

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	// Clean up the control buffers
	for (int i = 0; i < numSkeletons; ++i )
	{
		hkaMultithreadedAnimationUtils::deallocateSampleAndCombineJob( sampleAndCombineJobs[i] );
	}
}


void SampleAndBlendMultithreadingDemo::doMultithreadedSamplingAsynchronously()
{
	const int numSkeletons = m_activeSkeletons.getSize();

	// Set up the jobs
	hkArray<hkaAnimationSampleAndCombineJob> sampleAndCombineJobs;
	sampleAndCombineJobs.setSize(numSkeletons);
	int numJobsAdded = 0;

	// Allocate an array that will contain the "job finished" flags for each  animation job.  This needs to be allocated on the heap since these
	// addresses are written to from the SPU.  The array should be as large as the number of jobs you'll ever process in one frame.
	hkLocalArray<hkUint32> animationJobDoneFlags( sampleAndCombineJobs.getSize() );
	animationJobDoneFlags.setSize( sampleAndCombineJobs.getSize(), 0);

	m_jobQueue->setWaitPolicy(hkJobQueue::WAIT_INDEFINITELY);

	HK_TIMER_BEGIN("JobSetup", HK_NULL);

	// Make jobs
	for (int i = 0; i < numSkeletons; ++i )
	{
		// We gather data for each active control and place it in a buffer to be used by the SPU
		// The buffer must persist in memory as long as the job is active.

		// Allocate space for the buffer (typically 50 bytes per active control)
		hkaMultithreadedAnimationUtils::allocateSampleAndCombineJob( m_activeSkeletons[i], sampleAndCombineJobs[i] );

		{
			// Get output - local or model
			hkQsTransform* outputTransforms = HK_NULL;
			const hkInt16* parentIndices = HK_NULL;	// Used as flag for job to do local-to-model conversion
			if (m_convertLocalToModelInAnimationJob)
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseModelSpace().begin();
				parentIndices = m_skeleton->m_parentIndices.begin();
			}
			else
			{
				outputTransforms = m_poses[i]->accessUnsyncedPoseLocalSpace().begin();
				parentIndices = HK_NULL;
			}

			// Populate the buffer with data from the controls.
			// Warning - if you are *not* using a hkaPose class to allocate your output array for
			// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
			// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
			hkaMultithreadedAnimationUtils::createSampleAndCombineJob( m_activeSkeletons[i],
				m_activeSkeletons[i]->getSkeleton()->m_bones.getSize(), outputTransforms,
				m_activeSkeletons[i]->getSkeleton()->m_floatSlots.getSize(), m_poses[i]->getFloatSlotValues().begin(),
				const_cast<hkInt16*>(parentIndices),
				sampleAndCombineJobs[i] );
		}


		// Here we're going to use a flag instead of a semaphore to detect job completion
		sampleAndCombineJobs[i].m_jobDoneNotifier.m_flag = &(animationJobDoneFlags[i]);
	}

	HK_TIMER_END();

	// Start processing
	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	const int maxToAdd = 7;
	hkLocalArray<hkJob*> jobPointersForBatch( maxToAdd );

	// Next block on the PPU until all jobs are marked as done
	hkBool allJobsComplete = false;
	while( !allJobsComplete )
	{

		// To fake the asynchronous nature of job creation, just for this demo, every so often add in some more jobs
		if( hkUnitTest::rand01() < 0.01f)
		{
			int numToAdd = (int)(hkUnitTest::rand01() * maxToAdd);
			int maxCanAdd = numSkeletons - numJobsAdded;
			numToAdd = (numToAdd <= maxCanAdd)? numToAdd: 0;

			jobPointersForBatch.setSize(numToAdd);

			for(int i = 0; i < numToAdd; i++)
			{
				jobPointersForBatch[i] = &( sampleAndCombineJobs[numJobsAdded] );
				numJobsAdded++;
			}

			// Add the jobs - execution begins immediately on the SPU
			if(numToAdd > 0)
			{
				m_jobQueue->addJobBatch( jobPointersForBatch, hkJobQueue::JOB_HIGH_PRIORITY );
			}
		}

		// Deal with jobs which are done here
		//
		// If animationJobDoneFlags[i] == 1, then animation job 'i' is complete
		//

		// Check for job completion
		allJobsComplete = true;
		for( hkInt32 i = 0; i < numSkeletons; ++i )
		{
			if( animationJobDoneFlags[i] == 0 )
			{
				// One job not yet finished so can't exit
				allJobsComplete = false;
			}
		}

	}

	// Let job queue know that we are are done.
	m_jobQueue->setWaitPolicy(hkJobQueue::WAIT_UNTIL_ALL_WORK_COMPLETE);

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	// Clean up the control buffers
	for (int i = 0; i < numSkeletons; ++i )
	{
		hkaMultithreadedAnimationUtils::deallocateSampleAndCombineJob( sampleAndCombineJobs[i] );
	}
}

void SampleAndBlendMultithreadingDemo::printTimingStatistics()
{
	char buf[1024];

	const int h = getWindowHeight();

	int x = 20;
	int y = 80;
	int d = 20;

	if ( m_simpleStatistics )
	{
		hkString::sprintf(buf, "Max animation bytes to DMA: %d", m_maxDmaSize );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Compression ratio over all animations: %.1f", m_compressionRatio );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Average sample Time (last %3d Samples): %8.0f (uSecs)", m_windowedAverage->getWindowSize(), m_windowedAverage->getWindowedMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Current Sample Time:                    %8.0f (uSecs)", m_windowedAverage->getLastTime() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Sample time for %d characters, %d blends, %d bones", NUM_SKELETONS, NUM_ANIMATIONS, m_skeleton->m_bones.getSize() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, " " );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Simple Statistics (Hit \x11 to change)" );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );
	}
	else
	{
		hkString::sprintf(buf, "     Total Samples: %8d", m_windowedAverage->getNumSamples() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "       Window Size: %8d", m_windowedAverage->getWindowSize() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "   Widowed Std Dev: %8.0f (uSecs)", m_windowedAverage->getWindowedStandardDeviation() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "  Windowed Maximum: %8.0f (uSecs)", m_windowedAverage->getWindowedMax() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "  Windowed Minimum: %8.0f (uSecs)", m_windowedAverage->getWindowedMin() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "   Windowed Median: %8.0f (uSecs)", m_windowedAverage->getWindowedMedian() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "     Windowed Mean: %8.0f (uSecs)", m_windowedAverage->getWindowedMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "        Total Mean: %8.0f (uSecs)", m_windowedAverage->getTotalMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "           Current: %8.0f (uSecs)", m_windowedAverage->getLastTime() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Sample time for %d characters, %d blends, %d bones", NUM_SKELETONS, NUM_ANIMATIONS, m_skeleton->m_bones.getSize() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, " " );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Detailed Statistics (Hit \x11 to change)" );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );
	}

}

void SampleAndBlendMultithreadingDemo::makeFakeInput()
{
	// Toggle m_convertLocalToModelOnSPU every frame
	m_env->m_gamePad->forceButtonPressed( HKG_PAD_BUTTON_3 );
}



static const int demoType = HK_DEMO_TYPE_ANIMATION | HK_DEMO_TYPE_CRITICAL | HK_DEMO_TYPE_STATS;

#define TO_STRING1( x ) # x
#define TO_STRING2( x ) TO_STRING1( x )

static const char* helpString = "Multithreaded sampling and blending of " TO_STRING2( NUM_ANIMATIONS ) " animations X " TO_STRING2( NUM_SKELETONS ) " skeletons.";

HK_DECLARE_DEMO_VARIANT_USING_STRUCT( SampleAndBlendMultithreadingDemo, demoType, DemoVariant, s_variants, helpString );

/*
 * Havok SDK - NO SOURCE PC DOWNLOAD, BUILD(#20140907)
 * 
 * Confidential Information of Havok.  (C) Copyright 1999-2014
 * Telekinesys Research Limited t/a Havok. All Rights Reserved. The Havok
 * Logo, and the Havok buzzsaw logo are trademarks of Havok.  Title, ownership
 * rights, and intellectual property rights in the Havok software remain in
 * Havok and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and indicates
 * acceptance of the End User licence Agreement for this product. A copy of
 * the license is included with this software and is also available at www.havok.com/tryhavok.
 * 
 */
