/*
 *
 * Confidential Information of Telekinesys Research Limited (t/a Havok). Not for disclosure or distribution without Havok's
 * prior written consent. This software contains code, techniques and know-how which is confidential and proprietary to Havok.
 * Product and Trade Secret source code contains trade secrets of Havok. Havok Software (C) Copyright 1999-2014 Telekinesys Research Limited t/a Havok. All Rights Reserved. Use of this software is subject to the terms of an end user license agreement.
 *
 */

#include <Demos/demos.h>
#include <Demos/Animation/Api/LayeringAndPartitions/Multithreading/SampleOnly/SampleOnlyPartitionsMultithreadingDemo.h>
#include <Animation/Animation/Animation/Mirrored/hkaMirroredAnimation.h>
#include <Animation/Animation/Animation/Mirrored/hkaMirroredSkeleton.h>
#include <Animation/Animation/Animation/SplineCompressed/hkaSplineCompressedAnimation.h>
#include <Animation/Animation/Animation/Quantized/hkaQuantizedAnimation.h>
#include <Animation/Animation/Animation/PredictiveCompressed/hkaPredictiveCompressedAnimation.h>
#include <Animation/Animation/Animation/ReferencePose/hkaReferencePoseAnimation.h>
#include <Animation/Animation/Animation/Util/hkaPartitionedAnimationUtility.h>
#include <Animation/Animation/hkaAnimationContainer.h>
#include <Animation/Animation/Playback/Control/Default/hkaDefaultAnimationControl.h>
#include <Animation/Animation/Playback/Multithreaded/SampleAndCombine/hkaAnimationSampleAndCombineJobQueueUtils.h>
#include <Animation/Animation/Playback/Multithreaded/SampleAndCombine/hkaAnimationSampleAndCombineJobs.h>
#include <Animation/Animation/Playback/SampleAndBlend/hkaSampleBlendJob.h>
#include <Animation/Animation/Playback/SampleAndBlend/hkaSampleBlendJobQueueUtils.h>
#include <Animation/Animation/Rig/hkaPose.h>
#include <Common/Serialize/Util/hkLoader.h>
#include <Common/Serialize/Util/hkRootLevelContainer.h>
#include <Demos/DemoCommon/Utilities/Animation/AnimationUtils.h>
#include <Demos/DemoCommon/Utilities/Asset/hkAssetManagementUtil.h>
#include <Demos/DemoCommon/Utilities/WindowedAverage/WindowedAverage.h>

#if defined(HK_REAL_IS_DOUBLE)
static const char* RIG_FILE_NAME = "Resources/Animation/HavokGirl/hkRig_DP.hkt";
static const char* ANIMATION_FILE_NAME = "Resources/Animation/HavokGirl/hkRunTurnLLoop_DP.hkt";
#else
static const char* RIG_FILE_NAME = "Resources/Animation/HavokGirl/hkRig.hkt";
static const char* ANIMATION_FILE_NAME = "Resources/Animation/HavokGirl/hkRunTurnLLoop.hkt";
#endif

#define NUM_BONES 50
#define NUM_ANIMATIONS 50
#define WINDOWED_AVERAGE_SIZE 256

enum AnimationType
{
	Uncompressed,
	Spline,
	QuantizedSlerp,
	QuantizedQLerp,
	Predictive,
};

enum JobType
{
	JobTypeSampleOnly,
	JobTypeSampleAndBlend,
};

enum MirrorType
{
	Mirrored,
	NotMirrored,
};

enum ThreadingType
{
	MultiThreaded,
	SingleThreaded
};

struct DemoVariant
{
	const char* m_name;
	const char* m_details;
	AnimationType m_animationType;
	JobType m_jobType;
	ThreadingType m_threadingType;
	MirrorType m_mirrorType;
};

#	define ST_STRING "Single-threaded"
#	define MT_STRING "Multi-threaded"

const struct DemoVariant s_variants[] =
{
#ifdef HK_PLATFORM_MULTI_THREAD
	{
		MT_STRING " - SampleOnly - Uncompressed",
		"Sampling on " MT_STRING,
		Uncompressed,
		JobTypeSampleOnly,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleOnly - Spline",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleOnly,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleOnly - Spline - Mirrored",
		"Mirrored Spline on " MT_STRING,
		Spline,
		JobTypeSampleOnly,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - SampleAndBlend - Uncompressed",
		"Sampling on " MT_STRING,
		Uncompressed,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Spline",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Spline - Mirrored",
		"Spline compression on " MT_STRING,
		Spline,
		JobTypeSampleAndBlend,
		MultiThreaded,
		Mirrored
	},
	{
		MT_STRING " - SampleAndBlend - Quantized (Slerp)",
		"Quantized compression on " MT_STRING,
		QuantizedSlerp,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Quantized (QLerp)",
		"Quantized compression on " MT_STRING,
		QuantizedQLerp,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Predictive",
		"Predictive compression on " MT_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		MultiThreaded,
		NotMirrored
	},
	{
		MT_STRING " - SampleAndBlend - Predictive - Mirrored",
		"Predictive compression on " MT_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		MultiThreaded,
		Mirrored
	},
#endif // HK_PLATFORM_MULTI_THREAD
	{
		ST_STRING " - SampleOnly - Uncompressed",
		"Sampling on " ST_STRING,
		Uncompressed,
		JobTypeSampleOnly,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleOnly - Spline",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleOnly,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleOnly - Spline - Mirrored",
		"Mirrored Spline on " ST_STRING,
		Spline,
		JobTypeSampleOnly,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - SampleAndBlend - Uncompressed",
		"Sampling on " ST_STRING,
		Uncompressed,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Spline",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Spline - Mirrored",
		"Spline compression on " ST_STRING,
		Spline,
		JobTypeSampleAndBlend,
		SingleThreaded,
		Mirrored
	},
	{
		ST_STRING " - SampleAndBlend - Quantized (Slerp)",
		"Quantized compression on " ST_STRING,
		QuantizedSlerp,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Quantized (QLerp)",
		"Quantized compression on " ST_STRING,
		QuantizedQLerp,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Predictive",
		"Predictive compression on " ST_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		SingleThreaded,
		NotMirrored
	},
	{
		ST_STRING " - SampleAndBlend - Predictive - Mirrored",
		"Predictive compression on " ST_STRING,
		Predictive,
		JobTypeSampleAndBlend,
		SingleThreaded,
		Mirrored
	},
};

SampleOnlyPartitionsMultithreadingDemo::SampleOnlyPartitionsMultithreadingDemo( hkDemoEnvironment* env )
:	hkDefaultAnimationDemo(env),
	m_useMt(s_variants[m_variantId].m_threadingType == MultiThreaded),
	m_asyncSampling(false),
	m_simpleStatistics(true),
	m_windowedAverage(HK_NULL),
	m_time(0.0f)
{
	// Disable reports:
	if(m_env->m_reportingLevel < hkDemoEnvironment::REPORT_INFO )
	{
		setErrorEnabled(0x36118e94, false);
	}

	//
	// Setup the camera
	//
	{
		hkVector4 from(  -1.0f, -25.0f,  1.0f);
		hkVector4 to  (  0.0f,  0.0f,   0.0f);
		hkVector4 up  (  0.0f,  0.0f,   1.0f);
		setupDefaultCameras( env, from, to, up );
	}

	m_loader = new hkLoader();


	//
	// Get the rig
	//
	{
		hkStringBuf assetFile(RIG_FILE_NAME); hkAssetManagementUtil::getFilePath(assetFile);
		hkRootLevelContainer* container = m_loader->load( assetFile.cString() );
		HK_ASSERT2(0x27343437, container != HK_NULL , "Could not load asset");
		hkaAnimationContainer* ac = reinterpret_cast<hkaAnimationContainer*>( container->findObjectByType( hkaAnimationContainerClass.getName() ));

		HK_ASSERT2(0x27343435, ac && (ac->m_skeletons.getSize() > 0), "No skeleton loaded");
		m_skeleton = ac->m_skeletons[0];

		HK_ASSERT3( 0x0, m_skeleton->m_bones.getSize() == NUM_BONES, "Loaded skeleton has " << m_skeleton->m_bones.getSize() << "bones. Please #define NUM_BONES " << m_skeleton->m_bones.getSize() );

		// Create partitions in the skeleton
		hkaSkeleton::Partition partition;
		{
			partition.m_name = "Root";
			partition.m_startBoneIndex = 0;
			partition.m_numBones = 3;
			m_skeleton->m_partitions.pushBack(partition);

			partition.m_name = "Chest-Head";
			partition.m_startBoneIndex = 3;
			partition.m_numBones = 7;
			m_skeleton->m_partitions.pushBack(partition);

			partition.m_name = "LeftArm";
			partition.m_startBoneIndex = 10;
			partition.m_numBones = 15;
			m_skeleton->m_partitions.pushBack(partition);

			partition.m_name = "RightArm";
			partition.m_startBoneIndex = 25;
			partition.m_numBones = 15;
			m_skeleton->m_partitions.pushBack(partition);

			partition.m_name = "Legs";
			partition.m_startBoneIndex = 40;
			partition.m_numBones = 10;
			m_skeleton->m_partitions.pushBack(partition);
		}
	}

	//
	// Get the animation and the binding
	//
	{
		hkStringBuf assetFile(ANIMATION_FILE_NAME); hkAssetManagementUtil::getFilePath(assetFile);
		hkRootLevelContainer* container = m_loader->load( assetFile.cString() );
		HK_ASSERT2(0x27343437, container != HK_NULL , "Could not load asset");
		hkaAnimationContainer* ac = reinterpret_cast<hkaAnimationContainer*>( container->findObjectByType( hkaAnimationContainerClass.getName() ));

		HK_ASSERT2(0x27343435, ac && (ac->m_animations.getSize() > 0), "No animation loaded");

		HK_ASSERT2(0x27343435, ac && (ac->m_bindings.getSize() > 0), "No binding loaded");
		m_binding = ac->m_bindings[0];
	}

	// Turn the animation into a partitioned animation
	createPartitionedAnimaton(*m_binding, *m_skeleton);
	hkaInterleavedUncompressedAnimation*  uncompressedAnimation = static_cast<hkaInterleavedUncompressedAnimation*>( m_binding->m_animation.val() );

	//
	// Make new (compressed or mirrored) versions depending on the variant.
	//
	{
		switch(s_variants[m_variantId].m_animationType)
		{
			case Uncompressed:
			{
				m_animation = uncompressedAnimation;
				break;
			}
			case Spline:
			{
				hkaSplineCompressedAnimation::TrackCompressionParams p;
				hkaSplineCompressedAnimation::AnimationCompressionParams a;

#ifdef USE_INDIVIDUAL_TRACK_SAMPLING
				a.m_enableSampleSingleTracks = true;
#endif

				addAnimation( new hkaSplineCompressedAnimation( *uncompressedAnimation, p, a ), s_variants[m_variantId].m_mirrorType == Mirrored );

				break;
			}
			case QuantizedSlerp:
			{
				addAnimation( new hkaQuantizedAnimation( *m_binding, *m_skeleton ), s_variants[m_variantId].m_mirrorType == Mirrored );
				break;
			}
			case QuantizedQLerp:
			{
				addAnimation( new hkaQuantizedAnimation( *m_binding, *m_skeleton ), s_variants[m_variantId].m_mirrorType == Mirrored );
				break;
			}
			case Predictive:
			{
				addAnimation( new hkaPredictiveCompressedAnimation( *m_binding, *m_skeleton ), s_variants[m_variantId].m_mirrorType == Mirrored );
				break;
			}
		}
	}

	//
	// Create controls (to help step the animations, handling looping, speed etc)
	//
	for (int i=0; i< NUM_ANIMATIONS; i++)
	{
		hkaDefaultAnimationControl* control = new hkaDefaultAnimationControl(m_binding);
		control->setLocalTime(i * 0.05f);
		m_controls.pushBack( control );
	}

	setupGraphics( );

#ifdef HK_PLATFORM_MULTI_THREAD
	hkaAnimationSampleAndCombineJobQueueUtils::registerWithJobQueue( m_jobQueue );
	hkaSampleBlendJobQueueUtils::registerWithJobQueue( m_jobQueue );
#endif


	//
	// Make helper which will allow use to average time, see windowed average etc.
	//
	m_windowedAverage = new WindowedAverage( WINDOWED_AVERAGE_SIZE );
}

void SampleOnlyPartitionsMultithreadingDemo::makeMirroredAnimation(hkaAnimation* original)
{
	hkArray<hkStringPtr> ltag;
	hkArray<hkStringPtr> rtag;

	ltag.pushBack( " L " );
	ltag.pushBack( "EyeL" );
	ltag.pushBack( "LeftArm");

	rtag.pushBack( " R " );
	rtag.pushBack( "EyeR" );
	rtag.pushBack( "RightArm" );

	hkaMirroredSkeleton *mirroredSkeleton = new hkaMirroredSkeleton( m_skeleton );

	mirroredSkeleton->computeBonePairingFromNames( ltag, rtag );
	mirroredSkeleton->computePartitionPairingFromNames( ltag , rtag );

	hkQuaternion v_mir( 1.0f, 0.0f, 0.0f, 0.0f );
	mirroredSkeleton->setAllBoneInvariantsFromReferencePose( v_mir, 0.0f );

	hkaMirroredAnimation *mirroredAnimation = new hkaMirroredAnimation( original, m_binding, mirroredSkeleton );
	m_binding = mirroredAnimation->createMirroredBinding();
	m_animation= mirroredAnimation;

	mirroredSkeleton->removeReference();
}

void SampleOnlyPartitionsMultithreadingDemo::addAnimation( hkaAnimation* animation, bool mirror )
{
	if ( mirror )
	{
		makeMirroredAnimation( animation );
		animation->removeReference();
	}
	else
	{
		m_animation = animation;
		m_binding->m_animation = animation;
	}
}

void SampleOnlyPartitionsMultithreadingDemo::createPartitionedAnimaton( hkaAnimationBinding& binding, const hkaSkeleton& skeleton)
{
	hkaInterleavedUncompressedAnimation* origAnimation = static_cast<hkaInterleavedUncompressedAnimation*>( binding.m_animation.val() );

	hkArray<hkStringPtr> partitionNames;
	{
		partitionNames.pushBack("Root");
// 		partitionNames.pushBack("Chest-Head");
// 		partitionNames.pushBack("LeftArm");
// 		partitionNames.pushBack("RightArm");
		partitionNames.pushBack("Legs");
	}

	hkaPartitionedAnimationUtility::Input input;
	{
		input.m_animationData = origAnimation->m_transforms.begin();
		input.m_numberOfPoses = origAnimation->getNumOriginalFrames();
		input.m_numberOfTransformTracks = origAnimation->m_numberOfTransformTracks;
		input.m_skeleton = &skeleton;
		input.m_numberOfPartitions = partitionNames.getSize();
		input.m_partitionNames = partitionNames.begin();
		binding.m_partitionIndices.setSize(partitionNames.getSize());
	}

	//Output for the animation
	hkaPartitionedAnimationUtility::Output output;
	{
		output.m_transformsOut = origAnimation->m_transforms.begin();
		output.m_partitionIndicesOut = binding.m_partitionIndices.begin();
	}

	hkaPartitionedAnimationUtility::createPartitionedAnimation( input, output );

	origAnimation->m_transforms.setSize(output.m_numTotalTranformsOut);
	origAnimation->m_numberOfTransformTracks = output.m_numTransformTracksOut;

	//Partitioned animations require an identity mapping
	binding.m_transformTrackToBoneIndices.clear();
}

SampleOnlyPartitionsMultithreadingDemo::~SampleOnlyPartitionsMultithreadingDemo()
{
	for (int i=0; i< NUM_ANIMATIONS; i++)
	{
		m_controls[i]->removeReference();
	}

	// If we created a mirrored animation, clean up binding
	if (m_animation->getType() == hkaAnimation::HK_MIRRORED_ANIMATION)
	{
		hkaMirroredAnimation::destroyMirroredBinding( m_binding );
	}

	// If we created a compressed animation, clean it up.
	if (m_animation->getType() != hkaAnimation::HK_INTERLEAVED_ANIMATION)
	{
		m_animation->removeReference();
	}

	// Delete the windowed averager
	delete m_windowedAverage;

	// Finally delete the loader (all serialized data)
	delete m_loader;
}

hkDemo::Result SampleOnlyPartitionsMultithreadingDemo::stepDemo()
{
	//
	// Handle input
	//
	{
		if ( m_env->m_gamePad->wasButtonPressed(HKG_PAD_BUTTON_2) )
		{
			m_asyncSampling = !m_asyncSampling;
		}

		if ( m_env->m_gamePad->wasButtonPressed(HKG_PAD_BUTTON_1) )
		{
			m_simpleStatistics = !m_simpleStatistics;
		}
	}

	HK_TIMER_BEGIN("StepAnimations", HK_NULL);

	//
	// Advance the animation - this happens in single threaded mode
	//
	for (int i=0; i< NUM_ANIMATIONS; i++)
	{
		m_controls[i]->update( m_timestep );
	}

	//
	// Create output pose buffers - it must persist while the job is executing
	//
	hkLocalArray<hkaPose*> pose( NUM_ANIMATIONS );
	pose.setSize( NUM_ANIMATIONS );
	for (int p=0; p < NUM_ANIMATIONS; p++)
	{
		pose[p] = new hkaPose(m_skeleton);

		//set to reference; only for demo purposes as we are not combining the partitions to get a full pose
		pose[p]->setToReferencePose();
	}

	HK_TIMER_END();


	//
	// Do the sampling
	//
	hkStopwatch timer;
	{
		timer.start();

		if( m_useMt )
		{
			if ( s_variants[m_variantId].m_jobType == JobTypeSampleAndBlend )
			{
				doMultithreadedSamplingUsingSampleBlendJob(pose);
			}
			else if(m_asyncSampling)
			{
				doMultithreadedSamplingAsynchronously(pose);
			}
			else
			{
				doMultithreadedSamplingSynchronously(pose);
			}
		}
		else
		{
			// Sample animation on the PPU is single threaded mode
			for (int i = 0; i < NUM_ANIMATIONS; ++i )
			{
				// N.B. We are putting track data directly into a pose, which necessarily assumes that the binding is the identity.
				// In general of course this will not be true, but for the purpose of this demo it is simplest to do this rather
				// than sample into a temporary array and track data and copy over into the bone (pose) data.

#ifdef USE_INDIVIDUAL_TRACK_SAMPLING
				const int numTracks = m_controls[i]->getAnimationBinding()->m_animation->m_numberOfTransformTracks;
				const hkInt16* const tracks = m_controls[i]->getAnimationBinding()->m_transformTrackToBoneIndices;
				m_animation->sampleIndividualTransformTracks( m_controls[i]->getLocalTime(), tracks, numTracks, pose[i]->accessUnsyncedPoseLocalSpace().begin() );
#else
				//Make a temporary array to put the partial animation specified by the partitions into
				int numBonesInPartitions = m_animation->m_numberOfTransformTracks;
				hkLocalArray<hkQsTransform> partitionedPose(numBonesInPartitions);
				partitionedPose.setSize(numBonesInPartitions);
				m_animation->sampleTracks( m_controls[i]->getLocalTime(), partitionedPose.begin(), HK_NULL );

				// Map the partial animation back the the full pose
				hkaPartitionedAnimationUtility::mapPartitionPoseToFullPose(	m_skeleton->m_partitions, m_binding->m_partitionIndices,
																		partitionedPose.begin(), partitionedPose.getSize(),
																		pose[i]->accessUnsyncedPoseLocalSpace().begin());
#endif
			}
		}

		timer.stop();
	}


	hkReal sampleTime = ( timer.getSplitSeconds() * 1000000 );
	m_windowedAverage->pushNewValue( sampleTime );

	//
	// Display only if not being run with NULL renderer (e.g. bootstrapping for stats generation)
	//
	if( hkString::strNcmp( m_env->m_options->m_renderer, "n", 1) != 0)
	{

		HK_TIMER_BEGIN("syncModelSpace", HK_NULL);
		for (int i = 0; i < NUM_ANIMATIONS; ++i )
		{
			pose[i]->syncModelSpace();
		}
		HK_TIMER_END();

		hkQsTransform worldFromModel (hkQsTransform::IDENTITY);
		for (int i = 0; i < NUM_ANIMATIONS; ++i )
		{
			worldFromModel.m_translation.set( hkReal(i - 1 - (NUM_ANIMATIONS>>2)), 0, 0);
			AnimationUtils::drawPose( *pose[i], worldFromModel );
		}

		const int h = getWindowHeight();
		if ( m_useMt )
		{
			m_env->m_textDisplay->outputText( (m_asyncSampling?"Asynchronous processing (Hit \x12 to change)":"Synchronous processing (Hit \x12 to change)"), 20, h - 40 );
		}

		printTimingStatistics();

		m_time += m_timestep;
	}

	// Delete poses
	for (int i = 0; i < NUM_ANIMATIONS; ++i )
	{
		delete pose[i];
	}

	return DEMO_OK;
}

void SampleOnlyPartitionsMultithreadingDemo::doMultithreadedSamplingSynchronously(hkLocalArray<hkaPose*>& pose)
{
	HK_TIMER_BEGIN("JobSetup", HK_NULL);
	//For Sample Only we get back just a partial animation with the partition's transforms
	int numTransformsInPartitions = m_animation->m_numberOfTransformTracks;
	hkArray < hkArray<hkQsTransform> > tempPoses;
	tempPoses.setSize(NUM_ANIMATIONS);
	for(int i = 0; i < NUM_ANIMATIONS; i++)
	{
		tempPoses[i].setSize(numTransformsInPartitions);
	}

	// This data must persist while the spu job is executing
	hkArray<hkaAnimationSampleOnlyJob::AnimationData> animData; animData.setSize(NUM_ANIMATIONS);

	int maxBufferSize = 0;
	for (int i=0 ; i < NUM_ANIMATIONS; i++)
	{
		hkReal localTime = m_controls[i]->getLocalTime();
		m_animation->getFrameAndDelta( localTime, animData[i].m_frameIndex, animData[i].m_frameDelta );
		animData[i].m_maxTransformTrack = m_animation->m_numberOfTransformTracks;
		animData[i].m_maxFloatTrack = m_animation->m_numberOfFloatTracks;
		animData[i].m_animationOnCPU = m_animation;
		animData[i].m_poseOut = tempPoses[i].begin();

		// Warning - if you are *not* using a hkaPose class to allocate your output array for
		// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
		// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
		animData[i].m_floatSlotsOut = pose[i]->getFloatSlotValues().begin();

		// Here we can (optionally) try and minimise the buffer size to avoid stack overflow for large animations.
	}

	HK_TIMER_END();

	// We'll distribute the work evenly over the spus.
	int numThreads = m_threadPool->getNumThreads();
	numThreads++; // count the main thread too

	hkLocalArray<hkaAnimationSampleOnlyJob> sampleJobs( numThreads );
	sampleJobs.setSize( numThreads );


	hkaAnimationSampleOnlyJob::AnimationData* currentAnimData = animData.begin();
	for (int s=0; s < numThreads; s++)
	{
		const int animsForJob = NUM_ANIMATIONS / numThreads + int(s < NUM_ANIMATIONS % numThreads);	// handles any remainders

		// Initialize the job to sample all the animations
		sampleJobs[s].m_animData = currentAnimData;
		sampleJobs[s].m_numAnims = animsForJob;

		// Here we can (optionally) try and minimise the buffer size to avoid stack overflow for large animations.
		sampleJobs[s].m_bufferSize = maxBufferSize;

		// Add the job - execution begins immediately on the SPU
		// Since however this uses a critical section, and the job set up code above is relatively fast,
		// we defer adding until all jobs are set up, and then use an addJobBatch - this will be faster.
		//HK_TIMER_BEGIN("AddJob", HK_NULL);
		//m_jobQueue->addJob( *reinterpret_cast<hkJobQueue::JobQueueEntry*>(&job[s]), hkJobQueue::JOB_HIGH_PRIORITY, hkJobQueue::JOB_TYPE_HINT_SPU );
		//HK_TIMER_END();

		HK_ASSERT2(0x0, sampleJobs[s].isValid(), "Job is invalid");

		currentAnimData += animsForJob;
	}

	// Add jobs as a batch
	{
		HK_TIMER_BEGIN("AddJobBatch", HK_NULL);
		hkLocalArray<hkJob*> jobPointers(numThreads);
		jobPointers.setSize(numThreads);
		for (int i = 0; i < numThreads; ++i )
		{
			jobPointers[i] = &( sampleJobs[i] );
		}
		m_jobQueue->addJobBatch( jobPointers, hkJobQueue::JOB_HIGH_PRIORITY );
		HK_TIMER_END();
	}

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Wait for all threads to finish
	// There's no need to wait on the hkaAnimationSampleAndCombineJob's semaphore here, since we're going to end up waiting for all the jobs to finish.
	// However, if each job had its own semaphore and we wanted to wait on an specific job, this would be the place to do it.

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	for(int i = 0; i < pose.getSize(); i++)
	{
		hkaPartitionedAnimationUtility::mapPartitionPoseToFullPose(	m_skeleton->m_partitions,
																m_binding->m_partitionIndices,
																tempPoses[i].begin(),
																numTransformsInPartitions,
																pose[i]->accessUnsyncedPoseLocalSpace().begin());
	}
}

void SampleOnlyPartitionsMultithreadingDemo::doMultithreadedSamplingAsynchronously(hkLocalArray<hkaPose*>& pose)
{
	HK_TIMER_BEGIN("JobSetup", HK_NULL);
	//For Sample Only we get back just a partial animation with the partition's transforms
	int numTransformsInPartitions = m_animation->m_numberOfTransformTracks;
	hkArray < hkArray<hkQsTransform> > tempPoses;
	tempPoses.setSize(NUM_ANIMATIONS);
	for(int i = 0; i < NUM_ANIMATIONS; i++)
	{
		tempPoses[i].setSize(numTransformsInPartitions);
	}

	// This data must persist while the spu job is executing
	hkArray<hkaAnimationSampleOnlyJob::AnimationData> animData; animData.setSize( NUM_ANIMATIONS );

	m_jobQueue->setWaitPolicy(hkJobQueue::WAIT_INDEFINITELY);

	int maxBufferSize = 0;
	for (int i=0 ; i < NUM_ANIMATIONS; i++)
	{
		hkReal localTime = m_controls[i]->getLocalTime();
		m_animation->getFrameAndDelta( localTime, animData[i].m_frameIndex, animData[i].m_frameDelta );
		animData[i].m_maxTransformTrack = m_animation->m_numberOfTransformTracks;
		animData[i].m_maxFloatTrack = m_animation->m_numberOfFloatTracks;
		animData[i].m_animationOnCPU = m_animation;
		animData[i].m_poseOut = tempPoses[i].begin();

		// Populate the buffer with data from the controls.
		// Warning - if you are *not* using a hkaPose class to allocate your output array for
		// float slots you must ensure that your float slots array is a multiple of 16 bytes in size.
		// See hkaMultithreadedAnimationUtils::allocateFloatSlotsArrayRoundedUpToMultipleOf16() for details.
		animData[i].m_floatSlotsOut = pose[i]->getFloatSlotValues().begin();

	}

	HK_TIMER_END();

	// We'll distribute the work evenly over the spus.
	int numThreads = m_threadPool->getNumThreads();
	numThreads++; // count the main thread too

	const int numJobs = numThreads;
	hkArray<hkaAnimationSampleOnlyJob> sampleJobs( numJobs );
	sampleJobs.setSize( numJobs );

	// Allocate an array that will contain the "job finished" flags for each  animation job.  This needs to be allocated on the heap since these
	// addresses are written to from the SPU.
	hkLocalArray<hkUint32> animationJobDoneFlags(numJobs);
	animationJobDoneFlags.setSize(numJobs, 0);

	hkaAnimationSampleOnlyJob::AnimationData* currentAnimData = animData.begin();
	for (int s=0; s < numJobs; s++)
	{
		const int animsForJob = NUM_ANIMATIONS / numThreads + int(s < NUM_ANIMATIONS % numThreads);	// handles any remainders

		// Initialize the job to sample all the animations
		sampleJobs[s].m_animData = currentAnimData;
		sampleJobs[s].m_numAnims = animsForJob;
		sampleJobs[s].m_jobDoneNotifier.m_flag = &(animationJobDoneFlags[s]);

		// Here we can (optionally) try and minimise the buffer size to avoid stack overflow for large animations.
		sampleJobs[s].m_bufferSize = maxBufferSize;

		HK_ASSERT2(0x19e27e5b, sampleJobs[s].isValid(), "Data is invalid!");	// Sanity check before we hand off the jobs

		currentAnimData += animsForJob;
	}


	int numJobsAdded = 0;

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Next block on the PPU until all jobs are marked as done
	hkBool allJobsComplete = false;
	while( !allJobsComplete )
	{
		// To fake the asynchronous nature of job creation, just for this demo, every so often add in some more jobs.
		if( hkUnitTest::rand01() < 0.01f)
		{
			if(numJobsAdded < numJobs)
			{
				m_jobQueue->addJob( *reinterpret_cast<hkJobQueue::JobQueueEntry*>(&sampleJobs[numJobsAdded]), hkJobQueue::JOB_HIGH_PRIORITY);
				numJobsAdded++;
			}
		}

		// Deal with jobs which are done here
		//
		// If animationJobDoneFlags[i] == 1, then animation job 'i' is complete
		//

		// Check for completion
		allJobsComplete = true;
		for( hkInt32 i = 0; i < numJobs; ++i )
		{
			if( animationJobDoneFlags[i] == 0 )
			{
				allJobsComplete = false;
			}
		}

	}

	// Let job queue know that we are are done.
	m_jobQueue->setWaitPolicy(hkJobQueue::WAIT_UNTIL_ALL_WORK_COMPLETE);

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	for(int i = 0; i < pose.getSize(); i++)
	{
		hkaPartitionedAnimationUtility::mapPartitionPoseToFullPose(	m_skeleton->m_partitions,
																m_binding->m_partitionIndices,
																tempPoses[i].begin(),
																numTransformsInPartitions,
																pose[i]->accessUnsyncedPoseLocalSpace().begin());
	}
}

void SampleOnlyPartitionsMultithreadingDemo::doMultithreadedSamplingUsingSampleBlendJob(hkLocalArray<class hkaPose*>& pose)
{
	// We'll distribute the work evenly over the spus.
	int numThreads = m_threadPool->getNumThreads();
	numThreads++; // count the main thread too
	//For Sample Only we get back just a partial animation with the partition's transforms
	int numTransformsInPartitions = m_animation->m_numberOfTransformTracks;
	hkArray < hkArray<hkQsTransform> > tempPoses;
	tempPoses.setSize(NUM_ANIMATIONS);
	for(int i = 0; i < NUM_ANIMATIONS; i++)
	{
		tempPoses[i].setSize(numTransformsInPartitions);
	}

	hkLocalArray<hkaSampleBlendJob> sampleJobs( numThreads );
	sampleJobs.setSize( numThreads );

	int poseIndex = 0;

	for (int s=0; s < numThreads; s++)
	{
		const int animsForJob = NUM_ANIMATIONS / numThreads + int(s < NUM_ANIMATIONS % numThreads);	// handles any remainders

		sampleJobs[s].initSampleOnly( animsForJob, *m_skeleton, false, numTransformsInPartitions );

		for( int i = 0; i < animsForJob; i++ )
		{
			hkReal localTime = m_controls[poseIndex]->getLocalTime();
			hkQsTransform* bonesBuffer = tempPoses[poseIndex].begin();
			hkReal* floatSlotsBuffer = pose[poseIndex]->getFloatSlotValues().begin();
			sampleJobs[s].addAnimation( *m_skeleton, *m_animation, localTime, bonesBuffer, floatSlotsBuffer, m_binding );
			poseIndex++;
		}

		sampleJobs[s].m_useSlerpForQuantized = ( s_variants[m_variantId].m_animationType == QuantizedSlerp );
	}

	// Add jobs as a batch
	{
		HK_TIMER_BEGIN("AddJobBatch", HK_NULL);
		hkLocalArray<hkJob*> jobPointers(numThreads);
		jobPointers.setSize(numThreads);
		for (int i = 0; i < numThreads; ++i )
		{
			jobPointers[i] = &( sampleJobs[i] );
		}
		m_jobQueue->addJobBatch( jobPointers, hkJobQueue::JOB_HIGH_PRIORITY );
		HK_TIMER_END();
	}

	m_threadPool->processJobQueue( m_jobQueue );
	m_jobQueue->processAllJobs();

	// Wait for all threads to finish
	// There's no need to wait on the hkaAnimationSampleAndCombineJob's semaphore here, since we're going to end up waiting for all the jobs to finish.
	// However, if each job had its own semaphore and we wanted to wait on an specific job, this would be the place to do it.

	// Wait for the actual tasks to finish. This makes sure all timer information will have finished DMAing to main memory
	m_threadPool->waitForCompletion();

	for(int i = 0; i < pose.getSize(); i++)
	{
		hkaPartitionedAnimationUtility::mapPartitionPoseToFullPose(	m_skeleton->m_partitions,
																m_binding->m_partitionIndices,
																tempPoses[i].begin(),
																numTransformsInPartitions,
																pose[i]->accessUnsyncedPoseLocalSpace().begin());
	}
}

void SampleOnlyPartitionsMultithreadingDemo::printTimingStatistics()
{
	char buf[1024];

	const int h = getWindowHeight();

	int x = 20;
	int y = 80;
	int d = 20;

	if ( m_simpleStatistics )
	{
		hkString::sprintf(buf, "Average sample Time (last %3d Samples): %8.0f (uSecs)", m_windowedAverage->getWindowSize(), m_windowedAverage->getWindowedMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Current Sample Time:                    %8.0f (uSecs)", m_windowedAverage->getLastTime() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Sample time for %d animations, %d bones", NUM_ANIMATIONS, NUM_BONES );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, " " );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Simple Statistics (Hit \x11 to change)" );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );
	}
	else
	{
		hkString::sprintf(buf, "     Total Samples: %8d", m_windowedAverage->getNumSamples() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "       Window Size: %8d", m_windowedAverage->getWindowSize() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "   Widowed Std Dev: %8.0f (uSecs)", m_windowedAverage->getWindowedStandardDeviation() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "  Windowed Maximum: %8.0f (uSecs)", m_windowedAverage->getWindowedMax() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "  Windowed Minimum: %8.0f (uSecs)", m_windowedAverage->getWindowedMin() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "   Windowed Median: %8.0f (uSecs)", m_windowedAverage->getWindowedMedian() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "     Windowed Mean: %8.0f (uSecs)", m_windowedAverage->getWindowedMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "        Total Mean: %8.0f (uSecs)", m_windowedAverage->getTotalMean() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "           Current: %8.0f (uSecs)", m_windowedAverage->getLastTime() );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Sample time for %d animations, %d bones", NUM_ANIMATIONS, NUM_BONES );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, " " );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );

		hkString::sprintf(buf, "Detailed Statistics (Hit \x11 to change)" );
		m_env->m_textDisplay->outputText( buf, x, h - (y+=d) );
	}

}



#	define ST_STRING "Single-threaded"
#	define MT_STRING "Multi-threaded"

#if defined(HK_REAL_IS_DOUBLE)
static const int demoType = HK_DEMO_TYPE_ANIMATION;
#else
static const int demoType = HK_DEMO_TYPE_ANIMATION | HK_DEMO_TYPE_CRITICAL | HK_DEMO_TYPE_STATS;
#endif

#define TO_STRING1( x ) # x
#define TO_STRING2( x ) TO_STRING1( x )

static const char* helpString = "Multithreaded sampling of " TO_STRING2( NUM_ANIMATIONS ) " animations X " TO_STRING2( NUM_BONES ) " bones each.";

HK_DECLARE_DEMO_VARIANT_USING_STRUCT( SampleOnlyPartitionsMultithreadingDemo, demoType, DemoVariant, s_variants, helpString );

/*
 * Havok SDK - NO SOURCE PC DOWNLOAD, BUILD(#20140907)
 * 
 * Confidential Information of Havok.  (C) Copyright 1999-2014
 * Telekinesys Research Limited t/a Havok. All Rights Reserved. The Havok
 * Logo, and the Havok buzzsaw logo are trademarks of Havok.  Title, ownership
 * rights, and intellectual property rights in the Havok software remain in
 * Havok and/or its suppliers.
 * 
 * Use of this software for evaluation purposes is subject to and indicates
 * acceptance of the End User licence Agreement for this product. A copy of
 * the license is included with this software and is also available at www.havok.com/tryhavok.
 * 
 */
