#include <Physics2012/Collide/hkpCollide.h>
#include <Common/Base/Types/Geometry/Aabb/hkAabbUtil.h>
#include <Physics2012/Collide/Shape/Convex/Triangle/hkpTriangleShape.h>
#include <Physics2012/Collide/Shape/Compound/Collection/physics_mesh_shape/physics_mesh_shape.h>

#if defined(HK_PLATFORM_SPU)
	#include <Common/Base/Memory/PlatformUtils/Spu/SpuDmaCache/hkSpu4WayCache.h>
	#include <Physics2012/Collide/Util/Spu/havok_spu_util.h>
	#include <Physics2012/Dynamics/World/Simulation/Multithreaded/Spu/hkpSpuConfig.h>
#endif

// --------------------
//
// Defines/Macros
//
// --------------------

// DSFL jfk: Added reflection data by hand. Seems to work...
// struct hk_character_shape_phantom::_Auto
// {
// };
//#ifdef HK_DETAIL_REFLECT_DEFINITIONS
// havok_rigid_body::havok_rigid_body(hkReflect::BypassCtorFlag f)
// {
// }
hkReflect::Detail::AddrAndType physics_mesh_shape::getExactType() const
{
	return hkReflect::Detail::AddrAndType(this, HK_REFLECT_GET_TYPE(physics_mesh_shape));
}
//hkReflect::Detail::TypeRegNode hk_character_shape_phantom_typeRegNode(HK_REFLECT_GET_NON_CONST_TYPE(hk_character_shape_phantom));

hkReflect::Detail::TypeData hkReflect::ReflectionOf< physics_mesh_shape >::Holder::typeData =
{
	HK_FILTER_REFLECT_OPT(hkReflect::Opt::FORMAT | hkReflect::Opt::NAME | hkReflect::Opt::VERSION | hkReflect::Opt::COPY_CONSTRUCTOR | hkReflect::Opt::DESTRUCTOR | hkReflect::Opt::COPY_ASSIGNMENT | hkReflect::Opt::INHERITANCE | hkReflect::Opt::SIZE_ALIGN | hkReflect::Opt::REFLECT_CONSTRUCTOR | hkReflect::Opt::DECLS | hkReflect::Opt::AFTER_REFLECT_NEW | hkReflect::Opt::VALIDATE),//optionals
	(hkUlong)HK_REFLECT_GET_TYPE(hkpShapeCollection),//parent    
	HK_REFLECT_TYPE_OPTIONAL(hkReflect::Opt::FORMAT, hkReflect::Format::OfRecord::Value),
	HK_REFLECT_TYPE_OPTIONAL(hkReflect::Opt::NAME, "physics_mesh_shape"),
	HK_REFLECT_TYPE_OPTIONAL(hkReflect::Opt::VERSION, 1),
};


// Returns the number of leading (most significant) bits are off in an integer
#if defined(HK_PLATFORM_XBOX360)

  #define BIT_UTIL_NLZ(u) _CountLeadingZeros(u)

#elif defined(HK_PLATFORM_PS3)

  #if defined(HK_PLATFORM_PS3_SPU)

    #define BIT_UTIL_NLZ(u) spu_extract(spu_cntlz(spu_promote(u,0)),0)

  #else

    #define BIT_UTIL_NLZ(u) __cntlzw(u)

  #endif

#elif defined(HK_PLATFORM_WIN32)

	#include <intrin.h>

	#define BIT_UTIL_NLZ(u) bit_util_nlz_pc(u)
	static inline int bit_util_nlz_pc(unsigned int u)
	{
		unsigned long msb_index;
		int result = 32;
		if (_BitScanReverse(&msb_index, u)) {
			result = 31-msb_index;
		}
		return result;
	}

#elif defined(HK_PLATFORM_PS4)  // HVS_EC  Using the gcc/CLANG equivalent to __cntlzw

    #define BIT_UTIL_NLZ(u) __builtin_clz(u)

#elif defined(HK_PLATFORM_NX)  // HVS_EC  Using the gcc/CLANG equivalent to __cntlzw

#define BIT_UTIL_NLZ(u) __builtin_clz(u)

#else

  #error "Unsupported platform!"

#endif

// --------------------
//
// Enumerated Types
//
// --------------------


// --------------------
//
// Structures/Classes
//
// --------------------


// --------------------
//
// Global Variables
//
// --------------------


// --------------------
//
// Local Variables
//
// --------------------

#if !defined(HK_PLATFORM_SPU)

	// List of collision filters specified by the game.
	static hkUint32 Physics_mesh_collision_filter_table[ PMCF_NUM_COLLISION_FILTERS ] = { 0 };

#endif // #if !defined(HK_PLATFORM_SPU)

// --------------------
//
// Console Functions
//
// --------------------


// --------------------
//
// Internal Functions
//
// --------------------

#if defined(HK_PLATFORM_SPU)

extern class hkSpu4WayCache* g_SpuCollideUntypedCache;

HK_ALWAYS_INLINE static const void* HK_CALL hkGetArrayElemWithByteStridingHalfCacheSize( const void* base, int index, int elemsize, int dmaGroup = HK_SPU_DMA_GROUP_STALL, bool waitForCompletion = true  )
{
	// HAVOK 2011.3 MODIFIED CODE - STL 2010/02/08 - Copied these asserts from the other GetArrayElem functions.
	//
	// NEW:
	// ----
	//
	HK_ASSERT2(0xdbcf8890, base != HK_NULL, "null array base pointer passed to hkGetArrayElem");
	HK_ASSERT2(0xdbcf8891, index >= 0,		"Negative array index passed to hkGetArrayElem");
	//
	// HAVOK 2011.3 MODIFIED CODE - STL 2010/02/08 - Copied these asserts from the other GetArrayElem functions.

	hkUlong arrayAddrPpu = hkUlong(base) + ( index * elemsize );
	// We will virtually use only half the cache size while still bringing in the full size. That way we have a spill-over buffer of half the cache size that can be used for otherwise out-of-bounds accesses.
	const int mask  = ~((HK_SPU_AGENT_SECTOR_JOB_MAX_UNTYPED_CACHE_LINE_SIZE/2)-1);
	hkUlong arrayAddrAligned = arrayAddrPpu & mask;
	hkUlong alignedDataSpu = (hkUlong)g_SpuCollideUntypedCache->getFromMainMemoryInlined( (const void*)arrayAddrAligned , HK_SPU_AGENT_SECTOR_JOB_MAX_UNTYPED_CACHE_LINE_SIZE, dmaGroup, waitForCompletion );
	return reinterpret_cast<const void*> ( alignedDataSpu + (arrayAddrPpu & ~mask) );
}

#else

template <typename TYPE>
HK_ALWAYS_INLINE static const TYPE* HK_CALL hkGetArrayElemWithByteStridingHalfCacheSize( const TYPE* base, int index, int striding, int dmaGroup = 0, bool waitForCompletion = true )
{
	return reinterpret_cast<const TYPE*>( reinterpret_cast<const char*>(base) + ( index * striding ) );
}

#endif

#if !defined(HK_PLATFORM_SPU)

// Normalize the Havok vector safely.  If the vector is close to the zero vector the zero vector is returned,
// Havok produces a vector with NaNs if it has zero length.
//
// hk_vector:	(input/output) The vector to normalize.
//
// returns:	The length of the original vector.
//
static float havok_vector_normalize3_safe_ems(hkVector4& hk_vector)
{
#if 0 && defined(HK_DEBUG)//HVS_DMK // DSFL jfk
	float const length = hk_vector.length3();
	if (length < FLT_EPSILON)
	{
		hk_vector.setZero4();
		return 0.0f;
	}

	hk_vector.mul4(1.0f / length);

	return length;
#else
	const hkSimdFloat32 length = hk_vector.length<3>();//HVS_DMK
	if (length.getReal() < FLT_EPSILON)
	{
		hk_vector.setZero();//HVS_DMK
		return 0.0f;
	}

	hk_vector.mul(length.reciprocal());//1.0f / length);//HVS_DMK

	return length.getReal();//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK
}

#endif // #if !defined(HK_PLATFORM_SPU)

// --------------------
//
// External Functions
//
// --------------------

#if !defined(HK_PLATFORM_SPU)

// Constructor.
physics_mesh_shape::physics_mesh_shape(hkReal const* vert_buf, hkUint32 num_vertices, 
													hkVector4 const& aabb_min, hkVector4 const& aabb_max,
													hkUint32 num_triangle_aux_data, physics_mesh_triangle_aux_data const* p_triangle_aux_data)
: hkpShapeCollection(hkcdShapeType::V_PHYSICS_MESH, COLLECTION_USER),
  m_num_vertices(num_vertices), m_vertices(vert_buf), 
  m_num_triangle_aux_data(num_triangle_aux_data), m_triangle_aux_data(p_triangle_aux_data),
  m_triangle_radius(hkConvexShapeDefaultRadius),
  m_flags(0)
{
	// This identifies the hkpShapeCollection as a physics_mesh_shape.
	m_userData = PHYSICS_MESH_USER_DATA_ID;

	// Initialize the axis-aligned bounding box.
	{
#if 0 && defined(HK_DEBUG)//HVS_DMK // DSFL jfk
		m_aabb_center.setAdd4(aabb_min, aabb_max);
		m_aabb_center.mul4(0.5f);

		m_aabb_half_extents.setSub4(aabb_max, aabb_min);
		m_aabb_half_extents.mul4(0.5f);
#else
		hkSimdReal zeroPointFive;//HVS_DMK
		zeroPointFive.setFromFloat(0.5f);//HVS_DMK

		m_aabb_center.setAdd(aabb_min, aabb_max);//HVS_DMK
		m_aabb_center.mul(zeroPointFive);//HVS_DMK

		m_aabb_half_extents.setSub(aabb_max, aabb_min);//HVS_DMK
		m_aabb_half_extents.mul(zeroPointFive);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK
	}

	// Copy over the global collision filter table.
	hkString::memCpy(m_collision_filter_table, Physics_mesh_collision_filter_table, sizeof(m_collision_filter_table));
}

#endif // #if !defined(HK_PLATFORM_SPU)

//
// hkpShape interface.
//

void physics_mesh_shape::getAabb( const hkTransform& local_to_world, hkReal tolerance, hkAabb& out ) const
{
#if 0 && defined(HK_DEBUG)//HVS_DMK // DSFL jfk
	hkAabbUtil::calcAabb(local_to_world, m_aabb_half_extents, m_aabb_center, tolerance, out);
#else
	hkSimdReal tolerance_local;//HVS_DMK
	tolerance_local.setFromFloat(tolerance);//HVS_DMK
	hkAabbUtil::calcAabb(local_to_world, m_aabb_half_extents, m_aabb_center, tolerance_local, out);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK
}

#if !defined(HK_PLATFORM_SPU)

int physics_mesh_shape::calcSizeForSpu(const CalcSizeForSpuInput& input, int spuBufferSizeLeft) const
{
#if defined(HK_PLATFORM_HAS_SPU)
	// no dynamic extended mesh shapes on the spu
	if ( (!input.m_isFixedOrKeyframed || input.m_hasDynamicMotionSaved) && input.m_midphaseAgent3Registered )
	{
		HK_WARN(0xdbc1ffbc, "This physics_mesh_shape cannot run on SPU - midphase agent is registered and rigid body is not fixed or keyframed or has a dynamic motion saved.");
		if (input.m_isFixedOrKeyframed)
		{
			HK_WARN(0xad906291, "This shape can be run on SPU only for fixed and keyframed bodies. Now it defaults to be run on PPU. You can make it run on SPU by removing the saved dynamic motion.");
		}
		return -1;
	}

#endif

	return sizeof(physics_mesh_shape);
}

#endif // #if !defined(HK_PLATFORM_SPU)

//
// hkpShapeContainer interface.
//

#if !defined(HK_PLATFORM_SPU)

/// The number of child shapes. The default implementation just iterates over all keys and is really slow
int physics_mesh_shape::getNumChildShapes() const
{
	HK_ERROR(0x0, "physics_mesh_shape::getNumChildShapes() not implemented!");
	return 0;
}

/// Get the first child shape key
/// see getChildShape() for extra details
hkpShapeKey physics_mesh_shape::getFirstKey() const
{
	// We can't access the keys in order.
	return HK_INVALID_SHAPE_KEY;
}

/// Get the next child shape key
/// If the "oldKey" parameter is the last key in the shape collection, this function
/// returns HK_INVALID_SHAPE_KEY
/// see getChildShape() for extra details
hkpShapeKey physics_mesh_shape::getNextKey(hkpShapeKey oldKey) const
{
	HK_ERROR(0x0, "physics_mesh_shape::getNextKey() not implemented!");
	return HK_INVALID_SHAPE_KEY;
}

#endif // #if !defined(HK_PLATFORM_SPU)

/// Return the collision filter info for a given child shape
hkUint32 physics_mesh_shape::getCollisionFilterInfo( hkpShapeKey key ) const
{
	physics_mesh_triangle_aux_data aux_data;
	if (get_auxiliary_data(aux_data, key) == false) {

		// DSFL jfk
		//HK_ASSERT2(0x0, 0, "Auxillary data not found.");
		HK_ASSERT(0x0, 0, "Auxillary data not found.");
		return 0;
	}

	hkUint32 const triangle_flags = aux_data.m_flags;
	if (triangle_flags == 0) {

		return m_collision_filter_table[ PMCF_WORLD ];
	}

	// Calculate the index into the collision filter table.
	hkUint32 const table_index = 31 - BIT_UTIL_NLZ(triangle_flags);
	HK_ASSERT(0x0, table_index < PMCF_NUM_COLLISION_FILTERS, "FAIL"); // DSFL jfk

	// Only collide with the player?
	if (table_index == PMCF_PLAYER_COLLIDE_ONLY_DEMO) {

		// Determine if demo collision is enabled.
		bool demo_collision_enabled = m_flags & (1 << MESH_FLAG_DEMO_COLLISION_ENABLED);

		if (demo_collision_enabled) {
			return m_collision_filter_table[ PMCF_PLAYER_COLLIDE_ONLY ];
		} else {
			return m_collision_filter_table[ PMCF_NO_COLLISION ];
		}
	}

	return m_collision_filter_table[ table_index ];
}

/// Gets a child shape using a shape key.
/// This function must return a child shape pointer. This is only called internally by
/// the collision detection system after having called getFirstKey() or getNextKey().
/// If you have shape keys that are invalid, you must implement getNextKey() in such
/// a way that it skips over these shapes.
/// Important Note: It is assumed by the system that a shape key, if valid (i.e., returned by
/// getNextkey()) will always remain valid.
///
/// Notes:
///     - You can return a pointer to a shape
///     - or you can construct a shape in place in the buffer and return a pointer to that buffer.
///       e.g., hkpMeshShape uses this buffer for temporarily created triangles.
///       hkpListShape does not use the buffer as it already has shape instances.
///       \b Attention: When the buffer gets erased, no destructor will be called.
///     - The buffer must be 16 byte aligned.
const hkpShape* physics_mesh_shape::getChildShape( hkpShapeKey key, hkpShapeBuffer& buffer ) const
{
	// Decompose the shape key into vertex indices.
	const hkUint32 index_v0 = key & FIRST_VERTEX_INDEX_MASK;
	const hkUint32 offset_1 = (key >> SECOND_VERTEX_INDEX_OFFSET_SHIFT) & SECOND_VERTEX_INDEX_OFFSET_MASK;
	const hkUint32 index_v1 = index_v0 + offset_1;
	const hkUint32 offset_2 = (key >> THIRD_VERTEX_INDEX_OFFSET_SHIFT) & THIRD_VERTEX_INDEX_OFFSET_MASK;
	const hkUint32 index_v2 = index_v1 + offset_2;
	// DSFL jfk
// 	HK_ASSERT2(0x0, index_v0 < m_num_vertices, "Vector index 1 out of bounds");
// 	HK_ASSERT2(0x0, index_v1 < m_num_vertices, "Vector index 2 out of bounds");
// 	HK_ASSERT2(0x0, index_v2 < m_num_vertices, "Vector index 3 out of bounds");
	HK_ASSERT(0x0, index_v0 < m_num_vertices, "Vector index 1 out of bounds");
	HK_ASSERT(0x0, index_v1 < m_num_vertices, "Vector index 2 out of bounds");
	HK_ASSERT(0x0, index_v2 < m_num_vertices, "Vector index 3 out of bounds");

	// Fetch the vertices.
#if 0 && defined(HK_DEBUG)//HVS_DMK // DSFL jfk
	hkVector4 hk_v0;
	const hkReal* const addr_v0 = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v0, sizeof(hkReal) * 3));
	hk_v0.load3(addr_v0);

	hkVector4 hk_v1;
	const hkReal* const addr_v1 = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v1, sizeof(hkReal) * 3));
	hk_v1.load3(addr_v1);

	hkVector4 hk_v2;
	const hkReal* const addr_v2 = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v2, sizeof(hkReal) * 3));
	hk_v2.load3(addr_v2);
#else
	hkVector4 hk_v0;
	const hkReal* const addr_v0 = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v0, sizeof(hkReal) * 3));//HVS_DMK
	hk_v0.load<3>(addr_v0);//HVS_DMK

	hkVector4 hk_v1;
	HK_ALIGN_REAL(const hkReal* const addr_v1) = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v1, sizeof(hkReal) * 3));//HVS_DMK
	hk_v1.load<3>(addr_v1);//HVS_DMK

	hkVector4 hk_v2;
	HK_ALIGN_REAL(const hkReal* const addr_v2) = static_cast< const hkReal* >(hkGetArrayElemWithByteStridingHalfCacheSize(m_vertices, index_v2, sizeof(hkReal) * 3));//HVS_DMK
	hk_v2.load<3>(addr_v2);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK

	// Create the triangle in the supplied buffer.
#if !defined ( HK_PLATFORM_SPU )
	hkpTriangleShape* HK_RESTRICT p_triangle = new (&buffer) hkpTriangleShape();
#else
	hkpTriangleShape* HK_RESTRICT p_triangle = (hkpTriangleShape*)&buffer;	
#endif

	p_triangle->setType( HKCD_SHAPE_TYPE_FROM_CLASS(hkpTriangleShape) );
	p_triangle->setUserData( 0 );
	p_triangle->setRadius( get_triangle_radius() );
	p_triangle->setWeldingType( hkpWeldingUtility::WELDING_TYPE_NONE );
	HKCD_PATCH_SHAPE_VTABLE( p_triangle );

	// Set the extrusion
	hkVector4 zerovec;
#if 0 && defined HK_DEBUG//HVS_DMK // DSFL jfk
	zerovec.setZero4();//HVS_DMK
#else
	zerovec.setZero();//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK
	p_triangle->setExtrusion(zerovec);

	// get welding
	p_triangle->setWeldingInfo(0);

	p_triangle->setVertex(0, hk_v0);
	p_triangle->setVertex(1, hk_v1);
	p_triangle->setVertex(2, hk_v2);

	// Store a pointer to the city chunk collision shape so collision filtering can
	// check the flags.  We might want to store the flags in the triangle directly but that would
	// increase the cost of getting a triangle.
	p_triangle->setUserData(reinterpret_cast< hkUlong >(this));

	return p_triangle;
}

//
// physics_mesh_shape interface.
//

// Get auxiliary data associated with a triangle.
bool physics_mesh_shape::get_auxiliary_data(physics_mesh_triangle_aux_data& aux_data_out, hkpShapeKey shape_key) const
{
	// The auxiliary index is the first vertex index.
	hkUint32 const aux_index = shape_key & AUX_DATA_INDEX_MASK;
	if (aux_index >= m_num_triangle_aux_data) {

		// DSFL jfk
		//HK_ASSERT2(0x0, false, "havok_city_chunk_collision_shape::get_auxiliary_data: 'aux_index' is too large!");
		HK_ASSERT(0x0, false, "havok_city_chunk_collision_shape::get_auxiliary_data: 'aux_index' is too large!");
		return false;
	}

#if defined(HK_PLATFORM_SPU)
	havok_spu_get_array_elem_no_cache(aux_data_out, m_triangle_aux_data, aux_index);
#else
	aux_data_out = m_triangle_aux_data[ aux_index ];
#endif // #if defined(HK_PLATFORM_SPU)

	return true;
}

#if !defined(HK_PLATFORM_SPU)

// Calculate the triangle normal.
bool physics_mesh_shape::calculate_normal(hkVector4& hk_normal, hkpShapeKey shape_key) const
{
	// Get the triangle.
	hkpShapeBuffer shape_buffer;
	const hkpShape* const p_shape = getChildShape(shape_key, shape_buffer);
	if (p_shape == HK_NULL) {

		return false;
	}

	if (p_shape->getType() != hkcdShapeType::TRIANGLE) {

		return false;
	}

	// Get the vertices.
	const hkVector4* const p_hk_vertices = static_cast< const hkpTriangleShape* >(p_shape)->getVertices();

	// Calculate the normal.
	// NOTE: This must match vec_perp.
	{
		hkVector4 a;
#if 0 && defined HK_DEBUG//HVS_DMK // DSFL jfk
		a.setSub4(p_hk_vertices[1], p_hk_vertices[0]);//HVS_DMK
#else
		a.setSub(p_hk_vertices[1], p_hk_vertices[0]);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK

		hkVector4 b;
#if 0 && defined HK_DEBUG//HVS_DMK // DSFL jfk
		b.setSub4(p_hk_vertices[2], p_hk_vertices[1]);//HVS_DMK
#else
		b.setSub(p_hk_vertices[2], p_hk_vertices[1]);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK

		hk_normal.setCross(a, b);

		havok_vector_normalize3_safe_ems(hk_normal);
	}

	// Flip the normal if it needs it.
	hkUint32 flags = (shape_key >> SHAPE_KEY_FLAGS_SHIFT) & SHAPE_KEY_FLAGS_MASK;

	if (flags)
	{
#if 0 && defined HK_DEBUG//HVS_DMK // DSFL jfk
		hk_normal.mul4(-1.0f);
#else
		hkSimdFloat32 negativeOne; negativeOne.setFromFloat(-1.0f);//HVS_DMK
		hk_normal.mul(negativeOne);//HVS_DMK
#endif//#ifdef HK_DEBUG//HVS_DMK
	}

	return true;
}

// Enable/disable demo collision.
void physics_mesh_shape::enable_demo_collision(bool enable)
{
	// Clear the bit.
	m_flags &= ~(1 << MESH_FLAG_DEMO_COLLISION_ENABLED);

	// Maybe set it.
	if (enable) {
		m_flags |= (1 << MESH_FLAG_DEMO_COLLISION_ENABLED);
	}
}

#endif // #if !defined(HK_PLATFORM_SPU)

//
// System interface.
//

#if !defined(HK_PLATFORM_SPU)

// Initialize the physics mesh system.
//
// collision_filter_table: List of collision filters specified by the game.
//
void physics_mesh_initialize(const hkUint32 collision_filter_table[ PMCF_NUM_COLLISION_FILTERS ])
{
	// Copy over the table.
	hkString::memCpy(Physics_mesh_collision_filter_table, collision_filter_table, sizeof(Physics_mesh_collision_filter_table));
}

#endif // #if !defined(HK_PLATFORM_SPU)