// Horizon-based ambient occlusion with approximation
// DX11 only ( for performance resons ), could work on DX10 too
//
// TODO: Add jittering and optimize a bit!!
//

float4 ClearColor0 : DIFFUSE = { 0.0f, 0.0f, 0.0f, 1.0f};
float4 ClearColor1= { 1000000.0f, 0.0f, 0.0f, 0.0f};
float ClearDepth = 0.0f;
float DepthClip = 3000.0f;

float Script : STANDARDSGLOBAL
<
	string UIWidget = "none";
	string ScriptClass = "scene";
	string ScriptOrder = "postprocess";
	string ScriptOutput = "color";
	
	string Script = "Technique=HBAO;";
> = 0.8;

float4x4 g_mProjInv; // Scene's camera view 
float4x4 g_mProj;

float4 g_cFogColor = { 0,0,0,1 };
float4 g_iFogParams = { 0,0,0,0 };

float4 computeFog( in float viewZ, in float4 c )
{
	float scale = 0;
	float depth = viewZ;
	if (g_iFogParams.x > 2 ) // EXP2
	{	
		float ddensity = depth*g_iFogParams.w;
		scale = 1.0 / exp( ddensity*ddensity ); // 1/(e^((d*density)^2))
	}
	else if (g_iFogParams.x > 1 ) // EXP
	{
		float ddensity = depth*g_iFogParams.w;
		scale = 1.0 / exp( ddensity ); // 1/(e^(d*density))
	}
	else if (g_iFogParams.x > 0 ) // LINEAR
	{
		scale = (g_iFogParams.z - depth) / (g_iFogParams.z - g_iFogParams.y);
	}
	
	scale = clamp(scale, 0, 1);
	return ( (1 - scale) * float4(g_cFogColor.xyz,1) ) + ( scale * c); 
}

float2 WindowSize : VIEWPORTPIXELSIZE < string UIWidget = "none"; >;

texture SceneMap : RENDERCOLORTARGET
<
	string source = "SCENE";
>;

texture SSAOMap : RENDERCOLORTARGET
< 
    float2 ViewportRatio = { 1.0, 1.0 };
    int MIPLEVELS = 1;
    string format = "R32F";
>;

texture SSAOMap_Temp: RENDERCOLORTARGET
< 
    float2 ViewportRatio = { 1.0, 1.0 };
    int MIPLEVELS = 1;
    string format = "R32F";
>;

texture ViewDepthMap : RENDERCOLORTARGET
< 
	string source = "DEPTH";
>;

texture DitherMap
< 
	string source = "DITHER";
>;

sampler SceneMapSampler = sampler_state 
{
    texture = <SceneMap>;
    AddressU  = CLAMP;        
    AddressV  = CLAMP;
    AddressW  = CLAMP;
	FILTER = MIN_MAG_MIP_POINT;
};

sampler SSAOMapSampler = sampler_state 
{
    texture = <SSAOMap>;
    AddressU  = CLAMP;        
    AddressV  = CLAMP;
    AddressW  = CLAMP;
	FILTER = MIN_MAG_LINEAR_MIP_POINT;
};


sampler SSAOMap_TempSampler = sampler_state 
{
    texture = <SSAOMap_Temp>;
    AddressU  = CLAMP;        
    AddressV  = CLAMP;
    AddressW  = CLAMP;
   	FILTER = MIN_MAG_LINEAR_MIP_POINT;
};

sampler ViewDepthMapSampler = sampler_state 
{
    texture = <ViewDepthMap>;
    AddressU  = CLAMP;        
    AddressV  = CLAMP;
    AddressW  = CLAMP;
 	FILTER = MIN_MAG_MIP_POINT; //MIN_MAG_LINEAR_MIP_POINT;
};

sampler DitherSampler = sampler_state 
{
    texture = <DitherMap>;
    AddressU  = WRAP;        
    AddressV  = WRAP;
    AddressW  = WRAP;
 	FILTER = MIN_MAG_MIP_POINT;
};


struct VS_OUTPUT
{
   	float4 Position   : POSITION;
    float2 TexCoord0  : TEXCOORD0;
    float2 TexCoord1  : TEXCOORD1;
};

VS_OUTPUT VS_Quad(float4 Position : POSITION, 
				  float2 TexCoord : TEXCOORD0)
{
    VS_OUTPUT OUT;

	float2 texelSize = 1.0 / WindowSize;
 
	OUT.Position = Position; 
	
#ifndef HKG_DX10
	float2 dxHalfTexelPos = float2( -texelSize.x, texelSize.y ); // [-1,1] -> WindowSize  ==  2/WindowSize,  so half texel = 1/windowize
	OUT.Position.xy += (dxHalfTexelPos * Position.w);
#endif

	OUT.TexCoord0 = TexCoord;
	OUT.TexCoord1 = TexCoord;

    return OUT;
}

float4 PS_Display(VS_OUTPUT IN,
			  uniform sampler2D tex) : COLOR
{   
	float r = tex2D(tex, IN.TexCoord0).r;
	return float4(r,r,r,1);
}


struct VSS_OUTPUT
{
   	float4 Position   : POSITION;
    float2 TexCoord0  : TEXCOORD0;
    float4 EyeDir     : TEXCOORD1;
};

VSS_OUTPUT VS_Hbao(float4 Position : POSITION, 
				  float2 TexCoord : TEXCOORD0)
{
    VSS_OUTPUT OUT;
	float2 texelSize = 1.0 / WindowSize;
   
	OUT.Position = Position; 
	
    // Want to work out eye dir before we adjust for dx half texel 
    OUT.EyeDir = mul( float4(Position.xy,1,1), g_mProjInv );

#ifndef HKG_DX10	
	float2 dxHalfTexelPos = float2( -texelSize.x, texelSize.y ); // [-1,1] -> WindowSize  ==  2/WindowSize,  so half texel = 1/windowize    
	OUT.Position.xy += (dxHalfTexelPos * Position.w);
#endif

	OUT.TexCoord0 = TexCoord;
    
    return OUT;
}

float g_rad = 0.01;
float g_rangeMin = 0.1;
float g_rangeMax = 0.8;
float4 g_sphericalSampling[32];
float BlurWidth = 1.5;
float SceneScale = 0.8;
float g_blurDepthLimit = 100; // times g_rad
float g_ssaoDepthDiffLimit = 8; // times g_rad
float g_ssaoLightenLimit = 0.5;

float g_totalSsaoTop = 1.0;
float g_totalSsaoScale = 1.0f;

float g_ditherScale = 4.f;	// rt res / 4
	
	
// blur filter weights
const half weights7[7] = {
	0.05,
	0.1,
	0.2,
	0.3,
	0.2,
	0.1,
	0.05,
};	

#define NUM_DIRS 12
static const float3 g_dirs[12] =
{
	float3( 0.5f, -0.866f, 0 ),
	float3( 0.866f, -0.5f, 0 ),
	float3( 1, 0, 0 ),
	float3( 0.866f, 0.5f, 0 ),
	float3( 0.5f, 0.866f, 0 ),
	float3( 0, 1, 0 ),
	
	float3( -0.5f, 0.866f, 0 ),
	float3( -0.866f, 0.5f, 0 ),
	float3( -1, 0, 0 ),
	float3( -0.866f, -0.5f, 0 ),
	float3( -0.5f, -0.866f, 0 ),
	float3( 0, -1, 0 )
};

float3 getEyeSpacePos( float3 eyeXY, sampler2D texZ )
{
	float3 camDir = normalize(eyeXY.xyz);
	float4 screenPos = mul( float4( eyeXY, 1 ), g_mProj );
	screenPos.xyz /= screenPos.w;
	float2 texCoord = screenPos.xy * float2( 0.5f, -0.5f ) + 0.5f;
	float depth = tex2D(texZ, texCoord).r;
	return camDir*clamp(depth,0,1000);
}

float3 getNextTexelDepth( float2 centTexCoord, float3 dir, sampler2D texZ )
{
	float2 texelSize = 2.0 / WindowSize; // 2 texels to be safe
	float2 dirT = dir * texelSize;
	float depth = tex2D(texZ, centTexCoord.xy + dirT).r;
	return clamp(depth,0,1000);
}

float3  getEyeSpaceTangent( float2 centTexCoord, float3 dir, float depthCenter, sampler2D texZ  )
{
	float depthNext = getNextTexelDepth(centTexCoord, dir, texZ);
	return float3(dir.xy, depthNext - depthCenter );
}

float computeOcclusion( float3 eyeCenter, float3 eyePos, float3 normal, float3 tangent, sampler2D texZ )
{
	eyePos = getEyeSpacePos( eyePos, texZ );
	
	float3 centerToPos = eyePos - eyeCenter;
	float horizonLen = length( centerToPos );
		
	float occlusion = 0;
	if (dot(tangent, centerToPos) < 0.0)
		occlusion = 0.5;
	else	
		occlusion = dot( normal, centerToPos ) / horizonLen;
	float fallOff = g_rad * 100;
	float distanceFactor = saturate(horizonLen / fallOff);
	distanceFactor = 1.0 - distanceFactor * distanceFactor;
	
	return distanceFactor*occlusion;
}

float2 rotate(float3 vec, float2 rotationX, float2 rotationY)
{
	float2 rotated;
	rotated.x = dot(vec.xyz, rotationX.xyy);
	rotated.y = dot(vec.xyz, rotationY.xyy);
	return rotated;
}


float4 PS_Hbao(VSS_OUTPUT IN,
			  uniform sampler2D texZ) : COLOR
{   
	float depthCenter = clamp(tex2D(texZ, IN.TexCoord0).r,0,1000);
	//float isClearDepth = isinf(depthCenter); // cubemap based Skybox always sets depth as INF

	float3 eyeDir = IN.EyeDir.xyz/(IN.EyeDir.z*IN.EyeDir.w);
	float3 eyeCenter = depthCenter * normalize(eyeDir);

	float3 nx = normalize( ddx_fine( eyeCenter ) );
	float3 ny = normalize( ddy_fine( eyeCenter ) );
	float3 normal = normalize( cross( nx, ny ) );
		
	float maxOcclusion = 0;

	float3 randomFactors = tex2D(DitherSampler, IN.TexCoord0 * g_ditherScale).rgb;
	float2 rotationX = normalize(randomFactors.xy - .5);
	float2 rotationY = rotationX.yx * float2(-1.0f, 1.0f);

	for ( int d=0; d<NUM_DIRS; d++ )
	{
		float3 nDir = normalize( float3( rotate(g_dirs[d].xyz, rotationX, rotationY), 0) );	
				
		float3 tangent = getEyeSpaceTangent( IN.TexCoord0, nDir, depthCenter, texZ );
		tangent -= dot(normal, tangent) * normal;
		float3 dir = nDir * g_rad; //0.12f;
		
		float localMaxOcclusion = 0;
		for ( int i=1; i<=5; i++ )		// steps per Ray
		{
			float3 diff = (i+randomFactors.z) * dir;
			float occlusion  = computeOcclusion( eyeCenter, eyeCenter + diff, normal, tangent, texZ );	
			localMaxOcclusion = max( localMaxOcclusion, occlusion );
		}
		maxOcclusion += localMaxOcclusion;
	}
	maxOcclusion /= NUM_DIRS;

	float result = saturate( 1 - maxOcclusion );
	//result = saturate( ( result - 0.3f ) / 0.7f );

	return result;
}

struct VS_OUTPUT_BLUR
{
    float4 Position   : POSITION;
    float2 TexCoord0  : TEXCOORD0;
    float4 TexCoord12 : TEXCOORD1;
    float4 TexCoord34 : TEXCOORD2;
    float4 TexCoord56 : TEXCOORD3;
};

VS_OUTPUT_BLUR VS_Blur(float4 Position : POSITION, 
					   float2 TexCoord : TEXCOORD0,
					   uniform int nsamples, // always 7 now
					   uniform float2 direction
					   )
{
    VS_OUTPUT_BLUR OUT = (VS_OUTPUT_BLUR)0;
    float2 texelSize = 1.0 / WindowSize;

	OUT.Position = Position; 

#ifdef HKG_DX10
	// DX9 has half texel offset already (so we get some linear blurring)
	// DX10 is centered, so we can offset the lookup texels 
	TexCoord.xy += direction*float2( -texelSize.x, -texelSize.y)*0.5 ;
#else
 	float2 dxHalfTexelPos = float2( -texelSize.x, texelSize.y ); // [-1,1] -> WindowSize  ==  2/WindowSize,  so half texel = 1/windowize
 	OUT.Position.xy += (dxHalfTexelPos * Position.w);
#endif	

	float2 blurDir = BlurWidth * texelSize * direction;
    float2 s = TexCoord - (nsamples-1)*0.5*blurDir;
    
	OUT.TexCoord0     = s;
	OUT.TexCoord12.xy = s + blurDir*1;
	OUT.TexCoord12.zw = s + blurDir*2;
	OUT.TexCoord34.xy = s + blurDir*3;
	OUT.TexCoord34.zw = s + blurDir*4;
	OUT.TexCoord56.xy = s + blurDir*5;
	OUT.TexCoord56.zw = s + blurDir*6;
	
	return OUT;
}

float4 PS_Blur7(VS_OUTPUT_BLUR IN,
			   uniform sampler2D ssaotex,
			   uniform sampler2D depthtex,
			   uniform half weight[7]
			   ) : COLOR
{
    float c = 0;
    

    float2 centerSampleCoord = IN.TexCoord34.xy;

#ifdef HKG_DX10
    // undo blur offset
    float2 texelSize = 1.0 / WindowSize; 
    centerSampleCoord += float2( texelSize.x, 0 )*0.5 ;
#endif

	float ourDepth =  tex2D(depthtex, centerSampleCoord ).x;
    float centreSsao = tex2D(ssaotex, centerSampleCoord).r; 
	
	float blurLimit = g_rad*g_blurDepthLimit;
	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord0).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord0).r * weight[0];
    else
    	c += centreSsao * weight[0];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord12.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord12.xy).r * weight[1];
    else
    	c += centreSsao * weight[1];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord12.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord12.zw).r * weight[2];
    else
    	c += centreSsao * weight[2];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord34.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord34.xy).r * weight[3];
    else
    	c += centreSsao * weight[3];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord34.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord34.zw).r * weight[4];
    else
    	c += centreSsao * weight[4];

	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord56.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord56.xy).r * weight[5];
    else
    	c += centreSsao * weight[5];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord56.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord56.zw).r * weight[6];
    else
    	c += centreSsao * weight[6];
   	
	return float4(c,0,0,1);
} 

float4 PS_Blur7AndCombine(VS_OUTPUT_BLUR IN,
			   uniform sampler2D ssaotex,
			   uniform sampler2D depthtex,
			   uniform sampler2D scene,
			   uniform half weight[7]
			   ) : COLOR
{
    float c = 0;

    float2 centerSampleCoord = IN.TexCoord34.xy;

#ifdef HKG_DX10
    // undo blur offset
    float2 texelSize = 1.0 / WindowSize; 
    centerSampleCoord += float2( 0, texelSize.y)*0.5 ;
#endif

    float ourDepth = tex2D(depthtex, centerSampleCoord ).x;
    float centreSsao = tex2D(ssaotex, centerSampleCoord ).r; 

    float4 rendererdSceneColor = tex2D(scene, centerSampleCoord );
    float blurLimit = g_rad*g_blurDepthLimit;
  
    if (abs(ourDepth - tex2D(depthtex, IN.TexCoord0).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord0).r * weight[0];
    else
    	c += centreSsao * weight[0];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord12.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord12.xy).r * weight[1];
    else
    	c += centreSsao * weight[1];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord12.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord12.zw).r * weight[2];
    else
    	c += centreSsao * weight[2];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord34.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord34.xy).r * weight[3];
    else
    	c += centreSsao * weight[3];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord34.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord34.zw).r * weight[4];
    else
    	c += centreSsao * weight[4];

	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord56.xy).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord56.xy).r * weight[5];
    else
    	c += centreSsao * weight[5];
   	
	if (abs(ourDepth - tex2D(depthtex, IN.TexCoord56.zw).x) < blurLimit)
    	c += tex2D(ssaotex, IN.TexCoord56.zw).r * weight[6];
    else
    	c += centreSsao * weight[6];

   	float4 occlusion = float4(c,c,c,1);
	float4 color = (1-SceneScale)*occlusion + SceneScale*(rendererdSceneColor * occlusion);
	if ( g_iFogParams.x > 0)
    {
		color = computeFog( ourDepth, color );
    }    

	return color;
} 

#ifdef HKG_DX10

RasterizerState DisableCulling
{
    CullMode = NONE;
	MultisampleEnable = FALSE;
};

DepthStencilState DepthEnabling
{
	DepthEnable = FALSE;
	DepthWriteMask = ZERO;
};

BlendState DisableBlend
{
	BlendEnable[0] = FALSE;
};

BlendState EnableBlend
{
	BlendEnable[0] = TRUE;
	SrcBlend[0] = Zero;
	DestBlend[0] = Src_Color;
};

#ifdef HKG_DX11

technique11 HBAO
<
	string Script = "ClearColor0=ClearColor0;"
					"ClearColor1=ClearColor1;"
					"Clear=WipeAll;";
>
{
//		string Script = "RenderColorTarget0=;"
//						"Draw=Buffer;";

   pass SSOA
    <
		string Script = "RenderColorTarget0=SSAOMap;"
						"Draw=Buffer;";
	>
    {
		SetVertexShader( CompileShader( vs_5_0, VS_Hbao() ) );
        SetGeometryShader( NULL );
        SetPixelShader( CompileShader( ps_5_0, PS_Hbao(ViewDepthMapSampler) ) );
	
		SetRasterizerState(DisableCulling);       
		SetDepthStencilState(DepthEnabling, 0);
		SetBlendState(DisableBlend, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF);
//		SetBlendState(EnableBlend, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF);
    }

    pass BlurSSOA_H
    <
    	string Script = "RenderColorTarget0=SSAOMap_Temp;"
						"Draw=Buffer;";
	>
	{
		SetVertexShader( CompileShader( vs_4_0, VS_Blur(7,float2(1, 0) ) ) );
        SetGeometryShader( NULL );
        SetPixelShader( CompileShader( ps_4_0, PS_Blur7(SSAOMapSampler, ViewDepthMapSampler, weights7) ) );
	
		SetRasterizerState(DisableCulling);       
		SetDepthStencilState(DepthEnabling, 0);
		SetBlendState(DisableBlend, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF);
    }
   
	pass BlurSSOA_V_AndCombine
    <
    	string Script = "RenderColorTarget0=;"		
						"Draw=Buffer;";
	>
	{
		SetVertexShader( CompileShader( vs_4_0, VS_Blur(7,float2(0, 1)) ) );
        SetGeometryShader( NULL );
        SetPixelShader( CompileShader( ps_4_0, PS_Blur7AndCombine(SSAOMap_TempSampler, ViewDepthMapSampler, SceneMapSampler, weights7) ) );
	
		SetRasterizerState(DisableCulling);       
		SetDepthStencilState(DepthEnabling, 0);
		SetBlendState(DisableBlend, float4( 0.0f, 0.0f, 0.0f, 0.0f ), 0xFFFFFFFF);
    }
}

#endif
#endif
