This repository has been archived on 2024-06-20. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
coffee.pygments/tests/examplefiles/hlsl/example.hlsl

181 lines
5.3 KiB
HLSL

// A few random snippets of HLSL shader code I gathered...
// Macro inside a single-line comment: #define COMMENT_MACRO 1
/* Macro inside a block comment: #define COMMENT_MACRO 2 */
# define INDENTED_MACRO 5.0
#define SINGLELINE_MACRO 10.0
#define MULTILINE_MACRO(a, b) float2( \
a, \
b \
)
[numthreads(256, 1, 1)]
void cs_main(uint3 threadId : SV_DispatchThreadID)
{
// Seed the PRNG using the thread ID
rng_state = threadId.x;
// Generate a few numbers...
uint r0 = rand_xorshift();
uint r1 = rand_xorshift();
// Do some stuff with them...
// Generate a random float in [0, 1)...
float f0 = float(rand_xorshift()) * (1.0 / 4294967296.0);
// ...etc.
}
// Constant buffer of parameters
cbuffer IntegratorParams : register(b0)
{
float2 specPow; // Spec powers in XY directions (equal for isotropic BRDFs)
float3 L; // Unit vector toward light
int2 cThread; // Total threads launched in XY dimensions
int2 xyOutput; // Where in the output buffer to store the result
}
static const float pi = 3.141592654;
float AshikhminShirleyNDF(float3 H)
{
float normFactor = sqrt((specPow.x + 2.0f) * (specPow.y + 2.0)) * (0.5f / pi);
float NdotH = H.z;
float2 Hxy = normalize(H.xy);
return normFactor * pow(NdotH, dot(specPow, Hxy * Hxy));
}
float BeckmannNDF(float3 H)
{
float glossFactor = specPow.x * 0.5f + 1.0f; // This is 1/m^2 in the usual Beckmann formula
float normFactor = glossFactor * (1.0f / pi);
float NdotHSq = H.z * H.z;
return normFactor / (NdotHSq * NdotHSq) * exp(glossFactor * (1.0f - 1.0f / NdotHSq));
}
// Output buffer for compute shader (actually float, but must be declared as uint
// for atomic operations to work)
globallycoherent RWTexture2D<uint> o_data : register(u0);
// Sum up the outputs of all threads and store to the output location
static const uint threadGroupSize2D = 16;
static const uint threadGroupSize1D = threadGroupSize2D * threadGroupSize2D;
groupshared float g_partialSums[threadGroupSize1D];
void SumAcrossThreadsAndStore(float value, uint iThreadInGroup)
{
// First reduce within the threadgroup: partial sums of 2, 4, 8... elements
// are calculated by 1/2, 1/4, 1/8... of the threads, always keeping the
// active threads at the front of the group to minimize divergence.
// NOTE: there are faster ways of doing this...but this is simple to code
// and good enough.
g_partialSums[iThreadInGroup] = value;
GroupMemoryBarrierWithGroupSync();
[unroll] for (uint i = threadGroupSize1D / 2; i > 0; i /= 2)
{
if (iThreadInGroup < i)
{
g_partialSums[iThreadInGroup] += g_partialSums[iThreadInGroup + i];
}
GroupMemoryBarrierWithGroupSync();
}
// Then reduce across threadgroups: one thread from each group adds the group
// total to the final output location, using a software transactional memory
// style since D3D11 doesn't support atomic add on floats.
// (Assumes the output value has been cleared to zero beforehand.)
if (iThreadInGroup == 0)
{
float threadGroupSum = g_partialSums[0];
uint outputValueRead = o_data[xyOutput];
while (true)
{
uint newOutputValue = asuint(asfloat(outputValueRead) + threadGroupSum);
uint previousOutputValue;
InterlockedCompareExchange(
o_data[xyOutput], outputValueRead, newOutputValue, previousOutputValue);
if (previousOutputValue == outputValueRead)
break;
outputValueRead = previousOutputValue;
}
}
}
void main(
in Vertex i_vtx,
out Vertex o_vtx,
out float3 o_vecCamera : CAMERA,
out float4 o_uvzwShadow : UVZW_SHADOW,
out float4 o_posClip : SV_Position)
{
o_vtx = i_vtx;
o_vecCamera = g_posCamera - i_vtx.m_pos;
o_uvzwShadow = mul(float4(i_vtx.m_pos, 1.0), g_matWorldToUvzwShadow);
o_posClip = mul(float4(i_vtx.m_pos, 1.0), g_matWorldToClip);
}
#pragma pack_matrix(row_major)
struct Vertex
{
float3 m_pos : POSITION;
float3 m_normal : NORMAL;
float2 m_uv : UV;
};
cbuffer CBFrame : CB_FRAME // matches struct CBFrame in test.cpp
{
float4x4 g_matWorldToClip;
float4x4 g_matWorldToUvzwShadow;
float3x3 g_matWorldToUvzShadowNormal;
float3 g_posCamera;
float3 g_vecDirectionalLight;
float3 g_rgbDirectionalLight;
float2 g_dimsShadowMap;
float g_normalOffsetShadow;
float g_shadowSharpening;
float g_exposure; // Exposure multiplier
}
Texture2D<float3> g_texDiffuse : register(t0);
SamplerState g_ss : register(s0);
void main(
in Vertex i_vtx,
in float3 i_vecCamera : CAMERA,
in float4 i_uvzwShadow : UVZW_SHADOW,
out float3 o_rgb : SV_Target)
{
float3 normal = normalize(i_vtx.m_normal);
// Sample shadow map
float shadow = EvaluateShadow(i_uvzwShadow, normal);
// Evaluate diffuse lighting
float3 diffuseColor = g_texDiffuse.Sample(g_ss, i_vtx.m_uv);
float3 diffuseLight = g_rgbDirectionalLight * (shadow * saturate(dot(normal, g_vecDirectionalLight)));
diffuseLight += SimpleAmbient(normal);
o_rgb = diffuseColor * diffuseLight;
}
[domain("quad")]
void ds(
in float edgeFactors[4] : SV_TessFactor,
in float insideFactors[2] : SV_InsideTessFactor,
in OutputPatch<VData, 4> inp,
in float2 uv : SV_DomainLocation,
out float4 o_pos : SV_Position)
{
o_pos = lerp(lerp(inp[0].pos, inp[1].pos, uv.x), lerp(inp[2].pos, inp[3].pos, uv.x), uv.y);
}