Jump to content
  • Advertisement

Recommended Posts

Hi, 

 

I am working on a project where I'm trying to use Forward Plus Rendering on point lights. I have a simple reflective scene with many point lights moving around it. I am using effects file (.fx) to keep my shaders in one place. I am having a problem with Compute Shader code. I cannot get it to work properly and calculate the tiles and lighting properly. 

 

Is there anyone that is wishing to help me set up my compute shader?

Thank you in advance for any replies and interest!

Share this post


Link to post
Share on other sites
Advertisement
2 minutes ago, Karol Plewa said:

I am having a problem with Compute Shader code. I cannot get it to work properly and calculate the tiles and lighting properly. 

Is there anyone that is wishing to help me set up my compute shader?

How about you show us what you have managed so far, and where specifically you are stuck?

Share this post


Link to post
Share on other sites

 

1 hour ago, swiftcoder said:

How about you show us what you have managed so far, and where specifically you are stuck?

All this is in my .fx file

//--------------------------------------------------------------------------------------
// File: Deferred.fx
//
// Deferred Rendering
//--------------------------------------------------------------------------------------


//--------------------------------------------------------------------------------------
// Global Variables
//--------------------------------------------------------------------------------------

#ifndef BLOCK_SIZE
#pragma message( "BLOCK_SIZE undefined. Default to 8.")
#define BLOCK_SIZE 8 // should be defined by the application.
#endif
// The matrices (4x4 matrix of floats) for transforming from 3D model to 2D projection (used in vertex shader)
float4x4 WorldMatrix;
float4x4 ViewMatrix;
float4x4 ProjMatrix;
float4x4 ViewProjMatrix;
float4x4 InvViewMatrix;

// Viewport Dimensions
float ViewportWidth;
float ViewportHeight;

// Lights are stored in a stucture so we can pass lists of them
struct SPointLight
{
    float3 LightPosition;
    float  LightRadius;
    float4 LightColour;
};

// Point lights for forward-rendering. The deferred implementation passes the lights in as a vertex buffer (although that is
// not a requirement of deferred rendering - could use these variables instead)
static const int  MaxPointLights = 256;  // Maximum number of point lights the shader supports (this is for forward-rendering only)
int         NumPointLights;              // Actual number of point lights currently in use (this is for forward-rendering only)
SPointLight PointLights[MaxPointLights]; // List of point lights (for forward-rendering only)

                                         // Other light data
float3 AmbientColour;
float3 DiffuseColour;
float3 SpecularColour;
float  SpecularPower;
float3 CameraPos;
float  CameraNearClip;

// Textures
Texture2D DiffuseMap; // Diffuse texture map (with optional specular map in alpha)
Texture2D NormalMap;  // Normal map (with optional height map in alpha)

                      // G-Buffer when used as textures for lighting pass
Texture2D GBuff_DiffuseSpecular; // Diffuse colour in rgb, specular strength in a
Texture2D GBuff_WorldPosition;   // World position at pixel in rgb (xyz)
Texture2D GBuff_WorldNormal;     // World normal at pixel in rgb (xyz)


                                 // Samplers to use with the above textures
SamplerState TrilinearWrap
{
    Filter = MIN_MAG_MIP_LINEAR;
    AddressU = Wrap;
    AddressV = Wrap;
};
SamplerState LinearClamp
{
    Filter = MIN_MAG_MIP_LINEAR;
    AddressU = Clamp;
    AddressV = Clamp;
};
// Nearly always sample the g-buffer with point sampling (i.e. no bilinear, trilinear etc.) because we don't want to introduce blur
SamplerState PointClamp
{
    Filter = MIN_MAG_MIP_POINT;
    AddressU = Clamp;
    AddressV = Clamp;
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

cbuffer ScreenToViewParams : register(b3)
{
    float4x4 InverseProjection;
    float2 ScreenDimensions;
}
// Convert clip space coordinates to view space
float4 ClipToView(float4 clip)
{
    // View space position.
    float4 view = mul(InverseProjection, clip);
    // Perspective projection.
    view = view / view.w;

    return view;
}

// Convert screen space coordinates to view space.
float4 ScreenToView(float4 screen)
{
    // Convert to normalized texture coordinates
    float2 texCoord = screen.xy / ScreenDimensions;

    // Convert to clip space
    float4 clip = float4(float2(texCoord.x, 1.0f - texCoord.y) * 2.0f - 1.0f, screen.z, screen.w);

    return ClipToView(clip);
}

struct Plane
{
    float3 N;   // Plane normal.
    float  d;   // Distance to origin.
};

Plane ComputePlane(float3 p0, float3 p1, float3 p2)
{
    Plane plane;

    float3 v0 = p1 - p0;
    float3 v2 = p2 - p0;

    plane.N = normalize(cross(v0, v2));

    // Compute the distance to the origin using p0.
    plane.d = dot(plane.N, p0);

    return plane;
}
struct Frustum
{
    Plane planes[4];   // left, right, top, bottom frustum planes.
};

struct Sphere
{
    float3 c;   // Center point.
    float  r;   // Radius.
};

bool SphereInsidePlane(Sphere sphere, Plane plane)
{
    return dot(plane.N, sphere.c) - plane.d < -sphere.r;
}

bool SphereInsideFrustum(Sphere sphere, Frustum frustum, float zNear, float zFar)
{
    bool result = true;

    // First check depth
    // Note: Here, the view vector points in the -Z axis so the 
    // far depth value will be approaching -infinity.
    if (sphere.c.z - sphere.r > zNear || sphere.c.z + sphere.r < zFar)
    {
        result = false;
    }

    // Then check frustum planes
    for (int i = 0; i < 4 && result; i++)
    {
        if (SphereInsidePlane(sphere, frustum.planes))
        {
            result = false;
        }
    }

    return result;
}

struct Cone
{
    float3 T;   // Cone tip.
    float  h;   // Height of the cone.
    float3 d;   // Direction of the cone.
    float  r;   // bottom radius of the cone.
};

bool PointInsidePlane(float3 p, Plane plane)
{
    return dot(plane.N, p) - plane.d < 0;
}

bool ConeInsidePlane(Cone cone, Plane plane)
{
    // Compute the farthest point on the end of the cone to the positive space of the plane.
    float3 m = cross(cross(plane.N, cone.d), cone.d);
    float3 Q = cone.T + cone.d * cone.h - m * cone.r;

    // The cone is in the negative halfspace of the plane if both
    // the tip of the cone and the farthest point on the end of the cone to the 
    // positive halfspace of the plane are both inside the negative halfspace 
    // of the plane.
    return PointInsidePlane(cone.T, plane) && PointInsidePlane(Q, plane);
}

bool ConeInsideFrustum(Cone cone, Frustum frustum, float zNear, float zFar)
{
    bool result = true;

    Plane nearPlane = { float3(0, 0, -1), -zNear };
    Plane farPlane = { float3(0, 0, 1), zFar };

    // First check the near and far clipping planes.
    if (ConeInsidePlane(cone, nearPlane) || ConeInsidePlane(cone, farPlane))
    {
        result = false;
    }

    // Then check frustum planes
    for (int i = 0; i < 4 && result; i++)
    {
        if (ConeInsidePlane(cone, frustum.planes))
        {
            result = false;
        }
    }

    return result;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////


//--------------------------------------------------------------------------------------
// Forward Rendering and Common Structures
//--------------------------------------------------------------------------------------

// This structure describes generic vertex data to be sent into the vertex shader
// Used in forward rendering and when creating the g-buffer in deferred rendering
struct VS_INPUT
{
    float3 Pos    : POSITION;
    float3 Normal : NORMAL;
    float2 UV     : TEXCOORD0;
};

// This stucture contains the vertex data transformed into projection space & world space, i.e. the result of the usual vertex processing
// Used in forward rendering for the standard pixel lighting stage, but also used when building the g-buffer - the main geometry processing
// doesn't change much for deferred rendering - it's all about how this data is used next.
struct PS_TRANSFORMED_INPUT
{
    float4 ProjPos       : SV_Position;
    float3 WorldPosition : POSITION;
    float3 WorldNormal   : NORMAL;
    float2 UV            : TEXCOORD0;
};

// For both forward and deferred rendering, the light flares (sprites showing the position of the lights) are rendered as
// a particle system (this is not really to do with deferred rendering, just a visual nicety). Because the particles are
// transparent (additive blending), they must be rendered last, and they can't use deferred rendering (see lecture).
// This is the input to the particle pixel shader.
struct PS_LIGHTPARTICLE_INPUT
{
    float4 ProjPos                     : SV_Position;
    float2 UV                          : TEXCOORD0;
    nointerpolation float3 LightColour : COLOR0; // The light colour is passed to the pixel shader so the flare can be tinted. See below about "nointerpolation"
};
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

struct COMPUTE_SHADER_INPUT
{
    uint3 groupID           : SV_GroupID;           // 3D index of the thread group in the dispatch.
    uint3 groupThreadID     : SV_GroupThreadID;     // 3D index of local thread ID in a thread group.
    uint3 dispatchThreadID  : SV_DispatchThreadID;  // 3D index of global thread ID in the dispatch.
    uint  groupIndex        : SV_GroupIndex;        // Flattened local index of the thread within a thread group.
};

cbuffer DispatchParams : register(b4)
{
    uint3   numThreadGroups;
    uint3   numThreads;
}

RWStructuredBuffer<Frustum> out_Frustums : register(u0);

[numthreads(BLOCK_SIZE, BLOCK_SIZE, 1)]
void CS_ComputeFR(COMPUTE_SHADER_INPUT IN)
{
    // View space eye position is always at the origin.
    const float3 eyePos = float3(0, 0, 0);

    // Compute the 4 corner points on the far clipping plane to use as the 
    // frustum vertices.
    float4 screenSpace[4];
    // Top left point
    screenSpace[0] = float4(IN.dispatchThreadID.xy * BLOCK_SIZE, -1.0f, 1.0f);
    // Top right point
    screenSpace[1] = float4(float2(IN.dispatchThreadID.x + 1, IN.dispatchThreadID.y) * BLOCK_SIZE, -1.0f, 1.0f);
    // Bottom left point
    screenSpace[2] = float4(float2(IN.dispatchThreadID.x, IN.dispatchThreadID.y + 1) * BLOCK_SIZE, -1.0f, 1.0f);
    // Bottom right point
    screenSpace[3] = float4(float2(IN.dispatchThreadID.x + 1, IN.dispatchThreadID.y + 1) * BLOCK_SIZE, -1.0f, 1.0f);


    float3 viewSpace[4];
    // Now convert the screen space points to view space
    for (int i = 0; i < 4; i++)
    {
        viewSpace = ScreenToView(screenSpace).xyz;
    }

    Frustum frustum;

    // Left plane
    frustum.planes[0] = ComputePlane(eyePos, viewSpace[2], viewSpace[0]);
    // Right plane
    frustum.planes[1] = ComputePlane(eyePos, viewSpace[1], viewSpace[3]);
    // Top plane
    frustum.planes[2] = ComputePlane(eyePos, viewSpace[0], viewSpace[1]);
    // Bottom plane
    frustum.planes[3] = ComputePlane(eyePos, viewSpace[3], viewSpace[2]);
    
    if (IN.dispatchThreadID.x < numThreads.x && IN.dispatchThreadID.y < numThreads.y)
    {
        uint index = IN.dispatchThreadID.x + (IN.dispatchThreadID.y * numThreads.x);
        out_Frustums[index] = frustum;
    }

}

Texture2D DepthTextureVS : register(t3);
// Precomputed frustums for the grid.
StructuredBuffer<Frustum> in_Frustums : register(t9);
// Global counter for current index into the light index list.
RWStructuredBuffer<uint> o_LightIndexCounter : register(u2);
RWStructuredBuffer<uint> t_LightIndexCounter : register(u3);
// Light index lists and light grids.
RWStructuredBuffer<uint> o_LightIndexList : register(u4);
RWStructuredBuffer<uint> t_LightIndexList : register(u5);
RWTexture2D<uint2> o_LightGrid : register(u6);
RWTexture2D<uint2> t_LightGrid : register(u7);
static uint uMinDepth;
static uint uMaxDepth;
static Frustum GroupFrustum;
// Opaque geometry light lists.
static uint o_LightCount;
static uint o_LightIndexStartOffset;
static uint o_LightList[BLOCK_SIZE*BLOCK_SIZE*1];

// Transparent geometry light lists.
static uint t_LightCount;
static uint t_LightIndexStartOffset;
static uint t_LightList[BLOCK_SIZE*BLOCK_SIZE*1]; ///CHANGE IF NEEDED!!!!! READ!!!!!
Texture2D LightCountHeatMap : register(t10);
RWTexture2D<float4> DebugTexture : register(u1);

void o_AppendLight(uint lightIndex)
{
    uint index; // Index into the visible lights array.
    InterlockedAdd(o_LightCount, 1, index);
    if (index < 256)
    {
        o_LightList[index] = lightIndex;
    }
}

// Add the light to the visible light list for transparent geometry.
void t_AppendLight(uint lightIndex)
{
    uint index; // Index into the visible lights array.
    InterlockedAdd(t_LightCount, 1, index);
    if (index < 256)
    {
        t_LightList[index] = lightIndex;
    }
}

[numthreads(BLOCK_SIZE, BLOCK_SIZE, 1)]
void CS_main(COMPUTE_SHADER_INPUT IN)
{

    CS_ComputeFR(IN);
    // Calculate min & max depth in threadgroup / tile.
    int2 texCoord = IN.dispatchThreadID.xy;
    float fDepth = DepthTextureVS.Load(int3(texCoord, 0)).r;

    uint uDepth = asuint(fDepth);

    if (IN.groupIndex == 0) // Avoid contention by other threads in the group.
    {
        uMinDepth = 0xffffffff;
        uMaxDepth = 0;
        o_LightCount = 0;
        t_LightCount = 0;
        GroupFrustum = in_Frustums[IN.groupID.x + (IN.groupID.y * numThreadGroups.x)];
    }

    GroupMemoryBarrierWithGroupSync();

    InterlockedMin(uMinDepth, uDepth);
    InterlockedMax(uMaxDepth, uDepth);

    GroupMemoryBarrierWithGroupSync();

    float fMinDepth = asfloat(uMinDepth);
    float fMaxDepth = asfloat(uMaxDepth);

    // Convert depth values to view space.
    float minDepthVS = ScreenToView(float4(0, 0, fMinDepth, 1)).z;
    float maxDepthVS = ScreenToView(float4(0, 0, fMaxDepth, 1)).z;
    float nearClipVS = ScreenToView(float4(0, 0, 0, 1)).z;

    // Clipping plane for minimum depth value 
    // (used for testing lights within the bounds of opaque geometry).
    Plane minPlane = { float3(0, 0, -1), -minDepthVS };

    // Cull lights
    // Each thread in a group will cull 1 light until all lights have been culled.

    for (uint i = IN.groupIndex; i < MaxPointLights; i += BLOCK_SIZE * BLOCK_SIZE)
    {
        
                Sphere sphere = { PointLights.LightPosition, PointLights.LightRadius };
                if (SphereInsideFrustum(sphere, GroupFrustum, nearClipVS, maxDepthVS))
                {
                    // Add light to light list for transparent geometry.
                    t_AppendLight(i);

                    if (!SphereInsidePlane(sphere, minPlane))
                    {
                        // Add light to light list for opaque geometry.
                        o_AppendLight(i);
                    }
                }
            
        }
    

    // Wait till all threads in group have caught up.
    GroupMemoryBarrierWithGroupSync();

    // Update global memory with visible light buffer.
    // First update the light grid (only thread 0 in group needs to do this)
    if (IN.groupIndex == 0)
    {
        // Update light grid for opaque geometry.
        InterlockedAdd(o_LightIndexCounter[0], o_LightCount, o_LightIndexStartOffset);
        o_LightGrid[IN.groupID.xy] = uint2(o_LightIndexStartOffset, o_LightCount);

        // Update light grid for transparent geometry.
        InterlockedAdd(t_LightIndexCounter[0], t_LightCount, t_LightIndexStartOffset);
        t_LightGrid[IN.groupID.xy] = uint2(t_LightIndexStartOffset, t_LightCount);
    }

    GroupMemoryBarrierWithGroupSync();

    // Now update the light index list (all threads).
    // For opaque goemetry.
    for (i = IN.groupIndex; i < o_LightCount; i += BLOCK_SIZE * BLOCK_SIZE)
    {
        o_LightIndexList[o_LightIndexStartOffset + i] = o_LightList;
    }
    // For transparent geometry.
    for (i = IN.groupIndex; i < t_LightCount; i += BLOCK_SIZE * BLOCK_SIZE)
    {
        t_LightIndexList[t_LightIndexStartOffset + i] = t_LightList;
    }

    // Update the debug texture output.
    if (IN.groupThreadID.x == 0 || IN.groupThreadID.y == 0)
    {
        DebugTexture[texCoord] = float4(0, 0, 0, 0.9f);
    }
    else if (IN.groupThreadID.x == 1 || IN.groupThreadID.y == 1)
    {
        DebugTexture[texCoord] = float4(1, 1, 1, 0.5f);
    }
    else if (o_LightCount > 0)
    {
        float normalizedLightCount = o_LightCount / 50.0f;
        float4 lightCountHeatMapColor = LightCountHeatMap.SampleLevel(LinearClamp, float2(normalizedLightCount, 0), 0);
        DebugTexture[texCoord] = lightCountHeatMapColor;
    }
    else
    {
        DebugTexture[texCoord] = float4(0, 0, 0, 1);
    }
}

//--------------------------------------------------------------------------------------
// Forward rendering shaders - nothing particularly new here
//--------------------------------------------------------------------------------------

// This vertex shader transforms the vertex into projection space & world space and passes on the UV, i.e. the usual vertex processing
PS_TRANSFORMED_INPUT VS_TransformTex(VS_INPUT vIn)
{
    PS_TRANSFORMED_INPUT vOut;

    // Transform the input model vertex position into world space
    float4 modelPos = float4(vIn.Pos, 1.0f);
    float4 worldPos = mul(modelPos, WorldMatrix);
    vOut.WorldPosition = worldPos.xyz;

    // Further transform the vertex from world space into view space and into 2D projection space for rendering
    float4 viewPos = mul(worldPos, ViewMatrix);
    vOut.ProjPos = mul(viewPos, ProjMatrix);

    // Transform the vertex normal from model space into world space
    float4 modelNormal = float4(vIn.Normal, 0.0f);
    vOut.WorldNormal = mul(modelNormal, WorldMatrix).xyz;

    // Pass texture coordinates (UVs) on to the pixel shader, the vertex shader doesn't need them
    vOut.UV = vIn.UV;

    return vOut;
}

// Pixel shader that calculates per-pixel lighting and combines with diffuse and specular map
// Basically the same as previous pixel lighting shaders except this one processes an array of lights rather than a fixed number
// Obviously, this isn't efficient for large number of lights, which is the point of using deferred rendering instead of this
float4 PS_PixelLitDiffuseMap(PS_TRANSFORMED_INPUT pIn) : SV_Target
{
    ////////////////////
    // Sample texture

    // Extract diffuse material colour for this pixel from a texture
    float4 DiffuseMaterial = DiffuseMap.Sample(TrilinearWrap, pIn.UV);
    //    clip( DiffuseMaterial.a - 0.5f ); // Discard pixels with alpha < 0.5, the model in this lab uses a lot of alpha transparency, but this impacts performance

    // Renormalise normals that have been interpolated from the vertex shader
    float3 worldNormal = normalize(pIn.WorldNormal);

    ///////////////////////
    // Calculate lighting

    // Calculate direction of camera
    float3 CameraDir = normalize(CameraPos - pIn.WorldPosition); // Position of camera - position of current vertex (or pixel) (in world space)

                                                                 // Sum the effects of each light, 
    float3 TotalDiffuse = AmbientColour;
    float3 TotalSpecular = 0;
    for (int i = 0; i < NumPointLights; i++)
    {
        float3 LightVec = PointLights.LightPosition - pIn.WorldPosition;
        float  LightIntensity = saturate(1.0f - length(LightVec) / PointLights.LightRadius); // Tweaked the attenuation approach, see the function PS_PointLight above
        float3 LightDir = normalize(LightVec);

        float3 Diffuse = LightIntensity * PointLights.LightColour * max(dot(worldNormal, LightDir), 0);
        TotalDiffuse += Diffuse;
        float3 halfway = normalize(LightDir + CameraDir);
        TotalSpecular += Diffuse * pow(max(dot(worldNormal, halfway), 0), SpecularPower);
    }

    ////////////////////
    // Combine colours 

    // Combine maps and lighting for final pixel colour
    float4 combinedColour;
    combinedColour.rgb = DiffuseMaterial.rgb * TotalDiffuse + SpecularColour * TotalSpecular; // The models in this lab have no specular in texture alpha, so use specular colour from X-file
    combinedColour.a = 1.0f;

    return combinedColour;
}


// Dummy vertex shader for the light particle system geometry shader below. The geometry shader does all the work
VS_POINTLIGHT_INPUT VS_LightParticles(VS_POINTLIGHT_INPUT vIn)
{
    return vIn;
}


// Pixel shader to render the flares at the centre of each light, nothing special here
float4 PS_LightParticles(PS_LIGHTPARTICLE_INPUT pIn) : SV_Target
{
    // Tint texture with colour of the light
    float3 diffuse = DiffuseMap.Sample(TrilinearWrap, pIn.UV) * pIn.LightColour;
    return float4(diffuse, 0.0f);
}


//--------------------------------------------------------------------------------------
// States
//--------------------------------------------------------------------------------------

// States are needed to switch between additive blending for the lights and no blending for other models

RasterizerState CullNone  // Cull none of the polygons, i.e. show both sides
{
    CullMode = None;
    FillMode = SOLID;
};

DepthStencilState DepthWritesOn  // Write to the depth buffer - normal behaviour 
{
    DepthFunc = LESS;
    DepthWriteMask = ALL;
};

BlendState NoBlending // Switch off blending - pixels will be opaque
{
    BlendEnable[0] = FALSE;
};

//--------------------------------------------------------------------------------------
// Techniques
//--------------------------------------------------------------------------------------

// A particle system of lights (just the sprite to show the location, not the effect of the light). Rendered as camera-facing quads with additive blending
technique11 ForwardPlus
{
    pass P0
    {
        SetVertexShader(CompileShader(vs_5_0, VS_TransformTex()));
        SetGeometryShader(NULL);
        SetPixelShader(CompileShader(ps_5_0, PS_PixelLitDiffuseMap()));
        SetComputeShader(CompileShader(cs_5_0, CS_main()));

        SetBlendState(NoBlending, float4(0.0f, 0.0f, 0.0f, 0.0f), 0xFFFFFFFF);
        SetRasterizerState(CullNone);
        SetDepthStencilState(DepthWritesOn, 0);
    }
    
}

 

ForwardPlus.fx

Edited by swiftcoder
added code tags

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

  • Advertisement
×

Important Information

By using GameDev.net, you agree to our community Guidelines, Terms of Use, and Privacy Policy.

We are the game development community.

Whether you are an indie, hobbyist, AAA developer, or just trying to learn, GameDev.net is the place for you to learn, share, and connect with the games industry. Learn more About Us or sign up!

Sign me up!