Tiled Deferred Frustum Culling issues

Started by
5 comments, last by Niruz 10 years ago

I'm having some issues getting culling to work for the near/far plane per tile when doing tiled deferred shading. The way to do it in DirectX based on http://malegebi.wordpress.com/2012/01/30/a-new-era-of-forward-shading-is-coming/ is to do it like this.


    // Work out scale/bias from [0, 1]
    float2 tileScale = float2(DisplaySize.xy) * rcp(2.0f * float2(LightTileSize, LightTileSize));
    float2 tileBias = tileScale - float2(GroupID.xy);

    // Now work out composite projection matrix
    // Relevant matrix columns for this tile frusta
    float4 c1 = float4(Projection._11 * tileScale.x, 0.0f, tileBias.x, 0.0f);
    float4 c2 = float4(0.0f, -Projection._22 * tileScale.y, tileBias.y, 0.0f);
    float4 c4 = float4(0.0f, 0.0f, 1.0f, 0.0f);

    // Derive frustum planes
    float4 frustumPlanes[6];

    // Sides
    frustumPlanes[0] = c4 - c1;
    frustumPlanes[1] = c4 + c1;
    frustumPlanes[2] = c4 - c2;
    frustumPlanes[3] = c4 + c2;

    // Near/far
    frustumPlanes[4] = float4(0.0f, 0.0f,  1.0f, -minTileZ);
    frustumPlanes[5] = float4(0.0f, 0.0f, -1.0f,  maxTileZ);

I've tried just doing an opengl version of that but it doesn't seem to work, here's what I have so far, notice that near/far aren't correct:


vec4 frustumPlanes[6];

	vec2 tileScale = vec2(SCREEN_WIDTH,SCREEN_HEIGHT) * (1.0f / float( 2 * MAX_WORK_GROUP_SIZE));

	vec2 tileBias = tileScale - vec2(gl_WorkGroupID.xy);

	vec4 col1 = vec4(-projectionMatrix[0][0]  * tileScale.x, projectionMatrix[0][1], tileBias.x, projectionMatrix[0][3]); 

    vec4 col2 = vec4(projectionMatrix[1][0], -projectionMatrix[1][1] * tileScale.y, tileBias.y, projectionMatrix[1][3]);

    vec4 col4 = vec4(projectionMatrix[3][0], projectionMatrix[3][1],  -1.0f, projectionMatrix[3][3]); 

	//Left plane
    frustumPlanes[0] = col4 + col1;

    //right plane
    frustumPlanes[1] = col4 - col1;

    //top plane
    frustumPlanes[2] = col4 - col2;

    //bottom plane
    frustumPlanes[3] = col4 + col2;

    //near
    frustumPlanes[4] = vec4(0.0f, 0.0f, -1.0f,  -minDepthZ);

    //far
    frustumPlanes[5] = vec4(0.0f, 0.0f, -1.0f,  maxDepthZ);

	for(int i = 0; i < 4; i++)
    {
        frustumPlanes[i] *= 1.0f / length(frustumPlanes[i].xyz);
    }

I want to do near = vec4(0.0f, 0,0f, -1,0f, -minDepthZ) and for the far I want to do vec4(0.0f,0.0f,1.0f,maxDepthZ) which is like the directX version only reversed, I've been basing things on this link too http://www.lighthouse3d.com/tutorials/view-frustum-culling/clip-space-approach-extracting-the-planes/

But I cannot seem to get it to work, does anyone have any idea on how to correctly implement the near and far plane for the frustum?

Advertisement

I've tried reworking the code based on the AMD implementation, this is what I have now instead, only now the frustum for the tiles seems to have "flipped" or become reversed, the old implementation was based on directX which may have lead to some troubles, here's what I have now:


	vec4 frustumEqn[4];
    uint pxm = MAX_WORK_GROUP_SIZE * gl_WorkGroupID.x;
    uint pym = MAX_WORK_GROUP_SIZE * gl_WorkGroupID.y;
    uint pxp = MAX_WORK_GROUP_SIZE * (gl_WorkGroupID.x + 1);
    uint pyp = MAX_WORK_GROUP_SIZE * (gl_WorkGroupID.y + 1);

    uint uWindowWidthEvenlyDivisibleByTileRes = MAX_WORK_GROUP_SIZE * GetNumTilesX();
    uint uWindowHeightEvenlyDivisibleByTileRes = MAX_WORK_GROUP_SIZE * GetNumTilesY();

    vec4 frustum[4];
    frustum[0] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) );
    frustum[1] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) );
    frustum[2] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f ,1.0f) );
    frustum[3] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) );

    for (int i = 0; i < 4; i++)
        frustumEqn[i] = CreatePlaneEquation(frustum[i],frustum[(i+1) & 3]);

    barrier();

    int threadsPerTile = MAX_WORK_GROUP_SIZE * MAX_WORK_GROUP_SIZE;

    for (uint i = 0; i < NUM_OF_LIGHTS; i+= threadsPerTile)
    {
        uint il = gl_LocalInvocationIndex + i;

        if (il < NUM_OF_LIGHTS)
        {
            PointLight p = pointLights[il];

            vec4 viewPos = viewMatrix * vec4(p.posX,p.posY,p.posZ, 1.0f);
            float r = p.radius;

            if (viewPos.z + minDepthZ < r && viewPos.z - maxDepthZ < r)
            {

            if( ( GetSignedDistanceFromPlane( viewPos, frustumEqn[0] ) < r ) &&
                ( GetSignedDistanceFromPlane( viewPos, frustumEqn[1] ) < r ) &&
                ( GetSignedDistanceFromPlane( viewPos, frustumEqn[2] ) < r ) &&
                ( GetSignedDistanceFromPlane( viewPos, frustumEqn[3] ) < r) )

                {
                    uint id = atomicAdd(pointLightCount, 1);
                    pointLightIndex[id] = il;
                }
            }

        }
    }

And the functions used


vec3 ReconstructWP(float z, vec2 uv_f)
{
    vec4 sPos = vec4(uv_f * 2.0 - 1.0, z, 1.0);
    sPos = inverseViewProjectionMatrix * sPos;

    return (sPos.xyz / sPos.w);
}

vec4 ConvertProjToView( vec4 p )
{
    p = inverseProjectionMatrix * p;
    p /= p.w;
    return p;
}

// calculate the number of tiles in the horizontal direction
uint GetNumTilesX()
{
    return uint(( ( 1280 + MAX_WORK_GROUP_SIZE - 1 ) / float(MAX_WORK_GROUP_SIZE) ));
}

// calculate the number of tiles in the vertical direction
uint GetNumTilesY()
{
    return uint(( ( 720 + MAX_WORK_GROUP_SIZE - 1 ) / float(MAX_WORK_GROUP_SIZE) ));
}


vec4 CreatePlaneEquation( vec4 b, vec4 c )
{
    vec4 n;

    // normalize(cross( b.xyz-a.xyz, c.xyz-a.xyz )), except we know "a" is the origin
     n.xyz = normalize(cross( b.xyz, c.xyz ));

    // -(n dot a), except we know "a" is the origin
    n.w = 0;

    return n;
}

float GetSignedDistanceFromPlane( vec4 p, vec4 eqn )
{
    // dot( eqn.xyz, p.xyz ) + eqn.w, , except we know eqn.w is zero 
    // (see CreatePlaneEquation above)
    return dot( eqn.xyz, p.xyz );
}

Now, here's the funny part, in the picture below all tiles affected by a light are rendered in red, and if I move around it looks exactly like the world has somehow flipped everything, now I'm not sure how that could happen, but maybe someone has some ideas?

where is your 2d origin for the tile map? top left corner?

It is clockwise from the top left

I should probably add this maybe, this is the complete DirectX version from AMD used for tiled forward shading


//--------------------------------------------------------------------------------------
// File: ForwardPlus11.hlsl
//
// HLSL file for the ForwardPlus11 sample. Tiled light culling.
// 
// Author: Jason Stewart
// 
// Copyright © AMD Corporation. All rights reserved.
//--------------------------------------------------------------------------------------


#include "ForwardPlus11Common.hlsl"

#define FLT_MAX         3.402823466e+38F

//-----------------------------------------------------------------------------------------
// Textures and Buffers
//-----------------------------------------------------------------------------------------
Buffer<float4> g_PointLightBufferCenterAndRadius : register( t0 );
Buffer<float4> g_SpotLightBufferCenterAndRadius : register( t1 );

#if ( USE_DEPTH_CULLING == 1 )   // non-MSAA
Texture2D<float> g_DepthTexture : register( t2 );
#elif ( USE_DEPTH_CULLING == 2 ) // MSAA
Texture2DMS<float> g_DepthTexture : register( t2 );
#endif

RWBuffer<uint> g_PerTileLightIndexBufferOut : register( u0 );

//-----------------------------------------------------------------------------------------
// Group Shared Memory (aka local data share, or LDS)
//-----------------------------------------------------------------------------------------
#if ( USE_DEPTH_CULLING == 1 || USE_DEPTH_CULLING == 2 )
groupshared uint ldsZMax;
groupshared uint ldsZMin;
#endif

groupshared uint ldsLightIdxCounter;
groupshared uint ldsLightIdx[MAX_NUM_LIGHTS_PER_TILE];

//-----------------------------------------------------------------------------------------
// Helper functions
//-----------------------------------------------------------------------------------------

// this creates the standard Hessian-normal-form plane equation from three points, 
// except it is simplified for the case where the first point is the origin
float4 CreatePlaneEquation( float4 b, float4 c )
{
    float4 n;

    // normalize(cross( b.xyz-a.xyz, c.xyz-a.xyz )), except we know "a" is the origin
    n.xyz = normalize(cross( b.xyz, c.xyz ));

    // -(n dot a), except we know "a" is the origin
    n.w = 0;

    return n;
}

// point-plane distance, simplified for the case where 
// the plane passes through the origin
float GetSignedDistanceFromPlane( float4 p, float4 eqn )
{
    // dot( eqn.xyz, p.xyz ) + eqn.w, , except we know eqn.w is zero 
    // (see CreatePlaneEquation above)
    return dot( eqn.xyz, p.xyz );
}

// calculate the number of tiles in the horizontal direction
uint GetNumTilesX()
{
    return (uint)( ( g_uWindowWidth + TILE_RES - 1 ) / (float)TILE_RES );
}

// calculate the number of tiles in the vertical direction
uint GetNumTilesY()
{
    return (uint)( ( g_uWindowHeight + TILE_RES - 1 ) / (float)TILE_RES );
}

// convert a point from post-projection space into view space
float4 ConvertProjToView( float4 p )
{
    p = mul( p, g_mProjectionInv );
    p /= p.w;
    return p;
}

// convert a depth value from post-projection space into view space
float ConvertProjDepthToView( float z )
{
    z = 1.f / (z*g_mProjectionInv._34 + g_mProjectionInv._44);
    return z;
}

#if ( USE_DEPTH_CULLING == 1 || USE_DEPTH_CULLING == 2 )
void CalculateMinMaxDepthInLds( uint3 globalThreadIdx, uint depthBufferSampleIdx )
{
#if ( USE_DEPTH_CULLING == 1 )   // non-MSAA
    float depth = g_DepthTexture.Load( uint3(globalThreadIdx.x,globalThreadIdx.y,0) ).x;
#elif ( USE_DEPTH_CULLING == 2 ) // MSAA
    float depth = g_DepthTexture.Load( uint2(globalThreadIdx.x,globalThreadIdx.y), depthBufferSampleIdx ).x;
#endif
    float viewPosZ = ConvertProjDepthToView( depth );
    uint z = asuint( viewPosZ );
    if( depth != 0.f )
    {
        InterlockedMax( ldsZMax, z );
        InterlockedMin( ldsZMin, z );
    }
}
#endif

//-----------------------------------------------------------------------------------------
// Parameters for the light culling shader
//-----------------------------------------------------------------------------------------
#define NUM_THREADS_X TILE_RES
#define NUM_THREADS_Y TILE_RES
#define NUM_THREADS_PER_TILE (NUM_THREADS_X*NUM_THREADS_Y)

//-----------------------------------------------------------------------------------------
// Light culling shader
//-----------------------------------------------------------------------------------------
[numthreads(NUM_THREADS_X, NUM_THREADS_Y, 1)]
void CullLightsCS( uint3 globalIdx : SV_DispatchThreadID, uint3 localIdx : SV_GroupThreadID, uint3 groupIdx : SV_GroupID )
{
    uint localIdxFlattened = localIdx.x + localIdx.y*NUM_THREADS_X;
    uint tileIdxFlattened = groupIdx.x + groupIdx.y*GetNumTilesX();

    if( localIdxFlattened == 0 )
    {
#if ( USE_DEPTH_CULLING == 1 || USE_DEPTH_CULLING == 2 )
        ldsZMin = 0xffffffff;
        ldsZMax = 0;
#endif
        ldsLightIdxCounter = 0;
    }

    float4 frustumEqn[4];
    {   // construct frustum for this tile
        uint pxm = TILE_RES*groupIdx.x;
        uint pym = TILE_RES*groupIdx.y;
        uint pxp = TILE_RES*(groupIdx.x+1);
        uint pyp = TILE_RES*(groupIdx.y+1);

        uint uWindowWidthEvenlyDivisibleByTileRes = TILE_RES*GetNumTilesX();
        uint uWindowHeightEvenlyDivisibleByTileRes = TILE_RES*GetNumTilesY();

        // four corners of the tile, clockwise from top-left
        float4 frustum[4];
        frustum[0] = ConvertProjToView( float4( pxm/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pym)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
        frustum[1] = ConvertProjToView( float4( pxp/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pym)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
        frustum[2] = ConvertProjToView( float4( pxp/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pyp)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
        frustum[3] = ConvertProjToView( float4( pxm/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pyp)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );

        // create plane equations for the four sides of the frustum, 
        // with the positive half-space outside the frustum (and remember, 
        // view space is left handed, so use the left-hand rule to determine 
        // cross product direction)
        for(uint i=0; i<4; i++)
            frustumEqn[i] = CreatePlaneEquation( frustum[i], frustum[(i+1)&3] );
    }

    GroupMemoryBarrierWithGroupSync();

    // calculate the min and max depth for this tile, 
    // to form the front and back of the frustum

#if ( USE_DEPTH_CULLING == 1 || USE_DEPTH_CULLING == 2 )
    float minZ = FLT_MAX;
    float maxZ = 0.f;

#if ( USE_DEPTH_CULLING == 1 )   // non-MSAA
    CalculateMinMaxDepthInLds( globalIdx, 0 );
#elif ( USE_DEPTH_CULLING == 2 ) // MSAA
    uint depthBufferWidth, depthBufferHeight, depthBufferNumSamples;
    g_DepthTexture.GetDimensions( depthBufferWidth, depthBufferHeight, depthBufferNumSamples );
    for( uint sampleIdx=0; sampleIdx<depthBufferNumSamples; sampleIdx++ )
    {
        CalculateMinMaxDepthInLds( globalIdx, sampleIdx );
    }
#endif

    GroupMemoryBarrierWithGroupSync();
    maxZ = asfloat( ldsZMax );
    minZ = asfloat( ldsZMin );
#endif

    // loop over the lights and do a sphere vs. frustum intersection test
    uint uNumPointLights = g_uNumLights & 0xFFFFu;
    for(uint i=0; i<uNumPointLights; i+=NUM_THREADS_PER_TILE)
    {
        uint il = localIdxFlattened + i;
        if( il < uNumPointLights )
        {
            float4 center = g_PointLightBufferCenterAndRadius[il];
            float r = center.w;
            center.xyz = mul( float4(center.xyz, 1), g_mWorldView ).xyz;

            // test if sphere is intersecting or inside frustum
#if ( USE_DEPTH_CULLING != 0 )
            if( -center.z + minZ < r && center.z - maxZ < r )
#else
            if( -center.z < r )
#endif
            {
                if( ( GetSignedDistanceFromPlane( center, frustumEqn[0] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[1] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[2] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[3] ) < r ) )
                {
                    // do a thread-safe increment of the list counter 
                    // and put the index of this light into the list
                    uint dstIdx = 0;
                    InterlockedAdd( ldsLightIdxCounter, 1, dstIdx );
                    ldsLightIdx[dstIdx] = il;
                }
            }
        }
    }

    GroupMemoryBarrierWithGroupSync();

    // and again for spot lights
    uint uNumPointLightsInThisTile = ldsLightIdxCounter;
    uint uNumSpotLights = (g_uNumLights & 0xFFFF0000u) >> 16;
    for(uint j=0; j<uNumSpotLights; j+=NUM_THREADS_PER_TILE)
    {
        uint jl = localIdxFlattened + j;
        if( jl < uNumSpotLights )
        {
            float4 center = g_SpotLightBufferCenterAndRadius[jl];
            float r = center.w;
            center.xyz = mul( float4(center.xyz, 1), g_mWorldView ).xyz;

            // test if sphere is intersecting or inside frustum
#if ( USE_DEPTH_CULLING != 0 )
            if( -center.z + minZ < r && center.z - maxZ < r )
#else
            if( -center.z < r )
#endif
            {
                if( ( GetSignedDistanceFromPlane( center, frustumEqn[0] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[1] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[2] ) < r ) &&
                    ( GetSignedDistanceFromPlane( center, frustumEqn[3] ) < r ) )
                {
                    // do a thread-safe increment of the list counter 
                    // and put the index of this light into the list
                    uint dstIdx = 0;
                    InterlockedAdd( ldsLightIdxCounter, 1, dstIdx );
                    ldsLightIdx[dstIdx] = jl;
                }
            }
        }
    }

    GroupMemoryBarrierWithGroupSync();

    {   // write back
        uint startOffset = g_uMaxNumLightsPerTile*tileIdxFlattened;

        for(uint i=localIdxFlattened; i<uNumPointLightsInThisTile; i+=NUM_THREADS_PER_TILE)
        {
            // per-tile list of light indices
            g_PerTileLightIndexBufferOut[startOffset+i] = ldsLightIdx[i];
        }

        for(uint j=(localIdxFlattened+uNumPointLightsInThisTile); j<ldsLightIdxCounter; j+=NUM_THREADS_PER_TILE)
        {
            // per-tile list of light indices
            g_PerTileLightIndexBufferOut[startOffset+j+1] = ldsLightIdx[j];
        }

        if( localIdxFlattened == 0 )
        {
            // mark the end of each per-tile list with a sentinel (point lights)
            g_PerTileLightIndexBufferOut[startOffset+uNumPointLightsInThisTile] = LIGHT_INDEX_BUFFER_SENTINEL;

            // mark the end of each per-tile list with a sentinel (spot lights)
            g_PerTileLightIndexBufferOut[startOffset+ldsLightIdxCounter+1] = LIGHT_INDEX_BUFFER_SENTINEL;
        }
    }
}

looks conceptually correctly to me, that's doing the flip from 2d top-down to 3d bottom-up coordinates:

 
frustum[0] = ConvertProjToView( float4( pxm/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pym)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
frustum[1] = ConvertProjToView( float4( pxp/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pym)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
frustum[2] = ConvertProjToView( float4( pxp/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pyp)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );
frustum[3] = ConvertProjToView( float4( pxm/(float)uWindowWidthEvenlyDivisibleByTileRes*2.f-1.f, (uWindowHeightEvenlyDivisibleByTileRes-pyp)/(float)uWindowHeightEvenlyDivisibleByTileRes*2.f-1.f,1.f,1.f) );

maybe your debug output is buggy? :)

Hmm, the way I output the tiles affected by a light is just with


if(pointLightCount >= 1)
{
	imageStore(finalImage, pixelPos, vec4(1.0f, 0.0f, 0.0f, 1.0f));
}

the pointLightCount variable being per tile, also I tried multiplying the y component of the frustums by -1 which puts the light in the correct position but that solution feels a bit hacky...


 frustum[0] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, -1.0f*((uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f), 1.0f, 1.0f) );
    frustum[1] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, -1.0f*((uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f), 1.0f, 1.0f) );
    frustum[2] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, -1.0f*((uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f), 1.0f ,1.0f) );
    frustum[3] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, -1.0f*((uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f), 1.0f, 1.0f) );

I mean it works as intended but I'm starting to think maybe my projection matrix is wrong, but then normal rendering shouldn't be working either

This topic is closed to new replies.

Advertisement