Jump to content
  • Advertisement
Sign in to follow this  

DX11 [DirectX11] Instancing

This topic is 2522 days old which is more than the 365 day threshold we allow for new replies. Please post a new topic.

If you intended to correct an error in the post then please contact us.

Recommended Posts

I've managed to get basic instancing working - what I mean is that I can easily add instances of a given model and they all render correctly. For now I can only change the positions, but the buffers already include data for rotations and scales. Problem starts, when I set the instance buffer as a dynamic one, allow CPU to write there and try to change the data (position of the model) each frame.

Shader Code:

Texture2D color_map : register( t0 );
SamplerState sample_type : register( s0 );

cbuffer world_view_proj : register( b0 )
matrix world;
matrix view;
matrix projection;

struct Vertex_Input_Type
float4 position : POSITION;
float2 tex : TEXCOORD0;
float3 normal : NORMAL;
float3 tangent : TANGENT;
float3 binormal : BINORMAL;
float3 instance_pos : TEXCOORD1;
float3 instance_rot : TEXCOORD2;
float3 instance_scale : TEXCOORD3;

struct Pixel_Input_Type
float4 position : SV_POSITION;
float2 tex : TEXCOORD0;

Pixel_Input_Type VS( Vertex_Input_Type input )
Pixel_Input_Type output;

input.position.w = 1.0f;

input.position.x += input.instance_pos.x;
input.position.y += input.instance_pos.y;
input.position.z += input.instance_pos.z;

output.position = mul( input.position, world );
output.position = mul( output.position, view );
output.position = mul( output.position, projection );
output.tex = input.tex;

return output;

float4 PS( Pixel_Input_Type input ) : SV_TARGET
return color_map.Sample( sample_type, input.tex );

technique11 Render
pass P0
SetVertexShader( CompileShader( vs_4_0, VS() ) );
SetGeometryShader( 0 );
SetPixelShader( CompileShader( ps_4_0, PS() ) );

Creation of the layout:

D3D11_INPUT_ELEMENT_DESC polygon_layout[8];
polygon_layout[0].SemanticName = "POSITION";
polygon_layout[0].SemanticIndex = 0;
polygon_layout[0].Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
polygon_layout[0].InputSlot = 0;
polygon_layout[0].AlignedByteOffset = 0;
polygon_layout[0].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygon_layout[0].InstanceDataStepRate = 0;

polygon_layout[1].SemanticName = "TEXCOORD";
polygon_layout[1].SemanticIndex = 0;
polygon_layout[1].Format = DXGI_FORMAT_R32G32_FLOAT;
polygon_layout[1].InputSlot = 0;
polygon_layout[1].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[1].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygon_layout[1].InstanceDataStepRate = 0;

polygon_layout[2].SemanticName = "NORMAL";
polygon_layout[2].SemanticIndex = 0;
polygon_layout[2].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[2].InputSlot = 0;
polygon_layout[2].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[2].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygon_layout[2].InstanceDataStepRate = 0;

polygon_layout[3].SemanticName = "TANGENT";
polygon_layout[3].SemanticIndex = 0;
polygon_layout[3].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[3].InputSlot = 0;
polygon_layout[3].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[3].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygon_layout[3].InstanceDataStepRate = 0;

polygon_layout[4].SemanticName = "BINORMAL";
polygon_layout[4].SemanticIndex = 0;
polygon_layout[4].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[4].InputSlot = 0;
polygon_layout[4].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[4].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
polygon_layout[4].InstanceDataStepRate = 0;

// position
polygon_layout[5].SemanticName = "TEXCOORD";
polygon_layout[5].SemanticIndex = 1;
polygon_layout[5].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[5].InputSlot = 1;
polygon_layout[5].AlignedByteOffset = 0;
polygon_layout[5].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygon_layout[5].InstanceDataStepRate = 1;

// rotation
polygon_layout[6].SemanticName = "TEXCOORD";
polygon_layout[6].SemanticIndex = 2;
polygon_layout[6].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[6].InputSlot = 1;
polygon_layout[6].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[6].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygon_layout[6].InstanceDataStepRate = 1;

// scale
polygon_layout[7].SemanticName = "TEXCOORD";
polygon_layout[7].SemanticIndex = 3;
polygon_layout[7].Format = DXGI_FORMAT_R32G32B32_FLOAT;
polygon_layout[7].InputSlot = 1;
polygon_layout[7].AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
polygon_layout[7].InputSlotClass = D3D11_INPUT_PER_INSTANCE_DATA;
polygon_layout[7].InstanceDataStepRate = 1;

unsigned int num_elements = ARRAYSIZE( polygon_layout );

ID3DX11EffectTechnique* technique;
technique = m_effect->GetTechniqueByName( "Render" );
ID3DX11EffectPass* pass = technique->GetPassByIndex( 0U );

D3DX11_PASS_SHADER_DESC pass_desc;
D3DX11_EFFECT_SHADER_DESC shader_desc;

pass->GetVertexShaderDesc( &pass_desc );
pass_desc.pShaderVariable->GetShaderDesc( pass_desc.ShaderIndex, &shader_desc );

if( FAILED( device->CreateInputLayout( polygon_layout, num_elements, shader_desc.pBytecode, shader_desc.BytecodeLength, &m_layout ) ) )
return false;

The effect file gets loaded properly, textures show up as expected, thus I've cut it out. Creating the vertex buffer:
vertices - table that contains the vertex data of type Vertex_Type

D3D11_BUFFER_DESC vertex_buf_desc;
D3D11_SUBRESOURCE_DATA vertex_data;

vertex_buf_desc.Usage = D3D11_USAGE_DEFAULT;
vertex_buf_desc.ByteWidth = sizeof( Vertex_Type ) * vertex_count;
vertex_buf_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
vertex_buf_desc.CPUAccessFlags = 0;
vertex_buf_desc.MiscFlags = 0;
vertex_buf_desc.StructureByteStride = 0;

vertex_data.pSysMem = vertices;
vertex_data.SysMemPitch = 0;
vertex_data.SysMemSlicePitch = 0;

if( FAILED( device->CreateBuffer( &vertex_buf_desc, &vertex_data, &m_vertex_buf ) ) )
delete[] vertices;

return false;

delete[] vertices;

And here the instance buffer:
instances - table that contains the instance data of type Model_Instance_Type

D3D11_BUFFER_DESC instance_buf_desc;
instance_buf_desc.Usage = D3D11_USAGE_DYNAMIC;
instance_buf_desc.ByteWidth = sizeof( Model_Instance_Type ) * m_model_instance_list.size();
instance_buf_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
instance_buf_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
instance_buf_desc.MiscFlags = 0;
instance_buf_desc.StructureByteStride = 0;

D3D11_SUBRESOURCE_DATA instance_data;
instance_data.pSysMem = instances;
instance_data.SysMemPitch = 0;
instance_data.SysMemSlicePitch = 0;

if( FAILED( device->CreateBuffer( &instance_buf_desc, &instance_data, &m_instance_buf ) ) )
delete[] instances;

return false;

delete[] instances;

Everything works perfectly until I try to access the data in the instance buffer and change it (on a per-frame basis). Here is the function that does that, along with the structs I'm using (in case there's an error).

D3D11_MAPPED_SUBRESOURCE mapped_subresource;

if( FAILED( device_context->Map( m_instance_buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_subresource ) ) )
return false;

Model_Instance_Type* instance_data = static_cast<Model_Instance_Type*>( mapped_subresource.pData );

instance_data[index].pos = XMFLOAT3( posX, posY, posZ );
instance_data[index].rot = XMFLOAT3( rotX, rotY, rotZ );
instance_data[index].scale = XMFLOAT3( scaleX, scaleY, scaleZ );

device_context->Unmap( m_instance_buf, 0 );

I think that the functions that actually render the model and set up the shader variables will be needed:

void IceModel::Render( ID3D11DeviceContext* device_context )
RenderBuffers( device_context );

XMFLOAT4X4 world, view, projection;
XMMATRIX xna_world, xna_view, xna_projection;

GetWorldMatrix( xna_world );
XMStoreFloat4x4( &world, xna_world );

GetViewMatrix( xna_view );
XMStoreFloat4x4( &view, xna_view );

GetProjectionMatrix( xna_projection );
XMStoreFloat4x4( &projection, xna_projection );

shader->Render( device_context, m_model.size(), m_model_instance_list.size(), world, view, projection, m_tex );

void Model::RenderBuffers( ID3D11DeviceContext* device_context )
unsigned int strides[] = { sizeof( Vertex_Type ), sizeof( Model_Instance_Type ) };
unsigned int offsets[] = { 0, 0 };
ID3D11Buffer* buf_ptrs[] = { m_vertex_buf, m_instance_buf };

device_context->IASetVertexBuffers( 0, 2, buf_ptrs, strides, offsets );
device_context->IASetPrimitiveTopology( D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST );

bool Shader2D::Render( ID3D11DeviceContext* device_context, const int& vertex_count, const int& instance_count, XMFLOAT4X4 world,
XMFLOAT4X4 view, XMFLOAT4X4 projection, const std::vector<Texture*>& tex )
XMMATRIX xna_world = XMLoadFloat4x4( &world );
XMMATRIX xna_view = XMLoadFloat4x4( &view );
XMMATRIX xna_projection = XMLoadFloat4x4( &projection );

ID3DX11EffectShaderResourceVariable* color_map = m_effect->GetVariableByName( "color_map" )->AsShaderResource();
if( FAILED( color_map->SetResource( tex[1]->GetTexture() ) ) )
return false;

ID3DX11EffectSamplerVariable* sample_type = m_effect->GetVariableByName( "sample_type" )->AsSampler();
if( FAILED( sample_type->SetSampler( 0, m_sample_state ) ) )
return false;

ID3DX11EffectMatrixVariable* world_matrix = m_effect->GetVariableByName( "world" )->AsMatrix();
if( FAILED( world_matrix->SetMatrix( reinterpret_cast<float*>( &xna_world ) ) ) )
return false;

ID3DX11EffectMatrixVariable* view_matrix = m_effect->GetVariableByName( "view" )->AsMatrix();
if( FAILED( view_matrix->SetMatrix( reinterpret_cast<float*>( &xna_view ) ) ) )
return false;

ID3DX11EffectMatrixVariable* projection_matrix = m_effect->GetVariableByName( "projection" )->AsMatrix();
if( FAILED( projection_matrix->SetMatrix( reinterpret_cast<float*>( &xna_projection ) ) ) )
return false;

RenderShader( device_context, vertex_count, instance_count );

return true;

void Shader::RenderShader( ID3D11DeviceContext* device_context, const int& vertex_count, const int& instance_count )
device_context->IASetInputLayout( m_layout );

ID3DX11EffectTechnique* technique = m_effect->GetTechniqueByName( "Render" );
D3DX11_TECHNIQUE_DESC tech_desc;
technique->GetDesc( &tech_desc );

ID3DX11EffectPass* pass;
for( unsigned int i = 0; i < tech_desc.Passes; ++i )
pass = technique->GetPassByIndex( i );

if( pass )
pass->Apply( 0, device_context );
device_context->DrawInstanced( vertex_count, instance_count, 0, 0 );

struct Vertex_Type
XMFLOAT3 normal;
XMFLOAT3 tangent;
XMFLOAT3 binormal;

struct Model_Instance_Type
XMFLOAT3 scale;

Now about how it's not working. The model I want to move (the one I'm updating with new position) renders ideally, moves, no artifacts. However all other instanced objects are blinking, like they were rendered each 2nd frame so it's clearly seen that they're not rendered properly. If that wasn't enough, the instance I'm moving, not only renders in the proper spot, but it keeps rendering itself in the original position with the same kind of blinking. I'm completely lost on this, since if I get that well - when you update the instance buffer data, the old content gets overwritten. So how come the object renders itself at the original position?

I know that it's a lot of code, but I thought that I understood instancing as for static objects it works (I can set as many instances of each model as I wish with any coordinates) and this just destroys the day. If there's something more You need to know, please ask as I'd really like to get this going.

Thanks in advance.

Share this post

Link to post
Share on other sites

D3D11_MAPPED_SUBRESOURCE mapped_subresource;

if( FAILED( device_context->Map( m_instance_buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_subresource ) ) )
return false;

Model_Instance_Type* instance_data = static_cast<Model_Instance_Type*>( mapped_subresource.pData );

instance_data[index].pos = XMFLOAT3( posX, posY, posZ );
instance_data[index].rot = XMFLOAT3( rotX, rotY, rotZ );
instance_data[index].scale = XMFLOAT3( scaleX, scaleY, scaleZ );

device_context->Unmap( m_instance_buf, 0 );

Could you post all of the code for this part? You have an index which suggests you're using a loop, but there's no loop here.

Share this post

Link to post
Share on other sites
That's not really a loop index, but rather instance index in the vector, that contains them (raw data, not the instance buffer). Here is the whole function:

bool Model::UpdateInstance( const int& index, const float& posX, const float& posY, const float& posZ, const float& rotX, const float& rotY,
const float& rotZ, const float& scaleX, const float& scaleY, const float& scaleZ )
m_model_instance_list[index]->pos = XMFLOAT3( posX, posY, posZ );
m_model_instance_list[index]->rot = XMFLOAT3( rotX, rotY, rotZ );
m_model_instance_list[index]->scale = XMFLOAT3( scaleX, scaleY, scaleZ );

D3D11_MAPPED_SUBRESOURCE mapped_subresource;

if( FAILED( device_context->Map( m_instance_buf, 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped_subresource ) ) )
return false;

Model_Instance_Type* instance_data = static_cast<Model_Instance_Type*>( mapped_subresource.pData );

instance_data[index].pos = XMFLOAT3( posX, posY, posZ );
instance_data[index].rot = XMFLOAT3( rotX, rotY, rotZ );
instance_data[index].scale = XMFLOAT3( scaleX, scaleY, scaleZ );

device_context->Unmap( m_instance_buf, 0 );

return true;

m_model_instance_list :
std::vector<Model_Instance_Type*> m_model_instance_list;

The idea is to not update whole buffer, when I want to update one given instance. As the data is a table I assumed the [] operator should work fine. The data gets changed when I debug that part, and the the instance moves. What happens are the blinking and "copying itself" I described earlier.

EDIT: No idea if that helps, but I'm unsure of the memory alignments. While I did manage to get around the XMMATRIX requirements (storing them as XMFLOAT4X4 and using Store and Load functions), I might have an error when describing layout. How I understood it is, that the instance part of a layout does not need (nor have to) be aligned to the per vertex data - thus the 1st thing from instance vector - position, has 0 as a byte alignment. Also made InputSlot : 0 for per-vertex, 1 for per-instance, as there are 2 vertex buffers (haven't seen D3D11_BIND_INSTANCE_BUFFER flag, so I used vertex).

Share this post

Link to post
Share on other sites

The idea is to not update whole buffer, when I want to update one given instance. As the data is a table I assumed the [] operator should work fine. The data gets changed when I debug that part, and the the instance moves. What happens are the blinking and "copying itself" I described earlier.

Ah, well that's your problem. When you map with D3D11_MAP_WRITE_DISCARD (which is what you should be doing for a dynamic VB) the entire contents of the vertex buffer are invalidated. So you can't just copy in the data for one instance at a time, you have to copy in the data for all instances.

Share this post

Link to post
Share on other sites

[quote name='vipeout' timestamp='1311356682' post='4839043']
The idea is to not update whole buffer, when I want to update one given instance. As the data is a table I assumed the [] operator should work fine. The data gets changed when I debug that part, and the the instance moves. What happens are the blinking and "copying itself" I described earlier.

Ah, well that's your problem. When you map with D3D11_MAP_WRITE_DISCARD (which is what you should be doing for a dynamic VB) the entire contents of the vertex buffer are invalidated. So you can't just copy in the data for one instance at a time, you have to copy in the data for all instances.

Aw, so bad of me, I've been reading the meaning of flags, just forgot that :/. Thanks for showing this to me. I'll try fixing this ASAP, but I've got 1 question then: What do you do if you have thousands of instances? Buffer can get quite big isn't there a way to not write whole buffer each frame, when just 1% of it changed?

EDIT: Worked perfectly. Thank you very much, been trying to solve it even trying to add padding values to the buffer structs (I've been thinking it could be reading "dirty" data, not the one I've assigned).

Share this post

Link to post
Share on other sites
If you don't want to update the whole buffer, then you can't use DISCARD. For dynamic buffers, the driver will create multiple buffers behind the scenes and cycle through them whenever you update them so that you avoid any synchronization issues with the GPU (since you don't want to write to an area of memory while the GPU is accessing it). This fits in nicely with the semantics of DISCARD, since the the driver can cycle to the next buffer since the contents are undefined by the spec. If you don't want to update the entire buffer you can use NO_OVERWRITE, but when you do that you can only update a portion of the buffer that the GPU isn't currently using. This can work for adding new instances, but not for updating existing instances.

Share this post

Link to post
Share on other sites
Sign in to follow this  

  • Advertisement
  • Advertisement
  • Popular Tags

  • Similar Content

    • By MikhailGorobets
      I have a problem with SSAO. On left hand black area.
      Code shader:
      Texture2D<uint> texGBufferNormal : register(t0); Texture2D<float> texGBufferDepth : register(t1); Texture2D<float4> texSSAONoise : register(t2); float3 GetUV(float3 position) { float4 vp = mul(float4(position, 1.0), ViewProject); vp.xy = float2(0.5, 0.5) + float2(0.5, -0.5) * vp.xy / vp.w; return float3(vp.xy, vp.z / vp.w); } float3 GetNormal(in Texture2D<uint> texNormal, in int3 coord) { return normalize(2.0 * UnpackNormalSphermap(texNormal.Load(coord)) - 1.0); } float3 GetPosition(in Texture2D<float> texDepth, in int3 coord) { float4 position = 1.0; float2 size; texDepth.GetDimensions(size.x, size.y); position.x = 2.0 * (coord.x / size.x) - 1.0; position.y = -(2.0 * (coord.y / size.y) - 1.0); position.z = texDepth.Load(coord); position = mul(position, ViewProjectInverse); position /= position.w; return position.xyz; } float3 GetPosition(in float2 coord, float depth) { float4 position = 1.0; position.x = 2.0 * coord.x - 1.0; position.y = -(2.0 * coord.y - 1.0); position.z = depth; position = mul(position, ViewProjectInverse); position /= position.w; return position.xyz; } float DepthInvSqrt(float nonLinearDepth) { return 1 / sqrt(1.0 - nonLinearDepth); } float GetDepth(in Texture2D<float> texDepth, float2 uv) { return texGBufferDepth.Sample(samplerPoint, uv); } float GetDepth(in Texture2D<float> texDepth, int3 screenPos) { return texGBufferDepth.Load(screenPos); } float CalculateOcclusion(in float3 position, in float3 direction, in float radius, in float pixelDepth) { float3 uv = GetUV(position + radius * direction); float d1 = DepthInvSqrt(GetDepth(texGBufferDepth, uv.xy)); float d2 = DepthInvSqrt(uv.z); return step(d1 - d2, 0) * min(1.0, radius / abs(d2 - pixelDepth)); } float GetRNDTexFactor(float2 texSize) { float width; float height; texGBufferDepth.GetDimensions(width, height); return float2(width, height) / texSize; } float main(FullScreenPSIn input) : SV_TARGET0 { int3 screenPos = int3(input.Position.xy, 0); float depth = DepthInvSqrt(GetDepth(texGBufferDepth, screenPos)); float3 normal = GetNormal(texGBufferNormal, screenPos); float3 position = GetPosition(texGBufferDepth, screenPos) + normal * SSAO_NORMAL_BIAS; float3 random = normalize(2.0 * texSSAONoise.Sample(samplerNoise, input.Texcoord * GetRNDTexFactor(SSAO_RND_TEX_SIZE)).rgb - 1.0); float SSAO = 0; [unroll] for (int index = 0; index < SSAO_KERNEL_SIZE; index++) { float3 dir = reflect(SamplesKernel[index].xyz, random); SSAO += CalculateOcclusion(position, dir * sign(dot(dir, normal)), SSAO_RADIUS, depth); } return 1.0 - SSAO / SSAO_KERNEL_SIZE; }  

    • By MarcusAseth
      I've been following this tutorial -> https://www.3dgep.com/introduction-to-directx-11/#The_Main_Function , did all the steps,and I ended up with the main.cpp you can see below.
      The problem is the call at line 516 
      g_d3dDeviceContext->UpdateSubresource(g_d3dConstantBuffers[CB_Frame], 0, nullptr, &g_ViewMatrix, 0, 0); which is crashing the program, and the very odd thing is that the first time trough it works fine, it crash the app the second time it is called...
      Can someone help me understand why? 😕    I have no idea...
      #include <Direct3D_11PCH.h> //Shaders using namespace DirectX; // Globals //Window const unsigned g_WindowWidth = 1024; const unsigned g_WindowHeight = 768; const char* g_WindowClassName = "DirectXWindowClass"; const char* g_WindowName = "DirectX 11"; HWND g_WinHnd = nullptr; const bool g_EnableVSync = true; //Device and SwapChain ID3D11Device* g_d3dDevice = nullptr; ID3D11DeviceContext* g_d3dDeviceContext = nullptr; IDXGISwapChain* g_d3dSwapChain = nullptr; //RenderTarget view ID3D11RenderTargetView* g_d3dRenderTargerView = nullptr; //DepthStencil view ID3D11DepthStencilView* g_d3dDepthStencilView = nullptr; //Depth Buffer Texture ID3D11Texture2D* g_d3dDepthStencilBuffer = nullptr; // Define the functionality of the depth/stencil stages ID3D11DepthStencilState* g_d3dDepthStencilState = nullptr; // Define the functionality of the rasterizer stage ID3D11RasterizerState* g_d3dRasterizerState = nullptr; D3D11_VIEWPORT g_Viewport{}; //Vertex Buffer data ID3D11InputLayout* g_d3dInputLayout = nullptr; ID3D11Buffer* g_d3dVertexBuffer = nullptr; ID3D11Buffer* g_d3dIndexBuffer = nullptr; //Shader Data ID3D11VertexShader* g_d3dVertexShader = nullptr; ID3D11PixelShader* g_d3dPixelShader = nullptr; //Shader Resources enum ConstantBuffer { CB_Application, CB_Frame, CB_Object, NumConstantBuffers }; ID3D11Buffer* g_d3dConstantBuffers[ConstantBuffer::NumConstantBuffers]; //Demo parameter XMMATRIX g_WorldMatrix; XMMATRIX g_ViewMatrix; XMMATRIX g_ProjectionMatrix; // Vertex data for a colored cube. struct VertexPosColor { XMFLOAT3 Position; XMFLOAT3 Color; }; VertexPosColor g_Vertices[8] = { { XMFLOAT3(-1.0f, -1.0f, -1.0f), XMFLOAT3(0.0f, 0.0f, 0.0f) }, // 0 { XMFLOAT3(-1.0f, 1.0f, -1.0f), XMFLOAT3(0.0f, 1.0f, 0.0f) }, // 1 { XMFLOAT3(1.0f, 1.0f, -1.0f), XMFLOAT3(1.0f, 1.0f, 0.0f) }, // 2 { XMFLOAT3(1.0f, -1.0f, -1.0f), XMFLOAT3(1.0f, 0.0f, 0.0f) }, // 3 { XMFLOAT3(-1.0f, -1.0f, 1.0f), XMFLOAT3(0.0f, 0.0f, 1.0f) }, // 4 { XMFLOAT3(-1.0f, 1.0f, 1.0f), XMFLOAT3(0.0f, 1.0f, 1.0f) }, // 5 { XMFLOAT3(1.0f, 1.0f, 1.0f), XMFLOAT3(1.0f, 1.0f, 1.0f) }, // 6 { XMFLOAT3(1.0f, -1.0f, 1.0f), XMFLOAT3(1.0f, 0.0f, 1.0f) } // 7 }; WORD g_Indicies[36] = { 0, 1, 2, 0, 2, 3, 4, 6, 5, 4, 7, 6, 4, 5, 1, 4, 1, 0, 3, 2, 6, 3, 6, 7, 1, 5, 6, 1, 6, 2, 4, 0, 3, 4, 3, 7 }; //Forward Declaration LRESULT CALLBACK WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam); bool LoadContent(); int Run(); void Update(float deltaTime); void Clear(const FLOAT clearColor[4], FLOAT clearDepth, UINT8 clearStencil); void Present(bool vSync); void Render(); void CleanUp(); int InitApplication(HINSTANCE hInstance, int cmdShow); int InitDirectX(HINSTANCE hInstance, BOOL vsync); int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR cmd, int cmdShow) { UNREFERENCED_PARAMETER(hPrevInstance); UNREFERENCED_PARAMETER(cmd); // Check for DirectX Math library support. if (!XMVerifyCPUSupport()) { MessageBox(nullptr, TEXT("Failed to verify DirectX Math library support."), nullptr, MB_OK); return -1; } if (InitApplication(hInstance, cmdShow) != 0) { MessageBox(nullptr, TEXT("Failed to create applicaiton window."), nullptr, MB_OK); return -1; } if (InitDirectX(hInstance, g_EnableVSync) != 0) { MessageBox(nullptr, TEXT("Failed to initialize DirectX."), nullptr, MB_OK); CleanUp(); return -1; } if (!LoadContent()) { MessageBox(nullptr, TEXT("Failed to load content."), nullptr, MB_OK); CleanUp(); return -1; } int returnCode = Run(); CleanUp(); return returnCode; } int Run() { MSG msg{}; static DWORD previousTime = timeGetTime(); static const float targetFramerate = 30.0f; static const float maxTimeStep = 1.0f / targetFramerate; while (msg.message != WM_QUIT) { if (PeekMessage(&msg, 0, 0, 0, PM_REMOVE)) { TranslateMessage(&msg); DispatchMessage(&msg); } else { DWORD currentTime = timeGetTime(); float deltaTime = (currentTime - previousTime) / 1000.0f; previousTime = currentTime; deltaTime = std::min<float>(deltaTime, maxTimeStep); Update(deltaTime); Render(); } } return static_cast<int>(msg.wParam); } LRESULT CALLBACK WindowProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) { PAINTSTRUCT paintstruct; HDC hDC; switch (msg) { case WM_PAINT: { hDC = BeginPaint(hwnd, &paintstruct); EndPaint(hwnd, &paintstruct); }break; case WM_DESTROY: { PostQuitMessage(0); }break; default: return DefWindowProc(hwnd, msg, wParam, lParam); break; } return 0; } int InitApplication(HINSTANCE hInstance, int cmdShow) { //Register Window class WNDCLASSEX mainWindow{}; mainWindow.cbSize = sizeof(WNDCLASSEX); mainWindow.style = CS_HREDRAW | CS_VREDRAW; mainWindow.lpfnWndProc = &WindowProc; mainWindow.hInstance = hInstance; mainWindow.hCursor = LoadCursor(NULL, IDC_ARROW); mainWindow.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1); mainWindow.lpszMenuName = nullptr; mainWindow.lpszClassName = g_WindowClassName; if (!RegisterClassEx(&mainWindow)) { return -1; } RECT client{ 0,0,g_WindowWidth,g_WindowHeight }; AdjustWindowRect(&client, WS_OVERLAPPEDWINDOW, false); // Create Window g_WinHnd = CreateWindowEx(NULL, g_WindowClassName, g_WindowName, WS_OVERLAPPEDWINDOW | WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT, client.right - client.left, client.bottom - client.top, nullptr, nullptr, hInstance, nullptr); if (!g_WinHnd) { return -1; } UpdateWindow(g_WinHnd); return 0; } int InitDirectX(HINSTANCE hInstance, BOOL vsync) { assert(g_WinHnd != nullptr); RECT client{}; GetClientRect(g_WinHnd, &client); unsigned int clientWidth = client.right - client.left; unsigned int clientHeight = client.bottom - client.top; //Direct3D Initialization HRESULT hr{}; //SwapChainDesc DXGI_RATIONAL refreshRate = vsync ? DXGI_RATIONAL{ 1, 60 } : DXGI_RATIONAL{ 0, 1 }; DXGI_SWAP_CHAIN_DESC swapChainDesc{}; swapChainDesc.BufferDesc.Width = clientWidth; swapChainDesc.BufferDesc.Height = clientHeight; swapChainDesc.BufferDesc.RefreshRate = refreshRate; swapChainDesc.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; swapChainDesc.BufferDesc.Scaling = DXGI_MODE_SCALING_CENTERED; swapChainDesc.SampleDesc.Count = 1; swapChainDesc.SampleDesc.Quality = 0; swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; swapChainDesc.BufferCount = 1; swapChainDesc.OutputWindow = g_WinHnd; swapChainDesc.Windowed = true; swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; UINT createDeviceFlags{}; #if _DEBUG createDeviceFlags = D3D11_CREATE_DEVICE_DEBUG; #endif //Feature levels const D3D_FEATURE_LEVEL features[]{ D3D_FEATURE_LEVEL_11_0 }; D3D_FEATURE_LEVEL featureLevel; hr = D3D11CreateDeviceAndSwapChain( nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, createDeviceFlags, features, _countof(features), D3D11_SDK_VERSION, &swapChainDesc, &g_d3dSwapChain, &g_d3dDevice, &featureLevel, &g_d3dDeviceContext ); if (FAILED(hr)) { return -1; } //Render Target View ID3D11Texture2D* backBuffer; hr = g_d3dSwapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), reinterpret_cast<void**>(&backBuffer)); if (FAILED(hr)) { return -1; } hr = g_d3dDevice->CreateRenderTargetView(backBuffer, nullptr, &g_d3dRenderTargerView); if (FAILED(hr)) { return -1; } SafeRelease(backBuffer); //Depth Stencil View D3D11_TEXTURE2D_DESC depthStencilBufferDesc{}; depthStencilBufferDesc.Width = clientWidth; depthStencilBufferDesc.Height = clientHeight; depthStencilBufferDesc.MipLevels = 1; depthStencilBufferDesc.ArraySize = 1; depthStencilBufferDesc.Format = DXGI_FORMAT_D24_UNORM_S8_UINT; depthStencilBufferDesc.SampleDesc.Count = 1; depthStencilBufferDesc.SampleDesc.Quality = 0; depthStencilBufferDesc.Usage = D3D11_USAGE_DEFAULT; depthStencilBufferDesc.BindFlags = D3D11_BIND_DEPTH_STENCIL; hr = g_d3dDevice->CreateTexture2D(&depthStencilBufferDesc, nullptr, &g_d3dDepthStencilBuffer); if (FAILED(hr)) { return -1; } hr = g_d3dDevice->CreateDepthStencilView(g_d3dDepthStencilBuffer, nullptr, &g_d3dDepthStencilView); if (FAILED(hr)) { return -1; } //Set States D3D11_DEPTH_STENCIL_DESC depthStencilStateDesc{}; depthStencilStateDesc.DepthEnable = true; depthStencilStateDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; depthStencilStateDesc.DepthFunc = D3D11_COMPARISON_LESS; depthStencilStateDesc.StencilEnable = false; hr = g_d3dDevice->CreateDepthStencilState(&depthStencilStateDesc, &g_d3dDepthStencilState); if (FAILED(hr)) { return -1; } D3D11_RASTERIZER_DESC rasterizerStateDesc{}; rasterizerStateDesc.FillMode = D3D11_FILL_SOLID; rasterizerStateDesc.CullMode = D3D11_CULL_BACK; rasterizerStateDesc.FrontCounterClockwise = FALSE; rasterizerStateDesc.DepthClipEnable = TRUE; rasterizerStateDesc.ScissorEnable = FALSE;; rasterizerStateDesc.MultisampleEnable = FALSE; hr = g_d3dDevice->CreateRasterizerState(&rasterizerStateDesc, &g_d3dRasterizerState); if (FAILED(hr)) { return -1; } //Set Viewport g_Viewport.Width = static_cast<float>(clientWidth); g_Viewport.Height = static_cast<float>(clientHeight); g_Viewport.TopLeftX = 0.0f; g_Viewport.TopLeftY = 0.0f; g_Viewport.MinDepth = 0.0f; g_Viewport.MaxDepth = 1.0f; return 0; } bool LoadContent() { //Load Shaders HRESULT hr; assert(g_d3dDevice); //VS ID3DBlob* vsBlob = nullptr; D3DReadFileToBlob(L"../Shaders/SimpleVertexShader.cso", &vsBlob); assert(vsBlob); hr = g_d3dDevice->CreateVertexShader(vsBlob->GetBufferPointer(), vsBlob->GetBufferSize(), nullptr, &g_d3dVertexShader); if (FAILED(hr)) { SafeRelease(vsBlob); return false; } //Create VS Input Layout D3D11_INPUT_ELEMENT_DESC vertexLayoutDesc[] = { { "POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(VertexPosColor, Position), D3D11_INPUT_PER_VERTEX_DATA ,0 }, { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, offsetof(VertexPosColor, Color), D3D11_INPUT_PER_VERTEX_DATA ,0 } }; hr = g_d3dDevice->CreateInputLayout(vertexLayoutDesc, _countof(vertexLayoutDesc), vsBlob->GetBufferPointer(), vsBlob->GetBufferSize(), &g_d3dInputLayout); if (FAILED(hr)) { SafeRelease(vsBlob); return false; } SafeRelease(vsBlob); //PS ID3DBlob* psBlob = nullptr; D3DReadFileToBlob(L"../Shaders/SimplePixelShader.cso", &psBlob); assert(psBlob); hr = g_d3dDevice->CreatePixelShader(psBlob->GetBufferPointer(), psBlob->GetBufferSize(), nullptr, &g_d3dPixelShader); SafeRelease(psBlob); if (FAILED(hr)) { return false; } //Load Vertex Buffer D3D11_BUFFER_DESC vertexBufferDesc{}; vertexBufferDesc.ByteWidth = sizeof(VertexPosColor) * _countof(g_Vertices); vertexBufferDesc.Usage = D3D11_USAGE_DEFAULT; vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; D3D11_SUBRESOURCE_DATA resourceData{}; resourceData.pSysMem = g_Vertices; hr = g_d3dDevice->CreateBuffer(&vertexBufferDesc, &resourceData, &g_d3dVertexBuffer); if (FAILED(hr)) { return false; } //Load Index Buffer D3D11_BUFFER_DESC indexBufferDesc{}; indexBufferDesc.ByteWidth = sizeof(WORD) * _countof(g_Indicies); indexBufferDesc.Usage = D3D11_USAGE_DEFAULT; indexBufferDesc.BindFlags = D3D11_BIND_INDEX_BUFFER; resourceData.pSysMem = g_Indicies; hr = g_d3dDevice->CreateBuffer(&indexBufferDesc, &resourceData, &g_d3dIndexBuffer); if (FAILED(hr)) { return false; } //Load Constant Buffers D3D11_BUFFER_DESC cBufferDesc{}; cBufferDesc.ByteWidth = sizeof(XMMATRIX); cBufferDesc.Usage = D3D11_USAGE_DEFAULT; cBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; for (size_t bufferID = 0; bufferID < NumConstantBuffers; bufferID++) { hr = g_d3dDevice->CreateBuffer(&cBufferDesc, nullptr, &g_d3dConstantBuffers[bufferID]); if (FAILED(hr)) { return false; } } //Setup Projection Matrix RECT client{}; GetClientRect(g_WinHnd, &client); float clientWidth = static_cast<float>(client.right - client.left); float clientHeight = static_cast<float>(client.bottom - client.top); g_ProjectionMatrix = DirectX::XMMatrixPerspectiveFovLH(XMConvertToRadians(45.0f), clientWidth / clientHeight, 0.1f, 100.0f); g_d3dDeviceContext->UpdateSubresource(g_d3dConstantBuffers[CB_Application], 0, nullptr, &g_ProjectionMatrix, 0, 0); return true; } void Update(float deltaTime) { XMVECTOR eyePosition = XMVectorSet(0, 0, -10, 1); XMVECTOR focusPoint = XMVectorSet(0, 0, 0, 1); XMVECTOR upDirection = XMVectorSet(0, 1, 0, 0); g_ViewMatrix = DirectX::XMMatrixLookAtLH(eyePosition, focusPoint, upDirection); g_d3dDeviceContext->UpdateSubresource(g_d3dConstantBuffers[CB_Frame], 0, nullptr, &g_ViewMatrix, 0, 0); static float angle = 0.0f; angle += 90.0f * deltaTime; XMVECTOR rotationAxis = XMVectorSet(0, 1, 1, 0); g_WorldMatrix = DirectX::XMMatrixRotationAxis(rotationAxis, XMConvertToRadians(angle)); g_d3dDeviceContext->UpdateSubresource(g_d3dConstantBuffers[CB_Object], 0, nullptr, &g_WorldMatrix, 0, 0); } void Clear(const FLOAT clearColor[4], FLOAT clearDepth, UINT8 clearStencil) { g_d3dDeviceContext->ClearRenderTargetView(g_d3dRenderTargerView, clearColor); g_d3dDeviceContext->ClearDepthStencilView(g_d3dDepthStencilView, D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL, clearDepth, clearStencil); } void Present(bool vSync) { if (vSync) { g_d3dSwapChain->Present(1, 0); } else { g_d3dSwapChain->Present(0, 0); } } void Render() { assert(g_d3dDevice); assert(g_d3dDeviceContext); Clear(Colors::CornflowerBlue, 1.0f, 0); //IA const UINT vertexStride = sizeof(VertexPosColor); const UINT offset = 0; g_d3dDeviceContext->IASetVertexBuffers(0, 1, &g_d3dVertexBuffer, &vertexStride, &offset); g_d3dDeviceContext->IASetInputLayout(g_d3dInputLayout); g_d3dDeviceContext->IASetIndexBuffer(g_d3dIndexBuffer, DXGI_FORMAT_R16_UINT, 0); g_d3dDeviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); //VS g_d3dDeviceContext->VSSetShader(g_d3dVertexShader, nullptr, 0); g_d3dDeviceContext->VSGetConstantBuffers(0, NumConstantBuffers, g_d3dConstantBuffers); //RS g_d3dDeviceContext->RSSetState(g_d3dRasterizerState); g_d3dDeviceContext->RSSetViewports(1, &g_Viewport); //PS g_d3dDeviceContext->PSSetShader(g_d3dPixelShader, nullptr, 0); //OM g_d3dDeviceContext->OMSetRenderTargets(1, &g_d3dRenderTargerView, g_d3dDepthStencilView); g_d3dDeviceContext->OMSetDepthStencilState(g_d3dDepthStencilState, 1); //draw g_d3dDeviceContext->DrawIndexed(_countof(g_Indicies), 0, 0); Present(g_EnableVSync); } void CleanUp() { SafeRelease(g_d3dVertexShader); SafeRelease(g_d3dPixelShader); SafeRelease(g_d3dVertexBuffer); SafeRelease(g_d3dIndexBuffer); SafeRelease(g_d3dInputLayout); SafeRelease(g_d3dDepthStencilBuffer); for (size_t bufferID = 0; bufferID < NumConstantBuffers; bufferID++) { SafeRelease(g_d3dConstantBuffers[bufferID]); } SafeRelease(g_d3dDepthStencilState); SafeRelease(g_d3dRasterizerState); SafeRelease(g_d3dRenderTargerView); SafeRelease(g_d3dDepthStencilView); SafeRelease(g_d3dSwapChain); SafeRelease(g_d3dDeviceContext); SafeRelease(g_d3dDevice); }  
    • By MarcusAseth
      Hi guys, I'm trying to learn this stuff but running into some problems 😕
      I've compiled my .hlsl into a header file which contains the global variable with the precompiled shader data:
      //... // Approximately 83 instruction slots used #endif const BYTE g_vs[] = { 68, 88, 66, 67, 143, 82, 13, 236, 152, 133, 219, 113, 173, 135, 18, 87, 122, 208, 124, 76, 1, 0, 0, 0, 16, 76, 0, 0, 6, 0, //.... And now following the "Compiling at build time to header files" example at this msdn link , I've included the header files in my main.cpp and I'm trying to create the vertex shader like this:
      hr = g_d3dDevice->CreateVertexShader(g_vs, sizeof(g_vs), nullptr, &g_d3dVertexShader); if (FAILED(hr)) { return -1; } and this is failing, entering the if and returing -1.
      Can someone point out what I'm doing wrong? 😕 
    • By Toastmastern
      Hello everyone,
      After a few years of break from coding and my planet render game I'm giving it a go again from a different angle. What I'm struggling with now is that I have created a Frustum that works fine for now atleast, it does what it's supose to do alltho not perfect. But with the frustum came very low FPS, since what I'm doing right now just to see if the Frustum worked is to recreate the vertex buffer every frame that the camera detected movement. This is of course very costly and not the way to do it. Thats why I'm now trying to learn how to create a dynamic vertexbuffer instead and to map and unmap the vertexes, in the end my goal is to update only part of the vertexbuffer that is needed, but one step at a time ^^

      So below is my code which I use to create the Dynamic buffer. The issue is that I want the size of the vertex buffer to be big enough to handle bigger vertex buffers then just mPlanetMesh.vertices.size() due to more vertices being added later when I start to do LOD and stuff, the first render isn't the biggest one I will need.
      vertexBufferDesc.Usage = D3D11_USAGE_DYNAMIC; vertexBufferDesc.ByteWidth = mPlanetMesh.vertices.size(); vertexBufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; vertexBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; vertexBufferDesc.MiscFlags = 0; vertexBufferDesc.StructureByteStride = 0; vertexData.pSysMem = &mPlanetMesh.vertices[0]; vertexData.SysMemPitch = 0; vertexData.SysMemSlicePitch = 0; result = device->CreateBuffer(&vertexBufferDesc, &vertexData, &mVertexBuffer); if (FAILED(result)) { return false; } What happens is that the 
      result = device->CreateBuffer(&vertexBufferDesc, &vertexData, &mVertexBuffer); Makes it crash due to Access Violation. When I put the vertices.size() in it works without issues, but when I try to set it to like vertices.size() * 2 it crashes.
      I googled my eyes dry tonight but doesn't seem to find people with the same kind of issue, I've read that the vertex buffer can be bigger if needed. What I'm I doing wrong here?
      Best Regards and Thanks in advance
    • By yonisi
      I have a terrain engine where the terrain and water are on different grids. So I'm trying to render planar reflections of the terrain into the water grid. After reading some web pages and docs and also trying to learn from the RasterTek reflections demo and the small water bodies demo as well. What I do is as follows:
      1. Create a Reflection view matrix  - Technically I ONLY flip the camera position in the Y direction (Positive Y is up) and add to it 2 * waterLevel. Then I update the View matrix and I save that matrix for later. The code:
      void Camera::UpdateReflectionViewMatrix( float waterLevel ) { mBackupPosition = mPosition; mBackupLook = mLook; mPosition.y = -mPosition.y + 2.0f * waterLevel; //mLook.y = -mLook.y + 2.0f * waterLevel; UpdateViewMatrix(); mReflectionView = View(); } 2. I render the Terrain geometry to a 512x512 sized Render target by using the Reflection view matrix and an opposite culling (My Terrain is using front culling by nature so I'm using back culling for the Reflction render pass). Let me say that I checked with the Graphics debugger and the Reflection Render target looks "OK" at this stage (Picture attached). I don't know if the fact that the terrain is shown only at the top are of the texture is expected or not, but it seems OK.

      3. Render the Reflection texture into the water using projective texturing - I hope this step is OK code wise. Basically I'm sending to the shader the WorldReflectionViewProj matrix that was created at step 1 in order to use it for the projective texture coordinates, I then convert the position in the DS (Water and terrain are drawn with Tessellation) to the projective tex coords using that WorldReflectionViewProj matrix, then I sample the reflection texture after setting up the coordinates in the PS. Here is the code:
      //Send the ReflectionWorldViewProj matrix to the shader: XMStoreFloat4x4(&mPerFrameCB.Data.ReflectionWorldViewProj, XMMatrixTranspose( ( mWorld * pCam->GetReflectedView() ) * mProj )); //Setting up the Projective tex coords in the DS: Output.projTexPosition = mul(float4(worldPos.xyz, 1), g_ReflectionWorldViewProj); //Setting up the coords in the PS and sampling the reflection texture: float2 projTexCoords; projTexCoords.x = input.projTexPosition.x / input.projTexPosition.w / 2.0 + 0.5; projTexCoords.y = -input.projTexPosition.y / input.projTexPosition.w / 2.0 + 0.5; projTexCoords += normal.xz * 0.025; float4 reflectionColor = gReflectionMap.SampleLevel(SamplerClampLinear, projTexCoords, 0); texColor += reflectionColor * 0.25; I'll add that when compiling the PS I'm getting a warning on those dividing by input.projTexPosition.w for a possible float division by 0, I tried to add some offset or some minimum to the dividing term but that still not solved my issue.
      Here is the problem itself. At relatively flat view angles I'm seeing correct reflections (Or at least so it seems), but as I pitch the camera down, I'm seeing those artifacts which I have no idea where are coming from. I'm culling the terrain in the reflection render pass when it's lower than water height (I have heightmaps for that).
      Any help will be appreciated because I don't know what is wrong or where else to look.
    • By thmfrnk
      I am looking for a usefull commandline based texture compression tool with the rights to be able to ship with my application. It should have following caps:
      Supports all major image format as source files (jpeg, png, tga, bmp) Export as DDS Compression Formats BC1, BC2, BC3, BC4, BC7 I am actually using the nvdxt tool from Nvidia, but it does not support BC4 (which I need for one-channel 8bit textures). Everything else which I found wasn't really useful.
      Any suggestions?
    • By trojanfoe
      I have been trying to create a BlendState for my UI text sprites so that they are both alpha-blended (so you can see them) and invert the pixel they are rendered over (again, so you can see them).
      In order to get alpha blending you would need:
      SrcBlend = SRC_ALPHA DestBlend = INV_SRC_ALPHA and in order to have inverted colours you would need something like:
      SrcBlend = INV_DEST_COLOR DestBlend = INV_SRC_COLOR and you can't have both.
      So I have come to the conclusion that it's not possible; am I right?
    • By Royma
      In traditional way, it needs 6 passes for a point light and many passes for cascaded shadow mapping to generate shadow maps. Recently I learnt a method that using a geometry shader to generate all the shadow maps in one pass.I specify a render target and a depth-stencil buffer which are both Texture2dArray in DirectX11.It looks much better than the traditional way I think.But after I implemented it, I found cascaded shadow mapping runs much slower than the traditional way.The fps slow down from 60 to 35.I don't know why.I guess may be I should do some culling or maybe the geometry shader is not efficient.
      I want to know the reason that I reduced the drawcalls from 8 to 1, but it runs slow down.Should I abandon this method or is there any way to optimize this method to run more efficiently than multi-pass rendering?
      Here is the gs code:

      void main(
          triangle DepthGsIn input[3] : SV_POSITION,
          inout TriangleStream< DepthPsIn > output
          for (uint k = 0; k < 8; ++k)
              DepthPsIn element;
              element.RTIndex = k;
              for (uint i = 0; i < 3; ++i)
                  float2 shadowSlopeBias = calculateShadowSlopeBias(input.normal, -g_cameras[k].world[1]);
                  float shadowBias = shadowSlopeBias.y * g_cameras[k].shadowMapParameters.x + g_cameras[k].shadowMapParameters.y;
                  element.position = input.position + shadowBias * g_cameras[k].world[1];
                  element.position = mul(element.position, g_cameras[k].viewProjection);
                  element.depth = element.position.z / element.position.w;
    • By savail
      There are a few things which confuse me regarding DirectX 11 and HLSL shaders in general. I would be very grateful for your advice!
      1. Let's take for example a scene which invokes 2 totally separate pipeline render passes interchangeably. I understand I need to bind correct shaders for each of the render pass and potentially blend/depth or rasterizer state but what about resources such as Constant Buffers, Shader Resource Views and Unordered Access Views? Assuming that the second render pass uses none of the resources used by the first pass, do I still need to unbind the resources and clean pipeline state after first pass? Or is it ok to leave pipeline with unbound garbage since anything I'd need to bind for second pass would overwrite contents in the appropriate register slots anyway?
      2. Is it a good practice to assign register slots manually to all resources in HLSL?
      3. I thought about assigning manually register slots for every distinct render pass up to the maximum slot limit if neccessary. For example in 1 render pass I invoke 3 CS's, 2 VS's and 2 PS's and for all resources used by those shaders I try to fill as many register slots as neccessary and potentially reuse many times the same slot in shaders sharing the same resource. I was wondering if there is any performance penalty or gain when I bind all of my needed resources at the start of render pass and never gonna have to do it again until next render pass? - this means potentially binding a lot of registers and having excessive number of bound resources for every shader that is run.
      4. Is it a good practice to create a separate include file for every resource that occurs in >= 2 shader files or is it better to duplicate the declarations? In first case, the code is imo easier to maintain and edit but might be harder to read if there's too many includes. I've come up with a compromise between these 2 like this: create a separate include file for every CB that occurs in >= 2 shader files and a separate include file for every sampler I ever need to use. All other resources like srvs and uavs I prefer to duplicate in multiple shaders because they take much less space than CB for example... I'm not sure however if that's a good practice
    • By Kris1992
      I want implement Particle system based on stream out structure to my bigger project. I saw few articles about that method and I build one particle. It works almost correctly but in geometry shader with stream out i cant get value of InitVel.z and age because it always is 0. If i change order of age(for example age is before Position) it works fine for age but 6th float of order is still 0. It looks like he push only 5 first positions. I had no idea what i do wrong because i try change almost all(create input layout for vertex, the same like entry SO Declaration, change number of strides for static 28, change it to 32 but in this case he draw chaotic so size of strides is probably good). I think it is problem with limits of NumEntry in declaration Entry but on site msdn i saw the limit for directx is D3D11_SO_STREAM_COUNT(4)*D3D11_SO_OUTPUT_COMPONENT_COUNT(128) not 5. Pls can you look in this code and give me the way or hope of implement it correctly?? Thanks a lot for help.   
      Structure of particle struct Particle{ Particle() {} Particle(float x, float y, float z,float vx, float vy, float vz,float l /*UINT typ*/) :InitPos(x, y, z), InitVel(vx, vy, vz), Age(l) /*, Type(typ)*/{} XMFLOAT3 InitPos; XMFLOAT3 InitVel; float Age; //UINT Type; }; SO Entry D3D11_SO_DECLARATION_ENTRY PartlayoutSO[] = { { 0,"POSITION", 0, 0 , 3, 0 }, // output all components of position { 0,"VELOCITY", 0, 0, 3, 0 }, { 0,"AGE", 0, 0, 1, 0 } //{ 0,"TYPE", 0, 0, 1, 0 } }; Global Variables //streamout shaders ID3D11VertexShader* Part_VSSO; ID3D11GeometryShader* Part_GSSO; ID3DBlob *Part_GSSO_Buffer; ID3DBlob *Part_VSSO_Buffer; //normal shaders ID3D11VertexShader* Part_VS; ID3D11GeometryShader* Part_GS; ID3DBlob *Part_GS_Buffer; ID3D11PixelShader* Part_PS; ID3DBlob *Part_VS_Buffer; ID3DBlob *Part_PS_Buffer; ID3D11Buffer* PartVertBufferInit; //ID3D11Buffer* Popy; ID3D11Buffer* mDrawVB; ID3D11Buffer* mStreamOutVB; ID3D11InputLayout* PartVertLayout;// I try to set input layout too void ParticleSystem::InitParticles() { mFirstRun = true; srand(time(NULL)); hr = D3DCompileFromFile(L"ParticleVertexShaderSO4.hlsl", NULL, D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "vs_5_0", NULL, NULL, &Part_VSSO_Buffer, NULL); hr = D3DCompileFromFile(L"ParticleGeometryShaderSO4.hlsl", NULL, D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "gs_5_0", NULL, NULL, &Part_GSSO_Buffer, NULL); UINT StrideArray[1] = { sizeof(Particle) };//I try to set static 28 bits-7*4 per float hr = device->CreateVertexShader(Part_VSSO_Buffer->GetBufferPointer(), Part_VSSO_Buffer->GetBufferSize(), NULL, &Part_VSSO); hr = device->CreateGeometryShaderWithStreamOutput(Part_GSSO_Buffer- >GetBufferPointer(), Part_GSSO_Buffer->GetBufferSize(), PartlayoutSO ,3/* sizeof(PartlayoutSO)*/ , StrideArray, 1,D3D11_SO_NO_RASTERIZED_STREAM, NULL,&Part_GSSO); //Draw Shaders hr = D3DCompileFromFile(L"ParticleVertexShaderDRAW4.hlsl", NULL, D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "vs_5_0", NULL, NULL, &Part_VS_Buffer, NULL); hr = D3DCompileFromFile(L"ParticleGeometryShaderDRAW4.hlsl", NULL, D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "gs_5_0", NULL, NULL, &Part_GS_Buffer, NULL); hr = D3DCompileFromFile(L"ParticlePixelShaderDRAW4.hlsl", NULL, D3D_COMPILE_STANDARD_FILE_INCLUDE, "main", "ps_5_0", NULL, NULL, &Part_PS_Buffer, NULL); hr = device->CreateVertexShader(Part_VS_Buffer->GetBufferPointer(), Part_VS_Buffer->GetBufferSize(), NULL, &Part_VS); hr = device->CreateGeometryShader(Part_GS_Buffer->GetBufferPointer(), Part_GS_Buffer->GetBufferSize(), NULL, &Part_GS); hr = device->CreatePixelShader(Part_PS_Buffer->GetBufferPointer(), Part_PS_Buffer->GetBufferSize(), NULL, &Part_PS); BuildVertBuffer(); } void ParticleSystem::BuildVertBuffer() { D3D11_BUFFER_DESC vertexBufferDesc1; ZeroMemory(&vertexBufferDesc1, sizeof(vertexBufferDesc1)); vertexBufferDesc1.Usage = D3D11_USAGE_DEFAULT; vertexBufferDesc1.ByteWidth = sizeof(Particle)*1; //*numParticles; vertexBufferDesc1.BindFlags = D3D11_BIND_VERTEX_BUFFER;// | D3D11_BIND_STREAM_OUTPUT; vertexBufferDesc1.CPUAccessFlags = 0; vertexBufferDesc1.MiscFlags = 0; vertexBufferDesc1.StructureByteStride = 0;// I tried to comment this too Particle p; ZeroMemory(&p, sizeof(Particle)); p.InitPos = XMFLOAT3(0.0f, 0.0f, 0.0f); p.InitVel = XMFLOAT3(0.0f, 0.0f, 0.0f); p.Age = 0.0f; //p.Type = 100.0f; D3D11_SUBRESOURCE_DATA vertexBufferData1; ZeroMemory(&vertexBufferData1, sizeof(vertexBufferData1)); vertexBufferData1.pSysMem = &p;//było &p vertexBufferData1.SysMemPitch = 0; vertexBufferData1.SysMemSlicePitch = 0; hr = device->CreateBuffer(&vertexBufferDesc1, &vertexBufferData1, &PartVertBufferInit); ZeroMemory(&vertexBufferDesc1, sizeof(vertexBufferDesc1)); vertexBufferDesc1.ByteWidth = sizeof(Particle) * numParticles; vertexBufferDesc1.BindFlags = D3D11_BIND_VERTEX_BUFFER | D3D11_BIND_STREAM_OUTPUT; hr = device->CreateBuffer(&vertexBufferDesc1, 0, &mDrawVB); hr = device->CreateBuffer(&vertexBufferDesc1, 0, &mStreamOutVB); } void ParticleSystem::LoadDataParticles() { UINT stride = sizeof(Particle); UINT offset = 0; //Create the Input Layout //device->CreateInputLayout(Partlayout, numElementsPart, Part_VSSO_Buffer- //>GetBufferPointer(), // Part_VSSO_Buffer->GetBufferSize(), &PartVertLayout); //Set the Input Layout //context->IASetInputLayout(PartVertLayout); //Set Primitive Topology context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); if (mFirstRun) { // context->CopyResource(Popy, PartVertBufferInit); context->IASetVertexBuffers(0, 1, &PartVertBufferInit, &stride, &offset); } else { context->IASetVertexBuffers(0, 1, &mDrawVB, &stride, &offset); } context->SOSetTargets(1, &mStreamOutVB, &offset); context->VSSetShader(Part_VSSO, NULL, 0); context->GSSetShader(Part_GSSO, NULL, 0); context->PSSetShader(NULL, NULL, 0); //context->PSSetShader(Part_PS, NULL, 0); ID3D11DepthStencilState* depthState;//disable depth D3D11_DEPTH_STENCIL_DESC depthStateDesc; depthStateDesc.DepthEnable = false; depthStateDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; device->CreateDepthStencilState(&depthStateDesc, &depthState); context->OMSetDepthStencilState(depthState, 0); if (mFirstRun) { //mFirstRun; context->Draw(1, 0); mFirstRun = false; } else { context->DrawAuto(); } //} // done streaming-out--unbind the vertex buffer ID3D11Buffer* bufferArray[1] = { 0 }; context->SOSetTargets(1, bufferArray, &offset); // ping-pong the vertex buffers std::swap(mStreamOutVB, mDrawVB); // Draw the updated particle system we just streamed-out. //Create the Input Layout //device->CreateInputLayout(Partlayout, numElementsPart, Part_VS_Buffer- //>GetBufferPointer(), // Part_VS_Buffer->GetBufferSize(), &PartVertLayout); //Set the normal Input Layout //context->IASetInputLayout(PartVertLayout); context->IASetVertexBuffers(0, 1, &mDrawVB, &stride, &offset); ZeroMemory(&depthStateDesc, sizeof(depthStateDesc)); depthStateDesc.DepthEnable = true; depthStateDesc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ZERO; device->CreateDepthStencilState(&depthStateDesc, &depthState); context->OMSetDepthStencilState(depthState, 0); //I tried add normal layout here the same like Entry SO but no changes //Set Primitive Topology //context->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); context->VSSetShader(Part_VS, NULL, 0); context->GSSetShader(Part_GS, NULL, 0); context->PSSetShader(Part_PS, NULL, 0); context->DrawAuto(); //mFirstRun = true; context->GSSetShader(NULL, NULL, 0); } void ParticleSystem::RenderParticles() { //mFirstRun = true; LoadDataParticles(); } And the code of shaders: VertexShader to stream out struct Particle { float3 InitPos : POSITION; float3 InitVel : VELOCITY; float Age : AGE; //uint Type : TYPE; }; Particle main(Particle vin) { return vin;// just push data into geomtrywithso } GeometrywithSo struct Particle { float3 InitPos : POSITION; float3 InitVel : VELOCITY; float Age : AGE; //uint Type : TYPE; }; float RandomPosition(float offset) { float u = Time + offset;// (Time + offset); float v = ObjTexture13.SampleLevel(ObjSamplerState, u, 0).r; return (v); } [maxvertexcount(6)] void main( point Particle gin[1], inout PointStream< Particle > Output ) { //gin[0].Age = Time; if ( StartPart == 1.0f ) { //if (gin[0].Age < 100.0f) //{ for (int i = 0; i < 6; i++) { float3 VelRandom; //= 5.0f * RandomPosition((float)i / 5.0f); VelRandom.y = 10.0f+i; VelRandom.x = 35 * i* RandomPosition((float)i / 5.0f);//+ offse; VelRandom.z = 10.0f;//35*i * RandomPosition((float)i / 5.0f); Particle p; p.InitPos = VelRandom;//float3(0.0f, 5.0f, 0.0f); //+ VelRandom; p.InitVel = float3(10.0f, 10.0f, 10.0f); p.Age = 0.0f;//VelRandom.y; //p.Type = PT_FLARE; Output.Append(p); } Output.Append(gin[0]); } else if (StartPart == 0.0f) { if (gin[0].Age >= 0) { Output.Append(gin[0]); } } } If I change Age in geometry with so: for example Age += Time from const buffer In geometry shader its fine once but in draw shader it is 0 and next time if it is reading in geometry with so it is 0 too. Vertex shader to draw struct VertexOut { float3 Pos : POSITION; float4 Colour : COLOR; //uint Type : TYPE; }; struct Particle { float3 InitPos : POSITION; float3 InitVel : VELOCITY; float Age : AGE; // uint Type : TYPE; }; VertexOut main(Particle vin) { VertexOut vout; float3 gAccelW = float3(0.0f, -0.98f, 0.0f); float t = vin.Age; //float b = Time/10000; // constant Acceleration equation vout.Pos = vin.InitVel+ (0.7f * gAccelW)*Time/100; //vout.Pos.x = t; vout.Colour = float4(1.0f, 0.0f, 0.0f, 1.0f); //vout.Age = vout.Pos.y; //vout.Type = vin.Type; return vout; } Geometry shader to change point into line struct VertexOut { float3 Pos : POSITION; float4 Colour : COLOR; //uint Type : TYPE; }; struct GSOutput { float4 Pos : SV_POSITION; float4 Colour : COLOR; //float2 Tex : TEXCOORD; }; [maxvertexcount(2)] void main( point VertexOut gin[1], inout LineStream< GSOutput > Output ) { float3 gAccelW = float3(0.0f, -0.98f, 0.0f); //if (gin[0].Type != PT_EMITTER) { float4 v[2]; v[0] = float4(gin[0].Pos, 1.0f); v[1] = float4((gin[0].Pos + gAccelW), 1.0f); GSOutput gout; [unroll] for (int i = 0; i < 2; ++i) { gout.Pos = mul(v[i], WVP);// mul(v[i], gViewProj); gout.Colour = gin[0].Colour; Output.Append(gout); } } } And pixel Shader struct GSOutput { float4 Pos : SV_POSITION; float4 Colour : COLOR; }; float4 main(GSOutput pin) : SV_TARGET { return pin.Colour; }  
  • Advertisement
  • Popular Now

  • Forum Statistics

    • Total Topics
    • Total Posts

Important Information

By using GameDev.net, you agree to our community Guidelines, Terms of Use, and Privacy Policy.

Participate in the game development conversation and more when you create an account on GameDev.net!

Sign me up!