



float2 output; output = atan2 (In.y,In.x); return (output);
#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I','T','N','Z'))) // Determine if INTZ is supported HRESULT hr; hr = pd3d>CheckDeviceFormat(AdapterOrdinal, DeviceType, AdapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ); BOOL bINTZDepthStencilTexturesSupported = (hr == D3D_OK); // Create an INTZ depth stencil texture IDirect3DTexture9 *pINTZDST; pd3dDevice>CreateTexture(dwWidth, dwHeight, 1, D3DUSAGE_DEPTHSTENCIL, FOURCC_INTZ, D3DPOOL_DEFAULT, &pINTZDST, NULL); // Retrieve depth buffer surface from texture interface IDirect3DSurface9 *pINTZDSTSurface; pINTZDST>GetSurfaceLevel(0, &pINTZDSTSurface); // Bind depth buffer pd3dDevice>SetDepthStencilSurface(pINTZDSTSurface); // Bind depth buffer texture pd3dDevice>SetTexture(0, pINTZDST);
float3 reconstructPos(Texture2D depthTexture, float2 texCoord, float4x4 matrixProjectionInverted ) { float depth = 1depthTexture.Sample( samplerDefault, texCoord ).r; float2 cspos = float2(texCoord.x * 2  1, (1texCoord.y) * 2  1); float4 depthCoord = float4(cspos, depth, 1); depthCoord = mul (matrixProjectionInverted, depthCoord); return depthCoord.xyz / depthCoord.w; }Projection performed trivially.
static const int sBBIndexList[36] = { // index for top 4, 8, 7, 4, 7, 3, // index for bottom 5, 1, 2, 5, 2, 6, // index for left 5, 8, 4, 5, 4, 1, // index for right 2, 3, 7, 2, 7, 6, // index for back 6, 7, 8, 6, 8, 5, // index for front 1, 4, 3, 1, 3, 2, }; __m128 SSETransformCoords(__m128 *v, __m128 *m) { __m128 vResult = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(0,0,0,0)); vResult = _mm_mul_ps(vResult, m[0]); __m128 vTemp = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(1,1,1,1)); vTemp = _mm_mul_ps(vTemp, m[1]); vResult = _mm_add_ps(vResult, vTemp); vTemp = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(2,2,2,2)); vTemp = _mm_mul_ps(vTemp, m[2]); vResult = _mm_add_ps(vResult, vTemp); vResult = _mm_add_ps(vResult, m[3]); return vResult; } __forceinline __m128i Min(const __m128i &v0, const __m128i &v1) { __m128i tmp; tmp = _mm_min_epi32(v0, v1); return tmp; } __forceinline __m128i Max(const __m128i &v0, const __m128i &v1) { __m128i tmp; tmp = _mm_max_epi32(v0, v1); return tmp; } struct SSEVFloat4 { __m128 X; __m128 Y; __m128 Z; __m128 W; }; // get 4 triangles from vertices void SSEGather(SSEVFloat4 pOut[3], int triId, const __m128 xformedPos[]) { for(int i = 0; i < 3; i++) { int ind0 = sBBIndexList[triId*3 + i + 0]1; int ind1 = sBBIndexList[triId*3 + i + 3]1; int ind2 = sBBIndexList[triId*3 + i + 6]1; int ind3 = sBBIndexList[triId*3 + i + 9]1; __m128 v0 = xformedPos[ind0]; __m128 v1 = xformedPos[ind1]; __m128 v2 = xformedPos[ind2]; __m128 v3 = xformedPos[ind3]; _MM_TRANSPOSE4_PS(v0, v1, v2, v3); pOut[i].X = v0; pOut[i].Y = v1; pOut[i].Z = v2; pOut[i].W = v3; //now X contains X0 x1 x2 x3, Y  Y0 Y1 Y2 Y3 and so on... } } bool RasterizeTestBBoxSSE(Box3F box, __m128* matrix, float* buffer, Point4I res) { //TODO: performance LARGE_INTEGER frequency; // ticks per second LARGE_INTEGER t1, t2; // ticks double elapsedTime; // get ticks per second QueryPerformanceFrequency(&frequency); // start timer QueryPerformanceCounter(&t1); //verts and flags __m128 verticesSSE[8]; int flags[8]; static Point4F vertices[8]; static Point4F xformedPos[3]; static int flagsLoc[3]; // Set DAZ and FZ MXCSR bits to flush denormals to zero (i.e., make it faster) // Denormal are zero (DAZ) is bit 6 and Flush to zero (FZ) is bit 15. // so to enable the two to have to set bits 6 and 15 which 1000 0000 0100 0000 = 0x8040 _mm_setcsr( _mm_getcsr()  0x8040 ); // init vertices Point3F center = box.getCenter(); Point3F extent = box.getExtents(); Point4F vCenter = Point4F(center.x, center.y, center.z, 1.0); Point4F vHalf = Point4F(extent.x*0.5, extent.y*0.5, extent.z*0.5, 1.0); Point4F vMin = vCenter  vHalf; Point4F vMax = vCenter + vHalf; // fill vertices vertices[0] = Point4F(vMin.x, vMin.y, vMin.z, 1); vertices[1] = Point4F(vMax.x, vMin.y, vMin.z, 1); vertices[2] = Point4F(vMax.x, vMax.y, vMin.z, 1); vertices[3] = Point4F(vMin.x, vMax.y, vMin.z, 1); vertices[4] = Point4F(vMin.x, vMin.y, vMax.z, 1); vertices[5] = Point4F(vMax.x, vMin.y, vMax.z, 1); vertices[6] = Point4F(vMax.x, vMax.y, vMax.z, 1); vertices[7] = Point4F(vMin.x, vMax.y, vMax.z, 1); // transforms for(int i = 0; i < 8; i++) { verticesSSE[i] = _mm_loadu_ps(vertices[i]); verticesSSE[i] = SSETransformCoords(&verticesSSE[i], matrix); __m128 vertX = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(0,0,0,0)); // xxxx __m128 vertY = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(1,1,1,1)); // yyyy __m128 vertZ = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(2,2,2,2)); // zzzz __m128 vertW = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(3,3,3,3)); // wwww static const __m128 sign_mask = _mm_set1_ps(0.f); // 0.f = 1 << 31 vertW = _mm_andnot_ps(sign_mask, vertW); // abs vertW = _mm_shuffle_ps(vertW, _mm_set1_ps(1.0f), _MM_SHUFFLE(0,0,0,0)); //w,w,1,1 vertW = _mm_shuffle_ps(vertW, vertW, _MM_SHUFFLE(3,0,0,0)); //w,w,w,1 // project verticesSSE[i] = _mm_div_ps(verticesSSE[i], vertW); // now vertices are between 1 and 1 const __m128 sadd = _mm_setr_ps(res.x*0.5, res.y*0.5, 0, 0); const __m128 smult = _mm_setr_ps(res.x*0.5, res.y*(0.5), 1, 1); verticesSSE[i] = _mm_add_ps( sadd, _mm_mul_ps(verticesSSE[i],smult) ); } // Rasterize the AABB triangles 4 at a time for(int i = 0; i < 12; i += 4) { SSEVFloat4 xformedPos[3]; SSEGather(xformedPos, i, verticesSSE); // by 3 vertices // fxPtX[0] = X0 X1 X2 X3 of 1st vert in 4 triangles // fxPtX[1] = X0 X1 X2 X3 of 2nd vert in 4 triangles // and so on __m128i fxPtX[3], fxPtY[3]; for(int m = 0; m < 3; m++) { fxPtX[m] = _mm_cvtps_epi32(xformedPos[m].X); fxPtY[m] = _mm_cvtps_epi32(xformedPos[m].Y); } // Fab(x, y) = Ax + By + C = 0 // Fab(x, y) = (ya  yb)x + (xb  xa)y + (xa * yb  xb * ya) = 0 // Compute A = (ya  yb) for the 3 line segments that make up each triangle __m128i A0 = _mm_sub_epi32(fxPtY[1], fxPtY[2]); __m128i A1 = _mm_sub_epi32(fxPtY[2], fxPtY[0]); __m128i A2 = _mm_sub_epi32(fxPtY[0], fxPtY[1]); // Compute B = (xb  xa) for the 3 line segments that make up each triangle __m128i B0 = _mm_sub_epi32(fxPtX[2], fxPtX[1]); __m128i B1 = _mm_sub_epi32(fxPtX[0], fxPtX[2]); __m128i B2 = _mm_sub_epi32(fxPtX[1], fxPtX[0]); // Compute C = (xa * yb  xb * ya) for the 3 line segments that make up each triangle __m128i C0 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[1], fxPtY[2]), _mm_mullo_epi32(fxPtX[2], fxPtY[1])); __m128i C1 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[2], fxPtY[0]), _mm_mullo_epi32(fxPtX[0], fxPtY[2])); __m128i C2 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[0], fxPtY[1]), _mm_mullo_epi32(fxPtX[1], fxPtY[0])); // Compute triangle area __m128i triArea = _mm_mullo_epi32(B2, A1); triArea = _mm_sub_epi32(triArea, _mm_mullo_epi32(B1, A2)); __m128 oneOverTriArea = _mm_div_ps(_mm_set1_ps(1.0f), _mm_cvtepi32_ps(triArea)); __m128 Z[3]; Z[0] = xformedPos[0].W; Z[1] = _mm_mul_ps(_mm_sub_ps(xformedPos[1].W, Z[0]), oneOverTriArea); Z[2] = _mm_mul_ps(_mm_sub_ps(xformedPos[2].W, Z[0]), oneOverTriArea); // Use bounding box traversal strategy to determine which pixels to rasterize __m128i startX = _mm_and_si128(Max(Min(Min(fxPtX[0], fxPtX[1]), fxPtX[2]), _mm_set1_epi32(0)), _mm_set1_epi32(~1)); __m128i endX = Min(Max(Max(fxPtX[0], fxPtX[1]), fxPtX[2]), _mm_set1_epi32(res.x  1)); __m128i startY = _mm_and_si128(Max(Min(Min(fxPtY[0], fxPtY[1]), fxPtY[2]), _mm_set1_epi32(0)), _mm_set1_epi32(~1)); __m128i endY = Min(Max(Max(fxPtY[0], fxPtY[1]), fxPtY[2]), _mm_set1_epi32(res.y  1)); // Now we have 4 triangles set up. Rasterize them each individually. for(int lane=0; lane < 4; lane++) { // Skip triangle if area is zero if(triArea.m128i_i32[lane] <= 0) { continue; } // Extract this triangle's properties from the SIMD versions __m128 zz[3]; for(int vv = 0; vv < 3; vv++) { zz[vv] = _mm_set1_ps(Z[vv].m128_f32[lane]); } //drop culled triangle int startXx = startX.m128i_i32[lane]; int endXx = endX.m128i_i32[lane]; int startYy = startY.m128i_i32[lane]; int endYy = endY.m128i_i32[lane]; __m128i aa0 = _mm_set1_epi32(A0.m128i_i32[lane]); __m128i aa1 = _mm_set1_epi32(A1.m128i_i32[lane]); __m128i aa2 = _mm_set1_epi32(A2.m128i_i32[lane]); __m128i bb0 = _mm_set1_epi32(B0.m128i_i32[lane]); __m128i bb1 = _mm_set1_epi32(B1.m128i_i32[lane]); __m128i bb2 = _mm_set1_epi32(B2.m128i_i32[lane]); __m128i cc0 = _mm_set1_epi32(C0.m128i_i32[lane]); __m128i cc1 = _mm_set1_epi32(C1.m128i_i32[lane]); __m128i cc2 = _mm_set1_epi32(C2.m128i_i32[lane]); __m128i aa0Inc = _mm_mul_epi32(aa0, _mm_setr_epi32(1,2,3,4)); __m128i aa1Inc = _mm_mul_epi32(aa1, _mm_setr_epi32(1,2,3,4)); __m128i aa2Inc = _mm_mul_epi32(aa2, _mm_setr_epi32(1,2,3,4)); __m128i alpha0 = _mm_add_epi32(_mm_mul_epi32(aa0, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb0, _mm_set1_epi32(startYy))); alpha0 = _mm_add_epi32(cc0, alpha0); __m128i beta0 = _mm_add_epi32(_mm_mul_epi32(aa1, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb1, _mm_set1_epi32(startYy))); beta0 = _mm_add_epi32(cc1, beta0); __m128i gama0 = _mm_add_epi32(_mm_mul_epi32(aa2, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb2, _mm_set1_epi32(startYy))); gama0 = _mm_add_epi32(cc2, gama0); int rowIdx = (startYy * res.x + startXx); __m128 zx = _mm_mul_ps(_mm_cvtepi32_ps(aa1), zz[1]); zx = _mm_add_ps(zx, _mm_mul_ps(_mm_cvtepi32_ps(aa2), zz[2])); zx = _mm_mul_ps(zx, _mm_setr_ps(1.f, 2.f, 3.f, 4.f)); // Texels traverse for(int r = startYy; r < endYy; r++, rowIdx += res.x, alpha0 = _mm_add_epi32(alpha0, bb0), beta0 = _mm_add_epi32(beta0, bb1), gama0 = _mm_add_epi32(gama0, bb2)) { // Compute barycentric coordinates // Z0 as an origin int index = rowIdx; __m128i alpha = alpha0; __m128i beta = beta0; __m128i gama = gama0; //Compute barycentricinterpolated depth __m128 depth = zz[0]; depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(beta), zz[1])); depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(gama), zz[2])); __m128i anyOut = _mm_setzero_si128(); __m128i mask; __m128 previousDepth; __m128 depthMask; __m128i finalMask; for(int c = startXx; c < endXx; c+=4, index+=4, alpha = _mm_add_epi32(alpha, aa0Inc), beta = _mm_add_epi32(beta, aa1Inc), gama = _mm_add_epi32(gama, aa2Inc), depth = _mm_add_ps(depth, zx)) { mask = _mm_or_si128(_mm_or_si128(alpha, beta), gama); previousDepth = _mm_loadu_ps(&(buffer[index])); //calculate current depth //(log(depth)  6.907755375) * 0.048254941; __m128 curdepth = _mm_mul_ps(_mm_sub_ps(log_ps(depth),_mm_set1_ps(6.907755375)),_mm_set1_ps(0.048254941)); curdepth = _mm_sub_ps(curdepth, _mm_set1_ps(0.05)); depthMask = _mm_cmplt_ps(curdepth, previousDepth); finalMask = _mm_andnot_si128(mask, _mm_castps_si128(depthMask)); anyOut = _mm_or_si128(anyOut, finalMask); }//for each column if(!_mm_testz_si128(anyOut, _mm_set1_epi32(0x80000000))) { // stop timer QueryPerformanceCounter(&t2); // compute and print the elapsed time in millisec elapsedTime = (t2.QuadPart  t1.QuadPart) * 1000.0 / frequency.QuadPart; RasterizationStats::RasterizeSSETimeSpent += elapsedTime; return true; //early exit } }// for each row }// for each triangle }// for each set of SIMD# triangles return false; }
struct DXLogicalMeshBuffer final { uint8_t* data = nullptr; size_t dataSize = 0; size_t dataFormatStride = 0; size_t physicalAddress = 0; };
struct DXPhysicalMeshBuffer final { ID3D11Buffer* physicalBuffer = nullptr; ID3D11ShaderResourceView* physicalBufferView = nullptr; size_t physicalDataSize = 0; bool isDirty = false; typedef DynamicArray<DXLogicalMeshBuffer*> PageArray; PageArray allPages; DXPhysicalMeshBuffer() = default; inline ~DXPhysicalMeshBuffer() { if (physicalBuffer != nullptr) physicalBuffer>Release(); if (physicalBufferView != nullptr) physicalBufferView>Release(); } void allocate(DXLogicalMeshBuffer* logicalBuffer); void release(DXLogicalMeshBuffer* logicalBuffer); void rebuildPages(); // very expensive operation }
void DXPhysicalBuffer::allocate(DXLogicalMeshBuffer* logicalBuffer) { allPages.Add(logicalBuffer); isDirty = true; } void DXPhysicalBuffer::release(DXLogicalMeshBuffer* logicalBuffer) { allPages.Remove(logicalBuffer); isDirty = true; }
size_t vfStride = allPages[0]>dataFormatStride; // TODO: right now will not work with different strides size_t numElements = physicalDataSize / vfStride; if (physicalBuffer != nullptr) physicalBuffer>Release(); if (physicalBufferView != nullptr) physicalBufferView>Release(); D3D11_BUFFER_DESC bufferDesc; bufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; bufferDesc.ByteWidth = physicalDataSize; bufferDesc.Usage = D3D11_USAGE_DYNAMIC; bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; bufferDesc.StructureByteStride = vfStride; bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; if (FAILED(g_pd3dDevice>CreateBuffer(&bufferDesc, nullptr, &physicalBuffer))) { handleError(...); // handle your error here return; }
D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc; std::memset(&viewDesc, 0, sizeof(viewDesc)); viewDesc.Format = DXGI_FORMAT_UNKNOWN; viewDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; viewDesc.Buffer.ElementWidth = numElements; if (FAILED(g_pd3dDevice>CreateShaderResourceView(physicalBuffer, &viewDesc, &physicalBufferView))) { // TODO: error handling return; }
// fill the physical buffer D3D11_MAPPED_SUBRESOURCE mappedData; std::memset(&mappedData, 0, sizeof(mappedData)); if (FAILED(g_pImmediateContext>Map(physicalBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedData))) { handleError(...); // insert error handling here return; } uint8_t* dataPtr = reinterpret_cast<uint8_t*>(mappedData.pData); size_t pageOffset = 0; for (size_t i = 0; i < allPages.GetSize(); ++i) { DXLogicalMeshBuffer* logicalBuffer = allPages[i]; // copy logical data to the mapped physical data std::memcpy(dataPtr + pageOffset, logicalBuffer>data, logicalBuffer>dataSize); // calculate physical address logicalBuffer>physicalAddress = pageOffset / logicalBuffer>dataFormatStride; // calculate offset pageOffset += logicalBuffer>dataSize; } g_pImmediateContext>Unmap(physicalBuffer, 0);
std::memset(&bufferDesc, 0, sizeof(bufferDesc)); bufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; bufferDesc.ByteWidth = dataBufferSize; bufferDesc.Usage = D3D11_USAGE_DYNAMIC; bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; bufferDesc.StructureByteStride = stride; bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; if (FAILED(g_pd3dDevice>CreateBuffer(&bufferDesc, nullptr, &dataBuffer))) { handleError(...); // handle your error here return; } D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc; std::memset(&viewDesc, 0, sizeof(viewDesc)); viewDesc.Format = DXGI_FORMAT_UNKNOWN; viewDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; viewDesc.Buffer.ElementWidth = numInstances; if (FAILED(g_pd3dDevice>CreateShaderResourceView(dataBuffer, &viewDesc, &dataView))) { handleError(...); // handle your error here return; }
struct InternalData { uint32_t vb; uint32_t ib; uint32_t drawCallType; uint32_t count; };
struct DrawCall final { enum Type : uint32_t { Draw = 0, DrawIndexed = 1 }; enum { ConstantBufferSize = 2048 // TODO: remove hardcode }; enum { MaxTextures = 8 }; uint8_t constantBufferData[ConstantBufferSize]; DXLogicalMeshBuffer* vertexBuffer; DXLogicalMeshBuffer* indexBuffer; uint32_t count; uint32_t startVertex; uint32_t startIndex; Type type; };
// update constants { D3D11_MAPPED_SUBRESOURCE mappedData; if (FAILED(g_pImmediateContext>Map(psimpl>constantBuffer.dataBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedData))) { // TODO: error handling return; } uint8_t* dataPtr = reinterpret_cast<uint8_t*>(mappedData.pData); for (size_t i = 0; i < numInstances; ++i) { size_t offset = i * internal::DrawCall::ConstantBufferSize; const internal::DrawCall& call = queue>getDrawCalls()[i]; std::memcpy(dataPtr + offset, call.constantBufferData, internal::DrawCall::ConstantBufferSize); // fill internal data structure InternalData* idata = reinterpret_cast<InternalData*>(dataPtr + offset); DXLogicalMeshBuffer* vertexBuffer = static_cast<DXLogicalMeshBuffer*>(call.vertexBuffer.value); if (vertexBuffer != nullptr) idata>vb = vertexBuffer>physicalAddress; DXLogicalMeshBuffer* indexBuffer = static_cast<DXLogicalMeshBuffer*>(call.indexBuffer.value); if (indexBuffer != nullptr) idata>ib = indexBuffer>physicalAddress; idata>drawCallType = call.type; idata>count = call.count; } g_pImmediateContext>Unmap(psimpl>constantBuffer.dataBuffer, 0); }
ID3D11ShaderResourceView* vbibViews[2] = { g_physicalVertexBuffer>physicalBufferView, g_physicalIndexBuffer>physicalBufferView }; g_pImmediateContext>VSSetShaderResources(0, 2, vbibViews); g_pImmediateContext>VSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>HSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>DSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>GSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>PSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>DrawInstanced(maxDrawCallVertexCount, numInstances, 0, 0);
// vertex struct VertexData { float3 position; float2 texcoord0; float2 texcoord1; float3 normal; }; StructuredBuffer<VertexData> g_VertexBuffer; StructuredBuffer<uint> g_IndexBuffer; // pipeline state #define DRAW 0 #define DRAW_INDEXED 1 struct ConstantData { uint4 internalData; float4x4 World; float4x4 View; float4x4 Projection; }; StructuredBuffer<ConstantData> g_ConstantBuffer;
uint instanceID = input.instanceID; uint vertexID = input.vertexID; uint vbID = g_ConstantBuffer[instanceID].internalData[0]; uint ibID = g_ConstantBuffer[instanceID].internalData[1]; uint drawType = g_ConstantBuffer[instanceID].internalData[2]; uint drawCount = g_ConstantBuffer[instanceID].internalData[3]; VertexData vdata; [branch] if (drawType == DRAW_INDEXED) vdata = g_VertexBuffer[vbID + g_IndexBuffer[ibID + vertexID]]; else if (drawType == DRAW) vdata = g_VertexBuffer[vbID + vertexID]; [flatten] if (vertexID > drawCount) vdata = g_VertexOutsideClipPlane; // discard vertex by moving it outside of the clip plane
16384 unique cubes, 1.2ms per frame on Intel HD 4400:
4096 unique instances of grass, 200k triangles:
Figure1: A smoothed animation curve
Figure2: Two linear consecutive keyframes
(eq. 1)
(eq. 2)
(eq. 3)
float GetKnot(int i, int n) { // Calcuate a knot form an open uniform knot vector return saturate((float)(i  D) / (float)(n  D)); }
Figure 2. Recursive dependencies of a basis functions (n=4, d=2).
For a certain u the only non zero values are in the rectangles.
int GetKey(float u, int n) { return D + (int)floor((n  D) * u*0.9999f); }
#define MAX_N 10 // maximum number of control points in either direction (U or V) #define D 2 // degree of the curve #define EPSILON 0.00002f // used for normal and tangent calculation cbuffer cbPerObject { // BSpline int gNU; // gNU actual number of control points in U direction int gNV; // gNV actual number of control points in V direction float4 gCP[MAX_N * MAX_N]; // control points float3 gCenter; // arithmetic mean of control points // ... other variables };
(eq. 4)
void ComputePosNormalTangent(in float2 texCoord, out float3 pos, out float3 normal, out float3 tan) { float u = texCoord.x; float v = texCoord.y; float u_pdu = texCoord.x + EPSILON; float v_pdv = texCoord.y + EPSILON; int iU = GetKey(u, gNU); int iV = GetKey(v, gNV); // create and set basis float basisU[D + 1][MAX_N + D]; float basisV[D + 1][MAX_N + D]; float basisU_pdu[D + 1][MAX_N + D]; float basisV_pdv[D + 1][MAX_N + D]; basisU[0][iU] = basisV[0][iV] = basisU_pdu[0][iU] = basisV_pdv[0][iV] = 1.0f; // ... the rest of the function code
// ... the rest of the function code // evaluate triangle edges [unroll] for (int j = 1; j <= D; ++j) { float gKI; float gKI1; float gKIJ; float gKIJ1; // U gKI = GetKnot(iU, gNU); gKI1 = GetKnot(iU + 1, gNU); gKIJ = GetKnot(iU + j, gNU); gKIJ1 = GetKnot(iU  j + 1, gNU); float c0U = (u  gKI) / (gKIJ  gKI); float c1U = (gKI1  u) / (gKI1  gKIJ1); basisU[j][iU] = c0U * basisU[j  1][iU]; basisU[j][iU  j] = c1U * basisU[j  1][iU  j + 1]; float c0U_pdu = (u_pdu  gKI) / (gKIJ  gKI); float c1U_pdu = (gKI1  u_pdu) / (gKI1  gKIJ1); basisU_pdu[j][iU] = c0U_pdu * basisU_pdu[j  1][iU]; basisU_pdu[j][iU  j] = c1U_pdu * basisU_pdu[j  1][iU  j + 1]; // V gKI = GetKnot(iV, gNV); gKI1 = GetKnot(iV + 1, gNV); gKIJ = GetKnot(iV + j, gNV); gKIJ1 = GetKnot(iV  j + 1, gNV); float c0V = (v  gKI) / (gKIJ  gKI); float c1V = (gKI1  v) / (gKI1  gKIJ1); basisV[j][iV] = c0V * basisV[j  1][iV]; basisV[j][iV  j] = c1V * basisV[j  1][iV  j + 1]; float c0V_pdv = (v_pdv  gKI) / (gKIJ  gKI); float c1V_pdv = (gKI1  v_pdv) / (gKI1  gKIJ1); basisV_pdv[j][iV] = c0V_pdv * basisV_pdv[j  1][iV]; basisV_pdv[j][iV  j] = c1V_pdv * basisV_pdv[j  1][iV  j + 1]; } // evaluate triangle interior [unroll] for (j = 2; j <= D; ++j) { // U [unroll(j  1)] for (int k = iU  j + 1; k < iU; ++k) { float gKK = GetKnot(k, gNU); float gKK1 = GetKnot(k + 1, gNU); float gKKJ = GetKnot(k + j, gNU); float gKKJ1 = GetKnot(k + j + 1, gNU); float c0U = (u  gKK) / (gKKJ  gKK); float c1U = (gKKJ1  u) / (gKKJ1  gKK1); basisU[j][k] = c0U * basisU[j  1][k] + c1U * basisU[j  1][k + 1]; float c0U_pdu = (u_pdu  gKK) / (gKKJ  gKK); float c1U_pdu = (gKKJ1  u_pdu) / (gKKJ1  gKK1); basisU_pdu[j][k] = c0U_pdu * basisU_pdu[j  1][k] + c1U_pdu * basisU_pdu[j  1][k + 1]; } // V [unroll(j  1)] for (k = iV  j + 1; k < iV; ++k) { float gKK = GetKnot(k, gNV); float gKK1 = GetKnot(k + 1, gNV); float gKKJ = GetKnot(k + j, gNV); float gKKJ1 = GetKnot(k + j + 1, gNV); float c0V = (v  gKK) / (gKKJ  gKK); float c1V = (gKKJ1  v) / (gKKJ1  gKK1); basisV[j][k] = c0V * basisV[j  1][k] + c1V * basisV[j  1][k + 1]; float c0V_pdv = (v_pdv  gKK) / (gKKJ  gKK); float c1V_pdv = (gKKJ1  v_pdv) / (gKKJ1  gKK1); basisV_pdv[j][k] = c0V_pdv * basisV_pdv[j  1][k] + c1V_pdv * basisV_pdv[j  1][k + 1]; } } // ... the rest of the function code
// ... the rest of the function code float3 pos_pdu, pos_pdv; pos.x = pos_pdu.x = pos_pdv.x = 0.0f; pos.y = pos_pdu.y = pos_pdv.y = 0.0f; pos.z = pos_pdu.z = pos_pdv.z = 0.0f; [unroll(D + 1)] for (int jU = iU  D; jU <= iU; ++jU) { [unroll(D + 1)] for (int jV = iV  D; jV <= iV; ++jV) { pos += basisU[D][jU] * basisV[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); pos_pdu += basisU_pdu[D][jU] * basisV[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); pos_pdv += basisU[D][jU] * basisV_pdv[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); } } tan = normalize(pos_pdu  pos); float3 bTan = normalize(pos_pdv  pos); normal = normalize(cross(tan, bTan)); pos += gCenter; }
struct V_TexCoord { float2 TexCoord : TEXCOORD; }; V_TexCoord VS(V_TexCoord vin) { // Just a pass through shader V_TexCoord vout; vout.TexCoord = vin.TexCoord; return vout; }
struct PatchTess { float EdgeTess[3] : SV_TessFactor; float InsideTess : SV_InsideTessFactor; }; PatchTess ConstantHS(InputPatch<V_TexCoord, 3> patch, uint patchID : SV_PrimitiveID) { PatchTess pt; // Uniformly tessellate the patch. float tess = CalcTessFactor(gCenter); pt.EdgeTess[0] = tess; pt.EdgeTess[1] = tess; pt.EdgeTess[2] = tess; pt.InsideTess = tess; return pt; } [domain("tri")] [partitioning("fractional_odd")] [outputtopology("triangle_cw")] [outputcontrolpoints(3)] [patchconstantfunc("ConstantHS")] [maxtessfactor(64.0f)] V_TexCoord HS(InputPatch<V_TexCoord, 3> p, uint i : SV_OutputControlPointID, uint patchId : SV_PrimitiveID) { // Just a pass through shader V_TexCoord hout; hout.TexCoord = p[i].TexCoord; return hout; }
cbuffer cbPerObject { // ... other variables // Camera float4x4 gViewProj; float3 gEyePosW; // Tessellation float gMaxTessDistance; float gMinTessDistance; float gMinTessFactor; float gMaxTessFactor; }; float CalcTessFactor(float3 p) { float d = distance(p, gEyePosW); float s = saturate((d  gMinTessDistance) / (gMaxTessDistance  gMinTessDistance)); return lerp(gMinTessFactor, gMaxTessFactor, pow(s, 1.5f)); }
struct V_PosW_NormalW_TanW_TexCoord { float3 PosW : POSTION; float3 NormalW : NORMAL; float3 TanW : TANGENT; float2 TexCoord : TEXCOORD; }; [domain("tri")] V_PosW_NormalW_TanW_TexCoord DS(PatchTess patchTess, float3 bary : SV_DomainLocation, const OutputPatch<V_TexCoord, 3> tri) { float2 texCoord = bary.x*tri[0].TexCoord + bary.y*tri[1].TexCoord + bary.z*tri[2].TexCoord; V_PosW_NormalW_TanW_TexCoord dout; ComputePosNormalTangent(texCoord, dout.PosW, dout.NormalW, dout.TanW); dout.TexCoord = texCoord; return dout; }
struct V_PosH_NormalW_TanW_TexCoord { float4 PosH : SV_POSITION; float3 NormalW : NORMAL; float3 TanW : TANGENT; float2 TexCoord : TEXCOORD; }; [maxvertexcount(6)] void GS(triangle V_PosW_NormalW_TanW_TexCoord gin[3], inout TriangleStream<V_PosH_NormalW_TanW_TexCoord> triStream) { V_PosH_NormalW_TanW_TexCoord gout[6]; [unroll] // just copy pasti'n for (int i = 0; i < 3; ++i) { float3 posW = gin[i].PosW; gout[i].PosH = mul(float4(posW, 1.0f), gViewProj); gout[i].NormalW = gin[i].NormalW; gout[i].TanW = gin[i].TanW; gout[i].TexCoord = gin[i].TexCoord; } [unroll] // create the other side for (i = 3; i < 6; ++i) { float3 posW = gin[i3].PosW; gout[i].PosH = mul(float4(posW, 1.0f), gViewProj); gout[i].NormalW = gin[i3].NormalW; gout[i].TanW = gin[i3].TanW; gout[i].TexCoord = gin[i3].TexCoord; } triStream.Append(gout[0]); triStream.Append(gout[1]); triStream.Append(gout[2]); triStream.RestartStrip(); triStream.Append(gout[3]); triStream.Append(gout[5]); triStream.Append(gout[4]); }
float4 PS(V_PosH_NormalW_TanW_TexCoord pin) : SV_Target { // ... now what?! XD } technique11 BSplineDraw { pass P0 { SetVertexShader(CompileShader(vs_5_0, VS())); SetHullShader(CompileShader(hs_5_0, HS())); SetDomainShader(CompileShader(ds_5_0, DS())); SetGeometryShader(CompileShader(gs_5_0, GS())); SetPixelShader(CompileShader(ps_5_0, PS())); } }
class Particle { public: bool m_alive; Vec4d m_pos; Vec4d m_col; float time; // ... other fields public: // ctors... void update(float deltaTime); void render(); };
std::vector<particle> particles; // update function: for (auto &p : particles) p.update(dt); // rendering code: for (auto &p : particles) p.render();
void kill(particleID) { ?? } void wake(particleID) { ?? }
Shader "Sample/Diffuse" { Properties { _DiffuseTexture ("Diffuse Texture", 2D) = "white" {} _DiffuseTint ( "Diffuse Tint", Color) = (1, 1, 1, 1) } SubShader { Tags { "RenderType"="Opaque" } pass { Tags { "LightMode"="ForwardBase"} CGPROGRAM #pragma target 3.0 #pragma fragmentoption ARB_precision_hint_fastest #pragma vertex vertShadow #pragma fragment fragShadow #pragma multi_compile_fwdbase #include "UnityCG.cginc" #include "AutoLight.cginc" sampler2D _DiffuseTexture; float4 _DiffuseTint; float4 _LightColor0; struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; LIGHTING_COORDS(3, 4) }; v2f vertShadow(appdata_base v) { v2f o; o.pos = mul(UNITY_MATRIX_MVP, v.vertex); o.uv = v.texcoord; o.lightDir = normalize(ObjSpaceLightDir(v.vertex)); o.normal = normalize(v.normal).xyz; TRANSFER_VERTEX_TO_FRAGMENT(o); return o; } float4 fragShadow(v2f i) : COLOR { float3 L = normalize(i.lightDir); float3 N = normalize(i.normal); float attenuation = LIGHT_ATTENUATION(i) * 2; float4 ambient = UNITY_LIGHTMODEL_AMBIENT * 2; float NdotL = saturate(dot(N, L)); float4 diffuseTerm = NdotL * _LightColor0 * _DiffuseTint * attenuation; float4 diffuse = tex2D(_DiffuseTexture, i.uv); float4 finalColor = (ambient + diffuseTerm) * diffuse; return finalColor; } ENDCG } } FallBack "Diffuse" }
Tags { "LightMode"="ForwardBase"}
#pragma multi_compile_fwdbase
#include "AutoLight.cginc"
struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; LIGHTING_COORDS(3, 4) };
struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; float3 viewDir : TEXCOORD3; LIGHTING_COORDS(4, 5) };
v2f vertShadow(appdata_base v) { v2f o; o.pos = mul(UNITY_MATRIX_MVP, v.vertex); o.uv = v.texcoord; o.lightDir = normalize(ObjSpaceLightDir(v.vertex)); o.normal = normalize(v.normal).xyz; TRANSFER_VERTEX_TO_FRAGMENT(o); return o; }
float4 fragShadow(v2f i) : COLOR { float3 L = normalize(i.lightDir); float3 N = normalize(i.normal); float attenuation = LIGHT_ATTENUATION(i) * 2; float4 ambient = UNITY_LIGHTMODEL_AMBIENT * 2; float NdotL = saturate(dot(N, L)); float4 diffuseTerm = NdotL * _LightColor0 * _DiffuseTint * attenuation; float4 diffuse = tex2D(_DiffuseTexture, i.uv); float4 finalColor = (ambient + diffuseTerm) * diffuse; return finalColor; }
LH  RH  
look  wLook  eye  eye  wLook 
right  wUp x look  wUp x look 
up  look x right  look x right 
A  dot(right,eye)  dot(right,eye) 
B  dot(up, eye)  dot(up, eye) 
C  dot(look, eye)  dot(look, eye) 
Figure 2: OpenGL (Left) and DirectX (Right) NDC
deviceContext>ClearDepthStencilView(depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0); .... depthStencilDesc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; device>CreateDepthStencilState(&depthStencilDesc,&depthStencilState); deviceContext>OMSetDepthStencilState(depthStencilState, 1);Code 1: Code snippet settings for LH DirectX rendering
LH  RH  
D3D11_CLEAR_DEPTH  1.0  0.0 
depthStencilDesc.DepthFunc  D3D11_COMPARISON_LESS_EQUAL  D3D11_COMPARISON_GREATER_EQUAL 
LH  RH 
glClearDepth(0)  glClearDepth(1) 
glDepthFunc(GL_GEQUAL)  glDepthFunc(GL_LEQUAL) 
Figure 3: Incorrectly set depth function and clear for current projection
Figure 4: Correctly set depth function and clear for current projection
Figure 5: Projected depth with DirectX and OpenGL LH matrices (values used for calculation: near = 0.1, far = 1.0)
Figure 6: Projected depth with DirectX and OpenGL RH matrices (values used for calculation: near = 0.1, far = 1.0)
LH  gl_Position.z = (2.0) * log((gl_Position.z) * C + 1.0) / log(far * C + 1.0) + 1.0 
RH  gl_Position.z = (2.0) * log((gl_Position.z) * C + 1.0) / log(far * C + 1.0)  1.0 
Figure 7: Projected depth with classic perspective and with logarithmic one in LH (values used for calculation: near = 0.1, far = 1.0, C = 1.0)
Figure 8: Projected depth with classic perspective and with logarithmic one in RH (values used for calculation: near = 0.1, far = 1.0, C = 1.0)
Matrix4x4 tmp = Matrix4x4::Invert(viewMatrix); tmp.Transpose(); Vector4 clipPlane = Vector4::Transform(clipPlane, tmp);
float xSign = (clipPlane.X > 0) ? 1.0f : ((clipPlane.X < 0) ? 1.0f : 0.0f); float ySign = (clipPlane.Y > 0) ? 1.0f : ((clipPlane.Y < 0) ? 1.0f : 0.0f); Vector4 q = (xSign, ySign, 1, 1);
q.X = q.X / projection[0][0]; q.Y = q.Y / projection[1][1]; q.Z = 1.0f; q.W = (1.0f  projection[2][2]) / projection[3][2];
float a = q.Z / Vector4::Dot(clipPlane, q); Vector4 m3 = a * clipPlane;
q.X = q.x / projection[0][0]; q.Y = q.Y / projection[1][1]; q.Z = 1.0 / projection[2][3]; q.W = (1.0 / projection[3][2])  (projection[2][2] / (projection[2][3] * Matrix.M[3][2]));
float a = (2.0f * projection[2][3] * q.Z) / Vector4::Dot(clipPlane, q); Vector4 m3 = clipPlane * a; m3.Z = m3.Z + 1.0f;
LH: m3.Z = m3.Z + projection[2][3]; //([2][3] = +1) RH: m3.Z = m3.Z  projection[2][3]; //([2][3] = 1)
Matrix4x4 res = projection; res[0][2] = m3.X; res[1][2] = m3.Y; res[2][2] = m3.Z; res[3][2] = m3.W;
childFrame.ToRoot = childFrame.TransformationMatrix * parentFrame.ToRoot;
childFrame.AnimToRoot = childFrame.AnimTransform * parentFrame.AnimToRoot;
Arm: time 0: rotation 0 time 1: rotation about some axis (x,y,z) by 90 degrees time 2: rotation 0
Spine: time 0.0: rot 1.6 (about axis) x,y,z time 0.1: rot 1.65 x,y,z time 0.2: rot 1.69 x,y,z ... time 1.9: rot 1.65 x,y,z time 2.0: rot 1.6 x,y,z Arm: time 0.0: rot 0.7 x,y,z time 0.1: rot 0.71 x,y,z time 0.2: rot 0.72 x,y,z ... time 1.9: rot 0.71 x,y,z time 2.0: rot 0.7 x,y,z Head: time 0.0: rot 0.6 x,y,z time 0.1: rot 0.6 x,y,z ...
// for the Arm bone MATRIX4X4 animMat = MatrixFromQuaternion( key[0].quaternion ) ); Frame* frame = FindFrameWithName( keyFrame.BoneName ); frame>TransformationMatrix = animMat;
// The key frames have been search and returned: // keyFrameBefore = 0 // keyFrameAfter = 1 Quaternion quatBefore = key[keyFrameBefore].quaternion; Quaternion quatAfter = key[keyFrameAfter].quaternion;
// Determine the fraction of the time between the two frames represented by t. float ratio = ( t  key[keyFrameBefore].time )/( key[keyFrameAfter].time  key[keyFrameBefore].time );
// The SLERP function (a math utility) returns a quaternion based on // ratio * quatBefore and (1.0  ratio) * quatAfter. // HOWEVER: know your math libraries! Some SLERP functions // may calculate the result using (1.0  ratio)*quatBefore and ratio*quatAfter // Quaternion quatAtTimeT = SLERP( quatBefore, quatAfter, ratio ); MATRIX4X4 animMat = MatrixFromQuaternion( quatAtTimeT ) ); Frame* frame = FindFrameWithName( keyFrame.BoneName ); frame>TransformationMatrix = animMat;
struct AnimationKey { float time; Quaterion quat; Vector scale, trans; }; struct Animation { String frameName; // bonename, nodename // trackKeyIndex is the index in AnimTrack.trackKeys for interpolated timedKeys // for this particular frame/bone/node int trackKeyIndex; float maxTime; // used for checking conistency among Animations in an animationset std::vector<AnimationKey> animKeys; }; struct AnimationSet { bool checkedAgainstRootFrame; // used to verfiy the compatibility with the frame hierarchy String animationSetName; // maybe "Walk," "Run," "Idle," or other convenient description. std::vector<Animation> animations; double currentTime, period; // the current time ("t" in the above discussions) and the period }; struct RootFrame { bool checkedAgainstAnimationSets; Frame* _hierarchyRootFrame; }; // parameters that may be useful for a particular animation set struct AnimTrackDesc { float speed; // allows for displaying animations faster or slower than asmodeled. Not discussed. bool enable; // determines whether the animationset key frames are to be processed. }; // when 2 animationsets are to be blended, a BlendEvent is initiated // describing which track buffers are to be used for the blend struct BlendEvent { int fromTrack, toTrack; // which buffers float transitionTime; // how long the transition takes float curTime; // how far along in the transition time }; struct AnimTrack { int animSet; // animationSet currently assigned to this track std::vector<AnimationKey> trackKeys; // the track buffer AnimTrackDesc desc; };
class AnimController { public: AnimController() { initialized = false; } // no explicit destructor is needed. However, if a user should need to add a destructor, // follow the Rule of Three when doing so. See Disclaimer below for further information. bool Init(); bool AdvanceTime(double deltaTime); bool SetHierarchy(Frame* newRootFrame); bool SetTicksPerSecond(int newTicks, int whichAnimationSet); size_t GetNumAnimationSets() { return _animSets.size(); } bool GetAnimationSetName(int animSetNum, std::string& animName); size_t GetNumTracks() { return _animTracks.size(); } bool TrackSetAnimationSet(int animSetNum, int trackNum); bool TrackSetAnimationSet(std::string animSetName, int trackNum); int CreateTrack(AnimTrackDesc* trackDesc); // returns the index for the new track bool DeleteTrack( int trackNum ); bool GetTrackDescription( int trackNum, AnimTrackDesc* trackDesc); bool SetTrackDescription( int trackNum, AnimTrackDesc* trackDesc); bool GetTrackEnable(int trackNum); bool SetTrackEnable(int trackNum, bool enable); bool TransitionTracks(int fromTrack, int toTrack, float transitionTime); // start a blend event size_t GetNumBlendEvents() { return _trackEvents.size(); } bool GetBlendEvent(int eventNum, BlendEvent* event); bool initialized; // methods // CheckCompatibility  ensures the root frame hierarchy frameNames // match the frameNames in the animation sets. Specific implementation // of this method is not discussed in this article. bool CheckCompatibility(); // are the animation sets applicable to the frame hierarchy? bool BuildFrameNameIndex(); // sets Animation::trackKeyIndex bool SetCurTicks(DWORD animSetNum); bool InterpolateAnimations(Animation& anim, double fTime, std::vector<AnimTrackKey>& trackKeys); bool InterpolateKeyFrame(AnimationKey& animKey, double fTime); // attributes RootFrame _rootFrame; // frame hierarchy access (for storing matrices, finding names, etc. std::vector<AnimationSet> _animSets; // all the animation sets available std::vector<AnimTrack> _animTracks; // all the tracks std::vector<BlendEvent> _trackEvents; // information for blending animation sets std::vector<std::string> frameNames; // this of hierarchy frame names used to index into track buffers };
// // advance the time. // calculate animation matrices and store matrices in hierarchy TransformationMatrix // deltaTime is NOT the elapsed game time, but the change in time since the last render cycle time // For many applications, this is the same deltatime used to update other scene objects. // bool AnimController::AdvanceTime(double deltaTime) { if (!initialized) return false; // If an animation controller is intended to process just one track, or just one blend event // this section of code can be revised to enforce that assumption. // The code presented here allows for generalizing "track events" to do more // than just blending two animation sets for (size_t track = 0; track < _animTracks.size(); track++) // check the status of all tracks { // animation sets are rendered only when the associated track is enabled // Also check that the animation set associated with the track is "valid" if (_animTracks[track].desc.enable && _animTracks[track].animSet < (int)_animSets.size()) { UINT animSetNum = _animTracks[track].animSet; // variable convenient for looping // advance the local time for the animation set. _animSets[animSetNum].currentTime += deltaTime; // adjust the time if necessary. See SetCurTicks code below if (!SetCurTicks(animSetNum)) return false; // loop through animations for (size_t i = 0; i < _animSets[animSetNum].animations.size(); i++) { if( !InterpolateAnimations(_animSets[animSetNum].animations[i], _animSets[animSetNum].currentTime, _animTracks[track].trackKeys) ) return false; // something went wrong } } } MATRIX rot, scale, translate; // parameters used for interpolating // The concept for this animation controller is to: // Process A Blend Event // OR // Process a single track // // Though _trackEvents allows for other types of blending // and events, for the purpose of this article it is assumed // that there will be either 0 or just 1 blend occurring at a time if (_trackEvents.size()) { _trackEvents[0].curTime += deltaTime; // bump the progression of the blend if (_trackEvents[0].curTime > _trackEvents[0].transitionTime) // done with this event { SetTrackEnable(_trackEvents[0].fromTrack, false); // disable the "from" animation set // delete the event _trackEvents.clear(); } else { // to reduce the clutter of the calcuations, an iterator is used ONLY // for clarity. iter is, in fact, just _trackEvents[0]. std::vector<BlendEvent>::iterator iter = _trackEvents.begin(); float blendFactor = float(iter>curTime / iter>transitionTime); // get the buffers for both the "from" track and the "to" track std::vector<AnimationKey>& vFrom = _animTracks[iter>fromTrack].trackKeys; std::vector<AnimationKey>& vTo = _animTracks[iter>toTrack].trackKeys; // declare variables to use in the blending Quaternion quatFinal, quatFrom, quatTo; Vector scaleFinal, scaleFrom, scaleTo, translateFinal, translateFrom, translateTo; // loop through every animation, blend the results of the two animation sets // and send the animation matrix off to the frame hierarchy for (size_t tk = 0; tk < vFrom.size(); tk++) // trackKeys.size() are all the same size { // grab values from the track buffers quatFrom = vFrom[tk].quat; quatTo = vTo[tk].quat; scaleFrom = vFrom[tk].scale; scaleTo = vTo[tk].scale; translateFrom = vFrom[tk].trans; translateTo = vTo[tk].trans; // blend the quats, scales, and translations. Calculate the animation matrices. // The following line demonstrates possible concatenations IF the function // forms allow it. MatrixFromQuaternion(&rot, QuaternionSlerp(&quatFinal, &quatFrom, &quatTo, blendFactor)); // a bit more formally, calculate the blended scale scaleFinal = (1.0f  blendFactor)*scaleFrom + blendFactor * scaleTo; // calulate the blended translation translateTo = (1.0f  blendFactor)*translateFrom + blendFactor * translateTo; // create the scale and translation matrices MatrixScaling(&scale, scaleFinal.x, scaleFinal.y, scaleFinal.z); MatrixTranslation(&translate, translateFinal.x, translateFinal.y, translateFinal.z); // find the frame in the hierarchy with the name equivalent to the animation // The array "frameNames" is assumed to be an array of frame names in indexed order Frame* frame = FrameWithName(frameNames[tk], _rootFrame._hierarchyRootFrame); if (frame == NULL) return false; // GLOBALMSG // calculate and store the animation matrix. frame>TransformationMatrix = rot * scale * translate; } } } // if a blend is not progress, just update animations from the (hopefully) only enabled track else { // set Transformation matrix with track results for (size_t track = 0; track < _animTracks.size(); track++) { if (_animTracks[track].desc.enable) { std::vector<AnimTrackKey>& v = _animTracks[track].trackKeys; for (size_t tk = 0; tk < v.size(); tk++) { MatrixFromQuaternion(&rot, &v[tk].quat); MatrixScaling(&scale, v[tk].scale.x, v[tk].scale.y, v[tk].scale.z); MatrixTranslation(&translate, v[tk].trans.x, v[tk].trans.y, v[tk].trans.z); Frame* frame = FrameWithName(frameNames[tk], _rootFrame._hierarchyRootFrame); if (frame == NULL) return false; // GLOBALMSG? frame)>TransformationMatrix = rot * scale * translate; } } } } return true; }
// // the function name is a carry over from earlier implementations // when unsigned integers for key frame times were used. // bool AnimController::SetCurTicks(size_t animSetNum) { if (animSetNum >= _animSets.size()) return false; // error condition // convenient variables for clarity. // Also easier than coding "_animSets[animSetNum].xxx" multiple times float curTime = _animSets[animSetNum].currentTime; // was just bumped in AdvanceTime float period = _animSets[animSetNum].period; // NOTE: the following will cause the animation to LOOP from the end of the animation // back to the beginning. // Other actions which could be taken: //  pingpong: at the end of an action, reverse back through the keyframes to the beginning, etc. //  terminate the animation: perhaps provide a callback to report same while ( curTime >= period ) curTime = period; // loop within the animation // the result of this routine should be that // currentTime is >= 0 and less than the period. _animSets[animSetNum].currentTime = curTime; return true; }
// this routine finds a pair of key frames which bracket the animation time. // Interpolated values are calculated and stored in the the track buffer (trackKeys) bool AnimController::InterpolateAnimations(Animation& anim, float fTime, std::vector<AnimKey>& trackKeys); { Quaternion quat; if (anim.animKeys.size() > 1) // more than just a time==0 key { // find timedkey with time >= fTime size_t i = 0; // find a pair of key frames to interpolate while ( i < animKeys.size() && animKeys[i].time < fTime ) i++; if ( i >= animKeys.size() ) // should not happen, but handle it { i = animKeys.size()1; // use the last keyframe fTime = animKeys[i].time; } // animKeys[i].time >= fTime. That's the keyframe after the desired time // so animKeys[i1] is the keyframe before the desired time if ( i > 0 ) { float ratio = (fTime  animKey[i1].time) / (animKey[i].time  animKey[i1].time); Slerp or NLerp(&quat, &animKey[i1].quat, &animKey[i].quat, ratio); trackKeys[anim.trackKeyIndex.quat = quat; trackKeys[anim.trackKeyIndex].scale = (1.0fratio)*animKey[i1].scale + ratio * animKey[i].scale; trackKeys[anim.trackKeyIndex].trans = (1.0fratio)*animKey[i1].trans + ratio * animKey[i].trans; } else // use the time=0 keyframe { trackKeys[anim.trackKeyIndex].quat = animKey[0].quat; trackKeys[anim.trackKeyIndex].scale = animKey[0].scale; trackKeys[anim.trackKeyIndex].trans = animKey[0].trans; } } return true; }
// set up the blending of two tracks bool AnimController::TransitionTracks(size_t fromTrack, size_t toTrack, float transitionTime) { if (fromTrack >= _animTracks.size()  toTrack >= _animTracks.size()  fromTrack == toTrack  transitionTime < 0.0f) return false; // error condition BlendEvent blendEvent; blendEvent.fromTrack = fromTrack; blendEvent.toTrack = toTrack; blendEvent.transitionTime = transitionTime; blendEvent.curTime = 0.0f; _trackEvents.push_back(blendEvent); SetTrackEnable(fromTrack, true); SetTrackEnable(toTrack, true); return true; }
// This routine should be used when _rootFrame._hierarchyRootFrame has been set and animation sets // have been loaded. Must be done before any use of AdvanceTime. // You may want to tie this routine to the requirements to be considered "initialized." bool AnimationController::BuildFrameNameIndex() { frameNames.clear(); // start clean // work through the frame hierarchy, storing the name of each frame name AddFrameName( _rootFrame._hierarchyRootFrame ); // now that all names in the hierarchy have been found, // loop through the animations, checking names and setting the track buffer index for( size_t animSet = 0; animSet < _animSets.size(); animSet++) { for( size_t anim = 0; anim < _animSets[animSet].animations.size(); anim++ ) { if( (_animSets[animSet].animations[anim].trackKeyIndex = IndexForFrameName( _animSets[animSet].animations[anim].frameName )) < 0 ) return false; } } return true; } // doesn't appear in class methods above, but requires access to frameNames. void AnimationController::AddFrameName( Frame* frame ) { frameNames.pushback( std::string(frame>frameName) ); for each Child in frame: AddFrameName( Child ); } // doesn't appear in class methods above, but requires access to frameNames. int AnimationController::IndexForFrameName( std::string& frameName ) { for( size_t i=0; i < frameNames.size(); i++ ) { if( frameNames[i] == frameName ) return i; } return 1; // name not found }
void FBXExporter::ProcessMesh(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); mTriangleCount = currMesh>GetPolygonCount(); int vertexCounter = 0; mTriangles.reserve(mTriangleCount); for (unsigned int i = 0; i < mTriangleCount; ++i) { XMFLOAT3 normal[3]; XMFLOAT3 tangent[3]; XMFLOAT3 binormal[3]; XMFLOAT2 UV[3][2]; Triangle currTriangle; mTriangles.push_back(currTriangle); for (unsigned int j = 0; j < 3; ++j) { int ctrlPointIndex = currMesh>GetPolygonVertex(i, j); CtrlPoint* currCtrlPoint = mControlPoints[ctrlPointIndex]; ReadNormal(currMesh, ctrlPointIndex, vertexCounter, normal[j]); // We only have diffuse texture for (int k = 0; k < 1; ++k) { ReadUV(currMesh, ctrlPointIndex, currMesh>GetTextureUVIndex(i, j), k, UV[j][k]); } PNTIWVertex temp; temp.mPosition = currCtrlPoint>mPosition; temp.mNormal = normal[j]; temp.mUV = UV[j][0]; // Copy the blending info from each control point for(unsigned int i = 0; i < currCtrlPoint>mBlendingInfo.size(); ++i) { VertexBlendingInfo currBlendingInfo; currBlendingInfo.mBlendingIndex = currCtrlPoint>mBlendingInfo[i].mBlendingIndex; currBlendingInfo.mBlendingWeight = currCtrlPoint>mBlendingInfo[i].mBlendingWeight; temp.mVertexBlendingInfos.push_back(currBlendingInfo); } // Sort the blending info so that later we can remove // duplicated vertices temp.SortBlendingInfoByWeight(); mVertices.push_back(temp); mTriangles.back().mIndices.push_back(vertexCounter); ++vertexCounter; } } // Now mControlPoints has served its purpose // We can free its memory for(auto itr = mControlPoints.begin(); itr != mControlPoints.end(); ++itr) { delete itr>second; } mControlPoints.clear(); }
// inNode is the Node in this FBX Scene that contains the mesh // this is why I can use inNode>GetMesh() on it to get the mesh void FBXExporter::ProcessControlPoints(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); unsigned int ctrlPointCount = currMesh>GetControlPointsCount(); for(unsigned int i = 0; i < ctrlPointCount; ++i) { CtrlPoint* currCtrlPoint = new CtrlPoint(); XMFLOAT3 currPosition; currPosition.x = static_cast<float>(currMesh>GetControlPointAt(i).mData[0]); currPosition.y = static_cast<float>(currMesh>GetControlPointAt(i).mData[1]); currPosition.z = static_cast<float>(currMesh>GetControlPointAt(i).mData[2]); currCtrlPoint>mPosition = currPosition; mControlPoints[i] = currCtrlPoint; } }
void FBXExporter::ReadNormal(FbxMesh* inMesh, int inCtrlPointIndex, int inVertexCounter, XMFLOAT3& outNormal) { if(inMesh>GetElementNormalCount() < 1) { throw std::exception("Invalid Normal Number"); } FbxGeometryElementNormal* vertexNormal = inMesh>GetElementNormal(0); switch(vertexNormal>GetMappingMode()) { case FbxGeometryElement::eByControlPoint: switch(vertexNormal>GetReferenceMode()) { case FbxGeometryElement::eDirect: { outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[2]); } break; case FbxGeometryElement::eIndexToDirect: { int index = vertexNormal>GetIndexArray().GetAt(inCtrlPointIndex); outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[2]); } break; default: throw std::exception("Invalid Reference"); } break; case FbxGeometryElement::eByPolygonVertex: switch(vertexNormal>GetReferenceMode()) { case FbxGeometryElement::eDirect: { outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[2]); } break; case FbxGeometryElement::eIndexToDirect: { int index = vertexNormal>GetIndexArray().GetAt(inVertexCounter); outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[2]); } break; default: throw std::exception("Invalid Reference"); } break; } }
FbxGeometryElementNormal* vertexNormal = inMesh>GetElementNormal(0);
int index = vertexNormal>GetIndexArray().GetAt(inVertexCounter);
void FBXExporter::ProcessSkeletonHierarchy(FbxNode* inRootNode) { for (int childIndex = 0; childIndex < inRootNode>GetChildCount(); ++childIndex) { FbxNode* currNode = inRootNode>GetChild(childIndex); ProcessSkeletonHierarchyRecursively(currNode, 0, 0, 1); } } // inDepth is not needed here, I used it for debug but forgot to remove it void FBXExporter::ProcessSkeletonHierarchyRecursively(FbxNode* inNode, int inDepth, int myIndex, int inParentIndex) { if(inNode>GetNodeAttribute() && inNode>GetNodeAttribute()>GetAttributeType() && inNode>GetNodeAttribute()>GetAttributeType() == FbxNodeAttribute::eSkeleton) { Joint currJoint; currJoint.mParentIndex = inParentIndex; currJoint.mName = inNode>GetName(); mSkeleton.mJoints.push_back(currJoint); } for (int i = 0; i < inNode>GetChildCount(); i++) { ProcessSkeletonHierarchyRecursively(inNode>GetChild(i), inDepth + 1, mSkeleton.mJoints.size(), myIndex); } }
void FBXExporter::ProcessJointsAndAnimations(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); unsigned int numOfDeformers = currMesh>GetDeformerCount(); // This geometry transform is something I cannot understand // I think it is from MotionBuilder // If you are using Maya for your models, 99% this is just an // identity matrix // But I am taking it into account anyways...... FbxAMatrix geometryTransform = Utilities::GetGeometryTransformation(inNode); // A deformer is a FBX thing, which contains some clusters // A cluster contains a link, which is basically a joint // Normally, there is only one deformer in a mesh for (unsigned int deformerIndex = 0; deformerIndex < numOfDeformers; ++deformerIndex) { // There are many types of deformers in Maya, // We are using only skins, so we see if this is a skin FbxSkin* currSkin = reinterpret_cast<FbxSkin*>(currMesh>GetDeformer(deformerIndex, FbxDeformer::eSkin)); if (!currSkin) { continue; } unsigned int numOfClusters = currSkin>GetClusterCount(); for (unsigned int clusterIndex = 0; clusterIndex < numOfClusters; ++clusterIndex) { FbxCluster* currCluster = currSkin>GetCluster(clusterIndex); std::string currJointName = currCluster>GetLink()>GetName(); unsigned int currJointIndex = FindJointIndexUsingName(currJointName); FbxAMatrix transformMatrix; FbxAMatrix transformLinkMatrix; FbxAMatrix globalBindposeInverseMatrix; currCluster>GetTransformMatrix(transformMatrix); // The transformation of the mesh at binding time currCluster>GetTransformLinkMatrix(transformLinkMatrix); // The transformation of the cluster(joint) at binding time from joint space to world space globalBindposeInverseMatrix = transformLinkMatrix.Inverse() * transformMatrix * geometryTransform; // Update the information in mSkeleton mSkeleton.mJoints[currJointIndex].mGlobalBindposeInverse = globalBindposeInverseMatrix; mSkeleton.mJoints[currJointIndex].mNode = currCluster>GetLink(); // Associate each joint with the control points it affects unsigned int numOfIndices = currCluster>GetControlPointIndicesCount(); for (unsigned int i = 0; i < numOfIndices; ++i) { BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = currJointIndex; currBlendingIndexWeightPair.mBlendingWeight = currCluster>GetControlPointWeights()[i]; mControlPoints[currCluster>GetControlPointIndices()[i]]>mBlendingInfo.push_back(currBlendingIndexWeightPair); } // Get animation information // Now only supports one take FbxAnimStack* currAnimStack = mFBXScene>GetSrcObject<FbxAnimStack>(0); FbxString animStackName = currAnimStack>GetName(); mAnimationName = animStackName.Buffer(); FbxTakeInfo* takeInfo = mFBXScene>GetTakeInfo(animStackName); FbxTime start = takeInfo>mLocalTimeSpan.GetStart(); FbxTime end = takeInfo>mLocalTimeSpan.GetStop(); mAnimationLength = end.GetFrameCount(FbxTime::eFrames24)  start.GetFrameCount(FbxTime::eFrames24) + 1; Keyframe** currAnim = &mSkeleton.mJoints[currJointIndex].mAnimation; for (FbxLongLong i = start.GetFrameCount(FbxTime::eFrames24); i <= end.GetFrameCount(FbxTime::eFrames24); ++i) { FbxTime currTime; currTime.SetFrame(i, FbxTime::eFrames24); *currAnim = new Keyframe(); (*currAnim)>mFrameNum = i; FbxAMatrix currentTransformOffset = inNode>EvaluateGlobalTransform(currTime) * geometryTransform; (*currAnim)>mGlobalTransform = currentTransformOffset.Inverse() * currCluster>GetLink()>EvaluateGlobalTransform(currTime); currAnim = &((*currAnim)>mNext); } } } // Some of the control points only have less than 4 joints // affecting them. // For a normal renderer, there are usually 4 joints // I am adding more dummy joints if there isn't enough BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = 0; currBlendingIndexWeightPair.mBlendingWeight = 0; for(auto itr = mControlPoints.begin(); itr != mControlPoints.end(); ++itr) { for(unsigned int i = itr>second>mBlendingInfo.size(); i <= 4; ++i) { itr>second>mBlendingInfo.push_back(currBlendingIndexWeightPair); } } }
// This geometry transform is something I cannot understand // I think it is from MotionBuilder // If you are using Maya for your models, 99% this is just an // identity matrix // But I am taking it into account anyways...... FbxAMatrix geometryTransform = Utilities::GetGeometryTransformation(inNode);
FbxAMatrix Utilities::GetGeometryTransformation(FbxNode* inNode) { if (!inNode) { throw std::exception("Null for mesh geometry"); } const FbxVector4 lT = inNode>GetGeometricTranslation(FbxNode::eSourcePivot); const FbxVector4 lR = inNode>GetGeometricRotation(FbxNode::eSourcePivot); const FbxVector4 lS = inNode>GetGeometricScaling(FbxNode::eSourcePivot); return FbxAMatrix(lT, lR, lS); }
FbxAMatrix transformMatrix; FbxAMatrix transformLinkMatrix; FbxAMatrix globalBindposeInverseMatrix; currCluster>GetTransformMatrix(transformMatrix); // The transformation of the mesh at binding time currCluster>GetTransformLinkMatrix(transformLinkMatrix); // The transformation of the cluster(joint) at binding time from joint space to world space globalBindposeInverseMatrix = transformLinkMatrix.Inverse() * transformMatrix * geometryTransform; // Update the information in mSkeleton mSkeleton.mJoints[currJointIndex].mGlobalBindposeInverse = globalBindposeInverseMatrix;
// Associate each joint with the control points it affects unsigned int numOfIndices = currCluster>GetControlPointIndicesCount(); for (unsigned int i = 0; i < numOfIndices; ++i) { BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = currJointIndex; currBlendingIndexWeightPair.mBlendingWeight = currCluster>GetControlPointWeights()[i]; mControlPoints[currCluster>GetControlPointIndices()[i]]>mBlendingInfo.push_back(currBlendingIndexWeightPair); }
// Get animation information // Now only supports one take FbxAnimStack* currAnimStack = mFBXScene>GetSrcObject<FbxAnimStack>(0); FbxString animStackName = currAnimStack>GetName(); mAnimationName = animStackName.Buffer(); FbxTakeInfo* takeInfo = mFBXScene>GetTakeInfo(animStackName); FbxTime start = takeInfo>mLocalTimeSpan.GetStart(); FbxTime end = takeInfo>mLocalTimeSpan.GetStop(); mAnimationLength = end.GetFrameCount(FbxTime::eFrames24)  start.GetFrameCount(FbxTime::eFrames24) + 1; Keyframe** currAnim = &mSkeleton.mJoints[currJointIndex].mAnimation; for (FbxLongLong i = start.GetFrameCount(FbxTime::eFrames24); i <= end.GetFrameCount(FbxTime::eFrames24); ++i) { FbxTime currTime; currTime.SetFrame(i, FbxTime::eFrames24); *currAnim = new Keyframe(); (*currAnim)>mFrameNum = i; FbxAMatrix currentTransformOffset = inNode>EvaluateGlobalTransform(currTime) * geometryTransform; (*currAnim)>mGlobalTransform = currentTransformOffset.Inverse() * currCluster>GetLink()>EvaluateGlobalTransform(currTime); currAnim = &((*currAnim)>mNext); }
FbxAMatrix input; //Assume this matrix is the one to be converted. FbxVector4 translation = input.GetT(); FbxVector4 rotation = input.GetR(); translation.Set(translation.mData[0], translation.mData[1], translation.mData[2]); // This negate Z of Translation Component of the matrix rotation.Set(rotation.mData[0], rotation.mData[1], rotation.mData[2]); // This negate X,Y of Rotation Component of the matrix // These 2 lines finally set "input" to the eventual converted result input.SetT(translation); input.SetR(rotation);
// during render (or maybe update but never seen that) // this will also be drawn in this function, so if we dont call this // function the buttons does not exist anymore do_button("my label", x, y, function() print("I got clicked") end)
// during some kind of init of a scene for example local button = create_button("my label", x, y) button:set_callback(function() print("I got clicked") end) // later in render (this will draw all gui elements we have created) draw_gui()
// here are two versions of the immediate mode that do require an id // but the id just need to be unique per scene ui_button("buttonid", "my label", onclick):draw(x, y) ui_button({id = "buttonid", label = "my label", onclick = print}):draw(x, y) // this is what you do if you want to create the button beforehand // this becomes useful when dealing with listboxes and more advanced controls local button = ui_button({label = "my_label", onclick = print}) // in both cases the control comes to life when calling the draw function button:draw(x, y)
// the push and pop is a stack system of render states and in this case // it keeps the translation local to between them push() for i = 1, 10, 1 do // this is the only place that knows about this textbox // it is not created in some init function, but we need the id // so it can keep track of itself the next time it gets drawn // after the first call the ui_textbox function will return the same // object ui_inputbox({id = i, value = string.format("input #%i", i)}):draw() // this will adjust each element 40 units down from the last one add_translation(0, 40) end pop() // ui_textbox draw function would then look something like this function draw(self) local width = self.width local height = self.height set_blend_color(1, 1, 1, 1) // set texture for complete gui texture sheet set_texture(gui_texture_id) draw_rect(...) // here the uv data would go in to grab the right part // set font, and this will trigger another set_texture internally set_text_font("arial.ttf") set_text_size(16) set_text_align(0, 0.5) // this function is essentialy just calling multiple // draw rects internally for each character to be drawn draw_text_area(text, 0, 0, width, height) end
void IntermediateRenderer::bind_texture(Texture * texture) { // this is a texture pool that contains several arrays of similar sized textures // lets say we want to bind texture A and that texture already exists in in the pool // then if we have a different array bounded we must flush but otherwise we just use // another index for the next operations since texture A was already in the // current active array texture auto mat = materials.get_active_state(); if (texture == NULL) { // we dont need to unbind anything just reduce the impact of the texture to 0 mat>texture_alpha = 0.0f; } else { unsigned int texture_index = 0; if (texture_pool.bind(texture, &texture_index, std::bind(&IntermediateRenderer::flush, this))) { // this means we flushed // this will start a new draw call // refresh the state, usually means we grab the first // material index again (0) mat = materials.get_active_state(); } // just set the constant buffer values // and unless we flushed nothing will change // we will just continue to build our vertex buffer mat>texture_index = reinterpret_cast<float>(texture_index); mat>texture_alpha = 1.0f; } }
void IntermediateRenderer::draw_rect(const RECT_DESC & desc) { // this will switch what buffers we are pushing data to // so even if we switch from trianglelist to linelist // we dont need to flush but the rendering order will be wrong set_draw_topology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); // here we just get the currently active material and model states // model contains transformation data auto mat_id = materials.use_active_state(); auto model_id = models.use_active_state(); push_stream_ids(6, model_id, mat_id); // currently I am not using any index list, but might do in the future if I feel // I could benefit from it // its important to keep these sizes known at compile time // so we dont need to allocate temporary space on the heap somewhere Vector3 position_data[6] = { Vector3(desc.x, desc.y, 0), Vector3(desc.x + desc.width, desc.y, 0), Vector3(desc.x, desc.y + desc.height, 0), Vector3(desc.x, desc.y + desc.height, 0), Vector3(desc.x + desc.width, desc.y, 0), Vector3(desc.x + desc.width, desc.y + desc.height, 0) }; Vector2 texcoord_data[6] = { Vector2(desc.u1, desc.v1), Vector2(desc.u2, desc.v1), Vector2(desc.u1, desc.v2), Vector2(desc.u1, desc.v2), Vector2(desc.u2, desc.v1), Vector2(desc.u2, desc.v2) }; // i will switch this from float4 to an unsigned int // in the future so each vertex becomes much smaller // the desc.color_top and desc.color_bottom are already // uint32 formats Vector4 ctop(desc.color_top); Vector4 cbottom(desc.color_bottom); Vector4 color_data[6] = { ctop, ctop, cbottom, cbottom, ctop, cbottom, }; // this will just copy in our stack data to the vertex buffers position_stream>push(position_data); texcoord_stream>push(texcoord_data); color_stream>push(color_data); }
// instead of a normal array, we use an array of textures Texture2DArray Texture : register(t0); // each material is 8 floats struct Material { float4 color; float texture_index; float texture_alpha; float a; // padding float b; // padding }; // by having 256 different material at the same time // we can draw 256 different entities in only one draw call cbuffer MaterialBuffer : register(b0) { Material material[256]; }; struct Vertex { float4 position : SV_Position; float3 vposition : Position0; float3 normal : Normal0; float2 uv : Texcoord0; float4 color : Color0; // this is how we control what material // to use for what vertex, its only 1 byte in size // for a value range of 0255 uint material_id : Color1; }; Result main(Vertex input) { // lookup material Material mat = material[input.material_id]; // read from the right texture float4 texel = Texture.Sample(Sampler, float3(input.uv, mat.texture_index)); //... rest of shader }
// order of multiplication in DirectX FinalVector = vector * ScaleMat * RotationMat * TranslationMat
// order of multiplication in OpenGL FinalVector = TranslationMat * RotationMat * ScaleMat * vector
finalmatrix = SRTmatrix1(rot1 followed by trans1) * SRTmatrix2( rot2 followed by trans2).
struct Frame { string Name; // the frame or "bone" name Matrix TransformationMatrix; // to be used for local animation matrix MeshContainer MeshData; // perhaps only one or two frames will have mesh data FrameArray Children; // pointers or references to each child frame of this frame Matrix ToParent; // the local transform from bonespace to bone's parentspace Matrix ToRoot; // from bonespace to rootframe space };
// given this function ... function CalcToRootMatrix( Frame frame, Matrix parentMatrix ) { // transform from framespace to rootframespace through the parent's ToRoot matrix frame.ToRoot = frame.ToParent * parentMatrix; for each Child in frame: CalcToRootMatrix( Child, frame.ToRoot ); } // ... calculate all the Frame ToRoot matrices CalcToRootMatrix( RootFrame, IdentityMatrix ); // the root frame has no parent
frame.ToRoot = frame.ToParent * frameparent.ToParent * frameparentparent.ToParent * ... * RootFrame.ToRoot
// A function to search the hierarchy for a frame named "frameName" and return a reference to that frame Frame FindFrame( Frame frame, string frameName ) { Frame tmpFrame; if ( frame.Name == frameName ) return frame; for each Child in frame { if ( (tmpFrame = FindFrame( Child, frameName )) != NULL ) return tmpFrame; } return NULL; } // Note: MeshFrame.ToRoot is the transform for moving the mesh into rootframe space. function CalculateOffsetMatrix( Index boneIndex ) { string boneName = SkinInfo.GetBoneName( boneIndex ); Frame boneFrame = FindFrame( root_frame, boneName ); // error check for boneFrame == NULL if desired offsetMatrix[ boneIndex ] = MeshFrame.ToRoot * MatrixInverse( boneFrame.ToRoot ); } // generate all the offset matrices for( int i = 0; i < SkinInfo.NumBones(); i++ ) CalculateOffsetMatrix( i );
offsetMatrix = MeshFrame.ToRoot * Inverse( bone.ToParent * parent.ToParent * ... * root.ToParent )
offsetMatrix = MeshFrame.ToRoot * root.ToSomeChild * Child.ToAnotherChild * ... * boneParent.ToInfluenceBone
The mesh and bone hierarchy in pose position
struct AnimationSet { string animSetName; // for multiple sets, allows selection of actions AnimationArray animations; } struct Animation { string frameName; // look familiar? AnimationKeysArray keyFrames; } struct AnimationKey { TimeCode keyTime; Vector Scale, Translation; Quaternion Rotation; }
function CalulateTransformationMatrices( TimeCode deltaTime ) { TimeCode keyFrameTime = startTime + deltaTime; for each animation in AnimationSet: { Matrix frameTransform = CalculateFromAnimationKeys( keyFrameTime, animation.frameName ); Frame frame = FindFrame( rootFrame, animation.frameName ); frame.TransformationMatrix = frameTransform; } }
// given this function ... function CalcCombinedMatrix( Frame frame, Matrix parentMatrix ) { // transform from framespace to rootframespace through the parent's ToRoot matrix frame.TransformationMatrix = frame.TransformationMatrix * parentMatrix; for each Child in frame: CalcCombinedMatrix( Child, frame.TransformationMatrix ); } // ... calculate all the Frame toroot animation matrices CalcCombinedMatrix( RootFrame, IdentityMatrix );
// Given a FinalMatrix array.. function CalculateFinalMatrix( int boneIndex ) { string boneName = SkinInfo.GetBoneName( boneIndex ); Frame boneFrame = FindFrame( root_frame, boneName ); // error check for boneFrame == NULL if desired FinalMatrix[ boneIndex ] = OffsetMatrix[ boneIndex ] * boneFrame.TransformationMatrix; } // generate all the final matrices for( int i = 0; i < SkinInfo.NumBones(); i++ ) CalculateFinalMatrix( i );
// numInfluenceBones is the number of bones which influence the vertex // Depending on the vertex structure passed to the shader, it may passed in the vertex structure // or be set as a shader constant float fLastWeight = 1; float fWeight; vector vertexPos( 0 ); // start empty for (int i=0; i < numInfluenceBones1; i++) // N.B., the last boneweight is not need! { fWeight = boneWeight[ i ]; vertexPos += inputVertexPos * final_transform[ i ] * fWeight; fLastWeight = fWeight; } vertexPos += inputVertexPos * final_transform [ numInfluenceBones  1 ] * fLastWeight;
A voxelbased scene with complex geometry
Minecraft unleashed the creativity of users
Figure 1: Functional Graphics Pipeline
Figure 2: Pure "mathematics" view on the left versus a "graphics programming" view on the right
Figure 3: Clarification of shaders
Figure 4: Rasterization of a primitive into 58 fragments
Figure 5: Functional Graphics Pipeline with swapped front and backbuffer
create a stack with all of the vertecies in CW/CCW order; pop the top vertex off the stack and store in p0; pop the top vertex off the stack and store in pHelper; while the stack is not empty pop the top vertex off the stack and store in pTemp; create a triangle with vertices p0, pHelper, pTemp; let pHelper = pTemp
create a list of the vertices (perferably in CCW order, starting anywhere) while true for every vertex let pPrev = the previous vertex in the list let pCur = the current vertex; let pNext = the next vertex in the list if the vertex is not an interior vertex (the wedge product of (pPrev  pCur) and (pNext  pCur) <= 0, for CCW winding); continue; if there are any vertices in the polygon inside the triangle made by the current vertex and the two adjacent ones continue; create the triangle with the points pPrev, pCur, pNext, for a CCW triangle; remove pCur from the list; if no triangles were made in the above for loop break;
Figure 1: An example heightmap taken from Wikipedia
Figure 2: An example 3D heightfield taken from Wikipedia
Figure 4: A series of 2D line segments
“The vertex normal in a 2D coordinate system is the average of the normals of the attached line segments.”
ComponentNormal = Σ (lineNormals) / N; where N is the number of normals
Normal.x = Σ(xsegments) / Nx;
Normal.y = 1.0
Normal.z = Σ(zsegments) / Nz;
Figure 5: An overhead view of a heightfield
Normal.x = [(AP) + (PB)] / 2.0
Normal.y = 1.0
Normal.z = [(CP) + (PD)] / 2.0
struct FilterVertex // 8 Bytes per Vertex { float x, z; };
index ÃÃÂ¢ z * numVertsWide + x
ID3D10Buffer* m_pHeightBuffer; ID3D10ShaderResourceView* m_pHeightBufferRV; ID3D10EffectShaderResourceVariable* m_pHeightsRV;
void Heightfield::CreateShaderResources(int numSurfaces) { // Create the nonstreamed Shader Resources D3D10_BUFFER_DESC desc; D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc; // Create the height buffer for the filter method ZeroMemory(&desc, sizeof(D3D10_BUFFER_DESC)); ZeroMemory(&SRVDesc, sizeof(SRVDesc)); desc.ByteWidth = m_NumVertsDeep * m_NumVertsWide * sizeof(float); desc.Usage = D3D10_USAGE_DYNAMIC; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; SRVDesc.Format = DXGI_FORMAT_R32_FLOAT; SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER; SRVDesc.Buffer.ElementWidth = m_NumVertsDeep * m_NumVertsWide; m_pDevice>CreateBuffer(&desc, NULL, &m_pHeightBuffer); m_pDevice>CreateShaderResourceView(m_pHeightBuffer, &SRVDesc, &m_pHeightBufferRV); }
void Heightfield::Draw() { // Init some locals int numRows = m_NumVertsDeep  1; int numIndices = 2 * m_NumVertsWide; UINT offset = 0; UINT stride = sizeof(FilterVertex);
m_pNumVertsDeep>SetInt(m_NumVertsDeep); m_pNumVertsWide>SetInt(m_NumVertsWide); m_pMetersPerVertex>SetFloat(m_MetersPerVertex);
m_pDevice>IASetPrimitiveTopology (D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); m_pDevice>IASetIndexBuffer(m_pIndexBuffer,DXGI_FORMAT_R32_UINT,0); m_pDevice>IASetInputLayout(m_pHeightfieldIL); m_pDevice>IASetVertexBuffers(0, 1, &m_pHeightfieldVB, &stride, &offset);
m_pHeightsRV>SetResource(m_pHeightBufferRV);
m_pFilterSimpleTech>GetPassByIndex(0)>Apply(0); for (int j = 0; j < numRows; j++) m_pDevice>DrawIndexed( numIndices, j * numIndices, 0 ); }
float Height(int index) { return g_Heights.Load(int2(index, 0)); }
float3 FilterNormal( float2 pos, int index ) { float3 normal = float3(0, 1, 0); if(pos.y == 0) normal.z = Height(index)  Height(index + g_NumVertsWide); else if(pos.y == g_NumVertsDeep  1) normal.z = Height(index  g_NumVertsWide)  Height(index); else normal.z = ((Height(index)  Height(index + g_NumVertsWide)) + (Height(index  g_NumVertsWide)  Height(index))) * 0.5; if(pos.x == 0) normal.x = Height(index)  Height(index + 1); else if(pos.x == g_NumVertsWide  1) normal.x = Height(index  1)  Height(index); else normal.x = ((Height(index)  Height(index + 1)) + (Height(index  1)  Height(index))) * 0.5; return normalize(normal); }
VS_OUTPUT FilterHeightfieldVS( float2 vPos : POSITION ) { VS_OUTPUT Output = (VS_OUTPUT)0; float4 position = 1.0f; position.xz = vPos * g_MetersPerVertex;
// Pull the height from the buffer int index = (vPos.y * g_NumVertsWide) + vPos.x; position.y = g_Heights.Load(int2(index, 0)) * g_MetersPerVertex; Output.Position = mul(position, g_ViewProjectionMatrix);
// Compute the normal using a filter kernel float3 vNormalWorldSpace = FilterNormal(vPos, index); // Compute simple directional lighting equation float3 vTotalLightDiffuse = g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)); Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse; Output.Diffuse.a = 1.0f; return Output; }
Surface Normal: N_{s} ═ A × B
Vertex Normal: N = Norm( Σ (N_{si}) )
Figure 6: A simple gridbased mesh with triangles
struct MeshVertex { D3DXVECTOR3 pos; unsigned i; };
ID3D10Buffer* m_pNormalBufferSO; ID3D10ShaderResourceView* m_pNormalBufferRVSO; ID3D10EffectShaderResourceVariable* m_pSurfaceNormalsRV;
void Heightfield::CreateShaderResources( int numSurfaces ) { // Create the nonstreamed Shader Resources D3D10_BUFFER_DESC desc; D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc; // Create output normal buffer for the Stream Output ZeroMemory(&desc, sizeof(D3D10_BUFFER_DESC)); ZeroMemory(&SRVDesc, sizeof(SRVDesc)); desc.ByteWidth = numSurfaces * sizeof(D3DXVECTOR4); desc.Usage = D3D10_USAGE_DEFAULT; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE  D3D10_BIND_STREAM_OUTPUT;
SRVDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER; SRVDesc.Buffer.ElementWidth = numSurfaces; m_pDevice>CreateBuffer(&desc, NULL, &m_pNormalBufferSO); m_pDevice>CreateShaderResourceView(m_pNormalBufferSO, &SRVDesc, &m_pNormalBufferRVSO); }
void Heightfield::Draw() { int numRows = m_NumVertsDeep  1; int numIndices = 2 * m_NumVertsWide; m_pNumVertsDeep>SetInt(m_NumVertsDeep); m_pNumVertsWide>SetInt(m_NumVertsWide); m_pMetersPerVertex>SetFloat(m_MetersPerVertex); m_pDevice>IASetPrimitiveTopology (D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); m_pDevice>IASetIndexBuffer(m_pIndexBuffer, DXGI_FORMAT_R32_UINT,0); UINT offset = 0; UINT stride = sizeof(MeshVertex); m_pDevice>IASetInputLayout(m_pMeshIL); m_pDevice>IASetVertexBuffers(0, 1, &m_pMeshVB, &stride, &offset);
ID3D10ShaderResourceView* pViews[] = {NULL}; m_pDevice>VSSetShaderResources(0, 1, pViews);
m_pDevice>SOSetTargets(1, &m_pNormalBufferSO, &offset); m_pSurfaceNormalsRV>SetResource(m_pNormalBufferRVSO);
D3D10_TECHNIQUE_DESC desc; m_pMeshWithNormalMapSOTech>GetDesc(&desc); for(unsigned i = 0; i < desc.Passes; i++) { m_pMeshWithNormalMapSOTech>GetPassByIndex(i)>Apply(0); for (int j = 0; j < numRows; j++) m_pDevice>DrawIndexed(numIndices, j * numIndices, 0 );
m_pDevice>SOSetTargets(0, NULL, &offset); } }
GeometryShader gsNormalBuffer = ConstructGSWithSO( CompileShader( gs_4_0, SurfaceNormalGS() ), "POSITION.xyzw" ); technique10 MeshWithNormalMapSOTech { pass P0 { SetVertexShader( CompileShader( vs_4_0, PassThroughVS() ) ); SetGeometryShader( gsNormalBuffer ); SetPixelShader( NULL ); }
pass P1 { SetVertexShader( CompileShader( vs_4_0, RenderNormalMapScene() ) ); SetGeometryShader( NULL ); SetPixelShader( CompileShader( ps_4_0, RenderScenePS() ) ); SetDepthStencilState( EnableDepth, 0 ); } }
[maxvertexcount(1)] void SurfaceNormalGS( triangle GS_INPUT input[3], inout PointStream<GS_INPUT> PStream ) { GS_INPUT Output = (GS_INPUT)0; float3 edge1 = input[1].Position  input[0].Position; float3 edge2 = input[2].Position  input[0].Position; Output.Position.xyz = normalize( cross( edge2, edge1 ) ); PStream.Append(Output); }
float3 ComputeNormal(uint index) { float3 normal = 0.0; int topVertex = g_NumVertsDeep  1; int rightVertex = g_NumVertsWide  1; int normalsPerRow = rightVertex * 2; int numRows = topVertex; float top = normalsPerRow * (numRows  1); int x = index % g_NumVertsWide; int z = index / g_NumVertsWide; // Bottom if(z == 0) { if(x == 0) { float3 normal0 = g_SurfaceNormals.Load(int2( 0, 0 )); float3 normal1 = g_SurfaceNormals.Load(int2( 1, 0 )); normal = normal0 + normal1; } else if(x == rightVertex) { index = (normalsPerRow  1); normal = g_SurfaceNormals.Load(int2( index, 0 )); } else { index = (2 * x); normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index, 0 )) + g_SurfaceNormals.Load(int2( index+1, 0 )); } } // Top else if(z == topVertex) { if(x == 0) { normal = g_SurfaceNormals.Load(int2( top, 0 )); } else if(x == rightVertex) { index = (normalsPerRow * numRows)  1; normal = g_SurfaceNormals.Load(int2( index, 0 )) + g_SurfaceNormals.Load(int2( index1, 0 )); } else { index = top + (2 * x); normal = g_SurfaceNormals.Load(int2( index2, 0)) + g_SurfaceNormals.Load(int2( index, 0)) + g_SurfaceNormals.Load(int2( index1, 0)); } } // Middle else { if(x == 0) { int index1 = z * normalsPerRow; int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index1+1, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )); } else if(x == rightVertex) { int index1 = (z + 1) * normalsPerRow  1; int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )) + g_SurfaceNormals.Load(int2( index21, 0 )); } else { int index1 = (z * normalsPerRow) + (2 * x); int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index11, 0 )) + g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index1+1, 0 )) + g_SurfaceNormals.Load(int2( index22, 0 )) + g_SurfaceNormals.Load(int2( index21, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )); } } return normal; }
Figure 7: A screenshot of the demo program for this article
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.rgb * a1 + texture2.rgb * a2; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.a > texture2.a ? texture1.rgb : texture2.rgb; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.a + a1 > texture2.a + a2 ? texture1.rgb : texture2.rgb; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { float depth = 0.2; float ma = max(texture1.a + a1, texture2.a + a2)  depth; float b1 = max(texture1.a + a1  ma, 0); float b2 = max(texture2.a + a2  ma, 0); return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2); }