



float2 output; output = atan2 (In.y,In.x); return (output);
#define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I','T','N','Z'))) // Determine if INTZ is supported HRESULT hr; hr = pd3d>CheckDeviceFormat(AdapterOrdinal, DeviceType, AdapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ); BOOL bINTZDepthStencilTexturesSupported = (hr == D3D_OK); // Create an INTZ depth stencil texture IDirect3DTexture9 *pINTZDST; pd3dDevice>CreateTexture(dwWidth, dwHeight, 1, D3DUSAGE_DEPTHSTENCIL, FOURCC_INTZ, D3DPOOL_DEFAULT, &pINTZDST, NULL); // Retrieve depth buffer surface from texture interface IDirect3DSurface9 *pINTZDSTSurface; pINTZDST>GetSurfaceLevel(0, &pINTZDSTSurface); // Bind depth buffer pd3dDevice>SetDepthStencilSurface(pINTZDSTSurface); // Bind depth buffer texture pd3dDevice>SetTexture(0, pINTZDST);
float3 reconstructPos(Texture2D depthTexture, float2 texCoord, float4x4 matrixProjectionInverted ) { float depth = 1depthTexture.Sample( samplerDefault, texCoord ).r; float2 cspos = float2(texCoord.x * 2  1, (1texCoord.y) * 2  1); float4 depthCoord = float4(cspos, depth, 1); depthCoord = mul (matrixProjectionInverted, depthCoord); return depthCoord.xyz / depthCoord.w; }Projection performed trivially.
static const int sBBIndexList[36] = { // index for top 4, 8, 7, 4, 7, 3, // index for bottom 5, 1, 2, 5, 2, 6, // index for left 5, 8, 4, 5, 4, 1, // index for right 2, 3, 7, 2, 7, 6, // index for back 6, 7, 8, 6, 8, 5, // index for front 1, 4, 3, 1, 3, 2, }; __m128 SSETransformCoords(__m128 *v, __m128 *m) { __m128 vResult = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(0,0,0,0)); vResult = _mm_mul_ps(vResult, m[0]); __m128 vTemp = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(1,1,1,1)); vTemp = _mm_mul_ps(vTemp, m[1]); vResult = _mm_add_ps(vResult, vTemp); vTemp = _mm_shuffle_ps(*v, *v, _MM_SHUFFLE(2,2,2,2)); vTemp = _mm_mul_ps(vTemp, m[2]); vResult = _mm_add_ps(vResult, vTemp); vResult = _mm_add_ps(vResult, m[3]); return vResult; } __forceinline __m128i Min(const __m128i &v0, const __m128i &v1) { __m128i tmp; tmp = _mm_min_epi32(v0, v1); return tmp; } __forceinline __m128i Max(const __m128i &v0, const __m128i &v1) { __m128i tmp; tmp = _mm_max_epi32(v0, v1); return tmp; } struct SSEVFloat4 { __m128 X; __m128 Y; __m128 Z; __m128 W; }; // get 4 triangles from vertices void SSEGather(SSEVFloat4 pOut[3], int triId, const __m128 xformedPos[]) { for(int i = 0; i < 3; i++) { int ind0 = sBBIndexList[triId*3 + i + 0]1; int ind1 = sBBIndexList[triId*3 + i + 3]1; int ind2 = sBBIndexList[triId*3 + i + 6]1; int ind3 = sBBIndexList[triId*3 + i + 9]1; __m128 v0 = xformedPos[ind0]; __m128 v1 = xformedPos[ind1]; __m128 v2 = xformedPos[ind2]; __m128 v3 = xformedPos[ind3]; _MM_TRANSPOSE4_PS(v0, v1, v2, v3); pOut[i].X = v0; pOut[i].Y = v1; pOut[i].Z = v2; pOut[i].W = v3; //now X contains X0 x1 x2 x3, Y  Y0 Y1 Y2 Y3 and so on... } } bool RasterizeTestBBoxSSE(Box3F box, __m128* matrix, float* buffer, Point4I res) { //TODO: performance LARGE_INTEGER frequency; // ticks per second LARGE_INTEGER t1, t2; // ticks double elapsedTime; // get ticks per second QueryPerformanceFrequency(&frequency); // start timer QueryPerformanceCounter(&t1); //verts and flags __m128 verticesSSE[8]; int flags[8]; static Point4F vertices[8]; static Point4F xformedPos[3]; static int flagsLoc[3]; // Set DAZ and FZ MXCSR bits to flush denormals to zero (i.e., make it faster) // Denormal are zero (DAZ) is bit 6 and Flush to zero (FZ) is bit 15. // so to enable the two to have to set bits 6 and 15 which 1000 0000 0100 0000 = 0x8040 _mm_setcsr( _mm_getcsr()  0x8040 ); // init vertices Point3F center = box.getCenter(); Point3F extent = box.getExtents(); Point4F vCenter = Point4F(center.x, center.y, center.z, 1.0); Point4F vHalf = Point4F(extent.x*0.5, extent.y*0.5, extent.z*0.5, 1.0); Point4F vMin = vCenter  vHalf; Point4F vMax = vCenter + vHalf; // fill vertices vertices[0] = Point4F(vMin.x, vMin.y, vMin.z, 1); vertices[1] = Point4F(vMax.x, vMin.y, vMin.z, 1); vertices[2] = Point4F(vMax.x, vMax.y, vMin.z, 1); vertices[3] = Point4F(vMin.x, vMax.y, vMin.z, 1); vertices[4] = Point4F(vMin.x, vMin.y, vMax.z, 1); vertices[5] = Point4F(vMax.x, vMin.y, vMax.z, 1); vertices[6] = Point4F(vMax.x, vMax.y, vMax.z, 1); vertices[7] = Point4F(vMin.x, vMax.y, vMax.z, 1); // transforms for(int i = 0; i < 8; i++) { verticesSSE[i] = _mm_loadu_ps(vertices[i]); verticesSSE[i] = SSETransformCoords(&verticesSSE[i], matrix); __m128 vertX = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(0,0,0,0)); // xxxx __m128 vertY = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(1,1,1,1)); // yyyy __m128 vertZ = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(2,2,2,2)); // zzzz __m128 vertW = _mm_shuffle_ps(verticesSSE[i], verticesSSE[i], _MM_SHUFFLE(3,3,3,3)); // wwww static const __m128 sign_mask = _mm_set1_ps(0.f); // 0.f = 1 << 31 vertW = _mm_andnot_ps(sign_mask, vertW); // abs vertW = _mm_shuffle_ps(vertW, _mm_set1_ps(1.0f), _MM_SHUFFLE(0,0,0,0)); //w,w,1,1 vertW = _mm_shuffle_ps(vertW, vertW, _MM_SHUFFLE(3,0,0,0)); //w,w,w,1 // project verticesSSE[i] = _mm_div_ps(verticesSSE[i], vertW); // now vertices are between 1 and 1 const __m128 sadd = _mm_setr_ps(res.x*0.5, res.y*0.5, 0, 0); const __m128 smult = _mm_setr_ps(res.x*0.5, res.y*(0.5), 1, 1); verticesSSE[i] = _mm_add_ps( sadd, _mm_mul_ps(verticesSSE[i],smult) ); } // Rasterize the AABB triangles 4 at a time for(int i = 0; i < 12; i += 4) { SSEVFloat4 xformedPos[3]; SSEGather(xformedPos, i, verticesSSE); // by 3 vertices // fxPtX[0] = X0 X1 X2 X3 of 1st vert in 4 triangles // fxPtX[1] = X0 X1 X2 X3 of 2nd vert in 4 triangles // and so on __m128i fxPtX[3], fxPtY[3]; for(int m = 0; m < 3; m++) { fxPtX[m] = _mm_cvtps_epi32(xformedPos[m].X); fxPtY[m] = _mm_cvtps_epi32(xformedPos[m].Y); } // Fab(x, y) = Ax + By + C = 0 // Fab(x, y) = (ya  yb)x + (xb  xa)y + (xa * yb  xb * ya) = 0 // Compute A = (ya  yb) for the 3 line segments that make up each triangle __m128i A0 = _mm_sub_epi32(fxPtY[1], fxPtY[2]); __m128i A1 = _mm_sub_epi32(fxPtY[2], fxPtY[0]); __m128i A2 = _mm_sub_epi32(fxPtY[0], fxPtY[1]); // Compute B = (xb  xa) for the 3 line segments that make up each triangle __m128i B0 = _mm_sub_epi32(fxPtX[2], fxPtX[1]); __m128i B1 = _mm_sub_epi32(fxPtX[0], fxPtX[2]); __m128i B2 = _mm_sub_epi32(fxPtX[1], fxPtX[0]); // Compute C = (xa * yb  xb * ya) for the 3 line segments that make up each triangle __m128i C0 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[1], fxPtY[2]), _mm_mullo_epi32(fxPtX[2], fxPtY[1])); __m128i C1 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[2], fxPtY[0]), _mm_mullo_epi32(fxPtX[0], fxPtY[2])); __m128i C2 = _mm_sub_epi32(_mm_mullo_epi32(fxPtX[0], fxPtY[1]), _mm_mullo_epi32(fxPtX[1], fxPtY[0])); // Compute triangle area __m128i triArea = _mm_mullo_epi32(B2, A1); triArea = _mm_sub_epi32(triArea, _mm_mullo_epi32(B1, A2)); __m128 oneOverTriArea = _mm_div_ps(_mm_set1_ps(1.0f), _mm_cvtepi32_ps(triArea)); __m128 Z[3]; Z[0] = xformedPos[0].W; Z[1] = _mm_mul_ps(_mm_sub_ps(xformedPos[1].W, Z[0]), oneOverTriArea); Z[2] = _mm_mul_ps(_mm_sub_ps(xformedPos[2].W, Z[0]), oneOverTriArea); // Use bounding box traversal strategy to determine which pixels to rasterize __m128i startX = _mm_and_si128(Max(Min(Min(fxPtX[0], fxPtX[1]), fxPtX[2]), _mm_set1_epi32(0)), _mm_set1_epi32(~1)); __m128i endX = Min(Max(Max(fxPtX[0], fxPtX[1]), fxPtX[2]), _mm_set1_epi32(res.x  1)); __m128i startY = _mm_and_si128(Max(Min(Min(fxPtY[0], fxPtY[1]), fxPtY[2]), _mm_set1_epi32(0)), _mm_set1_epi32(~1)); __m128i endY = Min(Max(Max(fxPtY[0], fxPtY[1]), fxPtY[2]), _mm_set1_epi32(res.y  1)); // Now we have 4 triangles set up. Rasterize them each individually. for(int lane=0; lane < 4; lane++) { // Skip triangle if area is zero if(triArea.m128i_i32[lane] <= 0) { continue; } // Extract this triangle's properties from the SIMD versions __m128 zz[3]; for(int vv = 0; vv < 3; vv++) { zz[vv] = _mm_set1_ps(Z[vv].m128_f32[lane]); } //drop culled triangle int startXx = startX.m128i_i32[lane]; int endXx = endX.m128i_i32[lane]; int startYy = startY.m128i_i32[lane]; int endYy = endY.m128i_i32[lane]; __m128i aa0 = _mm_set1_epi32(A0.m128i_i32[lane]); __m128i aa1 = _mm_set1_epi32(A1.m128i_i32[lane]); __m128i aa2 = _mm_set1_epi32(A2.m128i_i32[lane]); __m128i bb0 = _mm_set1_epi32(B0.m128i_i32[lane]); __m128i bb1 = _mm_set1_epi32(B1.m128i_i32[lane]); __m128i bb2 = _mm_set1_epi32(B2.m128i_i32[lane]); __m128i cc0 = _mm_set1_epi32(C0.m128i_i32[lane]); __m128i cc1 = _mm_set1_epi32(C1.m128i_i32[lane]); __m128i cc2 = _mm_set1_epi32(C2.m128i_i32[lane]); __m128i aa0Inc = _mm_mul_epi32(aa0, _mm_setr_epi32(1,2,3,4)); __m128i aa1Inc = _mm_mul_epi32(aa1, _mm_setr_epi32(1,2,3,4)); __m128i aa2Inc = _mm_mul_epi32(aa2, _mm_setr_epi32(1,2,3,4)); __m128i alpha0 = _mm_add_epi32(_mm_mul_epi32(aa0, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb0, _mm_set1_epi32(startYy))); alpha0 = _mm_add_epi32(cc0, alpha0); __m128i beta0 = _mm_add_epi32(_mm_mul_epi32(aa1, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb1, _mm_set1_epi32(startYy))); beta0 = _mm_add_epi32(cc1, beta0); __m128i gama0 = _mm_add_epi32(_mm_mul_epi32(aa2, _mm_set1_epi32(startXx)), _mm_mul_epi32(bb2, _mm_set1_epi32(startYy))); gama0 = _mm_add_epi32(cc2, gama0); int rowIdx = (startYy * res.x + startXx); __m128 zx = _mm_mul_ps(_mm_cvtepi32_ps(aa1), zz[1]); zx = _mm_add_ps(zx, _mm_mul_ps(_mm_cvtepi32_ps(aa2), zz[2])); zx = _mm_mul_ps(zx, _mm_setr_ps(1.f, 2.f, 3.f, 4.f)); // Texels traverse for(int r = startYy; r < endYy; r++, rowIdx += res.x, alpha0 = _mm_add_epi32(alpha0, bb0), beta0 = _mm_add_epi32(beta0, bb1), gama0 = _mm_add_epi32(gama0, bb2)) { // Compute barycentric coordinates // Z0 as an origin int index = rowIdx; __m128i alpha = alpha0; __m128i beta = beta0; __m128i gama = gama0; //Compute barycentricinterpolated depth __m128 depth = zz[0]; depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(beta), zz[1])); depth = _mm_add_ps(depth, _mm_mul_ps(_mm_cvtepi32_ps(gama), zz[2])); __m128i anyOut = _mm_setzero_si128(); __m128i mask; __m128 previousDepth; __m128 depthMask; __m128i finalMask; for(int c = startXx; c < endXx; c+=4, index+=4, alpha = _mm_add_epi32(alpha, aa0Inc), beta = _mm_add_epi32(beta, aa1Inc), gama = _mm_add_epi32(gama, aa2Inc), depth = _mm_add_ps(depth, zx)) { mask = _mm_or_si128(_mm_or_si128(alpha, beta), gama); previousDepth = _mm_loadu_ps(&(buffer[index])); //calculate current depth //(log(depth)  6.907755375) * 0.048254941; __m128 curdepth = _mm_mul_ps(_mm_sub_ps(log_ps(depth),_mm_set1_ps(6.907755375)),_mm_set1_ps(0.048254941)); curdepth = _mm_sub_ps(curdepth, _mm_set1_ps(0.05)); depthMask = _mm_cmplt_ps(curdepth, previousDepth); finalMask = _mm_andnot_si128(mask, _mm_castps_si128(depthMask)); anyOut = _mm_or_si128(anyOut, finalMask); }//for each column if(!_mm_testz_si128(anyOut, _mm_set1_epi32(0x80000000))) { // stop timer QueryPerformanceCounter(&t2); // compute and print the elapsed time in millisec elapsedTime = (t2.QuadPart  t1.QuadPart) * 1000.0 / frequency.QuadPart; RasterizationStats::RasterizeSSETimeSpent += elapsedTime; return true; //early exit } }// for each row }// for each triangle }// for each set of SIMD# triangles return false; }
struct DXLogicalMeshBuffer final { uint8_t* data = nullptr; size_t dataSize = 0; size_t dataFormatStride = 0; size_t physicalAddress = 0; };
struct DXPhysicalMeshBuffer final { ID3D11Buffer* physicalBuffer = nullptr; ID3D11ShaderResourceView* physicalBufferView = nullptr; size_t physicalDataSize = 0; bool isDirty = false; typedef DynamicArray<DXLogicalMeshBuffer*> PageArray; PageArray allPages; DXPhysicalMeshBuffer() = default; inline ~DXPhysicalMeshBuffer() { if (physicalBuffer != nullptr) physicalBuffer>Release(); if (physicalBufferView != nullptr) physicalBufferView>Release(); } void allocate(DXLogicalMeshBuffer* logicalBuffer); void release(DXLogicalMeshBuffer* logicalBuffer); void rebuildPages(); // very expensive operation }
void DXPhysicalBuffer::allocate(DXLogicalMeshBuffer* logicalBuffer) { allPages.Add(logicalBuffer); isDirty = true; } void DXPhysicalBuffer::release(DXLogicalMeshBuffer* logicalBuffer) { allPages.Remove(logicalBuffer); isDirty = true; }
size_t vfStride = allPages[0]>dataFormatStride; // TODO: right now will not work with different strides size_t numElements = physicalDataSize / vfStride; if (physicalBuffer != nullptr) physicalBuffer>Release(); if (physicalBufferView != nullptr) physicalBufferView>Release(); D3D11_BUFFER_DESC bufferDesc; bufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; bufferDesc.ByteWidth = physicalDataSize; bufferDesc.Usage = D3D11_USAGE_DYNAMIC; bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; bufferDesc.StructureByteStride = vfStride; bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; if (FAILED(g_pd3dDevice>CreateBuffer(&bufferDesc, nullptr, &physicalBuffer))) { handleError(...); // handle your error here return; }
D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc; std::memset(&viewDesc, 0, sizeof(viewDesc)); viewDesc.Format = DXGI_FORMAT_UNKNOWN; viewDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; viewDesc.Buffer.ElementWidth = numElements; if (FAILED(g_pd3dDevice>CreateShaderResourceView(physicalBuffer, &viewDesc, &physicalBufferView))) { // TODO: error handling return; }
// fill the physical buffer D3D11_MAPPED_SUBRESOURCE mappedData; std::memset(&mappedData, 0, sizeof(mappedData)); if (FAILED(g_pImmediateContext>Map(physicalBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedData))) { handleError(...); // insert error handling here return; } uint8_t* dataPtr = reinterpret_cast<uint8_t*>(mappedData.pData); size_t pageOffset = 0; for (size_t i = 0; i < allPages.GetSize(); ++i) { DXLogicalMeshBuffer* logicalBuffer = allPages[i]; // copy logical data to the mapped physical data std::memcpy(dataPtr + pageOffset, logicalBuffer>data, logicalBuffer>dataSize); // calculate physical address logicalBuffer>physicalAddress = pageOffset / logicalBuffer>dataFormatStride; // calculate offset pageOffset += logicalBuffer>dataSize; } g_pImmediateContext>Unmap(physicalBuffer, 0);
std::memset(&bufferDesc, 0, sizeof(bufferDesc)); bufferDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE; bufferDesc.ByteWidth = dataBufferSize; bufferDesc.Usage = D3D11_USAGE_DYNAMIC; bufferDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; bufferDesc.StructureByteStride = stride; bufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; if (FAILED(g_pd3dDevice>CreateBuffer(&bufferDesc, nullptr, &dataBuffer))) { handleError(...); // handle your error here return; } D3D11_SHADER_RESOURCE_VIEW_DESC viewDesc; std::memset(&viewDesc, 0, sizeof(viewDesc)); viewDesc.Format = DXGI_FORMAT_UNKNOWN; viewDesc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; viewDesc.Buffer.ElementWidth = numInstances; if (FAILED(g_pd3dDevice>CreateShaderResourceView(dataBuffer, &viewDesc, &dataView))) { handleError(...); // handle your error here return; }
struct InternalData { uint32_t vb; uint32_t ib; uint32_t drawCallType; uint32_t count; };
struct DrawCall final { enum Type : uint32_t { Draw = 0, DrawIndexed = 1 }; enum { ConstantBufferSize = 2048 // TODO: remove hardcode }; enum { MaxTextures = 8 }; uint8_t constantBufferData[ConstantBufferSize]; DXLogicalMeshBuffer* vertexBuffer; DXLogicalMeshBuffer* indexBuffer; uint32_t count; uint32_t startVertex; uint32_t startIndex; Type type; };
// update constants { D3D11_MAPPED_SUBRESOURCE mappedData; if (FAILED(g_pImmediateContext>Map(psimpl>constantBuffer.dataBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &mappedData))) { // TODO: error handling return; } uint8_t* dataPtr = reinterpret_cast<uint8_t*>(mappedData.pData); for (size_t i = 0; i < numInstances; ++i) { size_t offset = i * internal::DrawCall::ConstantBufferSize; const internal::DrawCall& call = queue>getDrawCalls()[i]; std::memcpy(dataPtr + offset, call.constantBufferData, internal::DrawCall::ConstantBufferSize); // fill internal data structure InternalData* idata = reinterpret_cast<InternalData*>(dataPtr + offset); DXLogicalMeshBuffer* vertexBuffer = static_cast<DXLogicalMeshBuffer*>(call.vertexBuffer.value); if (vertexBuffer != nullptr) idata>vb = vertexBuffer>physicalAddress; DXLogicalMeshBuffer* indexBuffer = static_cast<DXLogicalMeshBuffer*>(call.indexBuffer.value); if (indexBuffer != nullptr) idata>ib = indexBuffer>physicalAddress; idata>drawCallType = call.type; idata>count = call.count; } g_pImmediateContext>Unmap(psimpl>constantBuffer.dataBuffer, 0); }
ID3D11ShaderResourceView* vbibViews[2] = { g_physicalVertexBuffer>physicalBufferView, g_physicalIndexBuffer>physicalBufferView }; g_pImmediateContext>VSSetShaderResources(0, 2, vbibViews); g_pImmediateContext>VSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>HSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>DSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>GSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>PSSetShaderResources(0 + 2, 1, &psimpl>constantBuffer.dataView); g_pImmediateContext>DrawInstanced(maxDrawCallVertexCount, numInstances, 0, 0);
// vertex struct VertexData { float3 position; float2 texcoord0; float2 texcoord1; float3 normal; }; StructuredBuffer<VertexData> g_VertexBuffer; StructuredBuffer<uint> g_IndexBuffer; // pipeline state #define DRAW 0 #define DRAW_INDEXED 1 struct ConstantData { uint4 internalData; float4x4 World; float4x4 View; float4x4 Projection; }; StructuredBuffer<ConstantData> g_ConstantBuffer;
uint instanceID = input.instanceID; uint vertexID = input.vertexID; uint vbID = g_ConstantBuffer[instanceID].internalData[0]; uint ibID = g_ConstantBuffer[instanceID].internalData[1]; uint drawType = g_ConstantBuffer[instanceID].internalData[2]; uint drawCount = g_ConstantBuffer[instanceID].internalData[3]; VertexData vdata; [branch] if (drawType == DRAW_INDEXED) vdata = g_VertexBuffer[vbID + g_IndexBuffer[ibID + vertexID]]; else if (drawType == DRAW) vdata = g_VertexBuffer[vbID + vertexID]; [flatten] if (vertexID > drawCount) vdata = g_VertexOutsideClipPlane; // discard vertex by moving it outside of the clip plane
16384 unique cubes, 1.2ms per frame on Intel HD 4400:
4096 unique instances of grass, 200k triangles:
Figure1: A smoothed animation curve
Figure2: Two linear consecutive keyframes
(eq. 1)
(eq. 2)
(eq. 3)
float GetKnot(int i, int n) { // Calcuate a knot form an open uniform knot vector return saturate((float)(i  D) / (float)(n  D)); }
Figure 2. Recursive dependencies of a basis functions (n=4, d=2).
For a certain u the only non zero values are in the rectangles.
int GetKey(float u, int n) { return D + (int)floor((n  D) * u*0.9999f); }
#define MAX_N 10 // maximum number of control points in either direction (U or V) #define D 2 // degree of the curve #define EPSILON 0.00002f // used for normal and tangent calculation cbuffer cbPerObject { // BSpline int gNU; // gNU actual number of control points in U direction int gNV; // gNV actual number of control points in V direction float4 gCP[MAX_N * MAX_N]; // control points float3 gCenter; // arithmetic mean of control points // ... other variables };
(eq. 4)
void ComputePosNormalTangent(in float2 texCoord, out float3 pos, out float3 normal, out float3 tan) { float u = texCoord.x; float v = texCoord.y; float u_pdu = texCoord.x + EPSILON; float v_pdv = texCoord.y + EPSILON; int iU = GetKey(u, gNU); int iV = GetKey(v, gNV); // create and set basis float basisU[D + 1][MAX_N + D]; float basisV[D + 1][MAX_N + D]; float basisU_pdu[D + 1][MAX_N + D]; float basisV_pdv[D + 1][MAX_N + D]; basisU[0][iU] = basisV[0][iV] = basisU_pdu[0][iU] = basisV_pdv[0][iV] = 1.0f; // ... the rest of the function code
// ... the rest of the function code // evaluate triangle edges [unroll] for (int j = 1; j <= D; ++j) { float gKI; float gKI1; float gKIJ; float gKIJ1; // U gKI = GetKnot(iU, gNU); gKI1 = GetKnot(iU + 1, gNU); gKIJ = GetKnot(iU + j, gNU); gKIJ1 = GetKnot(iU  j + 1, gNU); float c0U = (u  gKI) / (gKIJ  gKI); float c1U = (gKI1  u) / (gKI1  gKIJ1); basisU[j][iU] = c0U * basisU[j  1][iU]; basisU[j][iU  j] = c1U * basisU[j  1][iU  j + 1]; float c0U_pdu = (u_pdu  gKI) / (gKIJ  gKI); float c1U_pdu = (gKI1  u_pdu) / (gKI1  gKIJ1); basisU_pdu[j][iU] = c0U_pdu * basisU_pdu[j  1][iU]; basisU_pdu[j][iU  j] = c1U_pdu * basisU_pdu[j  1][iU  j + 1]; // V gKI = GetKnot(iV, gNV); gKI1 = GetKnot(iV + 1, gNV); gKIJ = GetKnot(iV + j, gNV); gKIJ1 = GetKnot(iV  j + 1, gNV); float c0V = (v  gKI) / (gKIJ  gKI); float c1V = (gKI1  v) / (gKI1  gKIJ1); basisV[j][iV] = c0V * basisV[j  1][iV]; basisV[j][iV  j] = c1V * basisV[j  1][iV  j + 1]; float c0V_pdv = (v_pdv  gKI) / (gKIJ  gKI); float c1V_pdv = (gKI1  v_pdv) / (gKI1  gKIJ1); basisV_pdv[j][iV] = c0V_pdv * basisV_pdv[j  1][iV]; basisV_pdv[j][iV  j] = c1V_pdv * basisV_pdv[j  1][iV  j + 1]; } // evaluate triangle interior [unroll] for (j = 2; j <= D; ++j) { // U [unroll(j  1)] for (int k = iU  j + 1; k < iU; ++k) { float gKK = GetKnot(k, gNU); float gKK1 = GetKnot(k + 1, gNU); float gKKJ = GetKnot(k + j, gNU); float gKKJ1 = GetKnot(k + j + 1, gNU); float c0U = (u  gKK) / (gKKJ  gKK); float c1U = (gKKJ1  u) / (gKKJ1  gKK1); basisU[j][k] = c0U * basisU[j  1][k] + c1U * basisU[j  1][k + 1]; float c0U_pdu = (u_pdu  gKK) / (gKKJ  gKK); float c1U_pdu = (gKKJ1  u_pdu) / (gKKJ1  gKK1); basisU_pdu[j][k] = c0U_pdu * basisU_pdu[j  1][k] + c1U_pdu * basisU_pdu[j  1][k + 1]; } // V [unroll(j  1)] for (k = iV  j + 1; k < iV; ++k) { float gKK = GetKnot(k, gNV); float gKK1 = GetKnot(k + 1, gNV); float gKKJ = GetKnot(k + j, gNV); float gKKJ1 = GetKnot(k + j + 1, gNV); float c0V = (v  gKK) / (gKKJ  gKK); float c1V = (gKKJ1  v) / (gKKJ1  gKK1); basisV[j][k] = c0V * basisV[j  1][k] + c1V * basisV[j  1][k + 1]; float c0V_pdv = (v_pdv  gKK) / (gKKJ  gKK); float c1V_pdv = (gKKJ1  v_pdv) / (gKKJ1  gKK1); basisV_pdv[j][k] = c0V_pdv * basisV_pdv[j  1][k] + c1V_pdv * basisV_pdv[j  1][k + 1]; } } // ... the rest of the function code
// ... the rest of the function code float3 pos_pdu, pos_pdv; pos.x = pos_pdu.x = pos_pdv.x = 0.0f; pos.y = pos_pdu.y = pos_pdv.y = 0.0f; pos.z = pos_pdu.z = pos_pdv.z = 0.0f; [unroll(D + 1)] for (int jU = iU  D; jU <= iU; ++jU) { [unroll(D + 1)] for (int jV = iV  D; jV <= iV; ++jV) { pos += basisU[D][jU] * basisV[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); pos_pdu += basisU_pdu[D][jU] * basisV[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); pos_pdv += basisU[D][jU] * basisV_pdv[D][jV] * (gCP[jU + jV * gNU].xyz  gCenter); } } tan = normalize(pos_pdu  pos); float3 bTan = normalize(pos_pdv  pos); normal = normalize(cross(tan, bTan)); pos += gCenter; }
struct V_TexCoord { float2 TexCoord : TEXCOORD; }; V_TexCoord VS(V_TexCoord vin) { // Just a pass through shader V_TexCoord vout; vout.TexCoord = vin.TexCoord; return vout; }
struct PatchTess { float EdgeTess[3] : SV_TessFactor; float InsideTess : SV_InsideTessFactor; }; PatchTess ConstantHS(InputPatch<V_TexCoord, 3> patch, uint patchID : SV_PrimitiveID) { PatchTess pt; // Uniformly tessellate the patch. float tess = CalcTessFactor(gCenter); pt.EdgeTess[0] = tess; pt.EdgeTess[1] = tess; pt.EdgeTess[2] = tess; pt.InsideTess = tess; return pt; } [domain("tri")] [partitioning("fractional_odd")] [outputtopology("triangle_cw")] [outputcontrolpoints(3)] [patchconstantfunc("ConstantHS")] [maxtessfactor(64.0f)] V_TexCoord HS(InputPatch<V_TexCoord, 3> p, uint i : SV_OutputControlPointID, uint patchId : SV_PrimitiveID) { // Just a pass through shader V_TexCoord hout; hout.TexCoord = p[i].TexCoord; return hout; }
cbuffer cbPerObject { // ... other variables // Camera float4x4 gViewProj; float3 gEyePosW; // Tessellation float gMaxTessDistance; float gMinTessDistance; float gMinTessFactor; float gMaxTessFactor; }; float CalcTessFactor(float3 p) { float d = distance(p, gEyePosW); float s = saturate((d  gMinTessDistance) / (gMaxTessDistance  gMinTessDistance)); return lerp(gMinTessFactor, gMaxTessFactor, pow(s, 1.5f)); }
struct V_PosW_NormalW_TanW_TexCoord { float3 PosW : POSTION; float3 NormalW : NORMAL; float3 TanW : TANGENT; float2 TexCoord : TEXCOORD; }; [domain("tri")] V_PosW_NormalW_TanW_TexCoord DS(PatchTess patchTess, float3 bary : SV_DomainLocation, const OutputPatch<V_TexCoord, 3> tri) { float2 texCoord = bary.x*tri[0].TexCoord + bary.y*tri[1].TexCoord + bary.z*tri[2].TexCoord; V_PosW_NormalW_TanW_TexCoord dout; ComputePosNormalTangent(texCoord, dout.PosW, dout.NormalW, dout.TanW); dout.TexCoord = texCoord; return dout; }
struct V_PosH_NormalW_TanW_TexCoord { float4 PosH : SV_POSITION; float3 NormalW : NORMAL; float3 TanW : TANGENT; float2 TexCoord : TEXCOORD; }; [maxvertexcount(6)] void GS(triangle V_PosW_NormalW_TanW_TexCoord gin[3], inout TriangleStream<V_PosH_NormalW_TanW_TexCoord> triStream) { V_PosH_NormalW_TanW_TexCoord gout[6]; [unroll] // just copy pasti'n for (int i = 0; i < 3; ++i) { float3 posW = gin[i].PosW; gout[i].PosH = mul(float4(posW, 1.0f), gViewProj); gout[i].NormalW = gin[i].NormalW; gout[i].TanW = gin[i].TanW; gout[i].TexCoord = gin[i].TexCoord; } [unroll] // create the other side for (i = 3; i < 6; ++i) { float3 posW = gin[i3].PosW; gout[i].PosH = mul(float4(posW, 1.0f), gViewProj); gout[i].NormalW = gin[i3].NormalW; gout[i].TanW = gin[i3].TanW; gout[i].TexCoord = gin[i3].TexCoord; } triStream.Append(gout[0]); triStream.Append(gout[1]); triStream.Append(gout[2]); triStream.RestartStrip(); triStream.Append(gout[3]); triStream.Append(gout[5]); triStream.Append(gout[4]); }
float4 PS(V_PosH_NormalW_TanW_TexCoord pin) : SV_Target { // ... now what?! XD } technique11 BSplineDraw { pass P0 { SetVertexShader(CompileShader(vs_5_0, VS())); SetHullShader(CompileShader(hs_5_0, HS())); SetDomainShader(CompileShader(ds_5_0, DS())); SetGeometryShader(CompileShader(gs_5_0, GS())); SetPixelShader(CompileShader(ps_5_0, PS())); } }
class Particle { public: bool m_alive; Vec4d m_pos; Vec4d m_col; float time; // ... other fields public: // ctors... void update(float deltaTime); void render(); };
std::vector<particle> particles; // update function: for (auto &p : particles) p.update(dt); // rendering code: for (auto &p : particles) p.render();
void kill(particleID) { ?? } void wake(particleID) { ?? }
Shader "Sample/Diffuse" { Properties { _DiffuseTexture ("Diffuse Texture", 2D) = "white" {} _DiffuseTint ( "Diffuse Tint", Color) = (1, 1, 1, 1) } SubShader { Tags { "RenderType"="Opaque" } pass { Tags { "LightMode"="ForwardBase"} CGPROGRAM #pragma target 3.0 #pragma fragmentoption ARB_precision_hint_fastest #pragma vertex vertShadow #pragma fragment fragShadow #pragma multi_compile_fwdbase #include "UnityCG.cginc" #include "AutoLight.cginc" sampler2D _DiffuseTexture; float4 _DiffuseTint; float4 _LightColor0; struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; LIGHTING_COORDS(3, 4) }; v2f vertShadow(appdata_base v) { v2f o; o.pos = mul(UNITY_MATRIX_MVP, v.vertex); o.uv = v.texcoord; o.lightDir = normalize(ObjSpaceLightDir(v.vertex)); o.normal = normalize(v.normal).xyz; TRANSFER_VERTEX_TO_FRAGMENT(o); return o; } float4 fragShadow(v2f i) : COLOR { float3 L = normalize(i.lightDir); float3 N = normalize(i.normal); float attenuation = LIGHT_ATTENUATION(i) * 2; float4 ambient = UNITY_LIGHTMODEL_AMBIENT * 2; float NdotL = saturate(dot(N, L)); float4 diffuseTerm = NdotL * _LightColor0 * _DiffuseTint * attenuation; float4 diffuse = tex2D(_DiffuseTexture, i.uv); float4 finalColor = (ambient + diffuseTerm) * diffuse; return finalColor; } ENDCG } } FallBack "Diffuse" }
Tags { "LightMode"="ForwardBase"}
#pragma multi_compile_fwdbase
#include "AutoLight.cginc"
struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; LIGHTING_COORDS(3, 4) };
struct v2f { float4 pos : SV_POSITION; float3 lightDir : TEXCOORD0; float3 normal : TEXCOORD1; float2 uv : TEXCOORD2; float3 viewDir : TEXCOORD3; LIGHTING_COORDS(4, 5) };
v2f vertShadow(appdata_base v) { v2f o; o.pos = mul(UNITY_MATRIX_MVP, v.vertex); o.uv = v.texcoord; o.lightDir = normalize(ObjSpaceLightDir(v.vertex)); o.normal = normalize(v.normal).xyz; TRANSFER_VERTEX_TO_FRAGMENT(o); return o; }
float4 fragShadow(v2f i) : COLOR { float3 L = normalize(i.lightDir); float3 N = normalize(i.normal); float attenuation = LIGHT_ATTENUATION(i) * 2; float4 ambient = UNITY_LIGHTMODEL_AMBIENT * 2; float NdotL = saturate(dot(N, L)); float4 diffuseTerm = NdotL * _LightColor0 * _DiffuseTint * attenuation; float4 diffuse = tex2D(_DiffuseTexture, i.uv); float4 finalColor = (ambient + diffuseTerm) * diffuse; return finalColor; }
LH  RH  
look  wLook  eye  eye  wLook 
right  wUp x look  wUp x look 
up  look x right  look x right 
A  dot(right,eye)  dot(right,eye) 
B  dot(up, eye)  dot(up, eye) 
C  dot(look, eye)  dot(look, eye) 
Figure 2: OpenGL (Left) and DirectX (Right) NDC
deviceContext>ClearDepthStencilView(depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0); .... depthStencilDesc.DepthFunc = D3D11_COMPARISON_LESS_EQUAL; device>CreateDepthStencilState(&depthStencilDesc,&depthStencilState); deviceContext>OMSetDepthStencilState(depthStencilState, 1);Code 1: Code snippet settings for LH DirectX rendering
LH  RH  
D3D11_CLEAR_DEPTH  1.0  0.0 
depthStencilDesc.DepthFunc  D3D11_COMPARISON_LESS_EQUAL  D3D11_COMPARISON_GREATER_EQUAL 
LH  RH 
glClearDepth(0)  glClearDepth(1) 
glDepthFunc(GL_GEQUAL)  glDepthFunc(GL_LEQUAL) 
Figure 3: Incorrectly set depth function and clear for current projection
Figure 4: Correctly set depth function and clear for current projection
Figure 5: Projected depth with DirectX and OpenGL LH matrices (values used for calculation: near = 0.1, far = 1.0)
Figure 6: Projected depth with DirectX and OpenGL RH matrices (values used for calculation: near = 0.1, far = 1.0)
LH  gl_Position.z = (2.0) * log((gl_Position.z) * C + 1.0) / log(far * C + 1.0) + 1.0 
RH  gl_Position.z = (2.0) * log((gl_Position.z) * C + 1.0) / log(far * C + 1.0)  1.0 
Figure 7: Projected depth with classic perspective and with logarithmic one in LH (values used for calculation: near = 0.1, far = 1.0, C = 1.0)
Figure 8: Projected depth with classic perspective and with logarithmic one in RH (values used for calculation: near = 0.1, far = 1.0, C = 1.0)
Matrix4x4 tmp = Matrix4x4::Invert(viewMatrix); tmp.Transpose(); Vector4 clipPlane = Vector4::Transform(clipPlane, tmp);
float xSign = (clipPlane.X > 0) ? 1.0f : ((clipPlane.X < 0) ? 1.0f : 0.0f); float ySign = (clipPlane.Y > 0) ? 1.0f : ((clipPlane.Y < 0) ? 1.0f : 0.0f); Vector4 q = (xSign, ySign, 1, 1);
q.X = q.X / projection[0][0]; q.Y = q.Y / projection[1][1]; q.Z = 1.0f; q.W = (1.0f  projection[2][2]) / projection[3][2];
float a = q.Z / Vector4::Dot(clipPlane, q); Vector4 m3 = a * clipPlane;
q.X = q.x / projection[0][0]; q.Y = q.Y / projection[1][1]; q.Z = 1.0 / projection[2][3]; q.W = (1.0 / projection[3][2])  (projection[2][2] / (projection[2][3] * Matrix.M[3][2]));
float a = (2.0f * projection[2][3] * q.Z) / Vector4::Dot(clipPlane, q); Vector4 m3 = clipPlane * a; m3.Z = m3.Z + 1.0f;
LH: m3.Z = m3.Z + projection[2][3]; //([2][3] = +1) RH: m3.Z = m3.Z  projection[2][3]; //([2][3] = 1)
Matrix4x4 res = projection; res[0][2] = m3.X; res[1][2] = m3.Y; res[2][2] = m3.Z; res[3][2] = m3.W;
childFrame.ToRoot = childFrame.TransformationMatrix * parentFrame.ToRoot;
childFrame.AnimToRoot = childFrame.AnimTransform * parentFrame.AnimToRoot;
Arm: time 0: rotation 0 time 1: rotation about some axis (x,y,z) by 90 degrees time 2: rotation 0
Spine: time 0.0: rot 1.6 (about axis) x,y,z time 0.1: rot 1.65 x,y,z time 0.2: rot 1.69 x,y,z ... time 1.9: rot 1.65 x,y,z time 2.0: rot 1.6 x,y,z Arm: time 0.0: rot 0.7 x,y,z time 0.1: rot 0.71 x,y,z time 0.2: rot 0.72 x,y,z ... time 1.9: rot 0.71 x,y,z time 2.0: rot 0.7 x,y,z Head: time 0.0: rot 0.6 x,y,z time 0.1: rot 0.6 x,y,z ...
// for the Arm bone MATRIX4X4 animMat = MatrixFromQuaternion( key[0].quaternion ) ); Frame* frame = FindFrameWithName( keyFrame.BoneName ); frame>TransformationMatrix = animMat;
// The key frames have been search and returned: // keyFrameBefore = 0 // keyFrameAfter = 1 Quaternion quatBefore = key[keyFrameBefore].quaternion; Quaternion quatAfter = key[keyFrameAfter].quaternion;
// Determine the fraction of the time between the two frames represented by t. float ratio = ( t  key[keyFrameBefore].time )/( key[keyFrameAfter].time  key[keyFrameBefore].time );
// The SLERP function (a math utility) returns a quaternion based on // ratio * quatBefore and (1.0  ratio) * quatAfter. // HOWEVER: know your math libraries! Some SLERP functions // may calculate the result using (1.0  ratio)*quatBefore and ratio*quatAfter // Quaternion quatAtTimeT = SLERP( quatBefore, quatAfter, ratio ); MATRIX4X4 animMat = MatrixFromQuaternion( quatAtTimeT ) ); Frame* frame = FindFrameWithName( keyFrame.BoneName ); frame>TransformationMatrix = animMat;
struct AnimationKey { float time; Quaterion quat; Vector scale, trans; }; struct Animation { String frameName; // bonename, nodename // trackKeyIndex is the index in AnimTrack.trackKeys for interpolated timedKeys // for this particular frame/bone/node int trackKeyIndex; float maxTime; // used for checking conistency among Animations in an animationset std::vector<AnimationKey> animKeys; }; struct AnimationSet { bool checkedAgainstRootFrame; // used to verfiy the compatibility with the frame hierarchy String animationSetName; // maybe "Walk," "Run," "Idle," or other convenient description. std::vector<Animation> animations; double currentTime, period; // the current time ("t" in the above discussions) and the period }; struct RootFrame { bool checkedAgainstAnimationSets; Frame* _hierarchyRootFrame; }; // parameters that may be useful for a particular animation set struct AnimTrackDesc { float speed; // allows for displaying animations faster or slower than asmodeled. Not discussed. bool enable; // determines whether the animationset key frames are to be processed. }; // when 2 animationsets are to be blended, a BlendEvent is initiated // describing which track buffers are to be used for the blend struct BlendEvent { int fromTrack, toTrack; // which buffers float transitionTime; // how long the transition takes float curTime; // how far along in the transition time }; struct AnimTrack { int animSet; // animationSet currently assigned to this track std::vector<AnimationKey> trackKeys; // the track buffer AnimTrackDesc desc; };
class AnimController { public: AnimController() { initialized = false; } // no explicit destructor is needed. However, if a user should need to add a destructor, // follow the Rule of Three when doing so. See Disclaimer below for further information. bool Init(); bool AdvanceTime(double deltaTime); bool SetHierarchy(Frame* newRootFrame); bool SetTicksPerSecond(int newTicks, int whichAnimationSet); size_t GetNumAnimationSets() { return _animSets.size(); } bool GetAnimationSetName(int animSetNum, std::string& animName); size_t GetNumTracks() { return _animTracks.size(); } bool TrackSetAnimationSet(int animSetNum, int trackNum); bool TrackSetAnimationSet(std::string animSetName, int trackNum); int CreateTrack(AnimTrackDesc* trackDesc); // returns the index for the new track bool DeleteTrack( int trackNum ); bool GetTrackDescription( int trackNum, AnimTrackDesc* trackDesc); bool SetTrackDescription( int trackNum, AnimTrackDesc* trackDesc); bool GetTrackEnable(int trackNum); bool SetTrackEnable(int trackNum, bool enable); bool TransitionTracks(int fromTrack, int toTrack, float transitionTime); // start a blend event size_t GetNumBlendEvents() { return _trackEvents.size(); } bool GetBlendEvent(int eventNum, BlendEvent* event); bool initialized; // methods // CheckCompatibility  ensures the root frame hierarchy frameNames // match the frameNames in the animation sets. Specific implementation // of this method is not discussed in this article. bool CheckCompatibility(); // are the animation sets applicable to the frame hierarchy? bool BuildFrameNameIndex(); // sets Animation::trackKeyIndex bool SetCurTicks(DWORD animSetNum); bool InterpolateAnimations(Animation& anim, double fTime, std::vector<AnimTrackKey>& trackKeys); bool InterpolateKeyFrame(AnimationKey& animKey, double fTime); // attributes RootFrame _rootFrame; // frame hierarchy access (for storing matrices, finding names, etc. std::vector<AnimationSet> _animSets; // all the animation sets available std::vector<AnimTrack> _animTracks; // all the tracks std::vector<BlendEvent> _trackEvents; // information for blending animation sets std::vector<std::string> frameNames; // this of hierarchy frame names used to index into track buffers };
// // advance the time. // calculate animation matrices and store matrices in hierarchy TransformationMatrix // deltaTime is NOT the elapsed game time, but the change in time since the last render cycle time // For many applications, this is the same deltatime used to update other scene objects. // bool AnimController::AdvanceTime(double deltaTime) { if (!initialized) return false; // If an animation controller is intended to process just one track, or just one blend event // this section of code can be revised to enforce that assumption. // The code presented here allows for generalizing "track events" to do more // than just blending two animation sets for (size_t track = 0; track < _animTracks.size(); track++) // check the status of all tracks { // animation sets are rendered only when the associated track is enabled // Also check that the animation set associated with the track is "valid" if (_animTracks[track].desc.enable && _animTracks[track].animSet < (int)_animSets.size()) { UINT animSetNum = _animTracks[track].animSet; // variable convenient for looping // advance the local time for the animation set. _animSets[animSetNum].currentTime += deltaTime; // adjust the time if necessary. See SetCurTicks code below if (!SetCurTicks(animSetNum)) return false; // loop through animations for (size_t i = 0; i < _animSets[animSetNum].animations.size(); i++) { if( !InterpolateAnimations(_animSets[animSetNum].animations[i], _animSets[animSetNum].currentTime, _animTracks[track].trackKeys) ) return false; // something went wrong } } } MATRIX rot, scale, translate; // parameters used for interpolating // The concept for this animation controller is to: // Process A Blend Event // OR // Process a single track // // Though _trackEvents allows for other types of blending // and events, for the purpose of this article it is assumed // that there will be either 0 or just 1 blend occurring at a time if (_trackEvents.size()) { _trackEvents[0].curTime += deltaTime; // bump the progression of the blend if (_trackEvents[0].curTime > _trackEvents[0].transitionTime) // done with this event { SetTrackEnable(_trackEvents[0].fromTrack, false); // disable the "from" animation set // delete the event _trackEvents.clear(); } else { // to reduce the clutter of the calcuations, an iterator is used ONLY // for clarity. iter is, in fact, just _trackEvents[0]. std::vector<BlendEvent>::iterator iter = _trackEvents.begin(); float blendFactor = float(iter>curTime / iter>transitionTime); // get the buffers for both the "from" track and the "to" track std::vector<AnimationKey>& vFrom = _animTracks[iter>fromTrack].trackKeys; std::vector<AnimationKey>& vTo = _animTracks[iter>toTrack].trackKeys; // declare variables to use in the blending Quaternion quatFinal, quatFrom, quatTo; Vector scaleFinal, scaleFrom, scaleTo, translateFinal, translateFrom, translateTo; // loop through every animation, blend the results of the two animation sets // and send the animation matrix off to the frame hierarchy for (size_t tk = 0; tk < vFrom.size(); tk++) // trackKeys.size() are all the same size { // grab values from the track buffers quatFrom = vFrom[tk].quat; quatTo = vTo[tk].quat; scaleFrom = vFrom[tk].scale; scaleTo = vTo[tk].scale; translateFrom = vFrom[tk].trans; translateTo = vTo[tk].trans; // blend the quats, scales, and translations. Calculate the animation matrices. // The following line demonstrates possible concatenations IF the function // forms allow it. MatrixFromQuaternion(&rot, QuaternionSlerp(&quatFinal, &quatFrom, &quatTo, blendFactor)); // a bit more formally, calculate the blended scale scaleFinal = (1.0f  blendFactor)*scaleFrom + blendFactor * scaleTo; // calulate the blended translation translateTo = (1.0f  blendFactor)*translateFrom + blendFactor * translateTo; // create the scale and translation matrices MatrixScaling(&scale, scaleFinal.x, scaleFinal.y, scaleFinal.z); MatrixTranslation(&translate, translateFinal.x, translateFinal.y, translateFinal.z); // find the frame in the hierarchy with the name equivalent to the animation // The array "frameNames" is assumed to be an array of frame names in indexed order Frame* frame = FrameWithName(frameNames[tk], _rootFrame._hierarchyRootFrame); if (frame == NULL) return false; // GLOBALMSG // calculate and store the animation matrix. frame>TransformationMatrix = rot * scale * translate; } } } // if a blend is not progress, just update animations from the (hopefully) only enabled track else { // set Transformation matrix with track results for (size_t track = 0; track < _animTracks.size(); track++) { if (_animTracks[track].desc.enable) { std::vector<AnimTrackKey>& v = _animTracks[track].trackKeys; for (size_t tk = 0; tk < v.size(); tk++) { MatrixFromQuaternion(&rot, &v[tk].quat); MatrixScaling(&scale, v[tk].scale.x, v[tk].scale.y, v[tk].scale.z); MatrixTranslation(&translate, v[tk].trans.x, v[tk].trans.y, v[tk].trans.z); Frame* frame = FrameWithName(frameNames[tk], _rootFrame._hierarchyRootFrame); if (frame == NULL) return false; // GLOBALMSG? frame)>TransformationMatrix = rot * scale * translate; } } } } return true; }
// // the function name is a carry over from earlier implementations // when unsigned integers for key frame times were used. // bool AnimController::SetCurTicks(size_t animSetNum) { if (animSetNum >= _animSets.size()) return false; // error condition // convenient variables for clarity. // Also easier than coding "_animSets[animSetNum].xxx" multiple times float curTime = _animSets[animSetNum].currentTime; // was just bumped in AdvanceTime float period = _animSets[animSetNum].period; // NOTE: the following will cause the animation to LOOP from the end of the animation // back to the beginning. // Other actions which could be taken: //  pingpong: at the end of an action, reverse back through the keyframes to the beginning, etc. //  terminate the animation: perhaps provide a callback to report same while ( curTime >= period ) curTime = period; // loop within the animation // the result of this routine should be that // currentTime is >= 0 and less than the period. _animSets[animSetNum].currentTime = curTime; return true; }
// this routine finds a pair of key frames which bracket the animation time. // Interpolated values are calculated and stored in the the track buffer (trackKeys) bool AnimController::InterpolateAnimations(Animation& anim, float fTime, std::vector<AnimKey>& trackKeys); { Quaternion quat; if (anim.animKeys.size() > 1) // more than just a time==0 key { // find timedkey with time >= fTime size_t i = 0; // find a pair of key frames to interpolate while ( i < animKeys.size() && animKeys[i].time < fTime ) i++; if ( i >= animKeys.size() ) // should not happen, but handle it { i = animKeys.size()1; // use the last keyframe fTime = animKeys[i].time; } // animKeys[i].time >= fTime. That's the keyframe after the desired time // so animKeys[i1] is the keyframe before the desired time if ( i > 0 ) { float ratio = (fTime  animKey[i1].time) / (animKey[i].time  animKey[i1].time); Slerp or NLerp(&quat, &animKey[i1].quat, &animKey[i].quat, ratio); trackKeys[anim.trackKeyIndex.quat = quat; trackKeys[anim.trackKeyIndex].scale = (1.0fratio)*animKey[i1].scale + ratio * animKey[i].scale; trackKeys[anim.trackKeyIndex].trans = (1.0fratio)*animKey[i1].trans + ratio * animKey[i].trans; } else // use the time=0 keyframe { trackKeys[anim.trackKeyIndex].quat = animKey[0].quat; trackKeys[anim.trackKeyIndex].scale = animKey[0].scale; trackKeys[anim.trackKeyIndex].trans = animKey[0].trans; } } return true; }
// set up the blending of two tracks bool AnimController::TransitionTracks(size_t fromTrack, size_t toTrack, float transitionTime) { if (fromTrack >= _animTracks.size()  toTrack >= _animTracks.size()  fromTrack == toTrack  transitionTime < 0.0f) return false; // error condition BlendEvent blendEvent; blendEvent.fromTrack = fromTrack; blendEvent.toTrack = toTrack; blendEvent.transitionTime = transitionTime; blendEvent.curTime = 0.0f; _trackEvents.push_back(blendEvent); SetTrackEnable(fromTrack, true); SetTrackEnable(toTrack, true); return true; }
// This routine should be used when _rootFrame._hierarchyRootFrame has been set and animation sets // have been loaded. Must be done before any use of AdvanceTime. // You may want to tie this routine to the requirements to be considered "initialized." bool AnimationController::BuildFrameNameIndex() { frameNames.clear(); // start clean // work through the frame hierarchy, storing the name of each frame name AddFrameName( _rootFrame._hierarchyRootFrame ); // now that all names in the hierarchy have been found, // loop through the animations, checking names and setting the track buffer index for( size_t animSet = 0; animSet < _animSets.size(); animSet++) { for( size_t anim = 0; anim < _animSets[animSet].animations.size(); anim++ ) { if( (_animSets[animSet].animations[anim].trackKeyIndex = IndexForFrameName( _animSets[animSet].animations[anim].frameName )) < 0 ) return false; } } return true; } // doesn't appear in class methods above, but requires access to frameNames. void AnimationController::AddFrameName( Frame* frame ) { frameNames.pushback( std::string(frame>frameName) ); for each Child in frame: AddFrameName( Child ); } // doesn't appear in class methods above, but requires access to frameNames. int AnimationController::IndexForFrameName( std::string& frameName ) { for( size_t i=0; i < frameNames.size(); i++ ) { if( frameNames[i] == frameName ) return i; } return 1; // name not found }
What to cut away to make this rig simpler?
Have fun
Andreas N. Grontved
void FBXExporter::ProcessMesh(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); mTriangleCount = currMesh>GetPolygonCount(); int vertexCounter = 0; mTriangles.reserve(mTriangleCount); for (unsigned int i = 0; i < mTriangleCount; ++i) { XMFLOAT3 normal[3]; XMFLOAT3 tangent[3]; XMFLOAT3 binormal[3]; XMFLOAT2 UV[3][2]; Triangle currTriangle; mTriangles.push_back(currTriangle); for (unsigned int j = 0; j < 3; ++j) { int ctrlPointIndex = currMesh>GetPolygonVertex(i, j); CtrlPoint* currCtrlPoint = mControlPoints[ctrlPointIndex]; ReadNormal(currMesh, ctrlPointIndex, vertexCounter, normal[j]); // We only have diffuse texture for (int k = 0; k < 1; ++k) { ReadUV(currMesh, ctrlPointIndex, currMesh>GetTextureUVIndex(i, j), k, UV[j][k]); } PNTIWVertex temp; temp.mPosition = currCtrlPoint>mPosition; temp.mNormal = normal[j]; temp.mUV = UV[j][0]; // Copy the blending info from each control point for(unsigned int i = 0; i < currCtrlPoint>mBlendingInfo.size(); ++i) { VertexBlendingInfo currBlendingInfo; currBlendingInfo.mBlendingIndex = currCtrlPoint>mBlendingInfo[i].mBlendingIndex; currBlendingInfo.mBlendingWeight = currCtrlPoint>mBlendingInfo[i].mBlendingWeight; temp.mVertexBlendingInfos.push_back(currBlendingInfo); } // Sort the blending info so that later we can remove // duplicated vertices temp.SortBlendingInfoByWeight(); mVertices.push_back(temp); mTriangles.back().mIndices.push_back(vertexCounter); ++vertexCounter; } } // Now mControlPoints has served its purpose // We can free its memory for(auto itr = mControlPoints.begin(); itr != mControlPoints.end(); ++itr) { delete itr>second; } mControlPoints.clear(); }
// inNode is the Node in this FBX Scene that contains the mesh // this is why I can use inNode>GetMesh() on it to get the mesh void FBXExporter::ProcessControlPoints(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); unsigned int ctrlPointCount = currMesh>GetControlPointsCount(); for(unsigned int i = 0; i < ctrlPointCount; ++i) { CtrlPoint* currCtrlPoint = new CtrlPoint(); XMFLOAT3 currPosition; currPosition.x = static_cast<float>(currMesh>GetControlPointAt(i).mData[0]); currPosition.y = static_cast<float>(currMesh>GetControlPointAt(i).mData[1]); currPosition.z = static_cast<float>(currMesh>GetControlPointAt(i).mData[2]); currCtrlPoint>mPosition = currPosition; mControlPoints[i] = currCtrlPoint; } }
void FBXExporter::ReadNormal(FbxMesh* inMesh, int inCtrlPointIndex, int inVertexCounter, XMFLOAT3& outNormal) { if(inMesh>GetElementNormalCount() < 1) { throw std::exception("Invalid Normal Number"); } FbxGeometryElementNormal* vertexNormal = inMesh>GetElementNormal(0); switch(vertexNormal>GetMappingMode()) { case FbxGeometryElement::eByControlPoint: switch(vertexNormal>GetReferenceMode()) { case FbxGeometryElement::eDirect: { outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inCtrlPointIndex).mData[2]); } break; case FbxGeometryElement::eIndexToDirect: { int index = vertexNormal>GetIndexArray().GetAt(inCtrlPointIndex); outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[2]); } break; default: throw std::exception("Invalid Reference"); } break; case FbxGeometryElement::eByPolygonVertex: switch(vertexNormal>GetReferenceMode()) { case FbxGeometryElement::eDirect: { outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(inVertexCounter).mData[2]); } break; case FbxGeometryElement::eIndexToDirect: { int index = vertexNormal>GetIndexArray().GetAt(inVertexCounter); outNormal.x = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[0]); outNormal.y = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[1]); outNormal.z = static_cast<float>(vertexNormal>GetDirectArray().GetAt(index).mData[2]); } break; default: throw std::exception("Invalid Reference"); } break; } }
FbxGeometryElementNormal* vertexNormal = inMesh>GetElementNormal(0);
int index = vertexNormal>GetIndexArray().GetAt(inVertexCounter);
void FBXExporter::ProcessSkeletonHierarchy(FbxNode* inRootNode) { for (int childIndex = 0; childIndex < inRootNode>GetChildCount(); ++childIndex) { FbxNode* currNode = inRootNode>GetChild(childIndex); ProcessSkeletonHierarchyRecursively(currNode, 0, 0, 1); } } // inDepth is not needed here, I used it for debug but forgot to remove it void FBXExporter::ProcessSkeletonHierarchyRecursively(FbxNode* inNode, int inDepth, int myIndex, int inParentIndex) { if(inNode>GetNodeAttribute() && inNode>GetNodeAttribute()>GetAttributeType() && inNode>GetNodeAttribute()>GetAttributeType() == FbxNodeAttribute::eSkeleton) { Joint currJoint; currJoint.mParentIndex = inParentIndex; currJoint.mName = inNode>GetName(); mSkeleton.mJoints.push_back(currJoint); } for (int i = 0; i < inNode>GetChildCount(); i++) { ProcessSkeletonHierarchyRecursively(inNode>GetChild(i), inDepth + 1, mSkeleton.mJoints.size(), myIndex); } }
void FBXExporter::ProcessJointsAndAnimations(FbxNode* inNode) { FbxMesh* currMesh = inNode>GetMesh(); unsigned int numOfDeformers = currMesh>GetDeformerCount(); // This geometry transform is something I cannot understand // I think it is from MotionBuilder // If you are using Maya for your models, 99% this is just an // identity matrix // But I am taking it into account anyways...... FbxAMatrix geometryTransform = Utilities::GetGeometryTransformation(inNode); // A deformer is a FBX thing, which contains some clusters // A cluster contains a link, which is basically a joint // Normally, there is only one deformer in a mesh for (unsigned int deformerIndex = 0; deformerIndex < numOfDeformers; ++deformerIndex) { // There are many types of deformers in Maya, // We are using only skins, so we see if this is a skin FbxSkin* currSkin = reinterpret_cast<FbxSkin*>(currMesh>GetDeformer(deformerIndex, FbxDeformer::eSkin)); if (!currSkin) { continue; } unsigned int numOfClusters = currSkin>GetClusterCount(); for (unsigned int clusterIndex = 0; clusterIndex < numOfClusters; ++clusterIndex) { FbxCluster* currCluster = currSkin>GetCluster(clusterIndex); std::string currJointName = currCluster>GetLink()>GetName(); unsigned int currJointIndex = FindJointIndexUsingName(currJointName); FbxAMatrix transformMatrix; FbxAMatrix transformLinkMatrix; FbxAMatrix globalBindposeInverseMatrix; currCluster>GetTransformMatrix(transformMatrix); // The transformation of the mesh at binding time currCluster>GetTransformLinkMatrix(transformLinkMatrix); // The transformation of the cluster(joint) at binding time from joint space to world space globalBindposeInverseMatrix = transformLinkMatrix.Inverse() * transformMatrix * geometryTransform; // Update the information in mSkeleton mSkeleton.mJoints[currJointIndex].mGlobalBindposeInverse = globalBindposeInverseMatrix; mSkeleton.mJoints[currJointIndex].mNode = currCluster>GetLink(); // Associate each joint with the control points it affects unsigned int numOfIndices = currCluster>GetControlPointIndicesCount(); for (unsigned int i = 0; i < numOfIndices; ++i) { BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = currJointIndex; currBlendingIndexWeightPair.mBlendingWeight = currCluster>GetControlPointWeights()[i]; mControlPoints[currCluster>GetControlPointIndices()[i]]>mBlendingInfo.push_back(currBlendingIndexWeightPair); } // Get animation information // Now only supports one take FbxAnimStack* currAnimStack = mFBXScene>GetSrcObject<FbxAnimStack>(0); FbxString animStackName = currAnimStack>GetName(); mAnimationName = animStackName.Buffer(); FbxTakeInfo* takeInfo = mFBXScene>GetTakeInfo(animStackName); FbxTime start = takeInfo>mLocalTimeSpan.GetStart(); FbxTime end = takeInfo>mLocalTimeSpan.GetStop(); mAnimationLength = end.GetFrameCount(FbxTime::eFrames24)  start.GetFrameCount(FbxTime::eFrames24) + 1; Keyframe** currAnim = &mSkeleton.mJoints[currJointIndex].mAnimation; for (FbxLongLong i = start.GetFrameCount(FbxTime::eFrames24); i <= end.GetFrameCount(FbxTime::eFrames24); ++i) { FbxTime currTime; currTime.SetFrame(i, FbxTime::eFrames24); *currAnim = new Keyframe(); (*currAnim)>mFrameNum = i; FbxAMatrix currentTransformOffset = inNode>EvaluateGlobalTransform(currTime) * geometryTransform; (*currAnim)>mGlobalTransform = currentTransformOffset.Inverse() * currCluster>GetLink()>EvaluateGlobalTransform(currTime); currAnim = &((*currAnim)>mNext); } } } // Some of the control points only have less than 4 joints // affecting them. // For a normal renderer, there are usually 4 joints // I am adding more dummy joints if there isn't enough BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = 0; currBlendingIndexWeightPair.mBlendingWeight = 0; for(auto itr = mControlPoints.begin(); itr != mControlPoints.end(); ++itr) { for(unsigned int i = itr>second>mBlendingInfo.size(); i <= 4; ++i) { itr>second>mBlendingInfo.push_back(currBlendingIndexWeightPair); } } }
// This geometry transform is something I cannot understand // I think it is from MotionBuilder // If you are using Maya for your models, 99% this is just an // identity matrix // But I am taking it into account anyways...... FbxAMatrix geometryTransform = Utilities::GetGeometryTransformation(inNode);
FbxAMatrix Utilities::GetGeometryTransformation(FbxNode* inNode) { if (!inNode) { throw std::exception("Null for mesh geometry"); } const FbxVector4 lT = inNode>GetGeometricTranslation(FbxNode::eSourcePivot); const FbxVector4 lR = inNode>GetGeometricRotation(FbxNode::eSourcePivot); const FbxVector4 lS = inNode>GetGeometricScaling(FbxNode::eSourcePivot); return FbxAMatrix(lT, lR, lS); }
FbxAMatrix transformMatrix; FbxAMatrix transformLinkMatrix; FbxAMatrix globalBindposeInverseMatrix; currCluster>GetTransformMatrix(transformMatrix); // The transformation of the mesh at binding time currCluster>GetTransformLinkMatrix(transformLinkMatrix); // The transformation of the cluster(joint) at binding time from joint space to world space globalBindposeInverseMatrix = transformLinkMatrix.Inverse() * transformMatrix * geometryTransform; // Update the information in mSkeleton mSkeleton.mJoints[currJointIndex].mGlobalBindposeInverse = globalBindposeInverseMatrix;
// Associate each joint with the control points it affects unsigned int numOfIndices = currCluster>GetControlPointIndicesCount(); for (unsigned int i = 0; i < numOfIndices; ++i) { BlendingIndexWeightPair currBlendingIndexWeightPair; currBlendingIndexWeightPair.mBlendingIndex = currJointIndex; currBlendingIndexWeightPair.mBlendingWeight = currCluster>GetControlPointWeights()[i]; mControlPoints[currCluster>GetControlPointIndices()[i]]>mBlendingInfo.push_back(currBlendingIndexWeightPair); }
// Get animation information // Now only supports one take FbxAnimStack* currAnimStack = mFBXScene>GetSrcObject<FbxAnimStack>(0); FbxString animStackName = currAnimStack>GetName(); mAnimationName = animStackName.Buffer(); FbxTakeInfo* takeInfo = mFBXScene>GetTakeInfo(animStackName); FbxTime start = takeInfo>mLocalTimeSpan.GetStart(); FbxTime end = takeInfo>mLocalTimeSpan.GetStop(); mAnimationLength = end.GetFrameCount(FbxTime::eFrames24)  start.GetFrameCount(FbxTime::eFrames24) + 1; Keyframe** currAnim = &mSkeleton.mJoints[currJointIndex].mAnimation; for (FbxLongLong i = start.GetFrameCount(FbxTime::eFrames24); i <= end.GetFrameCount(FbxTime::eFrames24); ++i) { FbxTime currTime; currTime.SetFrame(i, FbxTime::eFrames24); *currAnim = new Keyframe(); (*currAnim)>mFrameNum = i; FbxAMatrix currentTransformOffset = inNode>EvaluateGlobalTransform(currTime) * geometryTransform; (*currAnim)>mGlobalTransform = currentTransformOffset.Inverse() * currCluster>GetLink()>EvaluateGlobalTransform(currTime); currAnim = &((*currAnim)>mNext); }
FbxAMatrix input; //Assume this matrix is the one to be converted. FbxVector4 translation = input.GetT(); FbxVector4 rotation = input.GetR(); translation.Set(translation.mData[0], translation.mData[1], translation.mData[2]); // This negate Z of Translation Component of the matrix rotation.Set(rotation.mData[0], rotation.mData[1], rotation.mData[2]); // This negate X,Y of Rotation Component of the matrix // These 2 lines finally set "input" to the eventual converted result input.SetT(translation); input.SetR(rotation);
// during render (or maybe update but never seen that) // this will also be drawn in this function, so if we dont call this // function the buttons does not exist anymore do_button("my label", x, y, function() print("I got clicked") end)
// during some kind of init of a scene for example local button = create_button("my label", x, y) button:set_callback(function() print("I got clicked") end) // later in render (this will draw all gui elements we have created) draw_gui()
// here are two versions of the immediate mode that do require an id // but the id just need to be unique per scene ui_button("buttonid", "my label", onclick):draw(x, y) ui_button({id = "buttonid", label = "my label", onclick = print}):draw(x, y) // this is what you do if you want to create the button beforehand // this becomes useful when dealing with listboxes and more advanced controls local button = ui_button({label = "my_label", onclick = print}) // in both cases the control comes to life when calling the draw function button:draw(x, y)
// the push and pop is a stack system of render states and in this case // it keeps the translation local to between them push() for i = 1, 10, 1 do // this is the only place that knows about this textbox // it is not created in some init function, but we need the id // so it can keep track of itself the next time it gets drawn // after the first call the ui_textbox function will return the same // object ui_inputbox({id = i, value = string.format("input #%i", i)}):draw() // this will adjust each element 40 units down from the last one add_translation(0, 40) end pop() // ui_textbox draw function would then look something like this function draw(self) local width = self.width local height = self.height set_blend_color(1, 1, 1, 1) // set texture for complete gui texture sheet set_texture(gui_texture_id) draw_rect(...) // here the uv data would go in to grab the right part // set font, and this will trigger another set_texture internally set_text_font("arial.ttf") set_text_size(16) set_text_align(0, 0.5) // this function is essentialy just calling multiple // draw rects internally for each character to be drawn draw_text_area(text, 0, 0, width, height) end
void IntermediateRenderer::bind_texture(Texture * texture) { // this is a texture pool that contains several arrays of similar sized textures // lets say we want to bind texture A and that texture already exists in in the pool // then if we have a different array bounded we must flush but otherwise we just use // another index for the next operations since texture A was already in the // current active array texture auto mat = materials.get_active_state(); if (texture == NULL) { // we dont need to unbind anything just reduce the impact of the texture to 0 mat>texture_alpha = 0.0f; } else { unsigned int texture_index = 0; if (texture_pool.bind(texture, &texture_index, std::bind(&IntermediateRenderer::flush, this))) { // this means we flushed // this will start a new draw call // refresh the state, usually means we grab the first // material index again (0) mat = materials.get_active_state(); } // just set the constant buffer values // and unless we flushed nothing will change // we will just continue to build our vertex buffer mat>texture_index = reinterpret_cast<float>(texture_index); mat>texture_alpha = 1.0f; } }
void IntermediateRenderer::draw_rect(const RECT_DESC & desc) { // this will switch what buffers we are pushing data to // so even if we switch from trianglelist to linelist // we dont need to flush but the rendering order will be wrong set_draw_topology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); // here we just get the currently active material and model states // model contains transformation data auto mat_id = materials.use_active_state(); auto model_id = models.use_active_state(); push_stream_ids(6, model_id, mat_id); // currently I am not using any index list, but might do in the future if I feel // I could benefit from it // its important to keep these sizes known at compile time // so we dont need to allocate temporary space on the heap somewhere Vector3 position_data[6] = { Vector3(desc.x, desc.y, 0), Vector3(desc.x + desc.width, desc.y, 0), Vector3(desc.x, desc.y + desc.height, 0), Vector3(desc.x, desc.y + desc.height, 0), Vector3(desc.x + desc.width, desc.y, 0), Vector3(desc.x + desc.width, desc.y + desc.height, 0) }; Vector2 texcoord_data[6] = { Vector2(desc.u1, desc.v1), Vector2(desc.u2, desc.v1), Vector2(desc.u1, desc.v2), Vector2(desc.u1, desc.v2), Vector2(desc.u2, desc.v1), Vector2(desc.u2, desc.v2) }; // i will switch this from float4 to an unsigned int // in the future so each vertex becomes much smaller // the desc.color_top and desc.color_bottom are already // uint32 formats Vector4 ctop(desc.color_top); Vector4 cbottom(desc.color_bottom); Vector4 color_data[6] = { ctop, ctop, cbottom, cbottom, ctop, cbottom, }; // this will just copy in our stack data to the vertex buffers position_stream>push(position_data); texcoord_stream>push(texcoord_data); color_stream>push(color_data); }
// instead of a normal array, we use an array of textures Texture2DArray Texture : register(t0); // each material is 8 floats struct Material { float4 color; float texture_index; float texture_alpha; float a; // padding float b; // padding }; // by having 256 different material at the same time // we can draw 256 different entities in only one draw call cbuffer MaterialBuffer : register(b0) { Material material[256]; }; struct Vertex { float4 position : SV_Position; float3 vposition : Position0; float3 normal : Normal0; float2 uv : Texcoord0; float4 color : Color0; // this is how we control what material // to use for what vertex, its only 1 byte in size // for a value range of 0255 uint material_id : Color1; }; Result main(Vertex input) { // lookup material Material mat = material[input.material_id]; // read from the right texture float4 texel = Texture.Sample(Sampler, float3(input.uv, mat.texture_index)); //... rest of shader }
// order of multiplication in DirectX FinalVector = vector * ScaleMat * RotationMat * TranslationMat
// order of multiplication in OpenGL FinalVector = TranslationMat * RotationMat * ScaleMat * vector
finalmatrix = SRTmatrix1(rot1 followed by trans1) * SRTmatrix2( rot2 followed by trans2).
Hip
Left Thigh
Left Shin
Left Foot
Right Thigh
Right Shin
Right Foot
Spine
Neck
Head
Left Clavicle
Left Upper Arm
Left Lower Arm
Left Hand
Right Clavicle
Right Upper Arm
Right Lower Arm
Right Hand
struct Frame { string Name; // the frame or "bone" name Matrix TransformationMatrix; // to be used for local animation matrix MeshContainer MeshData; // perhaps only one or two frames will have mesh data FrameArray Children; // pointers or references to each child frame of this frame Matrix ToParent; // the local transform from bonespace to bone's parentspace Matrix ToRoot; // from bonespace to rootframe space };
 a root frame
 for each frame, a list or array of pointers to that frame's children's frames.
 for each frame, an indication if the frame contains a mesh
 for each frame, one or more matrices for storing framerelated SRTs
 for each influence bone, an offset matrix
 for each influence bone, an array of vertex indices and weights for the vertices which the bone "influences."
 an indication of which bone the animation applies to
 an array of "keys." Each key is comprised of:
 a tick count indicating the "time" in the animation sequence the key applies to
 a matrix (or a set of individual SRTs) for that tick count.
// given this function ... function CalcToRootMatrix( Frame frame, Matrix parentMatrix ) { // transform from framespace to rootframespace through the parent's ToRoot matrix frame.ToRoot = frame.ToParent * parentMatrix; for each Child in frame: CalcToRootMatrix( Child, frame.ToRoot ); } // ... calculate all the Frame ToRoot matrices CalcToRootMatrix( RootFrame, IdentityMatrix ); // the root frame has no parent
frame.ToRoot = frame.ToParent * frameparent.ToParent * frameparentparent.ToParent * ... * RootFrame.ToRoot
// A function to search the hierarchy for a frame named "frameName" and return a reference to that frame Frame FindFrame( Frame frame, string frameName ) { Frame tmpFrame; if ( frame.Name == frameName ) return frame; for each Child in frame { if ( (tmpFrame = FindFrame( Child, frameName )) != NULL ) return tmpFrame; } return NULL; } // Note: MeshFrame.ToRoot is the transform for moving the mesh into rootframe space. function CalculateOffsetMatrix( Index boneIndex ) { string boneName = SkinInfo.GetBoneName( boneIndex ); Frame boneFrame = FindFrame( root_frame, boneName ); // error check for boneFrame == NULL if desired offsetMatrix[ boneIndex ] = MeshFrame.ToRoot * MatrixInverse( boneFrame.ToRoot ); } // generate all the offset matrices for( int i = 0; i < SkinInfo.NumBones(); i++ ) CalculateOffsetMatrix( i );
offsetMatrix = MeshFrame.ToRoot * Inverse( bone.ToParent * parent.ToParent * ... * root.ToParent )
offsetMatrix = MeshFrame.ToRoot * root.ToSomeChild * Child.ToAnotherChild * ... * boneParent.ToInfluenceBone
The mesh and bone hierarchy in pose position
struct AnimationSet { string animSetName; // for multiple sets, allows selection of actions AnimationArray animations; } struct Animation { string frameName; // look familiar? AnimationKeysArray keyFrames; } struct AnimationKey { TimeCode keyTime; Vector Scale, Translation; Quaternion Rotation; }
function CalulateTransformationMatrices( TimeCode deltaTime ) { TimeCode keyFrameTime = startTime + deltaTime; for each animation in AnimationSet: { Matrix frameTransform = CalculateFromAnimationKeys( keyFrameTime, animation.frameName ); Frame frame = FindFrame( rootFrame, animation.frameName ); frame.TransformationMatrix = frameTransform; } }
// given this function ... function CalcCombinedMatrix( Frame frame, Matrix parentMatrix ) { // transform from framespace to rootframespace through the parent's ToRoot matrix frame.TransformationMatrix = frame.TransformationMatrix * parentMatrix; for each Child in frame: CalcCombinedMatrix( Child, frame.TransformationMatrix ); } // ... calculate all the Frame toroot animation matrices CalcCombinedMatrix( RootFrame, IdentityMatrix );
// Given a FinalMatrix array.. function CalculateFinalMatrix( int boneIndex ) { string boneName = SkinInfo.GetBoneName( boneIndex ); Frame boneFrame = FindFrame( root_frame, boneName ); // error check for boneFrame == NULL if desired FinalMatrix[ boneIndex ] = OffsetMatrix[ boneIndex ] * boneFrame.TransformationMatrix; } // generate all the final matrices for( int i = 0; i < SkinInfo.NumBones(); i++ ) CalculateFinalMatrix( i );
// numInfluenceBones is the number of bones which influence the vertex // Depending on the vertex structure passed to the shader, it may passed in the vertex structure // or be set as a shader constant float fLastWeight = 1; float fWeight; vector vertexPos( 0 ); // start empty for (int i=0; i < numInfluenceBones1; i++) // N.B., the last boneweight is not need! { fWeight = boneWeight[ i ]; vertexPos += inputVertexPos * final_transform[ i ] * fWeight; fLastWeight = fWeight; } vertexPos += inputVertexPos * final_transform [ numInfluenceBones  1 ] * fLastWeight;
Cube polygonized with a MCbased algorithm – notice the loss of detail on the edge
Horse movie image sequence courtesy of the US Library of Congress.
Painting by the infamous Bob Ross courtesy of deshow.net.
Picture courtesy of Silicon Graphics, Inc.
A voxelbased scene with complex geometry
Minecraft unleashed the creativity of users
Figure 1: Functional Graphics Pipeline
Figure 2: Pure "mathematics" view on the left versus a "graphics programming" view on the right
Figure 3: Clarification of shaders
Figure 4: Rasterization of a primitive into 58 fragments
Figure 5: Functional Graphics Pipeline with swapped front and backbuffer
create a stack with all of the vertecies in CW/CCW order; pop the top vertex off the stack and store in p0; pop the top vertex off the stack and store in pHelper; while the stack is not empty pop the top vertex off the stack and store in pTemp; create a triangle with vertices p0, pHelper, pTemp; let pHelper = pTemp
create a list of the vertices (perferably in CCW order, starting anywhere) while true for every vertex let pPrev = the previous vertex in the list let pCur = the current vertex; let pNext = the next vertex in the list if the vertex is not an interior vertex (the wedge product of (pPrev  pCur) and (pNext  pCur) <= 0, for CCW winding); continue; if there are any vertices in the polygon inside the triangle made by the current vertex and the two adjacent ones continue; create the triangle with the points pPrev, pCur, pNext, for a CCW triangle; remove pCur from the list; if no triangles were made in the above for loop break;
Figure 1: An example heightmap taken from Wikipedia
Figure 2: An example 3D heightfield taken from Wikipedia
Figure 4: A series of 2D line segments
“The vertex normal in a 2D coordinate system is the average of the normals of the attached line segments.”
ComponentNormal = Σ (lineNormals) / N; where N is the number of normals
Normal.x = Σ(xsegments) / Nx;
Normal.y = 1.0
Normal.z = Σ(zsegments) / Nz;
Figure 5: An overhead view of a heightfield
Normal.x = [(AP) + (PB)] / 2.0
Normal.y = 1.0
Normal.z = [(CP) + (PD)] / 2.0
struct FilterVertex // 8 Bytes per Vertex { float x, z; };
index ÃÃÂ¢ z * numVertsWide + x
ID3D10Buffer* m_pHeightBuffer; ID3D10ShaderResourceView* m_pHeightBufferRV; ID3D10EffectShaderResourceVariable* m_pHeightsRV;
void Heightfield::CreateShaderResources(int numSurfaces) { // Create the nonstreamed Shader Resources D3D10_BUFFER_DESC desc; D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc; // Create the height buffer for the filter method ZeroMemory(&desc, sizeof(D3D10_BUFFER_DESC)); ZeroMemory(&SRVDesc, sizeof(SRVDesc)); desc.ByteWidth = m_NumVertsDeep * m_NumVertsWide * sizeof(float); desc.Usage = D3D10_USAGE_DYNAMIC; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE; SRVDesc.Format = DXGI_FORMAT_R32_FLOAT; SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER; SRVDesc.Buffer.ElementWidth = m_NumVertsDeep * m_NumVertsWide; m_pDevice>CreateBuffer(&desc, NULL, &m_pHeightBuffer); m_pDevice>CreateShaderResourceView(m_pHeightBuffer, &SRVDesc, &m_pHeightBufferRV); }
void Heightfield::Draw() { // Init some locals int numRows = m_NumVertsDeep  1; int numIndices = 2 * m_NumVertsWide; UINT offset = 0; UINT stride = sizeof(FilterVertex);
m_pNumVertsDeep>SetInt(m_NumVertsDeep); m_pNumVertsWide>SetInt(m_NumVertsWide); m_pMetersPerVertex>SetFloat(m_MetersPerVertex);
m_pDevice>IASetPrimitiveTopology (D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); m_pDevice>IASetIndexBuffer(m_pIndexBuffer,DXGI_FORMAT_R32_UINT,0); m_pDevice>IASetInputLayout(m_pHeightfieldIL); m_pDevice>IASetVertexBuffers(0, 1, &m_pHeightfieldVB, &stride, &offset);
m_pHeightsRV>SetResource(m_pHeightBufferRV);
m_pFilterSimpleTech>GetPassByIndex(0)>Apply(0); for (int j = 0; j < numRows; j++) m_pDevice>DrawIndexed( numIndices, j * numIndices, 0 ); }
float Height(int index) { return g_Heights.Load(int2(index, 0)); }
float3 FilterNormal( float2 pos, int index ) { float3 normal = float3(0, 1, 0); if(pos.y == 0) normal.z = Height(index)  Height(index + g_NumVertsWide); else if(pos.y == g_NumVertsDeep  1) normal.z = Height(index  g_NumVertsWide)  Height(index); else normal.z = ((Height(index)  Height(index + g_NumVertsWide)) + (Height(index  g_NumVertsWide)  Height(index))) * 0.5; if(pos.x == 0) normal.x = Height(index)  Height(index + 1); else if(pos.x == g_NumVertsWide  1) normal.x = Height(index  1)  Height(index); else normal.x = ((Height(index)  Height(index + 1)) + (Height(index  1)  Height(index))) * 0.5; return normalize(normal); }
VS_OUTPUT FilterHeightfieldVS( float2 vPos : POSITION ) { VS_OUTPUT Output = (VS_OUTPUT)0; float4 position = 1.0f; position.xz = vPos * g_MetersPerVertex;
// Pull the height from the buffer int index = (vPos.y * g_NumVertsWide) + vPos.x; position.y = g_Heights.Load(int2(index, 0)) * g_MetersPerVertex; Output.Position = mul(position, g_ViewProjectionMatrix);
// Compute the normal using a filter kernel float3 vNormalWorldSpace = FilterNormal(vPos, index); // Compute simple directional lighting equation float3 vTotalLightDiffuse = g_LightDiffuse * max(0,dot(vNormalWorldSpace, g_LightDir)); Output.Diffuse.rgb = g_MaterialDiffuseColor * vTotalLightDiffuse; Output.Diffuse.a = 1.0f; return Output; }
Surface Normal: N_{s} ═ A × B
Vertex Normal: N = Norm( Σ (N_{si}) )
Figure 6: A simple gridbased mesh with triangles
struct MeshVertex { D3DXVECTOR3 pos; unsigned i; };
ID3D10Buffer* m_pNormalBufferSO; ID3D10ShaderResourceView* m_pNormalBufferRVSO; ID3D10EffectShaderResourceVariable* m_pSurfaceNormalsRV;
void Heightfield::CreateShaderResources( int numSurfaces ) { // Create the nonstreamed Shader Resources D3D10_BUFFER_DESC desc; D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc; // Create output normal buffer for the Stream Output ZeroMemory(&desc, sizeof(D3D10_BUFFER_DESC)); ZeroMemory(&SRVDesc, sizeof(SRVDesc)); desc.ByteWidth = numSurfaces * sizeof(D3DXVECTOR4); desc.Usage = D3D10_USAGE_DEFAULT; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE  D3D10_BIND_STREAM_OUTPUT;
SRVDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_BUFFER; SRVDesc.Buffer.ElementWidth = numSurfaces; m_pDevice>CreateBuffer(&desc, NULL, &m_pNormalBufferSO); m_pDevice>CreateShaderResourceView(m_pNormalBufferSO, &SRVDesc, &m_pNormalBufferRVSO); }
void Heightfield::Draw() { int numRows = m_NumVertsDeep  1; int numIndices = 2 * m_NumVertsWide; m_pNumVertsDeep>SetInt(m_NumVertsDeep); m_pNumVertsWide>SetInt(m_NumVertsWide); m_pMetersPerVertex>SetFloat(m_MetersPerVertex); m_pDevice>IASetPrimitiveTopology (D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); m_pDevice>IASetIndexBuffer(m_pIndexBuffer, DXGI_FORMAT_R32_UINT,0); UINT offset = 0; UINT stride = sizeof(MeshVertex); m_pDevice>IASetInputLayout(m_pMeshIL); m_pDevice>IASetVertexBuffers(0, 1, &m_pMeshVB, &stride, &offset);
ID3D10ShaderResourceView* pViews[] = {NULL}; m_pDevice>VSSetShaderResources(0, 1, pViews);
m_pDevice>SOSetTargets(1, &m_pNormalBufferSO, &offset); m_pSurfaceNormalsRV>SetResource(m_pNormalBufferRVSO);
D3D10_TECHNIQUE_DESC desc; m_pMeshWithNormalMapSOTech>GetDesc(&desc); for(unsigned i = 0; i < desc.Passes; i++) { m_pMeshWithNormalMapSOTech>GetPassByIndex(i)>Apply(0); for (int j = 0; j < numRows; j++) m_pDevice>DrawIndexed(numIndices, j * numIndices, 0 );
m_pDevice>SOSetTargets(0, NULL, &offset); } }
GeometryShader gsNormalBuffer = ConstructGSWithSO( CompileShader( gs_4_0, SurfaceNormalGS() ), "POSITION.xyzw" ); technique10 MeshWithNormalMapSOTech { pass P0 { SetVertexShader( CompileShader( vs_4_0, PassThroughVS() ) ); SetGeometryShader( gsNormalBuffer ); SetPixelShader( NULL ); }
pass P1 { SetVertexShader( CompileShader( vs_4_0, RenderNormalMapScene() ) ); SetGeometryShader( NULL ); SetPixelShader( CompileShader( ps_4_0, RenderScenePS() ) ); SetDepthStencilState( EnableDepth, 0 ); } }
[maxvertexcount(1)] void SurfaceNormalGS( triangle GS_INPUT input[3], inout PointStream<GS_INPUT> PStream ) { GS_INPUT Output = (GS_INPUT)0; float3 edge1 = input[1].Position  input[0].Position; float3 edge2 = input[2].Position  input[0].Position; Output.Position.xyz = normalize( cross( edge2, edge1 ) ); PStream.Append(Output); }
float3 ComputeNormal(uint index) { float3 normal = 0.0; int topVertex = g_NumVertsDeep  1; int rightVertex = g_NumVertsWide  1; int normalsPerRow = rightVertex * 2; int numRows = topVertex; float top = normalsPerRow * (numRows  1); int x = index % g_NumVertsWide; int z = index / g_NumVertsWide; // Bottom if(z == 0) { if(x == 0) { float3 normal0 = g_SurfaceNormals.Load(int2( 0, 0 )); float3 normal1 = g_SurfaceNormals.Load(int2( 1, 0 )); normal = normal0 + normal1; } else if(x == rightVertex) { index = (normalsPerRow  1); normal = g_SurfaceNormals.Load(int2( index, 0 )); } else { index = (2 * x); normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index, 0 )) + g_SurfaceNormals.Load(int2( index+1, 0 )); } } // Top else if(z == topVertex) { if(x == 0) { normal = g_SurfaceNormals.Load(int2( top, 0 )); } else if(x == rightVertex) { index = (normalsPerRow * numRows)  1; normal = g_SurfaceNormals.Load(int2( index, 0 )) + g_SurfaceNormals.Load(int2( index1, 0 )); } else { index = top + (2 * x); normal = g_SurfaceNormals.Load(int2( index2, 0)) + g_SurfaceNormals.Load(int2( index, 0)) + g_SurfaceNormals.Load(int2( index1, 0)); } } // Middle else { if(x == 0) { int index1 = z * normalsPerRow; int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index1+1, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )); } else if(x == rightVertex) { int index1 = (z + 1) * normalsPerRow  1; int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )) + g_SurfaceNormals.Load(int2( index21, 0 )); } else { int index1 = (z * normalsPerRow) + (2 * x); int index2 = index1  normalsPerRow; normal = g_SurfaceNormals.Load(int2( index11, 0 )) + g_SurfaceNormals.Load(int2( index1, 0 )) + g_SurfaceNormals.Load(int2( index1+1, 0 )) + g_SurfaceNormals.Load(int2( index22, 0 )) + g_SurfaceNormals.Load(int2( index21, 0 )) + g_SurfaceNormals.Load(int2( index2, 0 )); } } return normal; }
Figure 7: A screenshot of the demo program for this article
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.rgb * a1 + texture2.rgb * a2; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.a > texture2.a ? texture1.rgb : texture2.rgb; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { return texture1.a + a1 > texture2.a + a2 ? texture1.rgb : texture2.rgb; }
float3 blend(float4 texture1, float a1, float4 texture2, float a2) { float depth = 0.2; float ma = max(texture1.a + a1, texture2.a + a2)  depth; float b1 = max(texture1.a + a1  ma, 0); float b2 = max(texture2.a + a2  ma, 0); return (texture1.rgb * b1 + texture2.rgb * b2) / (b1 + b2); }