Jump to content

  • Log In with Google      Sign In   
  • Create Account

glDrawArraysInstanced performance on Intel hd2000/3000


Old topic!
Guest, the last post of this topic is over 60 days old and at this point you may not reply in this topic. If you wish to continue this conversation start a new topic.

  • You cannot reply to this topic
2 replies to this topic

#1 Hyunkel   Members   -  Reputation: 368

Like
0Likes
Like

Posted 31 March 2014 - 01:32 PM

Hi,

 

I'm currently working on a game project which uses OpenTK targeting OpenGL 3.1.

After implementing instanced rendering ( instanced vertex attributes using glDrawArraysInstanced ) I've noticed a horrible performance drop on our intel test machine ( Intel HD3000 ).

It went from 2ms frame time to over 2000ms.

All other machines (using Amd and Nvidia cards) are performing better with instancing.

I've checked the intel site to make sure that the HD3000 supports OpenGL 3.1 and that it has the latest drivers.

 

Do you have any ideas what could be causing this issue?

 

Buffer Structures:

[StructLayout( LayoutKind.Sequential )]
struct VertexData
{
    public Vector3 Position;
    public Vector3 Normal;
    public Vector2 TexCoord;

    public static readonly int SizeInBytes = Marshal.SizeOf( new VertexData() );
    public VertexData( Vector3 position, Vector3 normal, Vector2 texcoord )
    {
        Position = position;
        Normal = normal;
        TexCoord = texcoord;
    }
};

[StructLayout( LayoutKind.Sequential )]
public struct InstanceData
{
    public Vector4 SpriteRect;
    public Vector4 DestinationRect;
    public Color4 Color;
    public Vector4 Scissors;

    public static readonly int SizeInBytes = Marshal.SizeOf( new InstanceData() );
}

VAO Creation:

float v1 = -1f;
float v2 = 1f;

VertexData[] Vertices = new VertexData[]
{
    new VertexData(
        new Vector3(v1, v2, 0),
        new Vector3(0, 0, 1),
        new Vector2(0, 0)),
    new VertexData(
        new Vector3(v2, v2, 0),
        new Vector3(0, 0, 1),
        new Vector2(1, 0)),
    new VertexData(
        new Vector3(v1, v1, 0),
        new Vector3(0, 0, 1),
        new Vector2(0, 1)),
    new VertexData(
        new Vector3(v1, v1, 0),
        new Vector3(0, 0, 1),
        new Vector2(0, 1)),
    new VertexData(
        new Vector3(v2, v2, 0),
        new Vector3(0, 0, 1),
        new Vector2(1, 0)),
    new VertexData(
        new Vector3(v2, v1, 0),
        new Vector3(0, 0, 1),
        new Vector2(1, 1))
};

Buffer vertexBuffer = Buffer.CreateVertexBuffer( Vertices, VertexData.SizeInBytes );
InstanceBuffer = Buffer.CreateInstanceBuffer( InstanceData.SizeInBytes, 4096 );

GL.GenVertexArrays( 1, out VAOHandle );
GL.BindVertexArray( VAOHandle );

// Vertex Buffer
vertexBuffer.Bind();
GL.VertexAttribPointer( 0, 3, VertexAttribPointerType.Float, false, VertexData.SizeInBytes, 0 );
GL.EnableVertexAttribArray( 0 );

GL.VertexAttribPointer( 1, 3, VertexAttribPointerType.Float, false, VertexData.SizeInBytes, Vector3.SizeInBytes );
GL.EnableVertexAttribArray( 1 );

GL.VertexAttribPointer( 2, 2, VertexAttribPointerType.Float, false, VertexData.SizeInBytes, Vector3.SizeInBytes * 2 );
GL.EnableVertexAttribArray( 2 );

// Instance Buffer
InstanceBuffer.Bind();
GL.VertexAttribPointer( 3, 4, VertexAttribPointerType.Float, false, InstanceData.SizeInBytes, 0 );
GL.EnableVertexAttribArray( 3 );
GL.VertexAttribDivisor( 3, 1 );

GL.VertexAttribPointer( 4, 4, VertexAttribPointerType.Float, false, InstanceData.SizeInBytes, Vector4.SizeInBytes );
GL.EnableVertexAttribArray( 4 );
GL.VertexAttribDivisor( 4, 1 );

GL.VertexAttribPointer( 5, 4, VertexAttribPointerType.Float, false, InstanceData.SizeInBytes, Vector4.SizeInBytes * 2 );
GL.EnableVertexAttribArray( 5 );
GL.VertexAttribDivisor( 5, 1 );

GL.VertexAttribPointer( 6, 4, VertexAttribPointerType.Float, false, InstanceData.SizeInBytes, Vector4.SizeInBytes * 3 );
GL.EnableVertexAttribArray( 6 );
GL.VertexAttribDivisor( 6, 1 );

GL.BindVertexArray( 0 );

Vertex Shader:

#version 140

// Vertex Data
in vec3 in_position;
in vec3 in_normal;
in vec2 in_texcoord;
// Instance Data
in vec4 in_spriteRect;
in vec4 in_destinationRect;
in vec4 in_color;
in vec4 in_scissors;

// Output
out vec3 vs_normal;
out vec2 vs_texcoord;
out vec4 vs_color;
out vec4 vs_scissors;

void main()
{
	// Texture Coordinates
	vs_texcoord = in_texcoord;
	vs_texcoord *= in_spriteRect.zw;
	vs_texcoord += in_spriteRect.xy;

	// Position
	vec4 Position = vec4( in_position, 1.0f );

	// Normalize to [0, 1]
	Position.xy = Position.xy * 0.5f + 0.5f;

	// Apply Destination Transform
	Position.xy *= in_destinationRect.zw;
	Position.xy += in_destinationRect.xy;

	// Normalize to [-1, 1]
	Position.xy = Position.xy * 2.0f - 1.0f;

	// In OpenGL -1,-1 is the bottom left screen corner
	// In DirectX -1,-1 is the top left screen corner
	Position.y += 2.0f - in_destinationRect.w * 2.0f;

	vs_normal = in_normal;
	vs_color = in_color;
	vs_scissors = in_scissors;

	gl_Position = Position;
}

Fragment Shader:

#version 140

uniform sampler2D Tex;

// Input
in vec3 vs_normal;
in vec2 vs_texcoord;
in vec4 vs_color;
in vec4 vs_scissors;

// Output
out vec4 out_frag_color;

bool ScissorTest()
{
	return	gl_FragCoord.x > vs_scissors.x &&
			gl_FragCoord.y > vs_scissors.y &&
			gl_FragCoord.x < vs_scissors.x + vs_scissors.z &&
			gl_FragCoord.y < vs_scissors.y + vs_scissors.w;
}

void main()
{
	out_frag_color = vec4(0, 0, 0, 0);

	if(ScissorTest())
			out_frag_color = texture( Tex, vs_texcoord ) * vs_color;
}

Rendering

InstanceBuffer.Write( InstanceDataCPU, InstanceData.SizeInBytes, InstanceCount );
GL.BindVertexArray( VAOHandle );
BindTexture( TextureHandle, 0, texture );
GL.UseProgram( ProgramHandle );
GL.DrawArraysInstanced( PrimitiveType.Triangles, 0, 6, InstanceCount );


Sponsor:

#2 Fiddler   Members   -  Reputation: 850

Like
2Likes
Like

Posted 01 April 2014 - 04:17 AM

Intel does support GL.DrawArraysInstanced, so it appears that you are hitting a slow path or falling back to software emulation for some reason.

 

I would suggest using Intel's Graphics Performance Analyzer to understand what is happening and why. If the drivers support ARB_debug_output, you could also use that to get more insight on potential issues.


[OpenTK: C# OpenGL 4.4, OpenGL ES 3.0 and OpenAL 1.1. Now with Linux/KMS support!]


#3 Hyunkel   Members   -  Reputation: 368

Like
0Likes
Like

Posted 01 April 2014 - 02:50 PM

Thanks for the suggestions!

 

Unfortunately the hd3000 does not support ARB_debug_output,

and afaik Intel's Graphics Performance Analyzer doesn't support OpenGL on Windows yet.






Old topic!
Guest, the last post of this topic is over 60 days old and at this point you may not reply in this topic. If you wish to continue this conversation start a new topic.



PARTNERS