Things have turned out all right though, I lost about 150FPS, going from 580 to 430, but that's still pretty good.
How it's done:
First I needed a high precision timer, since one really doesn't exist in .Net I made my own:
public class Timer { [DllImport("Kernel32.dll")] private static extern bool QueryPerformanceCounter( out long Count ); [DllImport("Kernel32.dll")] private static extern bool QueryPerformanceFrequency( out long Frequency ); private long Count = 0; private long CurrentTime = 0; public delegate void Delegate( object Sender, long Ticks ); private static double Frequency = 0.0; public double Interval { get { return IntervalProperty; } private set { if( value <= 0 ) { value = 1; } IntervalProperty = value; } } private double IntervalProperty = 0; private long LastCount = 0; private long StartTime = 0; private Threading.Thread Thread = null; public event Delegate Tick = null; static Timer() { long SystemFrequency = 0; if( QueryPerformanceFrequency( out SystemFrequency ) == false ) { throw new System.NotSupportedException( "QueryPerformance functions not supported." ); } Frequency = (double)SystemFrequency; return; } public Timer( double Interval ) { this.Interval = Interval; Thread = new Threading.Thread( CheckTime ); Thread.IsBackground = true; return; } public Timer( double Interval, Delegate TickEvent ) { this.Interval = Interval; Tick += TickEvent; Thread = new Threading.Thread( CheckTime ); Thread.IsBackground = true; return; } private void CheckTime() { while( true ) { QueryPerformanceCounter( out CurrentTime ); Count = (long)( ( ((double)( CurrentTime - StartTime )) / Frequency ) / Interval ); if( Count != LastCount ) { if( ( Tick != null ) ) { Tick( this, Count - LastCount ); } LastCount = Count; } Threading.Thread.Sleep( 0 ); } return; } public void Start() { if( QueryPerformanceCounter( out StartTime ) == false ) { throw new System.NotSupportedException( "QueryPerformance functions not supported." ); } if( ( Thread.ThreadState & Threading.ThreadState.Unstarted ) == Threading.ThreadState.Unstarted ) { Thread.Start(); } else { Thread.Resume(); } return; } public void Stop() { if( ( Thread.ThreadState & Threading.ThreadState.Running ) == Threading.ThreadState.Running ) { Thread.Suspend(); } return; } }
This is my first time really using threads, nothings blown up yet.
Next a change in the mode 4 code. Each render mode will have 3 functions, Begin, End and Render which are chosen(using delegates) when the scan line is at 0 so if the program switches the video mode while rendering it will not have any effect until the next frame.
private static class Mode4 { private static byte[] ColorIndexArray = new byte[256*160]; private static Direct3D.Texture ColorTexture = null; private static Direct3D.Effect Effect = null; private static byte[] PaletteArray = new byte[256*512]; private static Direct3D.Texture PaletteTexture = null; private static Direct3D.CustomVertex.PositionTextured[] VertexArray = new Direct3D.CustomVertex.PositionTextured[160*6]; static Mode4() { ColorTexture = new Direct3D.Texture( Graphics.Device, 256, 256, 1, Direct3D.Usage.None, Direct3D.Format.L8, Direct3D.Pool.Managed ); PaletteTexture = new Direct3D.Texture( Graphics.Device, 256, 256, 1, Direct3D.Usage.None, Direct3D.Format.X1R5G5B5, Direct3D.Pool.Managed ); Effect = Direct3D.Effect.FromFile( Graphics.Device, "Mode4.fx", null, Direct3D.ShaderFlags.None, null ); Effect.SetValue( Effect.GetParameter( null, "WorldViewProj" ), Graphics.Device.Transform.View * Graphics.Device.Transform.Projection ); Effect.SetValue( Effect.GetParameter( null, "ColorTexture" ), ColorTexture ); Effect.SetValue( Effect.GetParameter( null, "PaletteTexture" ), PaletteTexture ); VertexArray[0] = new Direct3D.CustomVertex.PositionTextured( 240.0f, 0.0f, 0.1f, 0.0f, 0.0f ); VertexArray[1] = new Direct3D.CustomVertex.PositionTextured( 0.0f, 0.0f, 0.1f, ( 1.0f / 256.0f ) * 240.0f, 0.0f ); VertexArray[2] = VertexArray[0]; VertexArray[2].Y = 1; VertexArray[3] = VertexArray[1]; VertexArray[3].Y = 1; VertexArray[4] = VertexArray[0]; VertexArray[4].Y = 1; VertexArray[5] = VertexArray[1]; for( int i = 6; i < VertexArray.Length; i += 6 ) { for( int j = 0; j < 6; j++ ) { VertexArray[i+j] = VertexArray[j]; VertexArray[i+j].Y += i / 6; VertexArray[i+j].Tv = ((float)( i / 6 )) / 256.0f; } } return; } public static void Begin() { Device.VertexFormat = Direct3D.CustomVertex.PositionTextured.Format; return; } public static void End() { DirectX.GraphicsStream GraphicsStream = ColorTexture.LockRectangle( 0, new Drawing.Rectangle( 0, 0, 256, 160 ), Direct3D.LockFlags.None ); GraphicsStream.Write( ColorIndexArray, 0, ColorIndexArray.Length ); GraphicsStream.Close(); ColorTexture.UnlockRectangle( 0 ); GraphicsStream = PaletteTexture.LockRectangle( 0, Direct3D.LockFlags.None ); GraphicsStream.Write( PaletteArray, 0, PaletteArray.Length ); GraphicsStream.Close(); PaletteTexture.UnlockRectangle( 0 ); Effect.Begin( Direct3D.FX.None ); Effect.BeginPass( 0 ); Graphics.Device.DrawUserPrimitives( Direct3D.PrimitiveType.TriangleList, VertexArray.Length / 3, VertexArray ); Effect.EndPass(); Effect.End(); return; } public static void Render( int Index ) { Memory.Read( 0x06000000 + ( 240 * Index ), 240, ColorIndexArray, Index * 256 ); Memory.Read( 0x05000000, 512, PaletteArray, Index * 512 ); return; } }
In the GBA the "frame buffer"(as you PC folks would call it) is at 0x06000000 and the palette is at 0x05000000.
Also the texture used for the palette lookup is now 256x256 instead of 256x1, this allows for palette changes in-between scan lines.
That means I had to change this line in the shader:
OUT.Color = tex1D( PaletteTextureSampler, TextureColor.r );
to this:
OUT.Color = tex2D( PaletteTextureSampler, float2( TextureColor.r, IN.TexCoords.y ) );
I was curious why you might need to use threads. I would have done something like:
But I may be oversimplifying the problem, it's been a while since I did any emulator programming.