Sign in to follow this  
Andreas999

Mandelbrot fractal

Recommended Posts

Hi, I wrote a mandelbrot fractal program with the jMonkeyEgine using GLSL: http://www.lichtundliebe.info/projects/2DFractal/2DFractal.jnlp. Here is the code: http://www.lichtundliebe.info/projects/2DFractal.zip. If you start the program, all is fine. Clicking at the image, you can zoom in. That's also working with Fractal>Julia. If you click 5 times at the image, you are in the precision mode. There, the shader uses a 80 bits fixed point arithmetics for a higher precision. With ati cards >=x1300, the program is working fine. But with shader model 3.0 nvidia cards, the image is corrupted. The relevant shader MandelbrotStep2.frag, you can find in my code in the directory 2DFractal\src\Fractal2D\Shader: //uniform sampler2D myColorTable; uniform sampler2D inputTexture1; uniform sampler2D inputTexture2; uniform sampler2D inputTexture3; uniform sampler2D inputTexture4; uniform sampler2D inputTexture5; uniform float maxIter; //1/1024.0=0.0009765625 // the float mantissa has only 23 bits => number->number1 should be enough void convertToBigSize(in float number,out vec4 number1, out vec4 number2){ number1=vec4(0.0); number2=vec4(0.0); float signum=sign(number); number=abs(number); float temp=floor(number); number1.r=temp; number-=temp; number*=1024.0; temp=floor(number); number1.g=temp; number-=temp; number*=1024.0; temp=floor(number); number1.b=temp; number-=temp; number*=1024.0; temp=floor(number); number1.a=temp; number1*=signum; } //41 Ops void add(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){ a1+=b1; a2+=b2; //the values after the point have to be greater than 0.0 a1.r-=2.0;//-2 a2.a+=2048.0; a1.gba+=2046.0; a2.rgb+=2046.0; float temp=floor(a2.a*0.0009765625); a2.b+=temp; a2.a-=temp*1024.0; temp=floor(a2.b*0.0009765625); a2.g+=temp; a2.b-=temp*1024.0; temp=floor(a2.g*0.0009765625); a2.r+=temp; a2.g-=temp*1024.0; temp=floor(a2.r*0.0009765625); a1.a+=temp; a2.r-=temp*1024.0; temp=floor(a1.a*0.0009765625); a1.b+=temp; a1.a-=temp*1024.0; temp=floor(a1.b*0.0009765625); a1.g+=temp; a1.b-=temp*1024.0; temp=floor(a1.g*0.0009765625); a1.r+=temp; a1.g-=temp*1024.0; } //44+30+63=137ops void mult(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){ vec4 z11=a1*b1.r; vec4 z12=a2*b1.r; vec4 z21=a1*b1.g; vec4 z22=a2*b1.g; vec4 z31=a1*b1.b; vec4 z32=a2*b1.b; vec4 z41=a1*b1.a; vec4 z42=a2*b1.a; vec4 z51=a1*b2.r; vec4 z52=a2*b2.r; vec4 z61=a1*b2.g; vec4 z62=a2*b2.g; vec4 z71=a1*b2.b; vec4 z72=a2*b2.b; vec4 z81=a1*b2.a; vec4 z82=a2*b2.a; vec3 temp2=z82.gba; temp2.rg+=z72.ba; temp2.r+=z62.a; vec4 temp1=z52; temp1.a+=z82.r; temp1.rgb+=z81.gba; temp1.ba+=z72.rg; temp1.rg+=z71.ba; temp1.gba+=z62.rgb; temp1.r+=z61.a; temp1.rgb+=z42.gba; temp1.rg+=z32.ba; temp1.r+=z22.a; a2=z51; a2.a+=z81.r; a2.ba+=z71.rg; a2.gba+=z61.rgb; a2.a+=z42.r; a2.rgb+=z41.gba; a2.ba+=z32.rg; a2.rg+=z31.ba; a2.gba+=z22.rgb; a2.r+=z21.a; a2+=z12; a1=z11; a1.gba+=z21.rgb; a1.ba+=z31.rg; a1.a+=z41.r; float tempSign=sign(temp2.g); float temp=floor(abs(temp2.g)*0.0009765625); temp2.r+=temp*tempSign; tempSign=sign(temp2.r); temp=floor(abs(temp2.r)*0.0009765625); temp1.a+=temp*tempSign; tempSign=sign(temp1.a); temp=floor(abs(temp1.a)*0.0009765625); temp1.b+=temp*tempSign; tempSign=sign(temp1.b); temp=floor(abs(temp1.b)*0.0009765625); temp1.g+=temp*tempSign; tempSign=sign(temp1.g); temp=floor(abs(temp1.g)*0.0009765625); temp1.r+=temp*tempSign; tempSign=sign(temp1.r); temp=floor(abs(temp1.r)*0.0009765625); a2.a+=temp*tempSign; tempSign=sign(a2.a); temp=floor(abs(a2.a)*0.0009765625); a2.b+=temp*tempSign; a2.a-=temp*1024.0*tempSign; tempSign=sign(a2.b); temp=floor(abs(a2.b)*0.0009765625); a2.g+=temp*tempSign; a2.b-=temp*1024.0*tempSign; tempSign=sign(a2.g); temp=floor(abs(a2.g)*0.0009765625); a2.r+=temp*tempSign; a2.g-=temp*1024.0*tempSign; tempSign=sign(a2.r); temp=floor(abs(a2.r)*0.0009765625); a1.a+=temp*tempSign; a2.r-=temp*1024.0*tempSign; tempSign=sign(a1.a); temp=floor(abs(a1.a)*0.0009765625); a1.b+=temp*tempSign; a1.a-=temp*1024.0*tempSign; tempSign=sign(a1.b); temp=floor(abs(a1.b)*0.0009765625); a1.g+=temp*tempSign; a1.b-=temp*1024.0*tempSign; tempSign=sign(a1.g); temp=floor(abs(a1.g)*0.0009765625); a1.r+=temp*tempSign; a1.g-=temp*1024.0*tempSign; } /*//only for test purposes float convertToFloat(in vec4 X1, in vec4 X2){ float number=X1.r; float divi=1024.0; number+=X1.g/divi; divi*=1024.0; number+=X1.b/divi; divi*=1024.0; number+=X1.a/divi; divi*=1024.0; number+=X2.r/divi; divi*=1024.0; number+=X2.g/divi; divi*=1024.0; number+=X2.b/divi; divi*=1024.0; number+=X2.a/divi; return number; }*/ //a=a+b-4, 41 Ops void add2(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){ a1+=b1; a2+=b2; //the values after the point have to be smaller than 0.0 a1.r-=2.0;//-4+2 a2.a-=2048.0; a1.gba-=2046.0; a2.rgb-=2046.0; float temp=ceil(a2.a*0.0009765625); a2.b+=temp; a2.a-=temp*1024.0; temp=ceil(a2.b*0.0009765625); a2.g+=temp; a2.b-=temp*1024.0; temp=ceil(a2.g*0.0009765625); a2.r+=temp; a2.g-=temp*1024.0; temp=ceil(a2.r*0.0009765625); a1.a+=temp; a2.r-=temp*1024.0; temp=ceil(a1.a*0.0009765625); a1.b+=temp; a1.a-=temp*1024.0; temp=ceil(a1.b*0.0009765625); a1.g+=temp; a1.b-=temp*1024.0; temp=ceil(a1.g*0.0009765625); a1.r+=temp; a1.g-=temp*1024.0; //a1.r=floor(a1.r); //a1.g=floor(a1.g); } void decompress(in vec4 inputValue, out vec4 outputValue1, out vec4 outputValue2){ vec4 tempReader=inputValue; vec4 tempSign=floor(tempReader*0.000000476837158203125); tempReader-=tempSign*2097152.0; tempSign=-2.0*(tempSign-0.5); outputValue2=floor(tempReader*0.00048828125); tempReader-=outputValue2*2048.0; outputValue2*=tempSign; tempSign=floor(tempReader*0.0009765625); tempReader-=tempSign*1024.0; tempSign=-2.0*(tempSign-0.5); outputValue1=tempReader*tempSign; } bool isTrue(vec4 vX1, vec4 vX2, vec4 vY1, vec4 vY2){ add2(vX1, vX2, vY1, vY2); return all(lessThanEqual(vX1,vec4(0.0))) && all(lessThanEqual(vX2,vec4(0.0))); } void main () { //z vec4 zX1,zX2; decompress(texture2D(inputTexture1, gl_TexCoord[0].xy),zX1,zX2);//zx vec4 zY2,zY1; decompress(texture2D(inputTexture2, gl_TexCoord[0].xy),zY1,zY2);//zy vec4 positionX2,positionX1; decompress(texture2D(inputTexture4, gl_TexCoord[0].xy),positionX1,positionX2);//cx vec4 positionY2, positionY1; decompress(texture2D(inputTexture5, gl_TexCoord[0].xy),positionY1,positionY2);//cy gl_FragData[0] = vec4(1.0,1.0,1.0,1.0); vec4 tempZX1=zX1; vec4 tempZX2=zX2; vec4 tempZY1=zY1; vec4 tempZY2=zY2; //vec2 position=z; vec4 temp1; vec4 temp2; //tempZ.x+tempZ.y //add2(tempZX1,tempZX2,tempZX1, tempZY2); float i=texture2D(inputTexture3, gl_TexCoord[0].xy).x; //(tempZ.x+tempZ.y)<=4.0 equivalent to tempZ.x+tempZ.y-4<=0.0 while (i<maxIter && isTrue(tempZX1,tempZX2, tempZY1, tempZY2)) { //tempZ=z*z; tempZX1=zX1; tempZX2=zX2; tempZY1=zY1; tempZY2=zY2; mult(tempZX1, tempZX2, zX1, zX2); mult(tempZY1, tempZY2, zY1, zY2); //z = vec2(tempZ.x - tempZ.y, 2.0*z.x*z.y) + position; temp1=tempZX1; temp2=tempZX2; add(temp1, temp2, -tempZY1, -tempZY2); mult(zY1, zY2, zX1, zX2); add(zY1, zY2, zY1, zY2); zX1=temp1; zX2=temp2; add(zX1, zX2, positionX1, positionX2); add(zY1, zY2, positionY1, positionY2); //tempZ.x+tempZ.y //add2(tempZX1,tempZX2, tempZY1, tempZY2); i+=1.0; } //first 10 bits of the mantissa are used for zX1, bit 11 for the sign of zX1 //the next 10 bits are used for zX2, bit 22 for the sign of z vec4 signs=-ceil((sign(zX1)-1.0)*0.5)*1024.0-ceil((sign(zX2)-1.0)*0.5)*2097152.0;//1024*2048 gl_FragData[1]=abs(zX1)+abs(zX2)*2048.0+signs;//zx signs=-ceil((sign(zY1)-1.0)*0.5)*1024.0-ceil((sign(zY2)-1.0)*0.5)*2097152.0;//1024*2048 gl_FragData[2]=abs(zY1)+abs(zY2)*2048.0+signs;//zy //i is stored here gl_FragData[3]=vec4(i,0.0,0.0,0.0);//current iteration if (i <= maxIter) { float color=i/maxIter; gl_FragData[0]=vec4(color,color,1.0,1.0);//display } } This shader reads from 5 rgba 32 bits float textures and writes to 4 rgba 32 bits float textures where 2 of the read textures are rewritten. This shader will be executed again and again until the maximum iteration is reached. With older ati cards, the shader had at first too many alu instructions what caused an exception. But, I was able to fix it. Do you know why the image is corrupted with nvida cards annd how to fix it? Are there perhaps some limits exceeded? There seems to be something wrong with the reading process. Best, Andreas

Share this post


Link to post
Share on other sites
Quote:
Original post by Andreas999
Do you know why the image is corrupted with nvida cards annd how to fix it? Are there perhaps some limits exceeded? There seems to be something wrong with the reading process.
It seems to run all right on my Intel-integrated X3100, despite the lousy Mac drivers, so I find it hard to believe that you are overwhelming those NVidia cards.

Share this post


Link to post
Share on other sites
Quote:
Original post by swiftcoder
Quote:
Original post by Andreas999
Do you know why the image is corrupted with nvida cards and how to fix it? Are there perhaps some limits exceeded? There seems to be something wrong with the reading process.
It seems to run all right on my Intel-integrated X3100, despite the lousy Mac drivers, so I find it hard to believe that you are overwhelming those NVidia cards.

Thx. Does it also run in the precision mode? Because, in speed mode, all is ok.

Share this post


Link to post
Share on other sites
Quote:
Original post by Andreas999
where 2 of the read textures are rewritten.


Are you using ping-pong rendering or just writing to the same texture you're reading in the shader? The second is undefined behaviour and might cause your problem.

Share this post


Link to post
Share on other sites
Quote:
Original post by Andreas999
Quote:
Original post by swiftcoder
Quote:
Original post by Andreas999
Do you know why the image is corrupted with nvida cards and how to fix it? Are there perhaps some limits exceeded? There seems to be something wrong with the reading process.
It seems to run all right on my Intel-integrated X3100, despite the lousy Mac drivers, so I find it hard to believe that you are overwhelming those NVidia cards.

Thx. Does it also run in the precision mode? Because, in speed mode, all is ok.
Yes, albeit extremely slowly. Certainly not a usable speed, it takes several seconds to form the image.

Share this post


Link to post
Share on other sites
Hi,
in the precision mode, a multiplication p. ex. needs a lot of operations to display 80 bits of precision. That's why it's very slow at older cards.
At another forum, I was told, it's working now with a nvidia 8400.

Best,
Andreas

Share this post


Link to post
Share on other sites
Hi,
I was told, with a nvidia 9600, there is still a corrupted image.
So it's working:
In the precision mode, I use 9 rgba32f textures: t1, t2, t3, t4, t5, t6, pos1, pos2 and display.
Step 1: calculate the actual position, convert it to the 80 bits format and write it to pos1 and pos2 (Shader 1)
Step 2: read pos1 and pos2 and calculate some iterations, display the actual step at the display texture and write the current result to the textures t4, t5 and t6 (Shader 2).
Step 3: read pos1, pos2, t4, t5, t6 and calculate some iterations, display the actual step at the display texture and write the current result to the textures t1, t2 and t3 (Shader 3).
Step 4: read pos1, pos2, t1, t2, t3 and calculate some iterations, display the actual step at the display texture and write the current result to the textures t4, t5 and t6 (Shader 3).
Step 5: repeat Step3 and Step4 until the fractal is calculated with all iterations.

What could cause the corrupted image?

Best,
Andreas

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this