we are developing video process in powerVR. and we implement our filters in GLSL. there is one filter have about 600 line shader code. when we enable it, glDrawArray
seems failed. so i want to know if there is a shader code size limitation? thanks!
There aren’t any limitations of shader code size. Have you checked that your shader compilation and linking succeeded?
If you share the code with us on this thread, or email it to devtech@imgtec.com, we can look into the issue for you.
Regards,
Joe
hi, joe
thanks for your kindly reply. below is our shader code, i am sure we can run it on Ipad and Iphone5, however, it always failed at Iphone4. would you please also share with me some tips about optimize this shader code? i am really a newbie about optimizing shader code. thanks!
precision highp float;
uniform float inv_h;
uniform float texelWidthScale;
uniform float texelHeightScale;
uniform sampler2D inputTexture0;
const float norValue = 255.0 * 255.0 / (1024.032.0);
const float norexp = -1.0/32.0;
float computeWeight(mat3 m1,mat3 m2)
{
float distance = 0.0;
mat3 kernel1 = mat3(114.0,114.0,114.0,0.0,0.0,0.0,0.0,0.0,0.0);
mat3 kernel2 = mat3(1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0);
mat3 diff = m1 - m2;
diff = matrixCompMult(diff,diff);
diff = kernel2 * diff * kernel1;
//float d = 114.0 * (diff[0][0]diff[0][0] + diff[0][1]diff[0][1] + diff[0][2]diff[0][2] + diff[1][0]diff[1][0] + diff[1][1]diff[1][1] + diff[1][2]diff[1][2] + diff[0][2]diff[0][2] + diff[1][2]diff[1][2] + diff[2][2]diff[2][2]);
distance = diff[0][0] * inv_h * norValue;
float weight = step(distance,255.0) * pow(2.718281828,distancenorexp) * 1024.0;
return weight;
}
void main()
{
vec2 pt = vec2(gl_FragCoord);
vec2 scale = vec2(texelWidthScale, texelHeightScale);
vec4 cur = texture2D(inputTexture0, pt * scale);
vec4 ll1 = texture2D(inputTexture0, (pt+vec2(-1.0,0.0)) * scale);
vec4 tt1 = texture2D(inputTexture0, (pt+vec2(0.0,-1.0)) * scale);
vec4 tt2 = texture2D(inputTexture0, (pt+vec2(0.0,-2.0)) * scale);
vec4 tt3 = texture2D(inputTexture0, (pt+vec2(0.0,-3.0)) * scale);
vec4 rr1 = texture2D(inputTexture0, (pt+vec2(1.0,0.0)) * scale);
vec4 bb1 = texture2D(inputTexture0, (pt+vec2(0.0,1.0)) * scale);
vec4 bb2 = texture2D(inputTexture0, (pt+vec2(0.0,2.0)) * scale);
vec4 bb3 = texture2D(inputTexture0, (pt+vec2(0.0,3.0)) * scale);
vec4 lt13 = texture2D(inputTexture0, (pt+vec2(-1.0,-3.0)) * scale);
vec4 lt12 = texture2D(inputTexture0, (pt+vec2(-1.0,-2.0)) * scale);
vec4 lt11 = texture2D(inputTexture0, (pt+vec2(-1.0,-1.0)) * scale);
vec4 rt11 = texture2D(inputTexture0, (pt+vec2(1.0,-1.0)) * scale);
vec4 rt12 = texture2D(inputTexture0, (pt+vec2(1.0,-2.0)) * scale);
vec4 rt13 = texture2D(inputTexture0, (pt+vec2(1.0,-3.0)) * scale);
vec4 rb11 = texture2D(inputTexture0, (pt+vec2(1.0,1.0)) * scale);
vec4 rb12 = texture2D(inputTexture0, (pt+vec2(1.0,2.0)) * scale);
vec4 rb13 = texture2D(inputTexture0, (pt+vec2(1.0,3.0)) * scale);
vec4 lb11 = texture2D(inputTexture0, (pt+vec2(-1.0,1.0)) * scale);
vec4 lb12 = texture2D(inputTexture0, (pt+vec2(-1.0,2.0)) * scale);
vec4 lb13 = texture2D(inputTexture0, (pt+vec2(-1.0,3.0)) * scale);
mat3 cur3_r = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);
mat3 cur3_g = mat3(tt1.rgb,cur.rgb,bb1.rgb);
mat3 cur3_b = mat3(tt1.gba,cur.gba,bb1.gba);
mat3 cur3_a = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);
vec4 average = vec4(0.0);
vec4 weight = vec4(0.0);
//first line
///////////////////////////////
//point 1
mat3 other = mat3(lt13.gba,lt12.gba,lt11.gba);
float fweight = computeWeight(cur3_r,other);
average.r += fweightlt12.b;
weight.r += fweight;
//point 2
other = mat3(lt13.b,lt13.a,tt3.r,lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r);
fweight = computeWeight(cur3_r,other);
average.r += fweightlt12.a;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightlt12.a;
weight.g += fweight;
//point 3
other = mat3(lt13.a,tt3.r,tt3.g,lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g);
fweight = computeWeight(cur3_r,other);
average.r += fweight * tt2.r;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt2.r;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt2.r;
weight.b += fweight;
//point 4
other = mat3(tt3.rgb,tt2.rgb,tt1.rgb);
fweight = computeWeight(cur3_r,other);
average.r += fweighttt2.g;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt2.g;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt2.g;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt2.g;
weight.a += fweight;
//point 5
other = mat3(tt3.gba,tt2.gba,tt1.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweighttt2.b;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt2.b;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt2.b;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt2.b;
weight.a += fweight;
//point
other = mat3(tt3.b,tt3.a,rt13.r,tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r);
fweight = computeWeight(cur3_g,other);
average.g += fweighttt2.a;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt2.a;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt2.a;
weight.a += fweight;
//point
other = mat3(tt3.a,rt13.r,rt13.g,tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g);
fweight = computeWeight(cur3_b,other);
average.b += fweightrt12.r;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightrt12.r;
weight.a += fweight;
//point
other = mat3(rt13.rgb,rt12.rgb,rt11.rgb);
fweight = computeWeight(cur3_a,other);
average.a += fweightrt12.g;
weight.a += fweight;
//second line
///////////////////////////////
//point 6
other = mat3(lt12.gba,lt11.gba,ll1.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightlt11.b;
weight.r += fweight;
//point 7
other = mat3(lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r);
fweight = computeWeight(cur3_r,other);
average.r += fweightlt11.a;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightlt11.a;
weight.g += fweight;
//point 8
other = mat3(lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g);
fweight = computeWeight(cur3_r,other);
average.r += fweight * tt1.r;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt1.r;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt1.r;
weight.b += fweight;
//point 9
other = mat3(tt2.rgb,tt1.rgb,cur.rgb);
fweight = computeWeight(cur3_r,other);
average.r += fweighttt1.g;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt1.g;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt1.g;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt1.g;
weight.a += fweight;
//point 10
other = mat3(tt2.gba,tt1.gba,cur.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweighttt1.b;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweighttt1.b;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt1.b;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt1.b;
weight.a += fweight;
//point
other = mat3(tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r);
fweight = computeWeight(cur3_g,other);
average.g += fweighttt1.a;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweighttt1.a;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweighttt1.a;
weight.a += fweight;
//point
other = mat3(tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g);
fweight = computeWeight(cur3_b,other);
average.b += fweightrt11.r;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightrt11.r;
weight.a += fweight;
//point
other = mat3(rt12.rgb,rt11.rgb,rr1.rgb);
fweight = computeWeight(cur3_a,other);
average.a += fweightrt11.g;
weight.a += fweight;
//third line
///////////////////////////////
//point 11
other = mat3(lt11.gba,ll1.gba,lb11.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightll1.b;
weight.r += fweight;
//point 12
other = mat3(lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r);
fweight = computeWeight(cur3_r,other);
average.r += fweightll1.a;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightll1.a;
weight.g += fweight;
//point 13
other = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);
fweight = computeWeight(cur3_r,other);
average.r += fweight * cur.r;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightcur.r;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightcur.r;
weight.b += fweight;
//point 14
other = mat3(tt1.rgb,cur.rgb,bb1.rgb);
fweight = computeWeight(cur3_r,other);
average.r += fweightcur.g;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightcur.g;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightcur.g;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightcur.g;
weight.a += fweight;
//point 15
other = mat3(tt1.gba,cur.gba,bb1.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightcur.b;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightcur.b;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightcur.b;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightcur.b;
weight.a += fweight;
//point
other = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);
fweight = computeWeight(cur3_g,other);
average.g += fweightcur.a;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightcur.a;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightcur.a;
weight.a += fweight;
//point
other = mat3(tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g);
fweight = computeWeight(cur3_b,other);
average.b += fweightrr1.r;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightrr1.r;
weight.a += fweight;
//point
other = mat3(rt11.rgb,rr1.rgb,rb11.rgb);
fweight = computeWeight(cur3_a,other);
average.a += fweightrr1.g;
weight.a += fweight;
//fouth line
///////////////////////////////
//point 16
other = mat3(ll1.gba,lb11.gba,lb12.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightlb11.b;
weight.r += fweight;
//point 17
other = mat3(ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r);
fweight = computeWeight(cur3_r,other);
average.r += fweightlb11.a;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightlb11.a;
weight.g += fweight;
//point 18
other = mat3(ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g);
fweight = computeWeight(cur3_r,other);
average.r += fweight * bb1.r;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb1.r;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb1.r;
weight.b += fweight;
//point 19
other = mat3(cur.rgb,bb1.rgb,bb2.rgb);
fweight = computeWeight(cur3_r,other);
average.r += fweightbb1.g;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb1.g;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb1.g;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb1.g;
weight.a += fweight;
//point 20
other = mat3(cur.gba,bb1.gba,bb2.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightbb1.b;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb1.b;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb1.b;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb1.b;
weight.a += fweight;
//point
other = mat3(cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r);
fweight = computeWeight(cur3_g,other);
average.g += fweightbb1.a;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb1.a;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb1.a;
weight.a += fweight;
//point
other = mat3(cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g);
fweight = computeWeight(cur3_b,other);
average.b += fweightrb11.r;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightrb11.r;
weight.a += fweight;
//point
other = mat3(rr1.rgb,rb11.rgb,rb12.rgb);
fweight = computeWeight(cur3_a,other);
average.a += fweightrb11.g;
weight.a += fweight;
//fifth line
///////////////////////////////
//point 21
other = mat3(lb11.gba,lb12.gba,lb13.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightlb12.b;
weight.r += fweight;
//point 22
other = mat3(lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r,lb13.b,lb13.a,bb3.r);
fweight = computeWeight(cur3_r,other);
average.r += fweightlb12.a;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightlb12.a;
weight.g += fweight;
//point 23
other = mat3(lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g,lb13.a,bb3.r,bb3.g);
fweight = computeWeight(cur3_r,other);
average.r += fweight * bb2.r;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb2.r;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb2.r;
weight.b += fweight;
//point 24
other = mat3(bb1.rgb,bb2.rgb,bb3.rgb);
fweight = computeWeight(cur3_r,other);
average.r += fweightbb2.g;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb2.g;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb2.g;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb2.g;
weight.a += fweight;
//point 25
other = mat3(bb1.gba,bb2.gba,bb3.gba);
fweight = computeWeight(cur3_r,other);
average.r += fweightbb2.b;
weight.r += fweight;
fweight = computeWeight(cur3_g,other);
average.g += fweightbb2.b;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb2.b;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb2.b;
weight.a += fweight;
//point
other = mat3(bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r,bb3.b,bb3.a,rb13.r);
fweight = computeWeight(cur3_g,other);
average.g += fweightbb2.a;
weight.g += fweight;
fweight = computeWeight(cur3_b,other);
average.b += fweightbb2.a;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightbb2.a;
weight.a += fweight;
//rt
other = mat3(bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g,bb3.a,rb13.r,rb13.g);
fweight = computeWeight(cur3_b,other);
average.b += fweightrb12.r;
weight.b += fweight;
fweight = computeWeight(cur3_a,other);
average.a += fweightrb12.r;
weight.a += fweight;
//point
other = mat3(rb11.rgb,rb12.rgb,rb13.rgb);
fweight = computeWeight(cur3_a,other);
average.a += fweightrb12.g;
weight.a += fweight;
vec4 tmp1 = step(weight,vec4(0.0));
vec4 tmp2 = average/weight;
tmp2 = clamp(tmp2,0.0,1.0);
gl_FragColor = (vec4(1.0)-tmp1) * tmp2 + tmp1*cur;
}
Hi,
The shader is extremely expensive. When targeting an SGX540 instruction set, our PVRShaderEditor tool reports the best case is 2712 instructions, the worst is 5284. As a comparison, a current high-end mobile game will usually have fragment shaders that are between 5-40 instructions.
Although there isn’t a limit on the size of GLSL ES shader source, there is a limit to the number of instructions that can be processed by a given GPU. This limit will vary depending on the target GPU. If you hit this limit, the compiler should produce an error.
I suspect the problem you’re seeing on the target is purely compilation time. On my desktop machine, the shader takes 2-3 seconds to compile. On a mobile device, this compilation time will be much higher. This high compile time may cause iOS to kill the compiler as it may appear unresponsive for a very long time. You would have to discuss the issue with Apple to understand why compilation doesn’t succeed on the iPhone 4.
My recommendation would be to revisit your algorithm and see if there’s anyway it can be simplified. If it’s not possible to find a compromise between speed and quality of your output, you could split the render into multiple passes (i.e. output texture of pass 1 is read in by pass 2, and so on).
Regards,
Joe