Imagination PowerVR SDK Blog

glsl code limit size

pvrshadereditor

#1

we are developing video process in powerVR. and we implement our filters in GLSL. there is one filter have about 600 line shader code. when we enable it, glDrawArray

seems failed. so i want to know if there is a shader code size limitation? thanks!


#2

There aren’t any limitations of shader code size. Have you checked that your shader compilation and linking succeeded?



If you share the code with us on this thread, or email it to devtech@imgtec.com, we can look into the issue for you.



Regards,

Joe


#3

hi, joe



thanks for your kindly reply. below is our shader code, i am sure we can run it on Ipad and Iphone5, however, it always failed at Iphone4. would you please also share with me some tips about optimize this shader code? i am really a newbie about optimizing shader code. thanks!





precision highp float;



uniform float inv_h;

uniform float texelWidthScale;

uniform float texelHeightScale;

uniform sampler2D inputTexture0;



const float norValue = 255.0 * 255.0 / (1024.032.0);

const float norexp = -1.0/32.0;



float computeWeight(mat3 m1,mat3 m2)

{

float distance = 0.0;

mat3 kernel1 = mat3(114.0,114.0,114.0,0.0,0.0,0.0,0.0,0.0,0.0);

mat3 kernel2 = mat3(1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0);

mat3 diff = m1 - m2;

diff = matrixCompMult(diff,diff);

diff = kernel2 * diff * kernel1;

//float d = 114.0 * (diff[0][0]diff[0][0] + diff[0][1]diff[0][1] + diff[0][2]diff[0][2] + diff[1][0]diff[1][0] + diff[1][1]diff[1][1] + diff[1][2]diff[1][2] + diff[0][2]diff[0][2] + diff[1][2]diff[1][2] + diff[2][2]diff[2][2]);

distance = diff[0][0] * inv_h * norValue;

float weight = step(distance,255.0) * pow(2.718281828,distance
norexp) * 1024.0;

return weight;

}



void main()

{

vec2 pt = vec2(gl_FragCoord);

vec2 scale = vec2(texelWidthScale, texelHeightScale);

vec4 cur = texture2D(inputTexture0, pt * scale);



vec4 ll1 = texture2D(inputTexture0, (pt+vec2(-1.0,0.0)) * scale);

vec4 tt1 = texture2D(inputTexture0, (pt+vec2(0.0,-1.0)) * scale);

vec4 tt2 = texture2D(inputTexture0, (pt+vec2(0.0,-2.0)) * scale);

vec4 tt3 = texture2D(inputTexture0, (pt+vec2(0.0,-3.0)) * scale);

vec4 rr1 = texture2D(inputTexture0, (pt+vec2(1.0,0.0)) * scale);

vec4 bb1 = texture2D(inputTexture0, (pt+vec2(0.0,1.0)) * scale);

vec4 bb2 = texture2D(inputTexture0, (pt+vec2(0.0,2.0)) * scale);

vec4 bb3 = texture2D(inputTexture0, (pt+vec2(0.0,3.0)) * scale);



vec4 lt13 = texture2D(inputTexture0, (pt+vec2(-1.0,-3.0)) * scale);

vec4 lt12 = texture2D(inputTexture0, (pt+vec2(-1.0,-2.0)) * scale);

vec4 lt11 = texture2D(inputTexture0, (pt+vec2(-1.0,-1.0)) * scale);



vec4 rt11 = texture2D(inputTexture0, (pt+vec2(1.0,-1.0)) * scale);

vec4 rt12 = texture2D(inputTexture0, (pt+vec2(1.0,-2.0)) * scale);

vec4 rt13 = texture2D(inputTexture0, (pt+vec2(1.0,-3.0)) * scale);



vec4 rb11 = texture2D(inputTexture0, (pt+vec2(1.0,1.0)) * scale);

vec4 rb12 = texture2D(inputTexture0, (pt+vec2(1.0,2.0)) * scale);

vec4 rb13 = texture2D(inputTexture0, (pt+vec2(1.0,3.0)) * scale);



vec4 lb11 = texture2D(inputTexture0, (pt+vec2(-1.0,1.0)) * scale);

vec4 lb12 = texture2D(inputTexture0, (pt+vec2(-1.0,2.0)) * scale);

vec4 lb13 = texture2D(inputTexture0, (pt+vec2(-1.0,3.0)) * scale);



mat3 cur3_r = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);

mat3 cur3_g = mat3(tt1.rgb,cur.rgb,bb1.rgb);

mat3 cur3_b = mat3(tt1.gba,cur.gba,bb1.gba);

mat3 cur3_a = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);



vec4 average = vec4(0.0);

vec4 weight = vec4(0.0);



//first line

///////////////////////////////

//point 1

mat3 other = mat3(lt13.gba,lt12.gba,lt11.gba);

float fweight = computeWeight(cur3_r,other);

average.r += fweight
lt12.b;

weight.r += fweight;



//point 2

other = mat3(lt13.b,lt13.a,tt3.r,lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight
lt12.a;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
lt12.a;

weight.g += fweight;



//point 3

other = mat3(lt13.a,tt3.r,tt3.g,lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * tt2.r;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
tt2.r;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
tt2.r;

weight.b += fweight;



//point 4

other = mat3(tt3.rgb,tt2.rgb,tt1.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight
tt2.g;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
tt2.g;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
tt2.g;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
tt2.g;

weight.a += fweight;



//point 5

other = mat3(tt3.gba,tt2.gba,tt1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweighttt2.b;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
tt2.b;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweighttt2.b;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
tt2.b;

weight.a += fweight;



//point

other = mat3(tt3.b,tt3.a,rt13.r,tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r);

fweight = computeWeight(cur3_g,other);

average.g += fweighttt2.a;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
tt2.a;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweighttt2.a;

weight.a += fweight;



//point

other = mat3(tt3.a,rt13.r,rt13.g,tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight
rt12.r;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightrt12.r;

weight.a += fweight;



//point

other = mat3(rt13.rgb,rt12.rgb,rt11.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight
rt12.g;

weight.a += fweight;



//second line

///////////////////////////////

//point 6

other = mat3(lt12.gba,lt11.gba,ll1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweightlt11.b;

weight.r += fweight;



//point 7

other = mat3(lt12.b,lt12.a,tt2.r,lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight
lt11.a;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightlt11.a;

weight.g += fweight;



//point 8

other = mat3(lt12.a,tt2.r,tt2.g,lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * tt1.r;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
tt1.r;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweighttt1.r;

weight.b += fweight;



//point 9

other = mat3(tt2.rgb,tt1.rgb,cur.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight
tt1.g;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweighttt1.g;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
tt1.g;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweighttt1.g;

weight.a += fweight;



//point 10

other = mat3(tt2.gba,tt1.gba,cur.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight
tt1.b;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweighttt1.b;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
tt1.b;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweighttt1.b;

weight.a += fweight;



//point

other = mat3(tt2.b,tt2.a,rt12.r,tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight
tt1.a;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweighttt1.a;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
tt1.a;

weight.a += fweight;



//point

other = mat3(tt2.a,rt12.r,rt12.g,tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g);

fweight = computeWeight(cur3_b,other);

average.b += fweightrt11.r;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
rt11.r;

weight.a += fweight;



//point

other = mat3(rt12.rgb,rt11.rgb,rr1.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweightrt11.g;

weight.a += fweight;



//third line

///////////////////////////////

//point 11

other = mat3(lt11.gba,ll1.gba,lb11.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight
ll1.b;

weight.r += fweight;



//point 12

other = mat3(lt11.b,lt11.a,tt1.r,ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r);

fweight = computeWeight(cur3_r,other);

average.r += fweightll1.a;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
ll1.a;

weight.g += fweight;



//point 13

other = mat3(lt11.a,tt1.r,tt1.g,ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * cur.r;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightcur.r;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
cur.r;

weight.b += fweight;



//point 14

other = mat3(tt1.rgb,cur.rgb,bb1.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweightcur.g;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
cur.g;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightcur.g;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
cur.g;

weight.a += fweight;



//point 15

other = mat3(tt1.gba,cur.gba,bb1.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweightcur.b;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
cur.b;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightcur.b;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
cur.b;

weight.a += fweight;



//point

other = mat3(tt1.b,tt1.a,rt11.r,cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r);

fweight = computeWeight(cur3_g,other);

average.g += fweightcur.a;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
cur.a;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightcur.a;

weight.a += fweight;



//point

other = mat3(tt1.a,rt11.r,rt11.g,cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight
rr1.r;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightrr1.r;

weight.a += fweight;



//point

other = mat3(rt11.rgb,rr1.rgb,rb11.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight
rr1.g;

weight.a += fweight;



//fouth line

///////////////////////////////

//point 16

other = mat3(ll1.gba,lb11.gba,lb12.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweightlb11.b;

weight.r += fweight;



//point 17

other = mat3(ll1.b,ll1.a,cur.r,lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r);

fweight = computeWeight(cur3_r,other);

average.r += fweight
lb11.a;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightlb11.a;

weight.g += fweight;



//point 18

other = mat3(ll1.a,cur.r,cur.g,lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * bb1.r;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
bb1.r;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightbb1.r;

weight.b += fweight;



//point 19

other = mat3(cur.rgb,bb1.rgb,bb2.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweight
bb1.g;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightbb1.g;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
bb1.g;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightbb1.g;

weight.a += fweight;



//point 20

other = mat3(cur.gba,bb1.gba,bb2.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight
bb1.b;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightbb1.b;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
bb1.b;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightbb1.b;

weight.a += fweight;



//point

other = mat3(cur.b,cur.a,rr1.r,bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r);

fweight = computeWeight(cur3_g,other);

average.g += fweight
bb1.a;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightbb1.a;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
bb1.a;

weight.a += fweight;



//point

other = mat3(cur.a,rr1.r,rr1.g,bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g);

fweight = computeWeight(cur3_b,other);

average.b += fweightrb11.r;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
rb11.r;

weight.a += fweight;



//point

other = mat3(rr1.rgb,rb11.rgb,rb12.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweightrb11.g;

weight.a += fweight;



//fifth line

///////////////////////////////

//point 21

other = mat3(lb11.gba,lb12.gba,lb13.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweight
lb12.b;

weight.r += fweight;



//point 22

other = mat3(lb11.b,lb11.a,bb1.r,lb12.b,lb12.a,bb2.r,lb13.b,lb13.a,bb3.r);

fweight = computeWeight(cur3_r,other);

average.r += fweightlb12.a;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
lb12.a;

weight.g += fweight;



//point 23

other = mat3(lb11.a,bb1.r,bb1.g,lb12.a,bb2.r,bb2.g,lb13.a,bb3.r,bb3.g);

fweight = computeWeight(cur3_r,other);

average.r += fweight * bb2.r;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweightbb2.r;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
bb2.r;

weight.b += fweight;



//point 24

other = mat3(bb1.rgb,bb2.rgb,bb3.rgb);

fweight = computeWeight(cur3_r,other);

average.r += fweightbb2.g;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
bb2.g;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightbb2.g;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
bb2.g;

weight.a += fweight;



//point 25

other = mat3(bb1.gba,bb2.gba,bb3.gba);

fweight = computeWeight(cur3_r,other);

average.r += fweightbb2.b;

weight.r += fweight;



fweight = computeWeight(cur3_g,other);

average.g += fweight
bb2.b;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweightbb2.b;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweight
bb2.b;

weight.a += fweight;



//point

other = mat3(bb1.b,bb1.a,rb11.r,bb2.b,bb2.a,rb12.r,bb3.b,bb3.a,rb13.r);

fweight = computeWeight(cur3_g,other);

average.g += fweightbb2.a;

weight.g += fweight;



fweight = computeWeight(cur3_b,other);

average.b += fweight
bb2.a;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightbb2.a;

weight.a += fweight;



//rt

other = mat3(bb1.a,rb11.r,rb11.g,bb2.a,rb12.r,rb12.g,bb3.a,rb13.r,rb13.g);

fweight = computeWeight(cur3_b,other);

average.b += fweight
rb12.r;

weight.b += fweight;



fweight = computeWeight(cur3_a,other);

average.a += fweightrb12.r;

weight.a += fweight;



//point

other = mat3(rb11.rgb,rb12.rgb,rb13.rgb);

fweight = computeWeight(cur3_a,other);

average.a += fweight
rb12.g;

weight.a += fweight;



vec4 tmp1 = step(weight,vec4(0.0));

vec4 tmp2 = average/weight;

tmp2 = clamp(tmp2,0.0,1.0);

gl_FragColor = (vec4(1.0)-tmp1) * tmp2 + tmp1*cur;

}


#4

Hi,



The shader is extremely expensive. When targeting an SGX540 instruction set, our PVRShaderEditor tool reports the best case is 2712 instructions, the worst is 5284. As a comparison, a current high-end mobile game will usually have fragment shaders that are between 5-40 instructions.



Although there isn’t a limit on the size of GLSL ES shader source, there is a limit to the number of instructions that can be processed by a given GPU. This limit will vary depending on the target GPU. If you hit this limit, the compiler should produce an error.



I suspect the problem you’re seeing on the target is purely compilation time. On my desktop machine, the shader takes 2-3 seconds to compile. On a mobile device, this compilation time will be much higher. This high compile time may cause iOS to kill the compiler as it may appear unresponsive for a very long time. You would have to discuss the issue with Apple to understand why compilation doesn’t succeed on the iPhone 4.



My recommendation would be to revisit your algorithm and see if there’s anyway it can be simplified. If it’s not possible to find a compromise between speed and quality of your output, you could split the render into multiple passes (i.e. output texture of pass 1 is read in by pass 2, and so on).



Regards,

Joe