Imagination PowerVR SDK Blog

GE8100 in HTC Desire 12 (Android 7.1.1): fragment shader fails to compile


#1

Hello there!

I have a shader that compiles and runs ok on Adreno 418 and Mali T880. When I try to compile it on PowerVR GE 8100 however, glGetShaderInfoLog() returns a cryptic ‘Compile failed’ . That’s it!

Vertex Shader:

#version 310 es
precision highp float;
precision highp int;
in vec2 a_Position;          // quad [ (-0.5,-0.5), (-0.5,0.5) , (0.5,-0.5), (0.5,0.5) ]
out vec2 v_TexCoordinate; 
out vec2 v_Pixel;            
uniform uvec2 u_Size;        // size of the screen, in pixels.

void main()
  {
  v_TexCoordinate = (a_Position + 0.5);
  v_Pixel         = (a_Position + 0.5) * vec2(u_Size);
  gl_Position     = vec4(2.0*a_Position,1.0,1.0);
  }

Fragment shader:

#version 310 es
precision highp float;
precision highp int;
out vec4 fragColor;
in vec2 v_TexCoordinate;
in vec2 v_Pixel;    

uniform sampler2D u_Texture;
uniform sampler2D u_DepthTexture;

//////////////////////////////////////////////////////////////////////////////////////////////
// per-pixel linked list. Order Independent Transparency.

uniform uvec2 u_Size;
uniform uint u_numRecords;

layout (binding=0, offset=0) uniform atomic_uint u_Counter;

layout (std430,binding=1) buffer linkedlist 
  {                                         
  uint u_Records[];                 
  };                                 
                                            

//////////////////////////////////////////////////////////////////////////////////////////////
// Concurrent insert to a linked list. Tim Harris, 'pragmatic implementation of non-blocking
// linked-lists', 2001.
// This arranges fragments by decreasing 'depth', so one would think - from back to front, but
// in main() below the depth is mapped with S*(1-depth)/2, so it is really front to back.

void insert( vec2 ij, uint depth, uint rgba )
  {
  uint ptr = atomicCounterIncrement(u_Counter);

  if( ptr<u_numRecords )
    {
    ptr = 3u*ptr + u_Size.x*u_Size.y;

    u_Records[ptr+1u] = depth;
    u_Records[ptr+2u] = rgba;

    memoryBarrier();

    uint prev = uint(ij.x) + uint(ij.y) * u_Size.x;
    uint curr = u_Records[prev];

    while (true)
      {
      if ( curr==0u || depth > u_Records[curr+1u] )  // need to insert here
        {
        u_Records[ptr] = curr;     // next of new record is curr
        memoryBarrier();

        uint res = atomicCompSwap( u_Records[prev], curr, ptr );

        if (res==curr) break;      // done!
        else           curr = res; // could not insert! retry from same place in list
        }
      else                         // advance in list
        {
        prev = curr;
        curr = u_Records[prev];
        }
      }
    }
  }

//////////////////////////////////////////////////////////////////////////////////////////////

uint convert(vec4 c)
  {
  return ((uint(255.0*c.r))<<24u) + ((uint(255.0*c.g))<<16u) + ((uint(255.0*c.b))<<8u) + uint(255.0*c.a);
  }

//////////////////////////////////////////////////////////////////////////////////////////////
// Pass2 of the OIT algorithm - build the LinkedList phase.

void main()                    		
  {
  vec4  frag = texture(u_Texture     , v_TexCoordinate);
  float depth= texture(u_DepthTexture, v_TexCoordinate).r;

  if( frag.a > 0.95 )
    {
    gl_FragDepth = depth;
    fragColor    = frag;
    }
  else
    {
    if( frag.a > 0.0 )
      {
      const float S= 2147483647.0; // max signed int. Could probably be max unsigned int but this is enough.
      insert(v_Pixel, uint(S*(1.0-depth)/2.0), convert(frag) );
      }
    discard;
    }
  }

Could anyone offer some advice? If I comment out the ‘atomicCompSwap( u_Records[prev], curr, ptr );’ from the ‘insert()’ function, everything compiles.


#2

Another shader that works on Adreno and Mali but fails to compile on GE8100 with the very same cryptic ‘Compile failed’.

Vertex shader: identical like last time

Fragment shader:

#version 310 es
precision highp float;
precision highp int;

out vec4 fragColor;           // The output color
in vec2 v_TexCoordinate;      // Interpolated texture coordinate per fragment.
in vec2 v_Pixel;              // location of the current fragment, in pixels

//////////////////////////////////////////////////////////////////////////////////////////////
// per-pixel linked list. Order Independent Transparency.

uniform uvec2 u_Size;

layout (std430,binding=1) buffer linkedlist  // first (u_Size.x*u_Size.y) uints - head pointers,
  {                                          // one for each pixel in the Output rectangle.
  uint u_Records[];                          //
  };                                         // Next 3*u_numRecords uints - actual linked list, i.e.
                                             // triplets of (pointer,depth,rgba).

//////////////////////////////////////////////////////////////////////////////////////////////

vec4 convert(uint rgba)
  {
  return vec4( float((rgba>>24u)&255u),float((rgba>>16u)&255u),float((rgba>>8u)&255u),float(rgba&255u) ) / 255.0;
  }

//////////////////////////////////////////////////////////////////////////////////////////////
// A over B (https://en.wikipedia.org/wiki/Alpha_compositing)


vec4 blend(vec4 A,vec4 B)
  {
  float b = B.a * (1.0-A.a);
  float a = A.a + b;

  return vec4( (A.rgb*A.a + B.rgb*b)/a , a );
  }

//////////////////////////////////////////////////////////////////////////////////////////////
// Pass4 of the OIT algorithm - keep traversing the linked list, build the final color and blend it.

void main()                    		
  {
  uint prev = uint(v_Pixel.x) + uint(v_Pixel.y) * u_Size.x;
  uint curr = u_Records[prev];

  if (curr != 0u)
    {
    const float S= 2147483647.0;
    gl_FragDepth = 1.0 - 2.0*float(u_Records[curr+1u])/S;
    vec4 color   = convert(u_Records[curr+2u]);
    curr = u_Records[curr];

    while (curr != 0u)
      {
      color = blend( color , convert(u_Records[curr+2u]) );
      curr = u_Records[curr];
      }

    fragColor = color;
    }
  else discard;
}

#3

Hi,

thank you for reporting this problem. I managed to reproduce it internally and I will forward it to the compiler team. I will get back to you if we find any workarounds.

Please note that I found that the shader works on PowerVR Series6XT GPUs so you might want to give them a shot.
Take a look at them here:
https://community.imgtec.com/platforms/?product_category=powervr-gx6250
https://community.imgtec.com/platforms/?product_category=powervr-g6430

bests,
Marton


#4

Thanks a lot Marten! I am aware the shader works in GX6250 (although, it has to be said, pretty slowly - about 3 times slower than Mali T880 and 2.5 times slower than Adreno 418).

Looks like PowerVR architecture doesn’t like the memoryBarriers() - in addition to the ones inside the shader, I also have a

glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_ATOMIC_COUNTER_BARRIER_BIT)

between the passes.


#5

Hi,

you can use PVRTune to make sure glMemoryBarrier doesn’t cause pipeline bubbles/idle times.
You can also use PVRShaderEditor to see the disassembly of your shader and to optimize them. Please refer to the PowerVR ISR for interpreting the disassembly.
https://community.imgtec.com/developers/powervr/graphics-sdk/
http://cdn.imgtec.com/sdk-documentation/PowerVR%20Instruction%20Set%20Reference.pdf

bests,
Marton


#6

Hello Marton,

any news about some workarounds perhaps? :slight_smile:


#7

Hi utumno,

we haven’t found any workarounds yet, but our compiler team received the bug and they’ll be working on fixing it.

bests,
Marton


#8

Thanks a lot Marten, I appreciate it!

It would be really good though to have some sort of workaround for the present compiler - we have to support what is in the field :frowning:


#9

Hi,

can you please tell me what the driver version is on the device in question?
You can use AIDA64 for example.

thanks,
Marton


#10

GL_VERSION says

OpenGL ES GLSL ES 3.20 build 1.8@4490469


#11

thanks. Seems like we already fixed this issue in a later driver version, however your device doesn’t have it. You can request a newer driver (and Android version) from your device manufacturer (HTC).


#12

I’m having “compile failed” crash reports for the following fragment shader. Seem to fail for any device with a GE8100 GPU such as Huawei Y5, HTC Desire 12. These devices seem to run on Android 8.1.0.

PVRShaderEditor returns success for all compilers including PowerVR Rogue. I don’t have a device with this GPU so I can’t really test which line is causing the problem.

Please, any clues as to what’s going on is greatly appreciated.

#ifdef GL_ES
#define LOWP lowp
precision mediump float;
#else
#define LOWP 
#endif

varying vec2 v_texCoords;

uniform sampler2D u_texture;
uniform sampler2D u_noiseTexture;

uniform float u_time; 		// used to be time
uniform float u_power; 
uniform float u_lsdMix;
uniform float u_aspectRatio;


vec3 rgb2hsv(vec3 c)
{
    vec4 K = vec4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);
    vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));
    vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));

    float d = q.x - min(q.w, q.y);
    return vec3(abs(q.z + (q.w - q.y) / (6.0 * d)), d / q.x, q.x);
}

vec3 hsv2rgb(vec3 c)
{
    vec4 K = vec4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);
    vec3 p = abs(fract(c.xxx + K.xyz) * 6.0 - K.www);
    return c.z * mix(K.xxx, clamp(p - K.xxx, 0.0, 1.0), c.y);
}

void main() {
	vec2 blockSize = vec2(0.05, 0.05 * u_aspectRatio);
	vec2 uv_noise = floor(v_texCoords / blockSize) * blockSize;
	uv_noise += floor(vec2(u_time) * vec2(12.0, 35.0)) / vec2(64.0);
	uv_noise = fract(uv_noise);
	
	float block_thresh = fract(u_time * 8.0) * 0.025 * u_power;
	float line_thresh = fract(u_time * 4.0) * 0.2 * u_power;
	
	vec2 uv_r = v_texCoords, uv_g = v_texCoords, uv_b = v_texCoords;
    
    vec4 noise1 = texture2D(u_noiseTexture, uv_noise);    
    vec4 noise2 = texture2D(u_noiseTexture, vec2(uv_noise.y, 0.0));

	// glitch some blocks and lines
    float condition = floor(((min(block_thresh / noise1.r, 1.0) + min(line_thresh / noise2.g, 1.0)) / 2.0) + 0.5);
	
    vec2 dist = (uv_noise - 0.5) * 0.3 * condition;
    uv_r += dist * 0.1;
    uv_g += dist * 0.2;
    uv_b += dist * 0.125;

	vec4 fragColor = vec4(1.0, 1.0, 1.0, 1.0);

	// Lookup
	fragColor.r = texture2D(u_texture, fract(uv_r)).r;
	fragColor.g = texture2D(u_texture, fract(uv_g)).g;
	fragColor.b = texture2D(u_texture, fract(uv_b)).b;
	
	// LSD Effect
	vec3 color = rgb2hsv(fragColor.rgb);
	color.x = mod(((fragColor.x + fragColor.y + fragColor.z) * 0.7) + u_power + u_time, 1.0);
	color.y = 1.0;
	color.z = 1.0 - color.z;
	color = hsv2rgb(color);
	fragColor.rgb = mix(color, fragColor.rgb, u_lsdMix);
    

	// loose luma for some blocks
    condition = floor(min(block_thresh / noise1.g, 1.0));
    fragColor.rgb = mix(fragColor.rgb, fragColor.rrr, condition);
    
	// discolor block lines
    condition = floor(min(line_thresh / (noise2.b * 3.5), 1.0));
    fragColor.rgb = mix(fragColor.rgb, vec3(0.0, dot(fragColor.rgb, vec3(1.0)), 0.0), condition);
	

	// interleave lines in some blocks
    condition = floor(((min(block_thresh / (noise1.g * 1.5), 1.0) + min(line_thresh / (noise2.g * 2.5), 1.0)) / 2.0) + 0.5);
	vec3 mask = fragColor.rgb * vec3(3.0, 0.0, 0.0);
    fragColor.rgb = mix(fragColor.rgb, mask, condition);
	
	gl_FragColor = fragColor;
}