You might be able to use the EXT_disjoint_timer_query_webgl2?
function main() {
const gl = document.createElement('canvas').getContext('webgl2', {
powerPreference: 'high-performance',
});
log(`powerPreference: ${gl.getContextAttributes().powerPreference}\n\n`);
if (!gl) {
log('need WebGL2');
return;
}
const ext = gl.getExtension('EXT_disjoint_timer_query_webgl2');
if (!ext) {
log('need EXT_disjoint_timer_query_webgl2');
return;
}
const vs = `#version 300 es
in vec4 position;
void main() {
gl_Position = position;
}
`;
const fs = `#version 300 es
precision highp float;
uniform sampler2D tex;
out vec4 fragColor;
void main() {
const int across = 100;
const int up = 100;
vec2 size = vec2(textureSize(tex, 0));
vec4 sum = vec4(0);
for (int y = 0; y < up; ++y) {
for (int x = 0; x < across; ++x) {
vec2 start = gl_FragCoord.xy + vec2(x, y);
vec2 uv = (mod(start, size) + 0.5) / size;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
uv = texture(tex, uv).xy;
sum += texture(tex, uv);
}
}
fragColor = sum / float(across * up);
}
`;
const programInfo = twgl.createProgramInfo(gl, [vs, fs]);
const bufferInfo = twgl.primitives.createXYQuadBufferInfo(gl);
const pixels = new Uint8Array(1024 * 1024 * 4);
for (let i = 0; i < pixels.length; ++i) {
pixels[i] = Math.random() * 256;
}
// creates a 1024x1024 RGBA texture.
const tex = twgl.createTexture(gl, {src: pixels});
gl.useProgram(programInfo.program);
twgl.setBuffersAndAttributes(gl, programInfo, bufferInfo);
const waitFrame = _ => new Promise(resolve => requestAnimationFrame(resolve));
const widthHeightFromIndex = i => {
const height = 2 ** (i / 2 | 0);
const width = height * (i % 2 + 1);
return { width, height };
};
async function getSizeThatRunsUnderLimit(gl, limitMs) {
log('size time in milliseconds');
log('--------------------------------');
for (let i = 0; i < 32; ++i) {
const {width, height} = widthHeightFromIndex(i);
const timeElapsedMs = await getTimeMsForSize(gl, width, height);
const dims = `${width}x${height}`;
log(`${dims.padEnd(11)} ${timeElapsedMs.toFixed(1).padStart(6)}`);
if (timeElapsedMs > limitMs) {
return widthHeightFromIndex(i - 1);
}
}
}
(async () => {
const limit = 1000 / 20;
const {width, height} = await getSizeThatRunsUnderLimit(gl, limit);
log('--------------------------------');
log(`use ${width}x${height}`);
})();
async function getTimeMsForSize(gl, width, height) {
gl.canvas.width = width;
gl.canvas.height = height;
gl.viewport(0, 0, width, height);
// prime the GPU/driver
// this is voodoo but if I don't do this
// all the numbers come out bad. Even with
// this the first test seems to fail with
// a large number intermittently
gl.drawElements(gl.TRIANGLES, 6, gl.UNSIGNED_SHORT, 0);
for (;;) {
const query = gl.createQuery();
gl.beginQuery(ext.TIME_ELAPSED_EXT, query);
gl.drawElements(gl.TRIANGLES, 6, gl.UNSIGNED_SHORT, 0);
gl.endQuery(ext.TIME_ELAPSED_EXT);
gl.flush();
for (;;) {
await waitFrame();
const available = gl.getQueryParameter(query, gl.QUERY_RESULT_AVAILABLE);
if (available) {
break;
}
}
const disjoint = gl.getParameter(ext.GPU_DISJOINT_EXT);
if (!disjoint) {
const timeElapsed = gl.getQueryParameter(query, gl.QUERY_RESULT);
gl.deleteQuery(query);
return timeElapsed / (10 ** 6); // return milliseconds
}
gl.deleteQuery(query);
}
}
}
main();
function log(...args) {
const elem = document.createElement('pre');
elem.textContent = args.join(' ');
document.body.appendChild(elem);
}
pre { margin: 0; }
<script src="https://twgljs.org/dist/4.x/twgl-full.min.js"></script>
On my 2014 Macbook Pro Dual GPU (Intel/Nvidia), first off, even though I request high-performance Chrome gives me low-power meaning it's using the Intel integrated GPU.
The first timing on 1x1 pixels is often ~17ms intermittently and often but not always. I don't know how to fix that. I could keep timing until 1x1 pixels is some more reasonable number like time 5 times until it's < 1 ms and if never then fail?
powerPreference: low-power
size time in milliseconds
--------------------------------
1x1 16.1
2x1 0.0
2x2 0.0
4x2 0.0
4x4 0.0
8x4 0.1
8x8 0.1
16x8 0.0
16x16 0.0
32x16 0.0
32x32 0.0
64x32 13.6
64x64 35.7
128x64 62.6
--------------------------------
use 64x64
Testing on a late 2018 Macbook Air with Intel Integrated GPU shows a similar issue except the first timing comes out even worse at 42ms.
size time in milliseconds
--------------------------------
1x1 42.4
2x1 0.0
2x2 0.0
4x2 0.0
4x4 0.0
8x4 0.0
8x8 0.0
16x8 0.0
16x16 0.0
32x16 0.0
32x32 0.0
64x32 0.0
64x64 51.5
--------------------------------
use 64x32
Further, the timings are kind of bogus. Note on my 2014 MBP, 32x32 is 0ms and 64x32 is suddenly 13ms. I'd expect 32x32 to be 6.5ms. Same on the MBA above, everything is 0 and then suddenly 51ms !??!??
Running it on a Windows 10 desktop with Nvidia RTX 2070 everything seems more reasonable. The 1x1 timing is correct and the timings grow as expected.
powerPreference: low-power
size time in milliseconds
--------------------------------
1x1 0.0
2x1 0.0
2x2 0.0
4x2 0.0
4x4 0.0
8x4 0.0
8x8 0.0
16x8 0.0
16x16 0.0
32x16 0.1
32x32 0.1
64x32 2.4
64x64 2.9
128x64 3.1
128x128 6.0
256x128 15.4
256x256 27.8
512x256 58.6
--------------------------------
use 256x256
Also, on all systems if I don't pre-draw each size before the timing it fails and all timings come out > 16ms. Adding the pre-draw seems to work but it's voodoo. I even tried pre-drawing just 1x1 pixel instead of width by height pixels as the pre-draw and that failed!?!?!?
Further, Firefox doesn't support EXT_disjoint_timer_query_webgl2 I believe that's because precision timing makes it possible to steal info from other processes. Chrome fixed this with site isolation but I'm guessing Firefox has yet to do that.
note: WebGL1 has EXT_disjoint_timer_query for similar functionality.
update: the issues on intel GPUs might be related to fuzzing the timing to avoid security issues? Intel GPUs use unified memory (meaning they share memory with the CPU). I don't know. The chrome security article mentions lowering precision on devices with unified memory.
I suppose even without the timing extensions you could try seeing if you can render in under 60hz by checking requestAnimationFrame timing. Unfortunately my experience there is also that it can be flaky. Anything could cause rAF to take more than 60fps. Maybe the user is running other apps. Maybe they are on a 30hz monitor. etc... Maybe averaging the timings over a certain number of frames or taking the lowest reading of multiple timings.