@@ -28,6 +28,7 @@ using namespace egl_platform;
2828
2929namespace
3030{
31+ void FramebufferChangeCallback (void *userData, GLenum target, GLuint framebuffer);
3132
3233enum class TracePerfTestID
3334{
@@ -99,9 +100,34 @@ class TracePerfTest : public ANGLERenderTest, public ::testing::WithParamInterfa
99100 void destroyBenchmark () override ;
100101 void drawBenchmark () override ;
101102
103+ void onFramebufferChange (GLenum target, GLuint framebuffer);
104+
102105 uint32_t mStartFrame ;
103106 uint32_t mEndFrame ;
104107 std::function<void (uint32_t )> mReplayFunc ;
108+
109+ double getHostTimeFromGLTime (GLint64 glTime);
110+
111+ private:
112+ struct QueryInfo
113+ {
114+ GLuint beginTimestampQuery;
115+ GLuint endTimestampQuery;
116+ GLuint framebuffer;
117+ };
118+
119+ struct TimeSample
120+ {
121+ GLint64 glTime;
122+ double hostTime;
123+ };
124+
125+ void sampleTime ();
126+
127+ // For tracking RenderPass/FBO change timing.
128+ QueryInfo mCurrentQuery = {};
129+ std::vector<QueryInfo> mRunningQueries ;
130+ std::vector<TimeSample> mTimeline ;
105131};
106132
107133TracePerfTest::TracePerfTest ()
@@ -157,19 +183,153 @@ void TracePerfTest::initializeBenchmark()
157183
158184void TracePerfTest::destroyBenchmark () {}
159185
186+ void TracePerfTest::sampleTime ()
187+ {
188+ if (mIsTimestampQueryAvailable )
189+ {
190+ GLint64 glTime;
191+ // glGetInteger64vEXT is exported by newer versions of the timer query extensions.
192+ // Unfortunately only the core EP is exposed by some desktop drivers (e.g. NVIDIA).
193+ if (glGetInteger64vEXT)
194+ {
195+ glGetInteger64vEXT (GL_TIMESTAMP_EXT, &glTime);
196+ }
197+ else
198+ {
199+ glGetInteger64v (GL_TIMESTAMP_EXT, &glTime);
200+ }
201+ mTimeline .push_back ({glTime, angle::GetHostTimeSeconds ()});
202+ }
203+ }
204+
160205void TracePerfTest::drawBenchmark ()
161206{
207+ // Add a time sample from GL and the host.
208+ sampleTime ();
209+
162210 startGpuTimer ();
163211
164212 for (uint32_t frame = mStartFrame ; frame < mEndFrame ; ++frame)
165213 {
214+ char frameName[32 ];
215+ sprintf (frameName, " Frame %u" , frame);
216+ beginInternalTraceEvent (frameName);
217+
166218 mReplayFunc (frame);
167219 getGLWindow ()->swap ();
220+
221+ endInternalTraceEvent (frameName);
222+ }
223+
224+ // Process any running queries once per iteration.
225+ for (size_t queryIndex = 0 ; queryIndex < mRunningQueries .size ();)
226+ {
227+ const QueryInfo &query = mRunningQueries [queryIndex];
228+
229+ GLuint endResultAvailable = 0 ;
230+ glGetQueryObjectuivEXT (query.endTimestampQuery , GL_QUERY_RESULT_AVAILABLE,
231+ &endResultAvailable);
232+
233+ if (endResultAvailable == GL_TRUE)
234+ {
235+ char fboName[32 ];
236+ sprintf (fboName, " FBO %u" , query.framebuffer );
237+
238+ GLint64 beginTimestamp = 0 ;
239+ glGetQueryObjecti64vEXT (query.beginTimestampQuery , GL_QUERY_RESULT, &beginTimestamp);
240+ glDeleteQueriesEXT (1 , &query.beginTimestampQuery );
241+ double beginHostTime = getHostTimeFromGLTime (beginTimestamp);
242+ beginGLTraceEvent (fboName, beginHostTime);
243+
244+ GLint64 endTimestamp = 0 ;
245+ glGetQueryObjecti64vEXT (query.endTimestampQuery , GL_QUERY_RESULT, &endTimestamp);
246+ glDeleteQueriesEXT (1 , &query.endTimestampQuery );
247+ double endHostTime = getHostTimeFromGLTime (endTimestamp);
248+ endGLTraceEvent (fboName, endHostTime);
249+
250+ mRunningQueries .erase (mRunningQueries .begin () + queryIndex);
251+ }
252+ else
253+ {
254+ queryIndex++;
255+ }
168256 }
169257
170258 stopGpuTimer ();
171259}
172260
261+ // Converts a GL timestamp into a host-side CPU time aligned with "GetHostTimeSeconds".
262+ // This check is necessary to line up sampled trace events in a consistent timeline.
263+ // Uses a linear interpolation from a series of samples. We do a blocking call to sample
264+ // both host and GL time once per swap. We then find the two closest GL timestamps and
265+ // interpolate the host times between them to compute our result. If we are past the last
266+ // GL timestamp we sample a new data point pair.
267+ double TracePerfTest::getHostTimeFromGLTime (GLint64 glTime)
268+ {
269+ // Find two samples to do a lerp.
270+ size_t firstSampleIndex = mTimeline .size () - 1 ;
271+ while (firstSampleIndex > 0 )
272+ {
273+ if (mTimeline [firstSampleIndex].glTime < glTime)
274+ {
275+ break ;
276+ }
277+ firstSampleIndex--;
278+ }
279+
280+ // Add an extra sample if we're missing an ending sample.
281+ if (firstSampleIndex == mTimeline .size () - 1 )
282+ {
283+ sampleTime ();
284+ }
285+
286+ const TimeSample &start = mTimeline [firstSampleIndex];
287+ const TimeSample &end = mTimeline [firstSampleIndex + 1 ];
288+
289+ // Note: we have observed in some odd cases later timestamps producing values that are
290+ // smaller than preceding timestamps. This bears further investigation.
291+
292+ // Compute the scaling factor for the lerp.
293+ double glDelta = static_cast <double >(glTime - start.glTime );
294+ double glRange = static_cast <double >(end.glTime - start.glTime );
295+ double t = glDelta / glRange;
296+
297+ // Lerp(t1, t2, t)
298+ double hostRange = end.hostTime - start.hostTime ;
299+ return mTimeline [firstSampleIndex].hostTime + hostRange * t;
300+ }
301+
302+ // Callback from the perf tests.
303+ void TracePerfTest::onFramebufferChange (GLenum target, GLuint framebuffer)
304+ {
305+ if (!mIsTimestampQueryAvailable )
306+ return ;
307+
308+ if (target != GL_FRAMEBUFFER && target != GL_DRAW_FRAMEBUFFER)
309+ return ;
310+
311+ // We have at most one active timestamp query at a time. This code will end the current query
312+ // and immediately start a new one.
313+ if (mCurrentQuery .beginTimestampQuery != 0 )
314+ {
315+ glGenQueriesEXT (1 , &mCurrentQuery .endTimestampQuery );
316+ glQueryCounterEXT (mCurrentQuery .endTimestampQuery , GL_TIMESTAMP_EXT);
317+ mRunningQueries .push_back (mCurrentQuery );
318+ mCurrentQuery = {};
319+ }
320+
321+ ASSERT (mCurrentQuery .beginTimestampQuery == 0 );
322+
323+ glGenQueriesEXT (1 , &mCurrentQuery .beginTimestampQuery );
324+ glQueryCounterEXT (mCurrentQuery .beginTimestampQuery , GL_TIMESTAMP_EXT);
325+ mCurrentQuery .framebuffer = framebuffer;
326+ }
327+
328+ ANGLE_MAYBE_UNUSED void FramebufferChangeCallback (void *userData, GLenum target, GLuint framebuffer)
329+ {
330+ reinterpret_cast <TracePerfTest *>(userData)->onFramebufferChange (target, framebuffer);
331+ }
332+
173333TEST_P (TracePerfTest, Run)
174334{
175335 run ();
0 commit comments