Profiler - Also use GL timestamps

* Get performance information from the GPU using GL timers
* Avoid using glFinish() for the profiler
* Display max( cpu time , gpu time ) in the profiler window
This commit is contained in:
Emmanuel BENOîT 2017-12-24 16:02:17 +01:00
parent 99bd2c429b
commit 646ec5a423
4 changed files with 96 additions and 36 deletions

View file

@ -84,7 +84,6 @@ void T_Main::mainLoop( )
UI::Shaders( ).update( ); UI::Shaders( ).update( );
// Display // Display
glFinish( );
p.startFrame( ); p.startFrame( );
p.start( "Full frame" ); p.start( "Full frame" );
m.handleEvents( ); m.handleEvents( );
@ -182,7 +181,7 @@ void T_Main::render( )
if ( UI::ODbg( ).isActive( ) ) { if ( UI::ODbg( ).isActive( ) ) {
UI::ODbg( ).debugOutput( ); UI::ODbg( ).debugOutput( );
} }
glFinish( ); UI::Profiler( ).end( "Debug" ); UI::Profiler( ).end( "Debug" );
} else { } else {
T_Rendertarget::MainOutput( ); T_Rendertarget::MainOutput( );

View file

@ -731,7 +731,6 @@ void T_OpContext::run(
} }
case OP_UI_PEXIT: case OP_UI_PEXIT:
glFinish( );
UI::Profiler( ).end( profiling.last( ) ); UI::Profiler( ).end( profiling.last( ) );
profiling.removeLast( ); profiling.removeLast( );
break; break;

View file

@ -10,11 +10,19 @@ constexpr uint32_t T_Profiler::History;
constexpr uint32_t T_Profiler::Invalid; constexpr uint32_t T_Profiler::Invalid;
T_Profiler::~T_Profiler( )
{
const auto iSize{ gpuQueries_.size( ) };
if ( iSize ) {
glDeleteQueries( iSize , &gpuQueries_[ 0 ] );
}
}
void T_Profiler::clear( ) void T_Profiler::clear( )
{ {
sections_.clear( ); sections_.clear( );
samples_.clear( ); samples_.clear( );
starts_.clear( ); cpuStarts_.clear( );
} }
void T_Profiler::startFrame( ) void T_Profiler::startFrame( )
@ -25,15 +33,37 @@ void T_Profiler::startFrame( )
void T_Profiler::endFrame( ) void T_Profiler::endFrame( )
{ {
const auto n( sections_.size( ) ); const auto n{ sections_.size( ) };
if ( n ) {
int32_t done{ 0 };
while ( !done ) {
glGetQueryObjectiv( gpuQueries_[ n * 2 - 1 ] ,
GL_QUERY_RESULT_AVAILABLE ,
&done );
}
if ( gpuSamples_.size( ) < n ) {
gpuSamples_.resize( n );
}
for ( auto i = 0u ; i < n ; i ++ ) {
uint64_t a , b;
glGetQueryObjectui64v( gpuQueries_[ i * 2 ] ,
GL_QUERY_RESULT , &a );
glGetQueryObjectui64v( gpuQueries_[ i * 2 + 1 ] ,
GL_QUERY_RESULT , &b );
addSample( gpuSamples_[ i ] , b - a );
}
}
while ( secDurations_.size( ) < n ) { while ( secDurations_.size( ) < n ) {
secDurations_.add( 0 ); secDurations_.add( 0 );
secStarts_.add( 0 ); secStarts_.add( 0 );
} }
for ( auto i = 0u ; i < n ; i ++ ) { for ( auto i = 0u ; i < n ; i ++ ) {
const float d = computeDuration( i ); const float cpuD = computeDuration( samples_[ i ] );
secDurations_[ i ] = d; const float gpuD = computeDuration( gpuSamples_[ i ] );
secDurations_[ i ] = std::max( cpuD , gpuD );
if ( parents_[ i ] != Invalid ) { if ( parents_[ i ] != Invalid ) {
assert( parents_[ i ] < i ); assert( parents_[ i ] < i );
secStarts_[ i ] = secStarts_[ parents_[ i ] ]; secStarts_[ i ] = secStarts_[ parents_[ i ] ];
@ -55,7 +85,7 @@ void T_Profiler::start(
if ( pos == n ) { if ( pos == n ) {
sections_.add( section ); sections_.add( section );
samples_.add( T_SamplesList_{ } ); samples_.add( T_SamplesList_{ } );
starts_.add( 0u ); cpuStarts_.add( 0u );
chain_.add( Invalid ); chain_.add( Invalid );
parents_.add( Invalid ); parents_.add( Invalid );
} }
@ -71,7 +101,12 @@ void T_Profiler::start(
struct timespec ts; struct timespec ts;
clock_gettime( CLOCK_MONOTONIC , &ts ); clock_gettime( CLOCK_MONOTONIC , &ts );
starts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec; cpuStarts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
if ( gpuQueries_.size( ) <= pos * 2 + 1 ) {
extendGPUQueries( );
}
glQueryCounter( gpuQueries_[ pos * 2 ] , GL_TIMESTAMP );
} }
void T_Profiler::end( void T_Profiler::end(
@ -84,24 +119,11 @@ void T_Profiler::end(
} }
struct timespec ts; struct timespec ts;
glQueryCounter( gpuQueries_[ pos * 2 + 1 ] , GL_TIMESTAMP );
clock_gettime( CLOCK_MONOTONIC , &ts ); clock_gettime( CLOCK_MONOTONIC , &ts );
const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec; const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec;
const uint64_t duration = ended - starts_[ pos ]; addSample( samples_[ pos ] , ended - cpuStarts_[ pos ] );
auto& samples( samples_[ pos ] );
if ( samples.size( ) == 0 || ( samples.size( ) < History
&& samples[ 0 ].nSamples == Samples ) ) {
samples.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
} else if ( samples.size( ) == History && samples[ 0 ].nSamples == Samples ) {
for ( auto i = 1u ; i < History ; i ++ ) {
samples[ i ] = samples[ i - 1 ];
}
samples[ 0 ].sum = 0;
samples[ 0 ].nSamples = 0;
}
samples[ 0 ].sum += float( duration ) * 1e-6;
samples[ 0 ].nSamples ++;
previous_ = pos; previous_ = pos;
current_ = Invalid; current_ = Invalid;
@ -135,9 +157,10 @@ void T_Profiler::makeUI( )
ImGui::PushStyleColor( ImGuiCol_Text , color ); ImGui::PushStyleColor( ImGuiCol_Text , color );
ebcl::T_StringBuilder sb; ebcl::T_StringBuilder sb;
char tms[ 12 ];
snprintf( tms , 12 , "%.3f" , secDurations_[ i ] );
sb << sections_[ i ] << " (" sb << sections_[ i ] << " ("
<< int( round( secDurations_[ i ] ) ) << tms << "ms)" << '\0';
<< "ms)" << '\0';
ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] ); ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] );
ImGui::PopStyleColor( ); ImGui::PopStyleColor( );
} }
@ -206,15 +229,44 @@ void T_Profiler::makeUI( )
ImGui::End( ); ImGui::End( );
} }
float T_Profiler::computeDuration( void T_Profiler::extendGPUQueries( ) noexcept
const uint32_t section ) const {
const auto iSize{ gpuQueries_.size( ) };
const auto iGrowth{ gpuQueries_.growth( ) };
gpuQueries_.resize( iSize + iGrowth , 0 );
glGenQueries( iGrowth , &gpuQueries_[ iSize ] );
}
void T_Profiler::addSample(
T_SamplesList_& list ,
const uint64_t duration
) noexcept
{
if ( list.size( ) == 0 || ( list.size( ) < History
&& list[ 0 ].nSamples == Samples ) ) {
list.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
} else if ( list.size( ) == History && list[ 0 ].nSamples == Samples ) {
for ( auto i = 1u ; i < History ; i ++ ) {
list[ i ] = list[ i - 1 ];
}
list[ 0 ].sum = 0;
list[ 0 ].nSamples = 0;
}
list[ 0 ].sum += float( duration ) * 1e-6;
list[ 0 ].nSamples ++;
}
float T_Profiler::computeDuration(
T_SamplesList_ const& section
) noexcept
{ {
auto const& samples( samples_[ section ] );
float total = 0; float total = 0;
float nSamples = 0; float nSamples = 0;
for ( auto const& entry : samples ) { const auto ns{ section.size( ) };
total += entry.sum; for ( auto i = 0u ; i < ns ; i ++ ) {
nSamples += entry.nSamples; total += section[ i ].sum;
nSamples += section[ i ].nSamples;
} }
return total / nSamples; return total / nSamples;
} }

View file

@ -17,6 +17,8 @@ struct T_Profiler
static constexpr uint32_t History = 4; static constexpr uint32_t History = 4;
static constexpr uint32_t Invalid = 0xffffffff; static constexpr uint32_t Invalid = 0xffffffff;
~T_Profiler( );
void clear( ); void clear( );
void startFrame( ); void startFrame( );
@ -40,12 +42,17 @@ struct T_Profiler
bool& uiEnabled( ) { return uiEnabled_; } bool& uiEnabled( ) { return uiEnabled_; }
private: private:
using T_SamplesList_ = T_Array< T_ProfilerSamples >; using T_SamplesList_ = T_StaticArray< T_ProfilerSamples , History >;
using T_Data_ = T_Array< T_SamplesList_ >; using T_Data_ = T_Array< T_SamplesList_ >;
void extendGPUQueries( ) noexcept;
static void addSample(
T_SamplesList_& list ,
uint64_t duration ) noexcept;
uint32_t find( T_String const& section ) const; uint32_t find( T_String const& section ) const;
float computeDuration( static float computeDuration(
const uint32_t section ) const; T_SamplesList_ const& list ) noexcept;
uint32_t previous_; uint32_t previous_;
uint32_t current_; uint32_t current_;
@ -54,12 +61,15 @@ struct T_Profiler
T_Array< uint32_t > chain_; T_Array< uint32_t > chain_;
T_Array< uint32_t > parents_; T_Array< uint32_t > parents_;
T_Data_ samples_; T_Data_ samples_;
T_Array< uint64_t > starts_; T_Array< uint64_t > cpuStarts_;
T_Array< float > secDurations_; T_Array< float > secDurations_;
T_Array< float > secStarts_; T_Array< float > secStarts_;
bool uiEnabled_ = false; bool uiEnabled_ = false;
T_Array< int > displayed_; T_Array< int > displayed_;
T_Array< GLuint > gpuQueries_{ T_Array< GLuint >( 64 ) };
T_Data_ gpuSamples_;
}; };