Profiler - Also use GL timestamps

* Get performance information from the GPU using GL timers
* Avoid using glFinish() for the profiler
* Display max( cpu time , gpu time ) in the profiler window
This commit is contained in:
Emmanuel BENOîT 2017-12-24 16:02:17 +01:00
parent 99bd2c429b
commit 646ec5a423
4 changed files with 96 additions and 36 deletions

View file

@ -84,7 +84,6 @@ void T_Main::mainLoop( )
UI::Shaders( ).update( );
// Display
glFinish( );
p.startFrame( );
p.start( "Full frame" );
m.handleEvents( );
@ -182,7 +181,7 @@ void T_Main::render( )
if ( UI::ODbg( ).isActive( ) ) {
UI::ODbg( ).debugOutput( );
}
glFinish( ); UI::Profiler( ).end( "Debug" );
UI::Profiler( ).end( "Debug" );
} else {
T_Rendertarget::MainOutput( );

View file

@ -731,7 +731,6 @@ void T_OpContext::run(
}
case OP_UI_PEXIT:
glFinish( );
UI::Profiler( ).end( profiling.last( ) );
profiling.removeLast( );
break;

View file

@ -10,11 +10,19 @@ constexpr uint32_t T_Profiler::History;
constexpr uint32_t T_Profiler::Invalid;
T_Profiler::~T_Profiler( )
{
const auto iSize{ gpuQueries_.size( ) };
if ( iSize ) {
glDeleteQueries( iSize , &gpuQueries_[ 0 ] );
}
}
void T_Profiler::clear( )
{
sections_.clear( );
samples_.clear( );
starts_.clear( );
cpuStarts_.clear( );
}
void T_Profiler::startFrame( )
@ -25,15 +33,37 @@ void T_Profiler::startFrame( )
void T_Profiler::endFrame( )
{
const auto n( sections_.size( ) );
const auto n{ sections_.size( ) };
if ( n ) {
int32_t done{ 0 };
while ( !done ) {
glGetQueryObjectiv( gpuQueries_[ n * 2 - 1 ] ,
GL_QUERY_RESULT_AVAILABLE ,
&done );
}
if ( gpuSamples_.size( ) < n ) {
gpuSamples_.resize( n );
}
for ( auto i = 0u ; i < n ; i ++ ) {
uint64_t a , b;
glGetQueryObjectui64v( gpuQueries_[ i * 2 ] ,
GL_QUERY_RESULT , &a );
glGetQueryObjectui64v( gpuQueries_[ i * 2 + 1 ] ,
GL_QUERY_RESULT , &b );
addSample( gpuSamples_[ i ] , b - a );
}
}
while ( secDurations_.size( ) < n ) {
secDurations_.add( 0 );
secStarts_.add( 0 );
}
for ( auto i = 0u ; i < n ; i ++ ) {
const float d = computeDuration( i );
secDurations_[ i ] = d;
const float cpuD = computeDuration( samples_[ i ] );
const float gpuD = computeDuration( gpuSamples_[ i ] );
secDurations_[ i ] = std::max( cpuD , gpuD );
if ( parents_[ i ] != Invalid ) {
assert( parents_[ i ] < i );
secStarts_[ i ] = secStarts_[ parents_[ i ] ];
@ -55,7 +85,7 @@ void T_Profiler::start(
if ( pos == n ) {
sections_.add( section );
samples_.add( T_SamplesList_{ } );
starts_.add( 0u );
cpuStarts_.add( 0u );
chain_.add( Invalid );
parents_.add( Invalid );
}
@ -71,7 +101,12 @@ void T_Profiler::start(
struct timespec ts;
clock_gettime( CLOCK_MONOTONIC , &ts );
starts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
cpuStarts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
if ( gpuQueries_.size( ) <= pos * 2 + 1 ) {
extendGPUQueries( );
}
glQueryCounter( gpuQueries_[ pos * 2 ] , GL_TIMESTAMP );
}
void T_Profiler::end(
@ -84,24 +119,11 @@ void T_Profiler::end(
}
struct timespec ts;
glQueryCounter( gpuQueries_[ pos * 2 + 1 ] , GL_TIMESTAMP );
clock_gettime( CLOCK_MONOTONIC , &ts );
const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec;
const uint64_t duration = ended - starts_[ pos ];
auto& samples( samples_[ pos ] );
if ( samples.size( ) == 0 || ( samples.size( ) < History
&& samples[ 0 ].nSamples == Samples ) ) {
samples.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
} else if ( samples.size( ) == History && samples[ 0 ].nSamples == Samples ) {
for ( auto i = 1u ; i < History ; i ++ ) {
samples[ i ] = samples[ i - 1 ];
}
samples[ 0 ].sum = 0;
samples[ 0 ].nSamples = 0;
}
samples[ 0 ].sum += float( duration ) * 1e-6;
samples[ 0 ].nSamples ++;
addSample( samples_[ pos ] , ended - cpuStarts_[ pos ] );
previous_ = pos;
current_ = Invalid;
@ -135,9 +157,10 @@ void T_Profiler::makeUI( )
ImGui::PushStyleColor( ImGuiCol_Text , color );
ebcl::T_StringBuilder sb;
char tms[ 12 ];
snprintf( tms , 12 , "%.3f" , secDurations_[ i ] );
sb << sections_[ i ] << " ("
<< int( round( secDurations_[ i ] ) )
<< "ms)" << '\0';
<< tms << "ms)" << '\0';
ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] );
ImGui::PopStyleColor( );
}
@ -206,15 +229,44 @@ void T_Profiler::makeUI( )
ImGui::End( );
}
float T_Profiler::computeDuration(
const uint32_t section ) const
void T_Profiler::extendGPUQueries( ) noexcept
{
const auto iSize{ gpuQueries_.size( ) };
const auto iGrowth{ gpuQueries_.growth( ) };
gpuQueries_.resize( iSize + iGrowth , 0 );
glGenQueries( iGrowth , &gpuQueries_[ iSize ] );
}
void T_Profiler::addSample(
T_SamplesList_& list ,
const uint64_t duration
) noexcept
{
if ( list.size( ) == 0 || ( list.size( ) < History
&& list[ 0 ].nSamples == Samples ) ) {
list.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
} else if ( list.size( ) == History && list[ 0 ].nSamples == Samples ) {
for ( auto i = 1u ; i < History ; i ++ ) {
list[ i ] = list[ i - 1 ];
}
list[ 0 ].sum = 0;
list[ 0 ].nSamples = 0;
}
list[ 0 ].sum += float( duration ) * 1e-6;
list[ 0 ].nSamples ++;
}
float T_Profiler::computeDuration(
T_SamplesList_ const& section
) noexcept
{
auto const& samples( samples_[ section ] );
float total = 0;
float nSamples = 0;
for ( auto const& entry : samples ) {
total += entry.sum;
nSamples += entry.nSamples;
const auto ns{ section.size( ) };
for ( auto i = 0u ; i < ns ; i ++ ) {
total += section[ i ].sum;
nSamples += section[ i ].nSamples;
}
return total / nSamples;
}

View file

@ -17,6 +17,8 @@ struct T_Profiler
static constexpr uint32_t History = 4;
static constexpr uint32_t Invalid = 0xffffffff;
~T_Profiler( );
void clear( );
void startFrame( );
@ -40,12 +42,17 @@ struct T_Profiler
bool& uiEnabled( ) { return uiEnabled_; }
private:
using T_SamplesList_ = T_Array< T_ProfilerSamples >;
using T_SamplesList_ = T_StaticArray< T_ProfilerSamples , History >;
using T_Data_ = T_Array< T_SamplesList_ >;
void extendGPUQueries( ) noexcept;
static void addSample(
T_SamplesList_& list ,
uint64_t duration ) noexcept;
uint32_t find( T_String const& section ) const;
float computeDuration(
const uint32_t section ) const;
static float computeDuration(
T_SamplesList_ const& list ) noexcept;
uint32_t previous_;
uint32_t current_;
@ -54,12 +61,15 @@ struct T_Profiler
T_Array< uint32_t > chain_;
T_Array< uint32_t > parents_;
T_Data_ samples_;
T_Array< uint64_t > starts_;
T_Array< uint64_t > cpuStarts_;
T_Array< float > secDurations_;
T_Array< float > secStarts_;
bool uiEnabled_ = false;
T_Array< int > displayed_;
T_Array< GLuint > gpuQueries_{ T_Array< GLuint >( 64 ) };
T_Data_ gpuSamples_;
};