Profiler - Also use GL timestamps
* Get performance information from the GPU using GL timers * Avoid using glFinish() for the profiler * Display max( cpu time , gpu time ) in the profiler window
This commit is contained in:
parent
99bd2c429b
commit
646ec5a423
4 changed files with 96 additions and 36 deletions
|
@ -84,7 +84,6 @@ void T_Main::mainLoop( )
|
|||
UI::Shaders( ).update( );
|
||||
|
||||
// Display
|
||||
glFinish( );
|
||||
p.startFrame( );
|
||||
p.start( "Full frame" );
|
||||
m.handleEvents( );
|
||||
|
@ -182,7 +181,7 @@ void T_Main::render( )
|
|||
if ( UI::ODbg( ).isActive( ) ) {
|
||||
UI::ODbg( ).debugOutput( );
|
||||
}
|
||||
glFinish( ); UI::Profiler( ).end( "Debug" );
|
||||
UI::Profiler( ).end( "Debug" );
|
||||
|
||||
} else {
|
||||
T_Rendertarget::MainOutput( );
|
||||
|
|
|
@ -731,7 +731,6 @@ void T_OpContext::run(
|
|||
}
|
||||
|
||||
case OP_UI_PEXIT:
|
||||
glFinish( );
|
||||
UI::Profiler( ).end( profiling.last( ) );
|
||||
profiling.removeLast( );
|
||||
break;
|
||||
|
|
110
ui-profiling.cc
110
ui-profiling.cc
|
@ -10,11 +10,19 @@ constexpr uint32_t T_Profiler::History;
|
|||
constexpr uint32_t T_Profiler::Invalid;
|
||||
|
||||
|
||||
T_Profiler::~T_Profiler( )
|
||||
{
|
||||
const auto iSize{ gpuQueries_.size( ) };
|
||||
if ( iSize ) {
|
||||
glDeleteQueries( iSize , &gpuQueries_[ 0 ] );
|
||||
}
|
||||
}
|
||||
|
||||
void T_Profiler::clear( )
|
||||
{
|
||||
sections_.clear( );
|
||||
samples_.clear( );
|
||||
starts_.clear( );
|
||||
cpuStarts_.clear( );
|
||||
}
|
||||
|
||||
void T_Profiler::startFrame( )
|
||||
|
@ -25,15 +33,37 @@ void T_Profiler::startFrame( )
|
|||
|
||||
void T_Profiler::endFrame( )
|
||||
{
|
||||
const auto n( sections_.size( ) );
|
||||
const auto n{ sections_.size( ) };
|
||||
if ( n ) {
|
||||
int32_t done{ 0 };
|
||||
while ( !done ) {
|
||||
glGetQueryObjectiv( gpuQueries_[ n * 2 - 1 ] ,
|
||||
GL_QUERY_RESULT_AVAILABLE ,
|
||||
&done );
|
||||
}
|
||||
|
||||
if ( gpuSamples_.size( ) < n ) {
|
||||
gpuSamples_.resize( n );
|
||||
}
|
||||
for ( auto i = 0u ; i < n ; i ++ ) {
|
||||
uint64_t a , b;
|
||||
glGetQueryObjectui64v( gpuQueries_[ i * 2 ] ,
|
||||
GL_QUERY_RESULT , &a );
|
||||
glGetQueryObjectui64v( gpuQueries_[ i * 2 + 1 ] ,
|
||||
GL_QUERY_RESULT , &b );
|
||||
addSample( gpuSamples_[ i ] , b - a );
|
||||
}
|
||||
}
|
||||
|
||||
while ( secDurations_.size( ) < n ) {
|
||||
secDurations_.add( 0 );
|
||||
secStarts_.add( 0 );
|
||||
}
|
||||
|
||||
for ( auto i = 0u ; i < n ; i ++ ) {
|
||||
const float d = computeDuration( i );
|
||||
secDurations_[ i ] = d;
|
||||
const float cpuD = computeDuration( samples_[ i ] );
|
||||
const float gpuD = computeDuration( gpuSamples_[ i ] );
|
||||
secDurations_[ i ] = std::max( cpuD , gpuD );
|
||||
if ( parents_[ i ] != Invalid ) {
|
||||
assert( parents_[ i ] < i );
|
||||
secStarts_[ i ] = secStarts_[ parents_[ i ] ];
|
||||
|
@ -55,7 +85,7 @@ void T_Profiler::start(
|
|||
if ( pos == n ) {
|
||||
sections_.add( section );
|
||||
samples_.add( T_SamplesList_{ } );
|
||||
starts_.add( 0u );
|
||||
cpuStarts_.add( 0u );
|
||||
chain_.add( Invalid );
|
||||
parents_.add( Invalid );
|
||||
}
|
||||
|
@ -71,7 +101,12 @@ void T_Profiler::start(
|
|||
|
||||
struct timespec ts;
|
||||
clock_gettime( CLOCK_MONOTONIC , &ts );
|
||||
starts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||
cpuStarts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||
|
||||
if ( gpuQueries_.size( ) <= pos * 2 + 1 ) {
|
||||
extendGPUQueries( );
|
||||
}
|
||||
glQueryCounter( gpuQueries_[ pos * 2 ] , GL_TIMESTAMP );
|
||||
}
|
||||
|
||||
void T_Profiler::end(
|
||||
|
@ -84,24 +119,11 @@ void T_Profiler::end(
|
|||
}
|
||||
|
||||
struct timespec ts;
|
||||
glQueryCounter( gpuQueries_[ pos * 2 + 1 ] , GL_TIMESTAMP );
|
||||
clock_gettime( CLOCK_MONOTONIC , &ts );
|
||||
|
||||
const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||
const uint64_t duration = ended - starts_[ pos ];
|
||||
auto& samples( samples_[ pos ] );
|
||||
if ( samples.size( ) == 0 || ( samples.size( ) < History
|
||||
&& samples[ 0 ].nSamples == Samples ) ) {
|
||||
samples.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
|
||||
|
||||
} else if ( samples.size( ) == History && samples[ 0 ].nSamples == Samples ) {
|
||||
for ( auto i = 1u ; i < History ; i ++ ) {
|
||||
samples[ i ] = samples[ i - 1 ];
|
||||
}
|
||||
samples[ 0 ].sum = 0;
|
||||
samples[ 0 ].nSamples = 0;
|
||||
}
|
||||
samples[ 0 ].sum += float( duration ) * 1e-6;
|
||||
samples[ 0 ].nSamples ++;
|
||||
addSample( samples_[ pos ] , ended - cpuStarts_[ pos ] );
|
||||
|
||||
previous_ = pos;
|
||||
current_ = Invalid;
|
||||
|
@ -135,9 +157,10 @@ void T_Profiler::makeUI( )
|
|||
ImGui::PushStyleColor( ImGuiCol_Text , color );
|
||||
|
||||
ebcl::T_StringBuilder sb;
|
||||
char tms[ 12 ];
|
||||
snprintf( tms , 12 , "%.3f" , secDurations_[ i ] );
|
||||
sb << sections_[ i ] << " ("
|
||||
<< int( round( secDurations_[ i ] ) )
|
||||
<< "ms)" << '\0';
|
||||
<< tms << "ms)" << '\0';
|
||||
ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] );
|
||||
ImGui::PopStyleColor( );
|
||||
}
|
||||
|
@ -206,15 +229,44 @@ void T_Profiler::makeUI( )
|
|||
ImGui::End( );
|
||||
}
|
||||
|
||||
float T_Profiler::computeDuration(
|
||||
const uint32_t section ) const
|
||||
void T_Profiler::extendGPUQueries( ) noexcept
|
||||
{
|
||||
const auto iSize{ gpuQueries_.size( ) };
|
||||
const auto iGrowth{ gpuQueries_.growth( ) };
|
||||
gpuQueries_.resize( iSize + iGrowth , 0 );
|
||||
glGenQueries( iGrowth , &gpuQueries_[ iSize ] );
|
||||
}
|
||||
|
||||
void T_Profiler::addSample(
|
||||
T_SamplesList_& list ,
|
||||
const uint64_t duration
|
||||
) noexcept
|
||||
{
|
||||
if ( list.size( ) == 0 || ( list.size( ) < History
|
||||
&& list[ 0 ].nSamples == Samples ) ) {
|
||||
list.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
|
||||
|
||||
} else if ( list.size( ) == History && list[ 0 ].nSamples == Samples ) {
|
||||
for ( auto i = 1u ; i < History ; i ++ ) {
|
||||
list[ i ] = list[ i - 1 ];
|
||||
}
|
||||
list[ 0 ].sum = 0;
|
||||
list[ 0 ].nSamples = 0;
|
||||
}
|
||||
list[ 0 ].sum += float( duration ) * 1e-6;
|
||||
list[ 0 ].nSamples ++;
|
||||
}
|
||||
|
||||
float T_Profiler::computeDuration(
|
||||
T_SamplesList_ const& section
|
||||
) noexcept
|
||||
{
|
||||
auto const& samples( samples_[ section ] );
|
||||
float total = 0;
|
||||
float nSamples = 0;
|
||||
for ( auto const& entry : samples ) {
|
||||
total += entry.sum;
|
||||
nSamples += entry.nSamples;
|
||||
const auto ns{ section.size( ) };
|
||||
for ( auto i = 0u ; i < ns ; i ++ ) {
|
||||
total += section[ i ].sum;
|
||||
nSamples += section[ i ].nSamples;
|
||||
}
|
||||
return total / nSamples;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@ struct T_Profiler
|
|||
static constexpr uint32_t History = 4;
|
||||
static constexpr uint32_t Invalid = 0xffffffff;
|
||||
|
||||
~T_Profiler( );
|
||||
|
||||
void clear( );
|
||||
|
||||
void startFrame( );
|
||||
|
@ -40,12 +42,17 @@ struct T_Profiler
|
|||
bool& uiEnabled( ) { return uiEnabled_; }
|
||||
|
||||
private:
|
||||
using T_SamplesList_ = T_Array< T_ProfilerSamples >;
|
||||
using T_SamplesList_ = T_StaticArray< T_ProfilerSamples , History >;
|
||||
using T_Data_ = T_Array< T_SamplesList_ >;
|
||||
|
||||
void extendGPUQueries( ) noexcept;
|
||||
static void addSample(
|
||||
T_SamplesList_& list ,
|
||||
uint64_t duration ) noexcept;
|
||||
|
||||
uint32_t find( T_String const& section ) const;
|
||||
float computeDuration(
|
||||
const uint32_t section ) const;
|
||||
static float computeDuration(
|
||||
T_SamplesList_ const& list ) noexcept;
|
||||
|
||||
uint32_t previous_;
|
||||
uint32_t current_;
|
||||
|
@ -54,12 +61,15 @@ struct T_Profiler
|
|||
T_Array< uint32_t > chain_;
|
||||
T_Array< uint32_t > parents_;
|
||||
T_Data_ samples_;
|
||||
T_Array< uint64_t > starts_;
|
||||
T_Array< uint64_t > cpuStarts_;
|
||||
|
||||
T_Array< float > secDurations_;
|
||||
T_Array< float > secStarts_;
|
||||
|
||||
bool uiEnabled_ = false;
|
||||
T_Array< int > displayed_;
|
||||
|
||||
T_Array< GLuint > gpuQueries_{ T_Array< GLuint >( 64 ) };
|
||||
T_Data_ gpuSamples_;
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue