Profiler - Also use GL timestamps
* Get performance information from the GPU using GL timers * Avoid using glFinish() for the profiler * Display max( cpu time , gpu time ) in the profiler window
This commit is contained in:
parent
99bd2c429b
commit
646ec5a423
4 changed files with 96 additions and 36 deletions
|
@ -84,7 +84,6 @@ void T_Main::mainLoop( )
|
||||||
UI::Shaders( ).update( );
|
UI::Shaders( ).update( );
|
||||||
|
|
||||||
// Display
|
// Display
|
||||||
glFinish( );
|
|
||||||
p.startFrame( );
|
p.startFrame( );
|
||||||
p.start( "Full frame" );
|
p.start( "Full frame" );
|
||||||
m.handleEvents( );
|
m.handleEvents( );
|
||||||
|
@ -182,7 +181,7 @@ void T_Main::render( )
|
||||||
if ( UI::ODbg( ).isActive( ) ) {
|
if ( UI::ODbg( ).isActive( ) ) {
|
||||||
UI::ODbg( ).debugOutput( );
|
UI::ODbg( ).debugOutput( );
|
||||||
}
|
}
|
||||||
glFinish( ); UI::Profiler( ).end( "Debug" );
|
UI::Profiler( ).end( "Debug" );
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
T_Rendertarget::MainOutput( );
|
T_Rendertarget::MainOutput( );
|
||||||
|
|
|
@ -731,7 +731,6 @@ void T_OpContext::run(
|
||||||
}
|
}
|
||||||
|
|
||||||
case OP_UI_PEXIT:
|
case OP_UI_PEXIT:
|
||||||
glFinish( );
|
|
||||||
UI::Profiler( ).end( profiling.last( ) );
|
UI::Profiler( ).end( profiling.last( ) );
|
||||||
profiling.removeLast( );
|
profiling.removeLast( );
|
||||||
break;
|
break;
|
||||||
|
|
110
ui-profiling.cc
110
ui-profiling.cc
|
@ -10,11 +10,19 @@ constexpr uint32_t T_Profiler::History;
|
||||||
constexpr uint32_t T_Profiler::Invalid;
|
constexpr uint32_t T_Profiler::Invalid;
|
||||||
|
|
||||||
|
|
||||||
|
T_Profiler::~T_Profiler( )
|
||||||
|
{
|
||||||
|
const auto iSize{ gpuQueries_.size( ) };
|
||||||
|
if ( iSize ) {
|
||||||
|
glDeleteQueries( iSize , &gpuQueries_[ 0 ] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void T_Profiler::clear( )
|
void T_Profiler::clear( )
|
||||||
{
|
{
|
||||||
sections_.clear( );
|
sections_.clear( );
|
||||||
samples_.clear( );
|
samples_.clear( );
|
||||||
starts_.clear( );
|
cpuStarts_.clear( );
|
||||||
}
|
}
|
||||||
|
|
||||||
void T_Profiler::startFrame( )
|
void T_Profiler::startFrame( )
|
||||||
|
@ -25,15 +33,37 @@ void T_Profiler::startFrame( )
|
||||||
|
|
||||||
void T_Profiler::endFrame( )
|
void T_Profiler::endFrame( )
|
||||||
{
|
{
|
||||||
const auto n( sections_.size( ) );
|
const auto n{ sections_.size( ) };
|
||||||
|
if ( n ) {
|
||||||
|
int32_t done{ 0 };
|
||||||
|
while ( !done ) {
|
||||||
|
glGetQueryObjectiv( gpuQueries_[ n * 2 - 1 ] ,
|
||||||
|
GL_QUERY_RESULT_AVAILABLE ,
|
||||||
|
&done );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( gpuSamples_.size( ) < n ) {
|
||||||
|
gpuSamples_.resize( n );
|
||||||
|
}
|
||||||
|
for ( auto i = 0u ; i < n ; i ++ ) {
|
||||||
|
uint64_t a , b;
|
||||||
|
glGetQueryObjectui64v( gpuQueries_[ i * 2 ] ,
|
||||||
|
GL_QUERY_RESULT , &a );
|
||||||
|
glGetQueryObjectui64v( gpuQueries_[ i * 2 + 1 ] ,
|
||||||
|
GL_QUERY_RESULT , &b );
|
||||||
|
addSample( gpuSamples_[ i ] , b - a );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while ( secDurations_.size( ) < n ) {
|
while ( secDurations_.size( ) < n ) {
|
||||||
secDurations_.add( 0 );
|
secDurations_.add( 0 );
|
||||||
secStarts_.add( 0 );
|
secStarts_.add( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( auto i = 0u ; i < n ; i ++ ) {
|
for ( auto i = 0u ; i < n ; i ++ ) {
|
||||||
const float d = computeDuration( i );
|
const float cpuD = computeDuration( samples_[ i ] );
|
||||||
secDurations_[ i ] = d;
|
const float gpuD = computeDuration( gpuSamples_[ i ] );
|
||||||
|
secDurations_[ i ] = std::max( cpuD , gpuD );
|
||||||
if ( parents_[ i ] != Invalid ) {
|
if ( parents_[ i ] != Invalid ) {
|
||||||
assert( parents_[ i ] < i );
|
assert( parents_[ i ] < i );
|
||||||
secStarts_[ i ] = secStarts_[ parents_[ i ] ];
|
secStarts_[ i ] = secStarts_[ parents_[ i ] ];
|
||||||
|
@ -55,7 +85,7 @@ void T_Profiler::start(
|
||||||
if ( pos == n ) {
|
if ( pos == n ) {
|
||||||
sections_.add( section );
|
sections_.add( section );
|
||||||
samples_.add( T_SamplesList_{ } );
|
samples_.add( T_SamplesList_{ } );
|
||||||
starts_.add( 0u );
|
cpuStarts_.add( 0u );
|
||||||
chain_.add( Invalid );
|
chain_.add( Invalid );
|
||||||
parents_.add( Invalid );
|
parents_.add( Invalid );
|
||||||
}
|
}
|
||||||
|
@ -71,7 +101,12 @@ void T_Profiler::start(
|
||||||
|
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_gettime( CLOCK_MONOTONIC , &ts );
|
clock_gettime( CLOCK_MONOTONIC , &ts );
|
||||||
starts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
cpuStarts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||||
|
|
||||||
|
if ( gpuQueries_.size( ) <= pos * 2 + 1 ) {
|
||||||
|
extendGPUQueries( );
|
||||||
|
}
|
||||||
|
glQueryCounter( gpuQueries_[ pos * 2 ] , GL_TIMESTAMP );
|
||||||
}
|
}
|
||||||
|
|
||||||
void T_Profiler::end(
|
void T_Profiler::end(
|
||||||
|
@ -84,24 +119,11 @@ void T_Profiler::end(
|
||||||
}
|
}
|
||||||
|
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
|
glQueryCounter( gpuQueries_[ pos * 2 + 1 ] , GL_TIMESTAMP );
|
||||||
clock_gettime( CLOCK_MONOTONIC , &ts );
|
clock_gettime( CLOCK_MONOTONIC , &ts );
|
||||||
|
|
||||||
const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec;
|
||||||
const uint64_t duration = ended - starts_[ pos ];
|
addSample( samples_[ pos ] , ended - cpuStarts_[ pos ] );
|
||||||
auto& samples( samples_[ pos ] );
|
|
||||||
if ( samples.size( ) == 0 || ( samples.size( ) < History
|
|
||||||
&& samples[ 0 ].nSamples == Samples ) ) {
|
|
||||||
samples.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
|
|
||||||
|
|
||||||
} else if ( samples.size( ) == History && samples[ 0 ].nSamples == Samples ) {
|
|
||||||
for ( auto i = 1u ; i < History ; i ++ ) {
|
|
||||||
samples[ i ] = samples[ i - 1 ];
|
|
||||||
}
|
|
||||||
samples[ 0 ].sum = 0;
|
|
||||||
samples[ 0 ].nSamples = 0;
|
|
||||||
}
|
|
||||||
samples[ 0 ].sum += float( duration ) * 1e-6;
|
|
||||||
samples[ 0 ].nSamples ++;
|
|
||||||
|
|
||||||
previous_ = pos;
|
previous_ = pos;
|
||||||
current_ = Invalid;
|
current_ = Invalid;
|
||||||
|
@ -135,9 +157,10 @@ void T_Profiler::makeUI( )
|
||||||
ImGui::PushStyleColor( ImGuiCol_Text , color );
|
ImGui::PushStyleColor( ImGuiCol_Text , color );
|
||||||
|
|
||||||
ebcl::T_StringBuilder sb;
|
ebcl::T_StringBuilder sb;
|
||||||
|
char tms[ 12 ];
|
||||||
|
snprintf( tms , 12 , "%.3f" , secDurations_[ i ] );
|
||||||
sb << sections_[ i ] << " ("
|
sb << sections_[ i ] << " ("
|
||||||
<< int( round( secDurations_[ i ] ) )
|
<< tms << "ms)" << '\0';
|
||||||
<< "ms)" << '\0';
|
|
||||||
ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] );
|
ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] );
|
||||||
ImGui::PopStyleColor( );
|
ImGui::PopStyleColor( );
|
||||||
}
|
}
|
||||||
|
@ -206,15 +229,44 @@ void T_Profiler::makeUI( )
|
||||||
ImGui::End( );
|
ImGui::End( );
|
||||||
}
|
}
|
||||||
|
|
||||||
float T_Profiler::computeDuration(
|
void T_Profiler::extendGPUQueries( ) noexcept
|
||||||
const uint32_t section ) const
|
{
|
||||||
|
const auto iSize{ gpuQueries_.size( ) };
|
||||||
|
const auto iGrowth{ gpuQueries_.growth( ) };
|
||||||
|
gpuQueries_.resize( iSize + iGrowth , 0 );
|
||||||
|
glGenQueries( iGrowth , &gpuQueries_[ iSize ] );
|
||||||
|
}
|
||||||
|
|
||||||
|
void T_Profiler::addSample(
|
||||||
|
T_SamplesList_& list ,
|
||||||
|
const uint64_t duration
|
||||||
|
) noexcept
|
||||||
|
{
|
||||||
|
if ( list.size( ) == 0 || ( list.size( ) < History
|
||||||
|
&& list[ 0 ].nSamples == Samples ) ) {
|
||||||
|
list.insert( 0 , T_ProfilerSamples{ 0 , 0 } );
|
||||||
|
|
||||||
|
} else if ( list.size( ) == History && list[ 0 ].nSamples == Samples ) {
|
||||||
|
for ( auto i = 1u ; i < History ; i ++ ) {
|
||||||
|
list[ i ] = list[ i - 1 ];
|
||||||
|
}
|
||||||
|
list[ 0 ].sum = 0;
|
||||||
|
list[ 0 ].nSamples = 0;
|
||||||
|
}
|
||||||
|
list[ 0 ].sum += float( duration ) * 1e-6;
|
||||||
|
list[ 0 ].nSamples ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
float T_Profiler::computeDuration(
|
||||||
|
T_SamplesList_ const& section
|
||||||
|
) noexcept
|
||||||
{
|
{
|
||||||
auto const& samples( samples_[ section ] );
|
|
||||||
float total = 0;
|
float total = 0;
|
||||||
float nSamples = 0;
|
float nSamples = 0;
|
||||||
for ( auto const& entry : samples ) {
|
const auto ns{ section.size( ) };
|
||||||
total += entry.sum;
|
for ( auto i = 0u ; i < ns ; i ++ ) {
|
||||||
nSamples += entry.nSamples;
|
total += section[ i ].sum;
|
||||||
|
nSamples += section[ i ].nSamples;
|
||||||
}
|
}
|
||||||
return total / nSamples;
|
return total / nSamples;
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@ struct T_Profiler
|
||||||
static constexpr uint32_t History = 4;
|
static constexpr uint32_t History = 4;
|
||||||
static constexpr uint32_t Invalid = 0xffffffff;
|
static constexpr uint32_t Invalid = 0xffffffff;
|
||||||
|
|
||||||
|
~T_Profiler( );
|
||||||
|
|
||||||
void clear( );
|
void clear( );
|
||||||
|
|
||||||
void startFrame( );
|
void startFrame( );
|
||||||
|
@ -40,12 +42,17 @@ struct T_Profiler
|
||||||
bool& uiEnabled( ) { return uiEnabled_; }
|
bool& uiEnabled( ) { return uiEnabled_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using T_SamplesList_ = T_Array< T_ProfilerSamples >;
|
using T_SamplesList_ = T_StaticArray< T_ProfilerSamples , History >;
|
||||||
using T_Data_ = T_Array< T_SamplesList_ >;
|
using T_Data_ = T_Array< T_SamplesList_ >;
|
||||||
|
|
||||||
|
void extendGPUQueries( ) noexcept;
|
||||||
|
static void addSample(
|
||||||
|
T_SamplesList_& list ,
|
||||||
|
uint64_t duration ) noexcept;
|
||||||
|
|
||||||
uint32_t find( T_String const& section ) const;
|
uint32_t find( T_String const& section ) const;
|
||||||
float computeDuration(
|
static float computeDuration(
|
||||||
const uint32_t section ) const;
|
T_SamplesList_ const& list ) noexcept;
|
||||||
|
|
||||||
uint32_t previous_;
|
uint32_t previous_;
|
||||||
uint32_t current_;
|
uint32_t current_;
|
||||||
|
@ -54,12 +61,15 @@ struct T_Profiler
|
||||||
T_Array< uint32_t > chain_;
|
T_Array< uint32_t > chain_;
|
||||||
T_Array< uint32_t > parents_;
|
T_Array< uint32_t > parents_;
|
||||||
T_Data_ samples_;
|
T_Data_ samples_;
|
||||||
T_Array< uint64_t > starts_;
|
T_Array< uint64_t > cpuStarts_;
|
||||||
|
|
||||||
T_Array< float > secDurations_;
|
T_Array< float > secDurations_;
|
||||||
T_Array< float > secStarts_;
|
T_Array< float > secStarts_;
|
||||||
|
|
||||||
bool uiEnabled_ = false;
|
bool uiEnabled_ = false;
|
||||||
T_Array< int > displayed_;
|
T_Array< int > displayed_;
|
||||||
|
|
||||||
|
T_Array< GLuint > gpuQueries_{ T_Array< GLuint >( 64 ) };
|
||||||
|
T_Data_ gpuSamples_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue