diff --git a/m-tool.cc b/m-tool.cc index 26ed129..b28eb54 100644 --- a/m-tool.cc +++ b/m-tool.cc @@ -84,7 +84,6 @@ void T_Main::mainLoop( ) UI::Shaders( ).update( ); // Display - glFinish( ); p.startFrame( ); p.start( "Full frame" ); m.handleEvents( ); @@ -182,7 +181,7 @@ void T_Main::render( ) if ( UI::ODbg( ).isActive( ) ) { UI::ODbg( ).debugOutput( ); } - glFinish( ); UI::Profiler( ).end( "Debug" ); + UI::Profiler( ).end( "Debug" ); } else { T_Rendertarget::MainOutput( ); diff --git a/ui-opemu.cc b/ui-opemu.cc index fa13f79..45d2597 100644 --- a/ui-opemu.cc +++ b/ui-opemu.cc @@ -731,7 +731,6 @@ void T_OpContext::run( } case OP_UI_PEXIT: - glFinish( ); UI::Profiler( ).end( profiling.last( ) ); profiling.removeLast( ); break; diff --git a/ui-profiling.cc b/ui-profiling.cc index 8e22091..2fd9f26 100644 --- a/ui-profiling.cc +++ b/ui-profiling.cc @@ -10,11 +10,19 @@ constexpr uint32_t T_Profiler::History; constexpr uint32_t T_Profiler::Invalid; +T_Profiler::~T_Profiler( ) +{ + const auto iSize{ gpuQueries_.size( ) }; + if ( iSize ) { + glDeleteQueries( iSize , &gpuQueries_[ 0 ] ); + } +} + void T_Profiler::clear( ) { sections_.clear( ); samples_.clear( ); - starts_.clear( ); + cpuStarts_.clear( ); } void T_Profiler::startFrame( ) @@ -25,15 +33,37 @@ void T_Profiler::startFrame( ) void T_Profiler::endFrame( ) { - const auto n( sections_.size( ) ); + const auto n{ sections_.size( ) }; + if ( n ) { + int32_t done{ 0 }; + while ( !done ) { + glGetQueryObjectiv( gpuQueries_[ n * 2 - 1 ] , + GL_QUERY_RESULT_AVAILABLE , + &done ); + } + + if ( gpuSamples_.size( ) < n ) { + gpuSamples_.resize( n ); + } + for ( auto i = 0u ; i < n ; i ++ ) { + uint64_t a , b; + glGetQueryObjectui64v( gpuQueries_[ i * 2 ] , + GL_QUERY_RESULT , &a ); + glGetQueryObjectui64v( gpuQueries_[ i * 2 + 1 ] , + GL_QUERY_RESULT , &b ); + addSample( gpuSamples_[ i ] , b - a ); + } + } + while ( secDurations_.size( ) < n ) { secDurations_.add( 0 ); secStarts_.add( 0 ); } for ( auto i = 0u ; i < n ; i ++ ) { - const float d = computeDuration( i ); - secDurations_[ i ] = d; + const float cpuD = computeDuration( samples_[ i ] ); + const float gpuD = computeDuration( gpuSamples_[ i ] ); + secDurations_[ i ] = std::max( cpuD , gpuD ); if ( parents_[ i ] != Invalid ) { assert( parents_[ i ] < i ); secStarts_[ i ] = secStarts_[ parents_[ i ] ]; @@ -55,7 +85,7 @@ void T_Profiler::start( if ( pos == n ) { sections_.add( section ); samples_.add( T_SamplesList_{ } ); - starts_.add( 0u ); + cpuStarts_.add( 0u ); chain_.add( Invalid ); parents_.add( Invalid ); } @@ -71,7 +101,12 @@ void T_Profiler::start( struct timespec ts; clock_gettime( CLOCK_MONOTONIC , &ts ); - starts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec; + cpuStarts_[ pos ] = ts.tv_sec * 1000000000 + ts.tv_nsec; + + if ( gpuQueries_.size( ) <= pos * 2 + 1 ) { + extendGPUQueries( ); + } + glQueryCounter( gpuQueries_[ pos * 2 ] , GL_TIMESTAMP ); } void T_Profiler::end( @@ -84,24 +119,11 @@ void T_Profiler::end( } struct timespec ts; + glQueryCounter( gpuQueries_[ pos * 2 + 1 ] , GL_TIMESTAMP ); clock_gettime( CLOCK_MONOTONIC , &ts ); const uint64_t ended = ts.tv_sec * 1000000000 + ts.tv_nsec; - const uint64_t duration = ended - starts_[ pos ]; - auto& samples( samples_[ pos ] ); - if ( samples.size( ) == 0 || ( samples.size( ) < History - && samples[ 0 ].nSamples == Samples ) ) { - samples.insert( 0 , T_ProfilerSamples{ 0 , 0 } ); - - } else if ( samples.size( ) == History && samples[ 0 ].nSamples == Samples ) { - for ( auto i = 1u ; i < History ; i ++ ) { - samples[ i ] = samples[ i - 1 ]; - } - samples[ 0 ].sum = 0; - samples[ 0 ].nSamples = 0; - } - samples[ 0 ].sum += float( duration ) * 1e-6; - samples[ 0 ].nSamples ++; + addSample( samples_[ pos ] , ended - cpuStarts_[ pos ] ); previous_ = pos; current_ = Invalid; @@ -135,9 +157,10 @@ void T_Profiler::makeUI( ) ImGui::PushStyleColor( ImGuiCol_Text , color ); ebcl::T_StringBuilder sb; + char tms[ 12 ]; + snprintf( tms , 12 , "%.3f" , secDurations_[ i ] ); sb << sections_[ i ] << " (" - << int( round( secDurations_[ i ] ) ) - << "ms)" << '\0'; + << tms << "ms)" << '\0'; ImGui::Checkbox( sb.data( ) , (bool*) &displayed_[ i ] ); ImGui::PopStyleColor( ); } @@ -206,15 +229,44 @@ void T_Profiler::makeUI( ) ImGui::End( ); } -float T_Profiler::computeDuration( - const uint32_t section ) const +void T_Profiler::extendGPUQueries( ) noexcept +{ + const auto iSize{ gpuQueries_.size( ) }; + const auto iGrowth{ gpuQueries_.growth( ) }; + gpuQueries_.resize( iSize + iGrowth , 0 ); + glGenQueries( iGrowth , &gpuQueries_[ iSize ] ); +} + +void T_Profiler::addSample( + T_SamplesList_& list , + const uint64_t duration + ) noexcept +{ + if ( list.size( ) == 0 || ( list.size( ) < History + && list[ 0 ].nSamples == Samples ) ) { + list.insert( 0 , T_ProfilerSamples{ 0 , 0 } ); + + } else if ( list.size( ) == History && list[ 0 ].nSamples == Samples ) { + for ( auto i = 1u ; i < History ; i ++ ) { + list[ i ] = list[ i - 1 ]; + } + list[ 0 ].sum = 0; + list[ 0 ].nSamples = 0; + } + list[ 0 ].sum += float( duration ) * 1e-6; + list[ 0 ].nSamples ++; +} + +float T_Profiler::computeDuration( + T_SamplesList_ const& section + ) noexcept { - auto const& samples( samples_[ section ] ); float total = 0; float nSamples = 0; - for ( auto const& entry : samples ) { - total += entry.sum; - nSamples += entry.nSamples; + const auto ns{ section.size( ) }; + for ( auto i = 0u ; i < ns ; i ++ ) { + total += section[ i ].sum; + nSamples += section[ i ].nSamples; } return total / nSamples; } diff --git a/ui-profiling.hh b/ui-profiling.hh index f47849b..f6c251c 100644 --- a/ui-profiling.hh +++ b/ui-profiling.hh @@ -17,6 +17,8 @@ struct T_Profiler static constexpr uint32_t History = 4; static constexpr uint32_t Invalid = 0xffffffff; + ~T_Profiler( ); + void clear( ); void startFrame( ); @@ -40,12 +42,17 @@ struct T_Profiler bool& uiEnabled( ) { return uiEnabled_; } private: - using T_SamplesList_ = T_Array< T_ProfilerSamples >; + using T_SamplesList_ = T_StaticArray< T_ProfilerSamples , History >; using T_Data_ = T_Array< T_SamplesList_ >; + void extendGPUQueries( ) noexcept; + static void addSample( + T_SamplesList_& list , + uint64_t duration ) noexcept; + uint32_t find( T_String const& section ) const; - float computeDuration( - const uint32_t section ) const; + static float computeDuration( + T_SamplesList_ const& list ) noexcept; uint32_t previous_; uint32_t current_; @@ -54,12 +61,15 @@ struct T_Profiler T_Array< uint32_t > chain_; T_Array< uint32_t > parents_; T_Data_ samples_; - T_Array< uint64_t > starts_; + T_Array< uint64_t > cpuStarts_; T_Array< float > secDurations_; T_Array< float > secStarts_; bool uiEnabled_ = false; T_Array< int > displayed_; + + T_Array< GLuint > gpuQueries_{ T_Array< GLuint >( 64 ) }; + T_Data_ gpuSamples_; };