demotool/c-opopt.cc

#include "externals.hh"

#include "c-opopt.hh"
#include "c-ops.hh"
#include "c-sync.hh"

using namespace ebcl;
using namespace opast;
using namespace opopt;


/*= T_OptData ================================================================*/

#define M_LOGSTR_( S , L ) \
	logger( [](){ return T_StringBuilder{ S }; } , L )

uint32_t opopt::ComputeHash(
		T_OptData::T_VarId const& id ) noexcept
{
	const uint32_t nh{ ComputeHash( id.name ) };
	const uint32_t oh{ id.type != T_OptData::E_UDVarType::GLOBAL
		? ComputeHash( id.name )
		: 0
	};
	return ( uint32_t( id.type ) << 8 )
		^ nh
		^ ( ( oh << 29 ) | ( oh >> 3 ) );
}

constexpr uint32_t T_OptData::CFG_ENTER;
constexpr uint32_t T_OptData::CFG_MAINLOOP;
constexpr uint32_t T_OptData::CFG_END;

/*= T_OptData - INPUT DECLARATIONS ===========================================*/

void T_OptData::findInputDecls(
		T_OpsParserOutput& program ) noexcept
{
	if ( inputDecls ) {
		return;
	}

	inputDecls = T_KeyValueTable< T_String , T_Array< T_InputDecl > >{ };
	visitor.visit( program.root , [this]( A_Node& node , const bool exit ) {
		if ( exit && node.type( ) == A_Node::OP_INPUT ) {
			auto& input{ (T_InputInstrNode&) node };
			auto* da{ inputDecls->get( input.id( ) ) };
			if ( !da ) {
				inputDecls->add( input.id( ) , T_Array< T_InputDecl >{ } );
				da = inputDecls->get( input.id( ) );
			}
			da->add( T_InputDecl{ input.location( ) , input.defValue( ) } );
		}
		return true;
	} );
}


/*= T_OptData - INSTRUCTION NUMBERING ========================================*/

namespace {

bool ODNIVisitor_(
		A_Node& node ,
		const bool exit ,
		T_OptData& oData ,
		const uint32_t fnIndex ) noexcept
{
	if ( dynamic_cast< A_ExpressionNode* >( &node ) ) {
		return false;
	}
	auto* const iptr{
		dynamic_cast< A_InstructionNode* >( &node ) };
	if ( iptr && !exit ) {
		auto const& il{ dynamic_cast< T_InstrListNode& >( iptr->parent( ) ) };
		const auto hash{ ebcl::ComputeHash( (uint64_t)iptr ) };
		oData.instrIndex.add( hash );
		oData.instructions.add( T_OptData::T_InstrPos{
			oData.instructions.size( ) , iptr ,
			iptr == &il.node( il.size( ) - 1 ) ,
			fnIndex } );
	}
	return true;
}

} // namespace <anon>

void T_OptData::numberInstructions(
		T_OpsParserOutput& program ) noexcept
{
	instructions.clear( );
	instrIndex.clear( );

	const auto nf{ program.root.nFunctions( ) };
	for ( auto i = 0u ; i < nf ; i ++ ) {
		visitor.visit( program.root.function( i ) ,
			[&]( A_Node& node , const bool exit ) {
				return ODNIVisitor_( node , exit , *this , i );
			} );
	}
}

uint32_t T_OptData::indexOf(
		opast::A_InstructionNode const& instr ) noexcept
{
	const auto hash{ ebcl::ComputeHash( (uint64_t)&instr ) };
	uint32_t existing{ instrIndex.first( hash ) };
	while ( existing != T_HashIndex::INVALID_INDEX ) {
		if ( &instr == instructions[ existing ].node ) {
			break;
		}
		existing = instrIndex.next( existing );
	}
	assert( existing != T_HashIndex::INVALID_INDEX );
	return existing;
}


/*= T_OptData - CONTROL FLOW GRAPH CONSTRUCTION ==============================*/
namespace {

#warning Remove this later
#define LL1 2
#define LL2 2

// CFG type shortcuts
using T_CFN_ = T_OptData::T_CtrlFlowNode;
using P_CFN_ = T_OptData::P_CtrlFlowNode;

// Helpers to create or re-use CFG nodes
T_OptData::P_CtrlFlowNode BCFGNewNode_(
		T_Array< P_CFN_ >& pool ) noexcept
{
	if ( pool.empty( ) ) {
		return NewOwned< T_CFN_ >( );
	}

	auto r{ std::move( pool.last( ) ) };
	pool.removeLast( );
	r->instructions.clear( );
	r->inbound.clear( );
	r->outbound.clear( );
	return r;
}

#define M_NEWNODE_() BCFGNewNode_( old )
#define M_ADDNEW_() \
	ctrlFlowGraph.add( M_NEWNODE_( ) )
#define M_NODE_(i) \
	ctrlFlowGraph[i]

} // namespace <anon>

/*----------------------------------------------------------------------------*/

void T_OptData::buildControlFlowGraph(
		T_OpsParserOutput& program ) noexcept
{
	// Keep the old array, we'll reuse its contents
	T_Array< P_CtrlFlowNode > old{ std::move( ctrlFlowGraph ) };
	M_LOGSTR_( "Building control flow graph" , LL1 );

	// Create special nodes
	M_ADDNEW_( );
	M_ADDNEW_( );
	M_ADDNEW_( );
	M_NODE_( CFG_MAINLOOP )->outbound.add( CFG_END );
	M_NODE_( CFG_END )->inbound.add( CFG_MAINLOOP );

	// Data structure to handle conditionals
	struct T_StackEntry_ {
		uint32_t condBlock;
		bool hasDefault{ false };
		T_AutoArray< uint32_t , 8 > caseBlocks;
	};

	// Data structure for call sites
	struct T_CallSite_ {
		T_String name;
		uint32_t callBlock;
		uint32_t retBlock;
	};

	// Generate control flow graph for each function
	T_AutoArray< T_StackEntry_ , 8 > stack;
	T_Array< T_CallSite_ > callSites;
	T_Optional< uint32_t > cNode{ };
	visitor.visit( program.root , [&]( A_Node& node , const bool exit ) {
		const auto nt{ node.type( ) };

		// Handle start/end of functions
		if ( nt == A_Node::DECL_FN || nt == A_Node::DECL_INIT
				|| nt == A_Node::DECL_FRAME ) {
			auto& n{ dynamic_cast< A_FuncNode& >( node ) };
			auto const& fn{ n.name( ) };
			if ( exit ) {
				assert( stack.empty( ) );
				logger( [&](){
					T_StringBuilder sb;
					sb << "Function ended; last block had "
						<< ( M_NODE_( *cNode )->instructions
							? M_NODE_( *cNode )->instructions->count
							: 0 )
						<< " instructions";
					return sb;
				} , LL1 );
				auto* frec{ cfgFunctions.get( fn ) };
				assert( frec );
				frec->count = ctrlFlowGraph.size( ) - frec->first;
				cNode.clear( );
			} else {
				logger( [&](){
					T_StringBuilder sb;
					sb << "Starting function '" << fn << "' at "
						<< ctrlFlowGraph.size( );
					return sb;
				} , LL1 );
				cfgFunctions.add( fn , T_BasicBlock{ ctrlFlowGraph.size( ) } );
				cNode = M_ADDNEW_( );
			}
			return true;
		}

		// All instructions: continue the current basic block
		auto* const iptr{ dynamic_cast< A_InstructionNode* >( &node ) };
		if ( iptr && !exit ) {
			assert( cNode );
			auto& n{ *M_NODE_( *cNode ) };
			if ( n.instructions ) {
				n.instructions->count ++;
			} else {
				n.instructions = T_BasicBlock{ indexOf( *iptr ) };
			}
		}

		// Handle conditionals
		if ( nt == A_Node::OP_COND ) {
			if ( exit ) {
				auto& se{ stack.last( ) };
				cNode = M_ADDNEW_( );

				// Connect each case block to both the condition
				// and the next block
				const auto ncb{ se.caseBlocks.size( ) };
				for ( auto i = 0u ; i < ncb ; i ++ ) {
					auto& cbi{ se.caseBlocks[ i ] };
					auto& cb{ *M_NODE_( cbi ) };
					cb.inbound.add( se.condBlock );
					M_NODE_( se.condBlock )->outbound.add( cbi );
					cb.outbound.add( *cNode );
					M_NODE_( *cNode )->inbound.add( cbi );
				}
				if ( !se.hasDefault ) {
					M_NODE_( *cNode )->inbound.add( se.condBlock );
					M_NODE_( se.condBlock )->outbound.add( *cNode );
				}

				stack.removeLast( );
				logger( [&](){
					T_StringBuilder sb;
					sb << "Exiting conditional instruction, stack size "
						<< stack.size( );
					return sb;
				} , LL2 );
			} else {
				auto& se{ stack.addNew( ) };
				se.condBlock = *cNode;
				cNode.clear( );
				logger( [&](){
					T_StringBuilder sb;
					sb << "Entering conditional instruction, stack size "
						<< stack.size( )
						<< ", block had "
						<< ( M_NODE_( se.condBlock )->instructions
							? M_NODE_( se.condBlock )->instructions->count
							: 0 )
						<< " instructions";
					return sb;
				} , LL2 );
			}
			return true;
		}

		// Calls also break the flow
		if ( nt == A_Node::OP_CALL && !exit ) {
			T_CallInstrNode& ci{ *dynamic_cast< T_CallInstrNode* >( iptr ) };
			logger( [&](){
				T_StringBuilder sb;
				sb << "Call to " << ci.id( ) << ", block had "
					<< ( M_NODE_( *cNode )->instructions
						? M_NODE_( *cNode )->instructions->count
						: 0 )
					<< " instructions";
				return sb;
			} , LL2 );

			auto& cs{ callSites.addNew( ) };
			cs.name = ci.id( );
			cs.callBlock = *cNode;
			cNode = cs.retBlock = M_ADDNEW_( );
			return true;
		}

		// Condition case nodes: create new basic block, add to stack's list
		if ( nt == A_Node::TN_CASE || nt == A_Node::TN_DEFAULT ) {
			if ( exit ) {
				logger( [&](){
					T_StringBuilder sb;
					sb << "Case block added ("
						<< ( M_NODE_( *cNode )->instructions
							? M_NODE_( *cNode )->instructions->count
							: 0 )
						<< " instructions)";
					return sb;
				} , LL2 );
				cNode.clear( );
			} else {
				stack.last( ).hasDefault = stack.last( ).hasDefault
					|| ( nt == A_Node::TN_DEFAULT );
				cNode = M_ADDNEW_( );
				stack.last( ).caseBlocks.add( *cNode );
			}
		}

		return !dynamic_cast< A_ExpressionNode* >( &node );
	} );
	assert( cfgFunctions.contains( "*init*" ) && cfgFunctions.contains( "*frame*" ) );

	// Add fake call sites for *init* and *frame*
	{
		auto& cs{ callSites.addNew( ) };
		cs.callBlock = CFG_ENTER;
		cs.retBlock = CFG_MAINLOOP;
		cs.name = "*init*";
	}
	{
		auto& cs{ callSites.addNew( ) };
		cs.callBlock = CFG_MAINLOOP;
		cs.retBlock = CFG_MAINLOOP;
		cs.name = "*frame*";
	}

	// Handle calls
	for ( auto const& cs : callSites ) {
		auto const* frec{ cfgFunctions.get( cs.name ) };
		assert( frec );
		{
			auto& entry{ ctrlFlowGraph[ frec->first ] };
			entry->inbound.add( cs.callBlock );
			M_NODE_( cs.callBlock )->outbound.add( frec->first );
		}
		{
			const auto n{ frec->first + frec->count - 1 };
			auto& exit{ ctrlFlowGraph[ n ] };
			exit->outbound.add( cs.retBlock );
			M_NODE_( cs.retBlock )->inbound.add( n );
		}
	}

	// Dump whole graph
	logger( [&](){
		T_StringBuilder dump;

		int i{ 0 };
		dump << "Control flow graph dump\n";
		for ( auto const& p : ctrlFlowGraph ) {
			auto const& e{ *p };
			dump << "\nNode " << i++ << "\n\t";
			if ( e.instructions ) {
				dump << e.instructions->count
					<< " instruction(s) at index "
					<< e.instructions->first;
			} else {
				dump << "No instructions";
			}
			dump << "\n\tInbound:";
			{
				const auto ni{ e.inbound.size( ) };
				if ( ni == 0 ) {
					dump << " NONE";
				}
				for ( auto idx = 0u ; idx < ni ; idx ++ ) {
					dump << ' ' << e.inbound[ idx ];
				}
			}
			dump << "\n\tOutbound:";
			{
				const auto no{ e.outbound.size( ) };
				if ( no == 0 ) {
					dump << " NONE";
				}
				for ( auto idx = 0u ; idx < no ; idx ++ ) {
					dump << ' ' << e.outbound[ idx ];
				}
			}
			dump << '\n';
		}
		dump << '\n';

		return dump;
	} , LL2 );
}

#undef M_ADDNEW_
#undef M_NEWNODE_
#undef M_NODE_


/*= T_OptData - USE/DEFINE CHAINS ============================================*/
namespace {

#warning Remove this later
#undef LL1
#undef LL2
#define LL1 1
#define LL2 1

void BUDCAddRecord_(
		A_Node& n ,
		T_String const& id ,
		const bool use ,
		T_OptData& od ,
		T_RootNode& root ,
		T_OptData::T_VarId const* extVarId = nullptr ) noexcept
{

	// Find instruction and function index
	A_FuncNode* func{ nullptr };
	T_Optional< uint32_t > instrId;
	A_Node* pn{ &n };
	while ( pn ) {
		auto* const asInstr{ dynamic_cast< A_InstructionNode* >( pn ) };
		func = dynamic_cast< A_FuncNode* >( pn );
		if ( !instrId && asInstr ) {
			instrId = od.indexOf( *asInstr );
		} else if ( func ) {
			break;
		}
		pn = &pn->parent( );
	}
	assert( func && instrId );

	// Generate the identifier
	const T_OptData::T_VarId varId{ extVarId ? *extVarId : [&]() {
		auto const& n{ id };
		if ( func->hasLocal( id ) ) {
			return T_OptData::T_VarId{ n , func->name( ) ,
				func->isArgument( id ) };
		}
		return T_OptData::T_VarId{ n };
	} () };

	// Access or create the record
	auto* const varRec{ [&]() {
		auto* const x{ od.varUDChains.get( varId ) };
		if ( x ) {
			return x;
		}
		od.varUDChains.add( T_OptData::T_VarUseDefine{ varId } );
		return od.varUDChains.get( varId );
	} () };
	assert( varRec );

	// Add use/define record
	auto& udRec{ use ? varRec->uses.addNew( ) : varRec->defines.addNew( ) };
	udRec.node = *instrId;
	udRec.fnIndex = root.functionIndex( func->name( ) );

	od.logger( [&](){
		T_StringBuilder sb;
		sb << ( use ? "use " : "def " ) << varId.name << " at "
			<< n.location( ) << " (";
		if ( varId.type == T_OptData::E_UDVarType::GLOBAL ) {
			sb << "global";
		} else {
			if ( varId.type == T_OptData::E_UDVarType::LOCAL ) {
				sb << "local";
			} else {
				sb << "argument";
			}
			sb << " of " << varId.owner;
		}
		sb << "), instr #" << *instrId;
		return sb;
	} , LL2 );
}

void BUDCVisitor_(
		T_RootNode& root ,
		T_OptData& od ,
		A_Node& n )
{
	switch ( n.type( ) ) {

	    default: break;

	    case A_Node::EXPR_ID: {
		auto const& id{ dynamic_cast< T_IdentifierExprNode& >( n ).id( ) };
		if ( id != "width" && id != "height" && id != "time" ) {
			BUDCAddRecord_( n , id , true , od , root );
		}
		break;
	    }

	    case A_Node::OP_UNIFORMS:
		BUDCAddRecord_( n ,
			dynamic_cast< T_UniformsInstrNode& >( n ).progId( ) ,
			true , od , root );
		break;

	    case A_Node::OP_USE_TEXTURE:
		BUDCAddRecord_( n ,
			dynamic_cast< T_UseTextureInstrNode& >( n ).samplerId( ) ,
			true , od , root );
		// fallthrough
	    case A_Node::OP_USE_PROGRAM:
	    case A_Node::OP_USE_PIPELINE:
	    case A_Node::OP_USE_FRAMEBUFFER:
		BUDCAddRecord_( n ,
			dynamic_cast< T_UseInstrNode& >( n ).id( ) ,
			true , od , root );
		break;

	    case A_Node::OP_PIPELINE: {
		auto& pln{ dynamic_cast< T_PipelineInstrNode& >( n ) };
		BUDCAddRecord_( n , pln.id( ) , false , od , root );
		const auto np{ pln.size( ) };
		for ( auto i = 0u ; i < np ; i ++ ) {
			BUDCAddRecord_( n , pln.program( i ) ,
				true , od , root );
		}
		break;
	    }

	    case A_Node::TN_FBATT:
		BUDCAddRecord_( n , dynamic_cast< T_FramebufferInstrNode::T_Attachment& >( n ).id( ) ,
			true , od , root );
		break;

	    case A_Node::OP_FRAMEBUFFER:
	    case A_Node::OP_TEXTURE:
	    case A_Node::OP_SAMPLER:
	    case A_Node::OP_PROGRAM:
		BUDCAddRecord_( n , dynamic_cast< A_ResourceDefInstrNode& >( n ).id( ) ,
			false , od , root );
		break;

	    case A_Node::OP_SET:
		BUDCAddRecord_( n , dynamic_cast< T_SetInstrNode& >( n ).id( ) ,
			false , od , root );
		break;

	    case A_Node::OP_CALL: {
		auto& cn{ dynamic_cast< T_CallInstrNode& >( n ) };
		auto& callee{ root.function(
				root.functionIndex( cn.id( ) ) ) };
		const auto nlocs{ callee.locals( ) };
		for ( auto i = 0u ; i < nlocs ; i ++ ) {
			auto const& name{ callee.getLocalName( i ) };
			if ( !callee.isArgument( name ) ) {
				continue;
			}
			const T_OptData::T_VarId vid{ name , callee.name( ) , true };
			BUDCAddRecord_( n , name , false , od ,
					root , &vid );
		}
		break;
	    }

	}
}

/*----------------------------------------------------------------------------*/

struct T_UDEntry_
{
	uint32_t entry;
	bool isUse;
	uint32_t index;
};

using T_UDEPerInstr_ = T_KeyValueTable< uint32_t , T_AutoArray< T_UDEntry_ , 8 > >;

template< uint32_t S >
void BUDCAddEntries_(
		T_UDEPerInstr_& out ,
		const uint32_t mainEntry ,
		const bool isUse ,
		T_AutoArray< T_OptData::T_VarUDRecord , S > const& entries ) noexcept
{
	const auto na{ entries.size( ) };
	for ( auto j = 0u ; j < na ; j ++ ) {
		auto const& use{ entries[ j ] };
		auto* rec{ out.get( use.node ) };
		if ( !rec ) {
			out.add( use.node , T_AutoArray< T_UDEntry_ , 8 >{ } );
			rec = out.get( use.node );
		}
		assert( rec );

		auto& ne{ rec->addNew( ) };
		ne.entry = mainEntry;
		ne.isUse = isUse;
		ne.index = j;
	}
}


} // namespace <anon>

/*----------------------------------------------------------------------------*/

void T_OptData::buildUseDefineChains(
		T_OpsParserOutput& program ) noexcept
{
	M_LOGSTR_( "Building use/define chains" , LL1 );
	varUDChains.clear( );

	// Find all definitions and uses, add them to the table
	visitor.visit( program.root , [&]( auto& n , const bool exit ) {
		if ( !exit ) {
			BUDCVisitor_( program.root , *this , n );
		}
		return true;
	} );

	// Build a table of all variable uses/defines that were identified
	T_UDEPerInstr_ udPerInstr;
	auto const& udcEntries{ varUDChains.values( ) };
	const auto n{ udcEntries.size( ) };
	for ( auto i = 0u ; i < n ; i ++ ) {
		auto const& r{ udcEntries[ i ] };
		BUDCAddEntries_( udPerInstr , i , true , r.uses );
		BUDCAddEntries_( udPerInstr , i , false , r.defines );
	}


	/*
	 * So this whole heap of code below is wrong. It wouldn't work correctly
	 * in all cases (although it does with the current test code, but that's
	 * irrelevant).
	 *
	 * A block B from the CFG has instr{B}+1 points (1 point before each
	 * instruction, and 1 point at the end of the block).
	 *
	 * Uses should be assigned to the point preceding the instruction; defs
	 * and kills should be assigned to the point that follows it, before
	 * any uses (it may be simpler to have 2 points / instruction?)
	 *
	 *               defs from I0         defs from I1
	 *              \/                   \/
	 * P00 -> I0 -> P01 -> P10 -> I1 -> I11 -> PBE
	 *  /\                  /\                  /\
	 *   uses from I0        uses from I1        defs/kills from block end
	 *
	 * Entering/exiting functions is a bit of a PITA:
	 *	- call instructions define function arguments;
	 *	- return edges kill locals
	 */
#if 0
	// Walk the graph from the entry point until all reachable nodes
	// have been covered and keeping track of active definitions. When
	// the flow diverges, we need to store the state before the
	// divergence.
	const uint32_t nNodes{ ctrlFlowGraph.size( ) };
	uint32_t nProcessed{ 0 };
	uint32_t node{ CFG_ENTER };

	bool processedNodes[ nNodes ];
	memset( processedNodes , 0 , sizeof( processedNodes ) );

	using T_ActDefs_ = T_Array< uint32_t >;
	using P_ActDefs_ = T_OwnPtr< T_ActDefs_ >;
	P_ActDefs_ activeDefs{ NewOwned< T_ActDefs_ >( ) };
	activeDefs->resize( varUDChains.size( ) , T_HashIndex::INVALID_INDEX );

	struct T_StackEntry_ {
		P_ActDefs_ def;
		uint32_t node;

		T_StackEntry_( P_ActDefs_ const& src , const uint32_t n ) noexcept
			: def{ NewOwned< T_ActDefs_ >( *src ) } , node( n )
		{}
	};
	T_AutoArray< T_StackEntry_ , 32 > stack;

	while ( nProcessed < nNodes ) {
		assert( !processedNodes[ node ] );
		auto const& cn{ *ctrlFlowGraph[ node ] };
		processedNodes[ node ] = true;
		logger( [=]() {
			T_StringBuilder sb;
			sb << "processing node " << node;
			return sb;
		} , LL2 );
		nProcessed ++;

		if ( cn.instructions ) {
			// Check for uses and defines in the instructions
			const auto is{ cn.instructions->first };
			const auto ie{ is + cn.instructions->count };
			for ( auto ii = is ; ii < ie ; ii ++ ) {
				auto const* const irec{ udPerInstr.get( ii ) };
				if ( !irec ) {
					continue;
				}
				const auto nrec{ irec->size( ) };

				// Handle uses first
				for ( auto j = 0u ; j < nrec ; j ++ ) {
					auto const& rec{ (*irec)[ j ] };
					if ( !rec.isUse ) {
						continue;
					}

					auto& resource{ varUDChains[ rec.entry ] };
					const auto defId{ (*activeDefs)[ rec.entry ] };
					// FIXME: must be defined
					assert( defId != T_HashIndex::INVALID_INDEX );
					resource.defines[ defId ].refs.add( rec.index );
					resource.uses[ rec.index ].refs.add( defId );
					logger( [&](){
						T_StringBuilder sb;
						sb << "USE " << resource.var.name
							<< " @ instr #" << ii
							<< ", def " << defId;
						return sb;
					} , LL2 );
				}

				// Handle defines
				for ( auto j = 0u ; j < nrec ; j ++ ) {
					auto const& rec{ (*irec)[ j ] };
					if ( rec.isUse ) {
						continue;
					}
					(*activeDefs)[ rec.entry ] = rec.index;
					logger( [&](){
						T_StringBuilder sb;
						sb << "DEF " << rec.index << ' '
							<< varUDChains[ rec.entry ].var.name
							<< " @ instr #" << ii;
						return sb;
					} , LL2 );
				}
			}
		}

		if ( nProcessed == nNodes ) {
			break;
		}

		// Check for possible next nodes
		do {
			auto const& rcn{ *ctrlFlowGraph[ node ] };
			const uint32_t nSuccs{ [&](){
				const auto no{ rcn.outbound.size( ) };
				uint32_t c{ 0 };
				for ( auto i = 0u ; i < no ; i ++ ) {
					if ( !processedNodes[ rcn.outbound[ i ] ] ) {
						c ++;
					}
				}
				return c;
			}() };
			logger( [&]() {
				T_StringBuilder sb;
				sb << "node " << node << ": " << nSuccs
					<< " successor(s) left (stack depth "
					<< stack.size( ) << ')';
				return sb;
			} , LL2 );

			// -> no output nodes left -> pop stack and keep trying,
			//	unless we've already processed all nodes
			if ( nSuccs == 0 ) {
				assert( !stack.empty( ) );
				node = stack.last( ).node;
				activeDefs = std::move( stack.last( ).def );
				stack.removeLast( );
				logger( [&]() {
					T_StringBuilder sb;
					sb << "pop stack -> next node " << node
						<< " (stack depth "
						<< stack.size( ) << ')';
					return sb;
				} , LL2 );
				continue;
			}

			const auto no{ rcn.outbound.size( ) };
			uint32_t nn{ T_HashIndex::INVALID_INDEX };
			for ( auto i = 0u ; i < no ; i ++ ) {
				if ( !processedNodes[ rcn.outbound[ i ] ] ) {
					nn = rcn.outbound[ i ];
					break;
				}
			}

			// More than one possible successor? Push to stack
			if ( nSuccs > 1 ) {
				M_LOGSTR_( "pushing node" , LL2 );
				stack.addNew( activeDefs , node );
			}

			node = nn;
			break;
		} while ( 1 );

		// 30 if next block is an end of function, kill locals
	}
#endif
}


/*============================================================================*/

#undef M_LOGSTR_
#define M_LOGSTR_( S , L ) \
	oData.logger( [](){ return T_StringBuilder{ S }; } , L )


/*= CONSTANT FOLDING =========================================================*/

namespace {

struct T_ConstantFolder_
{
	T_ConstantFolder_( T_OptData& data ) noexcept
		: oData{ data }
	{}

	// Result
	bool didFold{ false };

	bool operator()( A_Node& node , bool exit ) noexcept;

    private:
	T_OptData& oData;

	template<
		typename T
	> void handleParentNode(
			A_Node& node ,
			std::function< A_ExpressionNode&( T& ) > get ,
			std::function< void( T& , P_ExpressionNode ) > set ) noexcept;

	P_ExpressionNode checkExpression(
			A_ExpressionNode& node ) noexcept;

	// Handle identifiers. If the size is fixed and the identifier is
	// either width or height, replace it with the appropriate value.
	P_ExpressionNode doIdExpr(
			T_IdentifierExprNode& node ) noexcept;

	// Handle reads from inputs. If there's a curve and it is a constant,
	// or if there's no curve and only one default value, then the
	// expression is constant.
	P_ExpressionNode doInputExpr(
			T_InputExprNode& node ) noexcept;

	// Transform an unary operator applied to a constant into a constant.
	P_ExpressionNode doUnaryOp(
			T_UnaryOperatorNode& node ,
			double value ) const noexcept;

	// Transform a binary operator applied to a constant into a constant.
	P_ExpressionNode doBinaryOp(
			T_BinaryOperatorNode& node ,
			double left ,
			double right ) const noexcept;
};

/*----------------------------------------------------------------------------*/

bool T_ConstantFolder_::operator()(
		A_Node& node ,
		const bool exit ) noexcept
{
	if ( exit ) {
		return true;
	}

	switch ( node.type( ) ) {

	    case A_Node::TN_ARG:
		handleParentNode< T_ArgumentNode >(
			node ,
			[]( auto& n ) -> A_ExpressionNode& { return n.expression( ); } ,
			[]( auto& n , P_ExpressionNode e ) { n.expression( std::move( e ) ); }
		);
		return false;

	    case A_Node::TN_CONDITION:
		handleParentNode< T_CondInstrNode::T_Expression >( node ,
			[]( auto& n ) -> A_ExpressionNode& { return n.expression( ); } ,
			[]( auto& n , P_ExpressionNode e ) { n.expression( std::move( e ) ); }
		);
		return false;

	    case A_Node::OP_SET:
		handleParentNode< T_SetInstrNode >( node ,
			[]( auto& n ) -> A_ExpressionNode& { return n.expression( ); } ,
			[]( auto& n , P_ExpressionNode e ) { n.setExpression( std::move( e ) ); } );
		return false;

	    default:
		return true;
	}
}

/*----------------------------------------------------------------------------*/

template<
	typename T
> void T_ConstantFolder_::handleParentNode(
		A_Node& n ,
		std::function< A_ExpressionNode&( T& ) > get ,
		std::function< void( T& , P_ExpressionNode ) > set ) noexcept
{
	auto& node{ (T&) n };
	auto& child{ get( node ) };
	auto r{ checkExpression( child ) };
	if ( r ) {
		oData.logger( [&]() {
			T_StringBuilder sb;
			sb << "substituting node at " << child.location( );
			return sb;
		} , 3 );
		r->location( ) = node.location( );
		set( node , std::move( r ) );
		didFold = true;
	}
}

P_ExpressionNode T_ConstantFolder_::checkExpression(
		A_ExpressionNode& node ) noexcept
{
	// Already a constant
	if ( node.type( ) == A_Node::EXPR_CONST ) {
		return {};
	}

	// Replace $width/$height with value if fixedSize
	if ( node.type( ) == A_Node::EXPR_ID ) {
		return doIdExpr( (T_IdentifierExprNode&) node );
	}

	// Replace inputs with value if no curve/constant curve
	if ( node.type( ) == A_Node::EXPR_INPUT ) {
		return doInputExpr( (T_InputExprNode&) node );
	}

	// Replace UnOp( Cnst ) with result
	auto* const asUnary{ dynamic_cast< T_UnaryOperatorNode* >( &node ) };
	if ( asUnary ) {
		handleParentNode< T_UnaryOperatorNode >( *asUnary ,
			[]( auto& n ) -> A_ExpressionNode& { return n.argument( ); } ,
			[]( auto& n , P_ExpressionNode e ) { n.setArgument( std::move( e ) ); } );
		if ( asUnary->argument( ).type( ) == A_Node::EXPR_CONST ) {
			auto const& cn{ (T_ConstantExprNode const&) asUnary->argument( ) };
			return doUnaryOp( *asUnary , cn.floatValue( ) );
		}
		return {};
	}

	// Replace BinOp( Cnst , Cnst ) with result
	auto* const asBinary{ dynamic_cast< T_BinaryOperatorNode* >( &node ) };
	assert( asBinary && "Missing support for some expr subtype" );
	handleParentNode< T_BinaryOperatorNode >( *asBinary ,
		[]( auto& n ) -> A_ExpressionNode& { return n.left( ); } ,
		[]( auto& n , P_ExpressionNode e ) { n.setLeft( std::move( e ) ); } );
	handleParentNode< T_BinaryOperatorNode >( *asBinary ,
		[]( auto& n ) -> A_ExpressionNode& { return n.right( ); } ,
		[]( auto& n , P_ExpressionNode e ) { n.setRight( std::move( e ) ); } );

	if ( asBinary->left( ).type( ) == A_Node::EXPR_CONST
			&& asBinary->right( ).type( ) == A_Node::EXPR_CONST ) {
		auto const& l{ (T_ConstantExprNode const&) asBinary->left( ) };
		auto const& r{ (T_ConstantExprNode const&) asBinary->right( ) };
		return doBinaryOp( *asBinary , l.floatValue( ) , r.floatValue( ) );
	}
	return {};
}

/*----------------------------------------------------------------------------*/

P_ExpressionNode T_ConstantFolder_::doInputExpr(
		T_InputExprNode& node ) noexcept
{
	if ( !oData.curves ) {
		return {};
	}

	auto const* const curve{ oData.curves->curves.get( node.id( ) ) };
	if ( curve ) {
		// Curve present, check if it's constant
		const auto cval{ curve->isConstant( ) };
		if ( !cval ) {
			return {};
		}
		return NewOwned< T_ConstantExprNode >( node.parent( ) , *cval );
	}

	assert( oData.inputDecls );
	auto const* const dva{ oData.inputDecls->get( node.id( ) ) };
	assert( dva );
	if ( dva->size( ) == 1 ) {
		// If there's only one default value, that's a constant.
		return NewOwned< T_ConstantExprNode >( node.parent( ) ,
				(*dva)[ 0 ].value );
	}
	return {};
}

P_ExpressionNode T_ConstantFolder_::doIdExpr(
		T_IdentifierExprNode& node ) noexcept
{
	if ( !oData.fixedSize ) {
		return {};
	}

	if ( node.id( ) == "width" ) {
		M_LOGSTR_( "replacing $width with fixed width" , 3 );
		return NewOwned< T_ConstantExprNode >( node.parent( ) ,
				double( oData.fixedSize->first ) );
	}

	if ( node.id( ) == "height" ) {
		M_LOGSTR_( "replacing $height with fixed height" , 3 );
		return NewOwned< T_ConstantExprNode >( node.parent( ) ,
				float( oData.fixedSize->second ) );
	}

	return {};
}

P_ExpressionNode T_ConstantFolder_::doUnaryOp(
		T_UnaryOperatorNode& node ,
		const double value ) const noexcept
{
	const double rVal{ [this]( auto& node , const auto value ) {
		switch ( node.op( ) ) {

		    case T_UnaryOperatorNode::NEG:
			return -value;

		    case T_UnaryOperatorNode::NOT:
			return value ? 0. : 1.;

		    case T_UnaryOperatorNode::INV:
			if ( value == 0 ) {
				oData.errors.addNew( "math - 1/x, x=0" , node.location( ) );
				return 0.;
			}
			return 1. / value;

		    case T_UnaryOperatorNode::COS:
			return cos( value );

		    case T_UnaryOperatorNode::SIN:
			return sin( value );

		    case T_UnaryOperatorNode::TAN:
			if ( fabs( value - M_PI / 2 ) <= 1e-6 ) {
				oData.errors.addNew( "math - tan(x), x=~PI/2" ,
						node.location( ) , E_SRDErrorType::WARNING );
			}
			return tan( value );

		    case T_UnaryOperatorNode::SQRT:
			if ( value < 0 ) {
				oData.errors.addNew( "math - sqrt(x), x<0" , node.location( ) );
				return 0.;
			}
			return sqrt( value );

		    case T_UnaryOperatorNode::LN:
			if ( value <= 0 ) {
				oData.errors.addNew( "math - ln(x), x<=0" , node.location( ) );
				return 0.;
			}
			return log( value );

		    case T_UnaryOperatorNode::EXP:
			return exp( value );
		}

		fprintf( stderr , "invalid operator %d\n" , int( node.op( ) ) );
		std::abort( );
	}( node , value ) };

	return NewOwned< T_ConstantExprNode >( node.parent( ) , rVal );
}

P_ExpressionNode T_ConstantFolder_::doBinaryOp(
		T_BinaryOperatorNode& node ,
		const double left ,
		const double right ) const noexcept
{
	const double rVal{ [this]( auto& node , const auto l , const auto r ) {
		switch ( node.op( ) ) {

		    case T_BinaryOperatorNode::ADD:
			return l + r;

		    case T_BinaryOperatorNode::SUB:
			return l - r;

		    case T_BinaryOperatorNode::MUL:
			return l * r;

		    case T_BinaryOperatorNode::DIV:
			if ( r == 0 ) {
				oData.errors.addNew( "math - l/r, r=0" , node.location( ) );
				return 0.;
			}
			return l / r;

		    case T_BinaryOperatorNode::POW:
			if ( l == 0 && r == 0 ) {
				oData.errors.addNew( "math - l^r, l=r=0" , node.location( ) );
				return 0.;
			}
			if ( l == 0 && r < 0 ) {
				oData.errors.addNew( "math - l^r, l=0, r<0" , node.location( ) );
				return 0.;
			}
			if ( l < 0 && fmod( r , 1. ) != 0. ) {
				oData.errors.addNew( "math - l^r, l<0, r not integer" , node.location( ) );
				return 0.;
			}
			return pow( l , r );

		    case T_BinaryOperatorNode::CMP_EQ: return ( l == r ) ? 1. : 0.;
		    case T_BinaryOperatorNode::CMP_NE: return ( l != r ) ? 1. : 0.;
		    case T_BinaryOperatorNode::CMP_GT: return ( l >  r ) ? 1. : 0.;
		    case T_BinaryOperatorNode::CMP_GE: return ( l >= r ) ? 1. : 0.;
		    case T_BinaryOperatorNode::CMP_LT: return ( l <  r ) ? 1. : 0.;
		    case T_BinaryOperatorNode::CMP_LE: return ( l <= r ) ? 1. : 0.;
		}
		fprintf( stderr , "invalid operator %d\n" , int( node.op( ) ) );
		std::abort( );
	}( node , left , right ) };

	return NewOwned< T_ConstantExprNode >( node.parent( ) , rVal );
}

} // namespace <anon>

/*----------------------------------------------------------------------------*/


bool opopt::FoldConstants(
		T_OpsParserOutput& program ,
		T_OptData& oData ) noexcept
{
	T_ConstantFolder_ folder{ oData };
	M_LOGSTR_( "... Folding constants" , 2 );
	if ( oData.curves ) {
		oData.findInputDecls( program );
	}
	oData.visitor.visit( program.root , [&]( auto& n , auto x ) {
		return folder( n , x );
	} );
	oData.logger( [&]() {
		T_StringBuilder sb{ "...... " };
		sb << ( folder.didFold
				? "Some constants were folded"
				: "No constants were folded" );
		return sb;
	} , 2 );
	return folder.didFold;
}


/*= CONSTANT PROPAGATION =====================================================*/

bool opopt::PropagateConstants(
		T_OpsParserOutput& program ,
		T_OptData& oData ) noexcept
{
	// We need to follow the general execution flow of the program. This is
	// not as straightforward as it seems.
	//	- Handling locals is rather easy as they "die" at the end of
	// the function in which they are defined.
	//	- Handling variables in init and functions that are called only
	// from init is not too hard: once a variable is set to a constant, it
	// can be substituted until the next set instruction.
	//	- Other variables need additional checks before propagating.
	// For example, if a variable is set to a constant during init, but is
	// updated at the end of the frame function, the value cannot be
	// propagated.

	oData.numberInstructions( program );
	oData.buildControlFlowGraph( program );
	oData.buildUseDefineChains( program );
	M_LOGSTR_( "... Propagating constants" , 2 );

	return false;
}


/*= DEAD CODE REMOVAL ========================================================*/

bool opopt::RemoveDeadCode(
		T_OpsParserOutput& program ,
		T_OptData& oData ) noexcept
{
	return false;
}