corelib/src/Strings.cc

/******************************************************************************/
/* STRINGS AND RELATED UTILITIES **********************************************/
/******************************************************************************/


#include <atomic>
#include <ebcl/Strings.hh>
#include <ebcl/HashIndex.hh>
#include <ebcl/Threading.hh>
#include <ebcl/Types.hh>
#include <ebcl/Alloc.hh>
#include <ebcl/BinaryStreams.hh>


using namespace ebcl;


/*= STRING STORAGE AND POOLING CLASSES =======================================*/

namespace {

// T_StringDataInternal - Actual interface w/ reference counting methods
class A_StringDataInternal : virtual public A_StringData
{
   public:
	const bool poolable;

	explicit A_StringDataInternal( bool poolable );
	virtual ~A_StringDataInternal( );

	// Add/remove user, for e.g. reference counting
	virtual void addUser( );
	virtual void removeUser( );
};


M_ABSTRACT_POINTERS( StringDataInternal );

/*----------------------------------------------------------------------------*/

// T_EmptyString - Fake storage for the empty string
class T_EmptyString final : public A_StringDataInternal
{
   public:
	static T_EmptyString EmptyString;

	T_EmptyString( );

	T_EmptyString( T_EmptyString const& ) = delete;
	T_EmptyString( T_EmptyString&& other ) = delete;
};


// T_StaticString - Storage for read-only strings
class T_StaticString final : public A_StringDataInternal
{
   public:
	T_StaticString( ) = delete;
	T_StaticString( T_StaticString const& ) = delete;
	T_StaticString( T_StaticString&& other ) noexcept = delete;

	T_StaticString( char const* string , uint32_t size );
	~T_StaticString( ) override;
};


// A_RefCountedString - A reference-counted string. Used for dynamic strings
// and substrings.
class A_RefCountedString : public A_StringDataInternal
{
    private:
	std::atomic< uint32_t > users_;

    public:
	A_RefCountedString( );
	A_RefCountedString( A_RefCountedString const& ) = delete;
	A_RefCountedString( A_RefCountedString&& ) = delete;

	void addUser( ) override;
	void removeUser( ) override;
};


// T_DynamicString - Storage for dynamically-created strings
class T_DynamicString final : public A_RefCountedString
{
    public:
	T_DynamicString( ) = delete;
	T_DynamicString( T_DynamicString const& ) = delete;
	T_DynamicString( T_DynamicString&& other ) noexcept = delete;

	// Pool allocation
	void* operator new( size_t size ) noexcept;
	void operator delete( void* object ) noexcept;

	T_DynamicString( char const* data , uint32_t size , bool nodup );
	~T_DynamicString( ) override;
};


// T_Substring - Storage for a string that is in fact a part of another
class T_Substring final : public A_RefCountedString
{
   private:
	RP_StringDataInternal source_;

   public:
	T_Substring( ) = delete;
	T_Substring( T_Substring const& ) = delete;
	T_Substring( T_Substring&& ) = delete;

	// Pool allocation
	void* operator new( size_t size ) noexcept;
	void operator delete( void* object ) noexcept;

	T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size );
	~T_Substring( ) override;
};


/*----------------------------------------------------------------------------*/

// T_StringPool - Pool of string storage classes
class T_StringPool final
{
   private:
	T_HashIndex index_;
	T_Array< T_OwnPtr< T_StaticString > > strings_;

   public:
	static T_StringPool Pool;
	static T_ReadWriteMutex Mutex;

	T_StringPool( );

	RP_StringDataInternal add( char const* data , uint32_t size );
	RP_StringDataInternal get( char const* data , uint32_t size ) const;

   private:
	uint32_t find( char const* data , uint32_t length , uint32_t hash ) const;
};


} // namespace

namespace ebcl { M_DECLARE_HASH( A_StringDataInternal ); }


/*= UTF-8 UTILITY FUNCTIONS ==================================================*/

bool ebcl::UTF8IsValid( char const* string )
{
	assert( string != nullptr );

	char const* ptr = string;
	char c;
	while ( ( c = *ptr ) != '\0' ) {
		// 4 bytes
		if ( ( c & 0xf8 ) == 0xf0 ) {
			// 3 following bytes should be part of this codepoint
			if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
			    || ( ptr[ 2 ] & 0xc0 ) != 0x80
			    || ( ptr[ 3 ] & 0xc0 ) != 0x80 ) {
				return false;
			}

			// Check for overlongs
			if ( ( c & 0x07 ) == 0 && ( ptr[ 1 ] & 0x30 ) == 0 ) {
				return false;
			}

			ptr += 4;

			// 3 bytes
		} else if ( ( c & 0xf0 ) == 0xe0 ) {
			// 2 following bytes should be part of this codepoint
			if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
			    || ( ptr[ 2 ] & 0xc0 ) != 0x80 ) {
				return false;
			}

			// Check for overlongs
			if ( ( c & 0x0f ) == 0 && ( ptr[ 1 ] & 0x20 ) == 0 ) {
				return false;
			}

			ptr += 3;

		} else if ( ( c & 0xe0 ) == 0xc0 ) {
			// Next byte should be part of this codepoint
			if ( ( ptr[ 1 ] & 0xc0 ) != 0x80 ) {
				return false;
			}

			// Check for overlongs
			if ( ( c & 0x1e ) == 0 ) {
				return false;
			}

			ptr += 2;

		} else if ( ( c & 0x80 ) != 0 ) {
			return false;

		} else {
			ptr ++;
		}
	}

	return true;
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8Length( char const* string )
{
	assert( string != nullptr );

	uint32_t len = 0;
	char const* ptr = string;
	char c;
	while ( ( c = *ptr ) != '\0' ) {
		if ( ( c & 0xf8 ) == 0xf0 ) {
			ptr += 4;
		} else if ( ( c & 0xf0 ) == 0xe0 ) {
			ptr += 3;
		} else if ( ( c & 0xe0 ) == 0xc0 ) {
			ptr += 2;
		} else {
			ptr ++;
		}
		len ++;
	}

	return len;
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8Size( char const* string )
{
	assert( string != nullptr );

	char const* ptr = string;
	char c;
	while ( ( c = *ptr ) != '\0' ) {
		if ( ( c & 0xf8 ) == 0xf0 ) {
			ptr += 4;
		} else if ( ( c & 0xf0 ) == 0xe0 ) {
			ptr += 3;
		} else if ( ( c & 0xe0 ) == 0xc0 ) {
			ptr += 2;
		} else {
			ptr ++;
		}
	}

	return uint32_t( ptr - string );
}

/*----------------------------------------------------------------------------*/

bool ebcl::UTF8Info( char const* string , uint32_t& size , uint32_t& length )
{
	assert( string != nullptr );

	char const* ptr = string;
	uint32_t len = 0;
	bool valid = true;
	char c;
	while ( ( c = *ptr ) != '\0' ) {
		// 4 bytes
		if ( ( c & 0xf8 ) == 0xf0 ) {
			valid = valid
			        // 3 following bytes should be part of this
			        // codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
				&& ( ptr[ 2 ] & 0xc0 ) == 0x80
				&& ( ptr[ 3 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( ( c & 0x07 ) != 0 || ( ptr[ 1 ] & 0x30 ) != 0 );
			ptr += 4;

			// 3 bytes
		} else if ( ( c & 0xf0 ) == 0xe0 ) {
			valid = valid
			        // 2 following bytes should be part of this
			        // codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
				&& ( ptr[ 2 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( ( c & 0x0f ) != 0 || ( ptr[ 1 ] & 0x20 ) != 0 );
			ptr += 3;

		} else if ( ( c & 0xe0 ) == 0xc0 ) {
			valid = valid
			        // Next byte should be part of this codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( c & 0x1e ) == 0;
			ptr += 2;

		} else {
			valid = valid && ( c & 0x80 ) == 0;
			ptr ++;
		}

		len ++;
	}

	length = len;
	size = ptr - string;
	return valid;
}

/*----------------------------------------------------------------------------*/

bool ebcl::UTF8BufferInfo( char const* data , uint32_t size , uint32_t& length )
{
	assert( data != nullptr );

	char const* ptr = data;
	uint32_t len = 0;
	bool valid = true;
	while ( ptr < data + size ) {
		char c = *ptr;
		bool ptrOk;

		// 4 bytes
		if ( ( c & 0xf8 ) == 0xf0 ) {
			ptrOk = ptr + 3 < data + size;
			valid = valid && ptrOk
			        // 3 following bytes should be part of this
			        // codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
				&& ( ptr[ 2 ] & 0xc0 ) == 0x80
				&& ( ptr[ 3 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( ( c & 0x07 ) != 0
				    || ( ptr[ 1 ] & 0x30 ) != 0 );
			ptr += 4;

			// 3 bytes
		} else if ( ( c & 0xf0 ) == 0xe0 ) {
			ptrOk = ptr + 2 < data + size;
			valid = valid && ptrOk
			        // 2 following bytes should be part of this
			        // codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
				&& ( ptr[ 2 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( ( c & 0x0f ) != 0
				    || ( ptr[ 1 ] & 0x20 ) != 0 );
			ptr += 3;

		} else if ( ( c & 0xe0 ) == 0xc0 ) {
			ptrOk = ptr + 1 < data + size;
			valid = valid && ptrOk
			        // Next byte should be part of this codepoint
				&& ( ptr[ 1 ] & 0xc0 ) == 0x80
			        // Check for overlongs
				&& ( c & 0x1e ) != 0;
			ptr += 2;

		} else {
			valid = valid && ( c & 0x80 ) == 0;
			ptrOk = true;
			ptr ++;
		}

		if ( ptrOk ) {
			len ++;
		}
	}

	length = len;
	return valid;
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8GetCodepoint( char const* data , uint32_t& bytes )
{
	assert( data != nullptr );
	if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
		bytes = 4;
		return ( ( data[ 0 ] & 0x07 ) << 18 )
		       | ( ( data[ 1 ] & 0x3f ) << 12 )
		       | ( ( data[ 2 ] & 0x3f ) << 6 )
		       | ( data[ 3 ] & 0x3f );

	} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
		bytes = 3;
		return ( ( data[ 0 ] & 0x0f ) << 12 )
		       | ( ( data[ 1 ] & 0x3f ) << 6 )
		       | ( data[ 2 ] & 0x3f );

	} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
		bytes = 2;
		return ( ( data[ 0 ] & 0x1f ) << 6 )
		       | ( data[ 1 ] & 0x3f );

	} else {
		bytes = 1;
		return data[ 0 ];
	}
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8GetCodepoint( char const* data )
{
	assert( data != nullptr );
	if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
		return ( ( data[ 0 ] & 0x07 ) << 18 )
		       | ( ( data[ 1 ] & 0x3f ) << 12 )
		       | ( ( data[ 2 ] & 0x3f ) << 6 )
		       | ( data[ 3 ] & 0x3f );

	} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
		return ( ( data[ 0 ] & 0x0f ) << 12 )
		       | ( ( data[ 1 ] & 0x3f ) << 6 )
		       | ( data[ 2 ] & 0x3f );

	} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
		return ( ( data[ 0 ] & 0x1f ) << 6 )
		       | ( data[ 1 ] & 0x3f );

	} else {
		return data[ 0 ];
	}
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8PutCodepoint( char* output , uint32_t available , uint32_t codepoint )
{
	if ( codepoint < 0x80 && available >= 1 ) {
		*output = char( codepoint );
		return 1;
	} else if ( codepoint < 0x800 && available >= 2 ) {
		output[ 0 ] = char( ( codepoint >> 6 ) | 0xc0 );
		output[ 1 ] = char( ( codepoint & 0x3f ) | 0x80 );
		return 2;
	} else if ( codepoint < 0x10000 && available >= 3 ) {
		output[ 0 ] = char( ( codepoint >> 12 ) | 0xe0 );
		output[ 1 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
		output[ 2 ] = char( ( codepoint & 0x3f ) | 0x80 );
		return 3;
	} else if ( codepoint < 0x110000 && available >= 4 ) {
		output[ 0 ] = char( ( codepoint >> 18 ) | 0xf0 );
		output[ 1 ] = char( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
		output[ 2 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
		output[ 3 ] = char( ( codepoint & 0x3f ) | 0x80 );
		return 4;
	}
	return 0;
}

/*----------------------------------------------------------------------------*/

uint32_t ebcl::UTF8GetMemoryOffset( char const* input , uint32_t index )
{
	assert( input != nullptr );

	char const* p = input;
	while ( index != 0 ) {
		if ( ( *p & 0xf8 ) == 0xf0 ) {
			p += 4;
		} else if ( ( *p & 0xf0 ) == 0xe0 ) {
			p += 3;
		} else if ( ( *p & 0xe0 ) == 0xc0 ) {
			p += 2;
		} else {
			p ++;
		}
		index --;
	}

	return p - input;
}

/*----------------------------------------------------------------------------*/

uint64_t ebcl::UTF8ToUnsignedInteger( char const* input , uint32_t size , bool * ok ,
		int base , bool useSep , uint32_t separator )
{
	char const* inputPos( input );
	char const* const inputEnd( input + size );

	if ( ok ) {
		*ok = false;
	}

	// Find start
	bool checkBase( false );
	bool hadSign( false );
	while ( 1 ) {
		if ( inputPos >= inputEnd ) {
			return 0;
		}
		uint32_t nBytes;
		const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );

		if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
			if ( c == '+' ) {
				if ( hadSign ) {
					return 0;
				}
				hadSign = true;
			} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
				checkBase = ( base == 0 && c == '0' );
				if ( checkBase ) {
					inputPos += nBytes;
				}
				break;
			} else {
				return 0;
			}
		}

		inputPos += nBytes;
	}

	// Detect base
	if ( checkBase ) {
		if ( inputPos >= inputEnd ) {
			if ( ok != nullptr ) {
				*ok = true;
			}
			return 0;
		}
		uint32_t nBytes;
		const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );

		bool next( true );
		if ( nc == 'x' || nc == 'X' ) {
			base = 16;
		} else if ( nc == 'b' || nc == 'B' ) {
			base = 2;
		} else {
			base = 8;
			next = false;
		}
		if ( next ) {
			inputPos += nBytes;
		}
	} else if ( base == 0 ) {
		base = 10;
	}

	// Start converting
	const uint64_t ubase( base );
	const uint64_t cutoff( UINT64_MAX / ubase );
	const uint64_t limit( UINT64_MAX % ubase );
	uint64_t accum( 0 );
	int any( 0 );
	while ( inputPos < inputEnd ) {
		uint32_t nBytes;
		const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
		inputPos += nBytes;

		if ( useSep && c == separator ) {
			continue;
		}

		uint32_t value;
		if ( c.isNumeric( ) ) {
			value = c - '0';
		} else if ( c.isAlpha( ) ) {
			value = c.toUpper( ) - 55;
		} else {
			any = 0;
			break;
		}
		if ( value >= ubase ) {
			any = 0;
			break;
		}

		if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
			any = -1;
		} else {
			any = 1;
			accum = accum * ubase + value;
		}
	}

	if ( any < 0 ) {
		accum = UINT64_MAX;
	} else if ( any > 0 && ok ) {
		*ok = true;
	}
	return accum;
}

/*----------------------------------------------------------------------------*/

int64_t ebcl::UTF8ToInteger( char const* input , uint32_t size , bool * ok ,
		int base , bool useSep , uint32_t separator )
{
	char const* inputPos( input );
	char const* const inputEnd( input + size );

	if ( ok ) {
		*ok = false;
	}

	// Find start
	bool checkBase( false );
	bool hadSign( false );
	bool neg( false );
	while ( 1 ) {
		if ( inputPos >= inputEnd ) {
			return 0;
		}
		uint32_t nBytes;
		const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );

		if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
			if ( c == '+' || c == '-' ) {
				if ( hadSign ) {
					return 0;
				}
				neg = ( c == '-' );
				hadSign = true;
			} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
				checkBase = ( base == 0 && c == '0' );
				if ( checkBase ) {
					inputPos += nBytes;
				}
				break;
			} else {
				return 0;
			}
		}

		inputPos += nBytes;
	}

	// Detect base
	if ( checkBase ) {
		if ( inputPos >= inputEnd ) {
			if ( ok != nullptr ) {
				*ok = true;
			}
			return 0;
		}
		uint32_t nBytes;
		const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );

		bool next( true );
		if ( nc == 'x' || nc == 'X' ) {
			base = 16;
		} else if ( nc == 'b' || nc == 'B' ) {
			base = 2;
		} else {
			base = 8;
			next = false;
		}
		if ( next ) {
			inputPos += nBytes;
		}
	} else if ( base == 0 ) {
		base = 10;
	}

	// Start converting
	const uint64_t ubase( base );
	const uint64_t max( neg ? ( uint64_t( 0 - ( INT64_MIN + INT64_MAX ) ) + INT64_MAX ) : INT64_MAX );
	const uint64_t cutoff( max / ubase );
	const uint64_t limit( max % ubase );
	uint64_t accum( 0 );
	int any( 0 );
	while ( inputPos < inputEnd ) {
		uint32_t nBytes;
		const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
		inputPos += nBytes;

		if ( useSep && c == separator ) {
			continue;
		}

		uint32_t value;
		if ( c.isNumeric( ) ) {
			value = c - '0';
		} else if ( c.isAlpha( ) ) {
			value = c.toUpper( ) - 55;
		} else {
			any = 0;
			break;
		}
		if ( value >= ubase ) {
			any = 0;
			break;
		}

		if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
			any = -1;
		} else {
			any = 1;
			accum = accum * ubase + value;
		}
	}

	if ( any < 0 ) {
		accum = neg ? INT64_MIN : INT64_MAX;
	} else if ( any > 0 ) {
		if ( neg ) {
			accum = (~accum) + 1;
		}
		if ( ok ) {
			*ok = true;
		}
	}
	return accum;
}

/*----------------------------------------------------------------------------*/

double ebcl::UTF8ToDouble( char const* input , uint32_t size ,
		bool * ok , uint32_t decimalPoint ,
		bool useSep , uint32_t separator )
{
	char const* inputPos( input );
	char const* const inputEnd( input + size );
	char output[ size + 1 ];
	char * outputPos( output );

	if ( ok ) {
		*ok = false;
	}

	enum E_State_ {
		INIT ,
		HAD_SIGN ,
		INT_PART ,
		HAD_SEP ,
		FRACT_PART ,
		AFTER_EXP ,
		AFTER_EXP_SIGN ,
		EXP_PART
	};
	E_State_ state( INIT );

	while ( inputPos < inputEnd ) {
		uint32_t nBytes;
		const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
		inputPos += nBytes;

		switch ( state ) {

			case INIT:
				if ( c.isWhitespace( ) ) {
					continue;
				}
				if ( c == '+' || c == '-' ) {
					*( outputPos ++ ) = c;
					state = HAD_SIGN;
				} else if ( c == decimalPoint ) {
					*( outputPos ++ ) = '.';
					state = FRACT_PART;
				} else if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = INT_PART;
				} else {
					return 0;
				}
				break;

			case HAD_SIGN:
				if ( c == decimalPoint ) {
					*( outputPos ++ ) = '.';
					state = FRACT_PART;
				} else if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = INT_PART;
				} else {
					return 0;
				}
				break;

			case INT_PART:
				if ( c == decimalPoint ) {
					*( outputPos ++ ) = '.';
					state = FRACT_PART;
				} else if ( c == 'e' || c == 'E' ) {
					*( outputPos ++ ) = 'e';
					state = AFTER_EXP;
				} else if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
				} else if ( useSep && c == separator ) {
					state = HAD_SEP;
				} else {
					return 0;
				}
				break;

			case HAD_SEP:
				if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = INT_PART;
				} else {
					return 0;
				}
				break;

			case FRACT_PART:
				if ( c == 'e' || c == 'E' ) {
					*( outputPos ++ ) = 'e';
					state = AFTER_EXP;
				} else if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = FRACT_PART;
				} else {
					return 0;
				}
				break;

			case AFTER_EXP:
				if ( c == '+' || c == '-' ) {
					*( outputPos ++ ) = c;
					state = AFTER_EXP_SIGN;
				} else if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = EXP_PART;
				} else {
					return 0;
				}
				break;

			case AFTER_EXP_SIGN:
				if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
					state = EXP_PART;
				} else {
					return 0;
				}
				break;

			case EXP_PART:
				if ( c.isNumeric( ) ) {
					*( outputPos ++ ) = c;
				} else {
					return 0;
				}
				break;
		}
	}
	if ( state == INIT || state == HAD_SEP || state == HAD_SIGN
			|| state == AFTER_EXP || state == AFTER_EXP_SIGN ) {
		return 0;
	}
	*outputPos = '\0';

	errno = 0;
	auto v( strtod( output , nullptr ) );
	if ( errno == 0 && ok ) {
		*ok = true;
	}
	return v;
}


/*= T_Character ==============================================================*/

M_DEFINE_OBJECT_READER( T_Character )
{
	const char first( reader.read< char >( ) );
	if ( ( first & 0xf8 ) == 0xf0 ) {
		return ( ( first & 0x07 ) << 18 )
		       | ( ( reader.read< char >( ) & 0x3f ) << 12 )
		       | ( ( reader.read< char >( ) & 0x3f ) << 6 )
		       | ( reader.read< char >( ) & 0x3f );

	} else if ( ( first & 0xf0 ) == 0xe0 ) {
		return ( ( first & 0x0f ) << 12 )
		       | ( ( reader.read< char >( ) & 0x3f ) << 6 )
		       | ( reader.read< char >( ) & 0x3f );

	} else if ( ( first & 0xe0 ) == 0xc0 ) {
		return ( ( first & 0x1f ) << 6 )
		       | ( reader.read< char >( ) & 0x3f );

	} else {
		return first;
	}
}

M_DEFINE_OBJECT_WRITER( T_Character )
{
	const uint32_t codepoint( item );
	if ( codepoint < 0x80 ) {
		writer.write< char >( codepoint );

	} else if ( codepoint < 0x800 ) {
		writer.write< char >( ( codepoint >> 6 ) | 0xc0 );
		writer.write< char >( ( codepoint & 0x3f ) | 0x80 );

	} else if ( codepoint < 0x10000 ) {
		writer.write< char >( ( codepoint >> 12 ) | 0xe0 );
		writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
		writer.write< char >( ( codepoint & 0x3f ) | 0x80 );

	} else {
		writer.write< char >( ( codepoint >> 18 ) | 0xf0 );
		writer.write< char >( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
		writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
		writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
	}
}

/*= A_StringData =============================================================*/

inline A_StringData::~A_StringData( )
{ }


/*= A_StringDataInternal =====================================================*/

inline A_StringDataInternal::A_StringDataInternal( bool poolable )
	: poolable( poolable )
{ }

A_StringDataInternal::~A_StringDataInternal( )
{ }

void A_StringDataInternal::addUser( )
{ }

void A_StringDataInternal::removeUser( )
{ }

inline M_DEFINE_HASH( A_StringDataInternal )
{
	return HashData( reinterpret_cast< uint8_t const* >( item.data( ) ) ,
			item.size( ) );
}


/*= T_EmptyString ============================================================*/

T_EmptyString::T_EmptyString( )
	: A_StringDataInternal( false )
{
	data_ = nullptr;
	size_ = 0;
	valid_ = true;
	length_ = 0;
}

T_EmptyString T_EmptyString::EmptyString;


/*= T_StaticString ===========================================================*/

T_StaticString::T_StaticString( char const* data , uint32_t size )
	: A_StringDataInternal( false )
{
	size_ = size;
	data_ = ( char* )::operator new ( size_ );
	memcpy( data_ , data , size_ );
	valid_ = UTF8BufferInfo( data , size , length_ );
}

T_StaticString::~T_StaticString( )
{
	::operator delete ( data_ );
}


/*= A_RefCountedString =======================================================*/

inline A_RefCountedString::A_RefCountedString( )
	: A_StringDataInternal( true ) , users_( 1 )
{ }

void A_RefCountedString::addUser( )
{
	users_.fetch_add( 1 , std::memory_order_acq_rel );
}

void A_RefCountedString::removeUser( )
{
	if ( users_.fetch_sub( 1 , std::memory_order_acq_rel ) == 1 ) {
		std::atomic_thread_fence( std::memory_order_acq_rel );
		if ( users_.load( std::memory_order_acq_rel ) == 0 ) {
			delete this;
		}
	}
}


/*= T_DynamicString ==========================================================*/

namespace {
static thread_local T_ThreadedPoolAllocator<
		sizeof( T_DynamicString ) , alignof( T_DynamicString ) ,
		32 , 16
	> DynamicStringAllocator_;

}

void* T_DynamicString::operator new(
		const size_t size ) noexcept
{
	return DynamicStringAllocator_.allocate( size );
}

void T_DynamicString::operator delete(
		void* const object ) noexcept
{
	DynamicStringAllocator_.free( object );
}

/*----------------------------------------------------------------------------*/


T_DynamicString::T_DynamicString( char const* data , uint32_t size , bool nodup )
	: A_RefCountedString( )
{
	size_ = size;
	if ( nodup ) {
		data_ = const_cast< char* >( data );
	} else {
		data_ = ( char* )::operator new ( size_ );
		memcpy( data_ , data , size_ );
	}
	valid_ = UTF8BufferInfo( data , size , length_ );
}

T_DynamicString::~T_DynamicString( )
{
	::operator delete ( data_ );
}


/*= T_Substring ==============================================================*/

namespace {
static thread_local T_ThreadedPoolAllocator<
		sizeof( T_Substring ) , alignof( T_Substring ) ,
		32 , 4
	> SubstringAllocator_;

}

void* T_Substring::operator new(
		const size_t size ) noexcept
{
	return SubstringAllocator_.allocate( size );
}

void T_Substring::operator delete(
		void* const object ) noexcept
{
	SubstringAllocator_.free( object );
}

/*----------------------------------------------------------------------------*/

T_Substring::T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size )
	: A_RefCountedString( ) , source_( source )
{
	assert( size + offset <= source_->size( ) );

	source_->addUser( );
	data_ = const_cast< char* >( source->data( ) + offset );
	size_ = size;
	valid_ = UTF8BufferInfo( data_ , size_ , length_ );
}

T_Substring::~T_Substring( )
{
	source_->removeUser( );
}


/*= T_StringPool =============================================================*/

T_StringPool T_StringPool::Pool;
T_ReadWriteMutex T_StringPool::Mutex;

/*----------------------------------------------------------------------------*/

T_StringPool::T_StringPool( )
	: index_( 16384 , 4096 , 4096 ) , strings_( 4096 )
{ }

/*----------------------------------------------------------------------------*/

RP_StringDataInternal T_StringPool::add( char const* data , uint32_t size )
{
	T_ReadLock lock( T_StringPool::Mutex );
	const auto hash( HashData( (uint8_t const*) data , size ) );
	const auto idx( find( data , size , hash ) );

	if ( idx == T_HashIndex::INVALID_INDEX ) {
		const T_WriteLock wLock( lock.upgrade( ) );
		index_.add( hash );
		const auto str( strings_.add( NewOwned< T_StaticString >( data , size ) ) );
		return strings_[ str ].get( );
	} else {
		return strings_[ idx ].get( );
	}
}

/*----------------------------------------------------------------------------*/

RP_StringDataInternal T_StringPool::get( char const* data , uint32_t size ) const
{
	const T_ReadLock lock( T_StringPool::Mutex );
	const auto hash( HashData( reinterpret_cast< uint8_t const* >( data ) , size ) );
	const auto idx( find( data , size , hash ) );

	if ( idx == T_HashIndex::INVALID_INDEX ) {
		return nullptr;
	} else {
		return strings_[ idx ].get( );
	}
}

/*----------------------------------------------------------------------------*/

uint32_t T_StringPool::find( char const* data , uint32_t sz , uint32_t hash ) const
{
	uint32_t idx = index_.first( hash );
	while ( idx != T_HashIndex::INVALID_INDEX ) {
		auto const& p( strings_[ idx ] );
		if ( p->size( ) == sz && !memcmp( p->data( ) , data , sz ) ) {
			break;
		}
		idx = index_.next( idx );
	}
	return idx;
}


/*= T_StringIterator =========================================================*/

T_StringIterator::T_StringIterator( RP_StringData data , uint32_t index )
	: data_( data ) , index_( index )
{
	if ( data_ != nullptr ) {
		dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
		pos_ = UTF8GetMemoryOffset( data_->data( ) , index );
		codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ , bytes_ );
	} else {
		pos_ = codepoint_ = bytes_ = 0;
	}
}

T_StringIterator::T_StringIterator( T_StringIterator const& other )
	: data_( other.data_ ) , pos_( other.pos_ ) , index_( other.index_ ) ,
	codepoint_( other.codepoint_ ) , bytes_( other.bytes_ )
{
	if ( data_ != nullptr ) {
		dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
	}
}

/*----------------------------------------------------------------------------*/

T_StringIterator::~T_StringIterator( )
{
	if ( data_ != nullptr ) {
		dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	}
}

/*----------------------------------------------------------------------------*/

T_StringIterator& T_StringIterator::operator= ( T_StringIterator const& other )
{
	if ( data_ != nullptr ) {
		dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	}
	data_ = other.data_;
	pos_ = other.pos_;
	index_ = other.index_;
	codepoint_ = other.codepoint_;
	bytes_ = other.bytes_;
	if ( data_ != nullptr ) {
		dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
	}
	return *this;
}

/*----------------------------------------------------------------------------*/

void ebcl::swap( T_StringIterator& lhs , T_StringIterator& rhs ) noexcept
{
	using std::swap;
	swap( lhs.data_ , rhs.data_ );
	swap( lhs.pos_ , rhs.pos_ );
	swap( lhs.index_ , rhs.index_ );
	swap( lhs.codepoint_ , rhs.codepoint_ );
	swap( lhs.bytes_ , rhs.bytes_ );
}

/*----------------------------------------------------------------------------*/

bool T_StringIterator::next( )
{
	if ( atEnd( ) ) {
		return false;
	}
	pos_ += bytes_;
	if ( pos_ == data_->size( ) ) {
		codepoint_ = bytes_ = 0;
	} else {
		codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ ,
					      bytes_ );
		index_ ++;
	}
	return true;
}


/*= T_String =================================================================*/

T_String::T_String( ) noexcept
	: data_( &T_EmptyString::EmptyString )
{ }

T_String::T_String( char const* initial )
{
	if ( initial == nullptr || *initial == 0 ) {
		data_ = &T_EmptyString::EmptyString;
	} else {
		const uint32_t len( strlen( initial ) );
		data_ = T_StringPool::Pool.get( initial , len );
		if ( data_ == nullptr ) {
			data_ = new T_DynamicString( initial , len , false );
		}
	}
}

T_String::T_String( T_StringBuilder&& sb )
	: T_String( sb.data_ , sb.size_ , true )
{
	sb.data_ = nullptr;
	sb.size_ = sb.length_ = sb.capacity_ = 0;
}

T_String::T_String( T_StringBuilder const& sb )
	: T_String( sb.data_ , sb.size_ )
{ }

T_String::T_String( char const* data , uint32_t size , bool nodup )
{
	if ( data == nullptr || size == 0 ) {
		data_ = &T_EmptyString::EmptyString;
	} else {
		data_ = new T_DynamicString( data , size , nodup );
	}
}

/*----------------------------------------------------------------------------*/

T_String::T_String( T_String const& source )
	: data_( source.data_ )
{
	dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
}

T_String::T_String( T_String&& source ) noexcept
	: data_( &T_EmptyString::EmptyString )
{
	swap( *this , source );
}

/*----------------------------------------------------------------------------*/

T_String::~T_String( )
{
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
}

/*----------------------------------------------------------------------------*/

T_String& T_String::operator= ( T_String&& string ) noexcept
{
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	data_ = string.data_;
	string.data_ = &T_EmptyString::EmptyString;
	return *this;
}

T_String& T_String::operator= ( T_String const& string )
{
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	data_ = string.data_;
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
	return *this;
}

T_String& T_String::operator= ( T_StringBuilder&& sb )
{
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	data_ = new T_DynamicString( sb.data_ , sb.size_ , true );
	sb.data_ = nullptr;
	sb.size_ = sb.length_ = sb.capacity_ = 0;
	return *this;
}

/*----------------------------------------------------------------------------*/

T_String& T_String::operator= ( T_StringBuilder const& sb )
{
	assert( data_ != nullptr );
	dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
	data_ = new T_DynamicString( sb.data_ , sb.size_ , false );
	return *this;
}

/*----------------------------------------------------------------------------*/

void ebcl::swap( T_String& lhs , T_String& rhs ) noexcept
{
	using std::swap;
	swap( lhs.data_ , rhs.data_ );
}

/*----------------------------------------------------------------------------*/

T_String T_String::Pooled( char const* data , uint32_t size )
{
	assert( data != nullptr );
	T_String s;
	if ( size ) {
		s.data_ = T_StringPool::Pool.add( data , size );
	}
	return s;
}

/*----------------------------------------------------------------------------*/

T_String& T_String::addToPool( )
{
	const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
	if ( d->poolable ) {
		data_ = T_StringPool::Pool.add( d->data( ) , d->size( ) );
		d->removeUser( );
	}
	return *this;
}


T_String& T_String::usePool( )
{
	const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
	if ( d->poolable ) {
		const auto nd( T_StringPool::Pool.get( d->data( ) , d->size( ) ) );
		if ( nd != nullptr ) {
			data_ = nd;
			d->removeUser( );
		}
	}
	return *this;
}

/*----------------------------------------------------------------------------*/

T_String T_String::left( uint32_t count ) const
{
	if ( count >= length( ) ) {
		return *this;
	}

	const auto end( UTF8GetMemoryOffset( data( ) , count ) );
	T_String s;
	s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) , 0 , end );
	return s;
}

T_String T_String::right( uint32_t count ) const
{
	if ( count >= length( ) ) {
		return *this;
	}

	const auto start( UTF8GetMemoryOffset( data( ) , length( ) - count ) );
	T_String s;
	s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
				  start , data_->size( ) - start );
	return s;
}

T_String T_String::substr( uint32_t offset , uint32_t count ) const
{
	// Special cases
	if ( offset == 0 ) {
		return left( count );
	}
	if ( offset >= length( ) || count == 0 ) {
		return T_String( );
	}

	const auto end( uint64_t( offset ) + count );
	if ( end >= length( ) ) {
		return right( end > length( )
			     ? ( length( ) - offset )
			     : count );
	}

	const auto start( UTF8GetMemoryOffset( data( ) , offset ) );
	const auto bytes( UTF8GetMemoryOffset( data( ) + start , count ) );
	T_String s;
	s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
				  start , bytes );
	return s;
}

/*----------------------------------------------------------------------------*/

T_String T_String::trim( ) const noexcept
{
	if ( length( ) == 0 ) {
		return T_String( );
	}

	T_Optional< uint32_t > firstNws;
	uint32_t lastNws = 0;
	T_StringIterator it( *this );
	while ( !it.atEnd( ) ) {
		T_Character c( it );
		if ( !c.isWhitespace( ) ) {
			if ( !firstNws ) {
				firstNws = it.index( );
			}
			lastNws = it.index( );
		}
		it.next( );
	}

	if ( !firstNws ) {
		return T_String( );
	}
	return range( *firstNws , lastNws );
}

/*----------------------------------------------------------------------------*/

int32_t T_String::compare( T_String const& other ) const
{
	if ( this == &other || data_ == other.data_ ) {
		return 0;
	}
	if ( !( valid( ) && other.valid( ) ) ) {
		return 0;
	}

	T_StringIterator it1( *this ) , it2( other );
	while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
		T_Character c1( it1 ) ,
		c2( it2 );
		if ( c1 < c2 ) {
			return -1;
		} else if ( c1 > c2 ) {
			return 1;
		}
		it1.next( );
		it2.next( );
	}

	if ( !it2.atEnd( ) ) {
		return -1;
	} else if ( !it1.atEnd( ) ) {
		return 1;
	} else {
		return 0;
	}
}

int32_t T_String::compareIgnoreCase( T_String const& other ) const
{
	if ( !( valid( ) && other.valid( ) ) ) {
		return 0;
	}

	T_StringIterator it1( *this ) , it2( other );
	while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
		auto c1( T_Character( it1 ).toLower( ) ) ,
		c2( T_Character( it2 ).toLower( ) );
		if ( c1 < c2 ) {
			return -1;
		} else if ( c1 > c2 ) {
			return 1;
		}
		it1.next( );
		it2.next( );
	}

	if ( !it2.atEnd( ) ) {
		return -1;
	} else if ( !it1.atEnd( ) ) {
		return 1;
	} else {
		return 0;
	}
}

/*----------------------------------------------------------------------------*/

bool T_String::startsWith( T_String const& other ) const
{
	if ( !( valid( ) && other.valid( ) ) ) {
		return false;
	} else if ( other.length( ) > length( ) ) {
		return false;
	} else if ( other.length( ) == 0 ) {
		return true;
	} else {
		return !memcmp( data( ) , other.data( ) , other.size( ) );
	}
}

bool T_String::endsWith( T_String const& other ) const
{
	if ( !( valid( ) && other.valid( ) ) ) {
		return false;
	} else if ( other.length( ) > length( ) ) {
		return false;
	} else if ( other.length( ) == 0 ) {
		return true;
	} else {
		return !memcmp( data( ) + size( ) - other.size( ) ,
			       other.data( ) , other.size( ) );
	}
}

/*----------------------------------------------------------------------------*/

int32_t T_String::find( T_String const& other , uint32_t from ) const
{
	if ( from > length( )
			|| length( ) - from < other.length( )
			|| !( valid( ) && other.valid( ) ) ) {
		return -1;
	}

	if ( other.length( ) == 0 ) {
		return from;
	}

	uint32_t pos = from , byte = UTF8GetMemoryOffset( data( ) , from );
	while ( byte + other.size( ) <= size( ) ) {
		char const* tp = data( ) + byte;
		char const* op = other.data( );
		if ( !memcmp( tp , op , other.size( ) ) ) {
			return pos;
		}
		byte += UTF8GetMemoryOffset( tp , 1 );
		pos ++;
	}
	return -1;
}

/*----------------------------------------------------------------------------*/

int32_t T_String::find( T_Character character , uint32_t from ) const
{
	if ( !( valid( ) && character.isValid( ) && from < length( ) ) ) {
		return -1;
	}

	T_StringIterator it( getIterator( from ) );
	while ( !it.atEnd( ) ) {
		if ( T_Character( it ) == character ) {
			return it.index( );
		}
		it.next( );
	}
	return -1;
}

/*----------------------------------------------------------------------------*/

T_String T_String::replace( T_Character initial , T_Character replacement ) const
{
	if ( !( valid( ) && initial.isValid( ) && replacement.isValid( ) ) ) {
		return T_String( );
	}
	if ( initial == replacement || !*this ) {
		return *this;
	}

	T_StringBuilder sb;
	sb.ensureCapacity( size( ) );
	T_StringIterator it( *this );
	while ( !it.atEnd( ) ) {
		T_Character c( it );
		it.next( );
		if ( c == initial ) {
			sb << replacement;
		} else {
			sb << c;
		}
	}
	return T_String( std::move( sb ) );
}

T_String T_String::replace( T_String const& initial , T_String const& replacement ) const
{
	if ( !( valid( ) && initial.valid( ) && replacement.valid( ) ) ) {
		return T_String( );
	}

	if ( !initial || initial == replacement || initial.length( ) > length( ) || !*this ) {
		return *this;
	}
	if ( initial.length( ) == length( ) && *this == initial ) {
		return replacement;
	}

	char const* const tgt( initial.data( ) );
	const auto tgtSize( initial.size( ) );
	char const* const src( data( ) );
	const auto srcSize( size( ) );

	T_StringBuilder sb;
	uint32_t pos = 0;
	while ( pos < srcSize ) {
		uint32_t delta;
		if ( pos + tgtSize <= size( ) && !memcmp( src + pos , tgt , tgtSize ) ) {
			sb << replacement;
			delta = tgtSize;
		} else {
			sb << T_Character( UTF8GetCodepoint( src + pos , delta ) );
		}
		pos += delta;
	}
	return T_String( std::move( sb ) );
}

/*----------------------------------------------------------------------------*/

T_Buffer< char > T_String::toOSString( ) const
{
	if ( !valid( ) ) {
		return T_Buffer< char >( );
	}

#ifdef _WIN32

	const auto n( data_->length( ) );
	T_Buffer< char > output( ( n + 1 ) * 2 );
	if ( !MultiByteToWideChar( CP_UTF8 , 0 ,
				  data( ) , data_->size( ) ,
				  ( wchar_t* ) output.data( ) , n ) )
	{
		return T_Buffer< char >( );
	}
	output[ n * 2 ] = output[ n * 2 + 1 ] = 0;
	return output;

#else

	const auto n( data_->size( ) );
	T_Buffer< char > output( n + 1 );
	memcpy( output.data( ) , data( ) , n );
	output[ n ] = 0;
	return output;

#endif
}

/*----------------------------------------------------------------------------*/

M_DEFINE_OBJECT_READER( T_String )
{
	const uint32_t size( reader.read< uint32_t >( ) );
	if ( size == 0 ) {
		return T_String( );
	}

	char* const buffer = ( char* )::operator new ( size );
	const uint32_t r = reader.stream( ).read( buffer , size );
	if ( r != size ) {
		throw X_StreamError( E_StreamError::BAD_DATA );
	}
	return T_String( buffer , size , true );
}

M_DEFINE_OBJECT_WRITER( T_String )
{
	const uint32_t s( item.size( ) );
	writer.write( s );
	if ( s != 0 ) {
		const uint32_t w( writer.stream( ).write( item.data( ) , s ) );
		if ( w != s ) {
			throw X_StreamError( E_StreamError::BAD_DATA );
		}
	}
}


/*= T_StringBuilder ==========================================================*/

T_StringBuilder::T_StringBuilder( T_StringBuilder const& other )
	: data_( nullptr ) , capacity_( 0 ) , size_( other.size_ ) ,
	length_( other.length_ )
{
	ensureCapacity( size_ );
	if ( size_ != 0 ) {
		memcpy( data_ , other.data_ , size_ );
	}
}

T_StringBuilder::T_StringBuilder( T_StringBuilder&& other ) noexcept
	: T_StringBuilder( )
{
	swap( *this , other );
}

T_StringBuilder::T_StringBuilder( char const* data , uint32_t size )
	: T_StringBuilder( )
{
	ensureCapacity( size );
	memcpy( data_ , data , size );
	size_ = size;
	UTF8BufferInfo( data , size , length_ );
}

T_StringBuilder::T_StringBuilder( T_String const& string )
	: T_StringBuilder( )
{
	size_ = string.size( );
	length_ = string.length( );
	ensureCapacity( size_ );
	memcpy( data_ , string.data( ) , size_ );
}

/*----------------------------------------------------------------------------*/

T_StringBuilder::~T_StringBuilder( )
{
	::operator delete ( data_ );
}

/*----------------------------------------------------------------------------*/

void ebcl::swap( T_StringBuilder& lhs , T_StringBuilder& rhs )
{
	using std::swap;
	swap( lhs.data_ , rhs.data_ );
	swap( lhs.capacity_ , rhs.capacity_ );
	swap( lhs.size_ , rhs.size_ );
	swap( lhs.length_ , rhs.length_ );
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder const& other )
{
	ensureCapacity( other.size( ) );
	size_ = other.size_;
	length_ = other.length_;
	if ( size_ != 0 ) {
		memcpy( data_ , other.data_ , size_ );
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder&& other ) noexcept
{
	if ( data_ ) {
		::operator delete( data_ );
	}
	data_ = other.data_;
	capacity_ = other.capacity_;
	size_ = other.size_;
	length_ = other.length_;
	other.data_ = nullptr;
	other.capacity_ = other.size_ = other.length_ = 0;
	return *this;
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::ensureCapacity( uint32_t minCap )
{
	if ( minCap > capacity_ ) {
		const uint32_t mod = minCap % C_GROWTH;
		const uint32_t nCap = ( mod != 0 )
				      ? ( minCap + C_GROWTH - mod ) : minCap;
		char* const nData = ( char* )::operator new ( nCap );
		if ( data_ != nullptr ) {
			memcpy( nData , data_ , size_ );
			::operator delete ( data_ );
		}
		data_ = nData;
		capacity_ = nCap;
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::free( )
{
	::operator delete ( data_ );
	capacity_ = 0;
	data_ = nullptr;
	return clear( );
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::append( T_StringBuilder const& other )
{
	const uint32_t sz = other.size_;
	if ( sz != 0 ) {
		ensureCapacity( size_ + sz );
		memcpy( data_ + size_ , other.data_ , sz );
		size_ += sz;
		length_ += other.length_;
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::append( T_StringBuilder&& other )
{
	if ( size_ == 0 && capacity_ <= other.capacity_ ) {
		swap( *this , other );
		return *this;
	} else {
		return append( ( T_StringBuilder const& ) other );
	}
}

T_StringBuilder& T_StringBuilder::append( T_String const& string )
{
	const uint32_t sz = string.size( );
	if ( string.size( ) != 0 ) {
		ensureCapacity( size_ + sz );
		memcpy( data_ + size_ , string.data( ) , sz );
		size_ += sz;
		length_ += string.length( );
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::append( char const* string , uint32_t size )
{
	if ( size != 0 ) {
		uint32_t len;
		UTF8BufferInfo( string , size , len );
		ensureCapacity( size_ + size );
		memcpy( data_ + size_ , string , size );
		size_ += size;
		length_ += len;
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::append( char character )
{
	if ( uint8_t( character ) < 128 ) {
		ensureCapacity( size_ + 1 );
		data_[ size_ ++ ] = character;
		length_ ++;
	}
	return *this;
}

T_StringBuilder& T_StringBuilder::append( T_Character character )
{
	if ( character.isValid( ) ) {
		ensureCapacity( capacity_ + 4 );
		uint32_t w = UTF8PutCodepoint( data_ + size_ ,
					      capacity_ - size_ , character );
		assert( w != 0 );
		size_ += w;
		length_ ++;
	}
	return *this;
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::appendNumeric( int64_t value , int base , bool useSep , T_Character sep ,
						int sepEvery )
{
	assert( base >= 2 && base <= 36 );
	assert( sepEvery > 0 );
	assert( sep.isValid( ) );

	if ( value == 0 ) {
		return append( '0' );
	}

	const bool neg = value < 0;
	const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 )
			      + ( neg ? 1 : 0 );
	uint32_t output[ size ];
	uint32_t len = 0 , ecap = 0;
	int sepl = 0;

	if ( neg ) {
		value = -value;
	}

	while ( value != 0 ) {
		const uint32_t mod = value % base;
		output[ size - ( len + 1 ) ] = mod
					       + ( mod < 10 ? '0' : ( 'A' - 10 ) );
		len ++;
		sepl ++;
		ecap ++;
		assert( len < size );

		value = ( value - mod ) / base;
		if ( useSep && sepl == sepEvery && value != 0 ) {
			output[ size - ( len + 1 ) ] = sep;
			len ++;
			sepl = 0;
			ecap += 4;
			assert( len < size );
		}
	}

	if ( neg ) {
		output[ size - ( len + 1 ) ] = '-';
		len ++;
		ecap ++;
		assert( len < size );
	}

	ensureCapacity( size_ + ecap );
	ecap = 0;
	char* ptr = data_ + size_;
	for ( uint32_t i = size - len ; i < size ; i ++ ) {
		const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
		ptr += wr;
		ecap += wr;
	}
	size_ += ecap;
	length_ += len;

	return *this;
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::appendNumeric( uint64_t value , int base , bool useSep , T_Character sep ,
						int sepEvery )
{
	assert( base >= 2 && base <= 36 );
	assert( sepEvery > 0 );
	assert( sep.isValid( ) );

	if ( value == 0 ) {
		return append( '0' );
	}

	const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 );
	uint32_t output[ size ];
	uint32_t len = 0 , ecap = 0;
	int sepl = 0;

	while ( value != 0 ) {
		const uint32_t mod = value % base;
		output[ size - ( len + 1 ) ] = mod
					       + ( mod < 10 ? '0' : ( 'A' - 10 ) );
		len ++;
		sepl ++;
		ecap ++;
		assert( len < size );

		value = ( value - mod ) / base;
		if ( useSep && sepl == sepEvery && value != 0 ) {
			output[ size - ( len + 1 ) ] = sep;
			len ++;
			sepl = 0;
			ecap += 4;
			assert( len < size );
		}
	}

	ensureCapacity( size_ + ecap );
	ecap = 0;
	char* ptr = data_ + size_;
	for ( uint32_t i = size - len ; i < size ; i ++ ) {
		const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
		ptr += wr;
		ecap += wr;
	}
	size_ += ecap;
	length_ += len;

	return *this;
}

/*----------------------------------------------------------------------------*/

T_StringBuilder& T_StringBuilder::appendDouble( double value , uint32_t precision , bool trailingZeros )
{
	char const* const fmt = trailingZeros ? "%.*f" : "%.*g";
	const int nchars = snprintf( nullptr , 0 , fmt , precision , value );
	char buffer[ nchars + 1 ];
	snprintf( buffer , nchars + 1 , fmt , precision , value );
	return append( buffer , nchars );
}

/*----------------------------------------------------------------------------*/

M_DEFINE_OBJECT_WRITER( T_StringBuilder )
{
	writer.write( item.size( ) );
	writer.stream( ).write( item.data( ) , item.size( ) );
}