corelib/src/Strings.cc

2044 lines
46 KiB
C++
Raw Normal View History

/******************************************************************************/
/* STRINGS AND RELATED UTILITIES **********************************************/
/******************************************************************************/
#include <atomic>
#include <ebcl/Strings.hh>
#include <ebcl/HashIndex.hh>
#include <ebcl/Threading.hh>
#include <ebcl/Types.hh>
#include <ebcl/Alloc.hh>
#include <ebcl/BinaryStreams.hh>
using namespace ebcl;
/*= STRING STORAGE AND POOLING CLASSES =======================================*/
namespace {
// T_StringDataInternal - Actual interface w/ reference counting methods
class A_StringDataInternal : virtual public A_StringData
{
public:
const bool poolable;
explicit A_StringDataInternal( bool poolable );
virtual ~A_StringDataInternal( );
// Add/remove user, for e.g. reference counting
virtual void addUser( );
virtual void removeUser( );
};
M_ABSTRACT_POINTERS( StringDataInternal );
/*----------------------------------------------------------------------------*/
// T_EmptyString - Fake storage for the empty string
class T_EmptyString final : public A_StringDataInternal
{
public:
static T_EmptyString EmptyString;
T_EmptyString( );
T_EmptyString( T_EmptyString const& ) = delete;
T_EmptyString( T_EmptyString&& other ) = delete;
};
// T_StaticString - Storage for read-only strings
class T_StaticString final : public A_StringDataInternal
{
public:
T_StaticString( ) = delete;
T_StaticString( T_StaticString const& ) = delete;
T_StaticString( T_StaticString&& other ) noexcept = delete;
T_StaticString( char const* string , uint32_t size );
~T_StaticString( ) override;
};
// A_RefCountedString - A reference-counted string. Used for dynamic strings
// and substrings.
class A_RefCountedString : public A_StringDataInternal
{
private:
std::atomic< uint32_t > users_;
public:
A_RefCountedString( );
A_RefCountedString( A_RefCountedString const& ) = delete;
A_RefCountedString( A_RefCountedString&& ) = delete;
void addUser( ) override;
void removeUser( ) override;
};
// T_DynamicString - Storage for dynamically-created strings
class T_DynamicString final : public A_RefCountedString
{
public:
T_DynamicString( ) = delete;
T_DynamicString( T_DynamicString const& ) = delete;
T_DynamicString( T_DynamicString&& other ) noexcept = delete;
// Pool allocation
void* operator new( size_t size ) noexcept;
void operator delete( void* object ) noexcept;
T_DynamicString( char const* data , uint32_t size , bool nodup );
~T_DynamicString( ) override;
};
// T_Substring - Storage for a string that is in fact a part of another
class T_Substring final : public A_RefCountedString
{
private:
RP_StringDataInternal source_;
public:
T_Substring( ) = delete;
T_Substring( T_Substring const& ) = delete;
T_Substring( T_Substring&& ) = delete;
// Pool allocation
void* operator new( size_t size ) noexcept;
void operator delete( void* object ) noexcept;
T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size );
~T_Substring( ) override;
};
/*----------------------------------------------------------------------------*/
// T_StringPool - Pool of string storage classes
class T_StringPool final
{
private:
T_HashIndex index_;
T_Array< T_OwnPtr< T_StaticString > > strings_;
public:
static T_StringPool Pool;
static T_ReadWriteMutex Mutex;
T_StringPool( );
RP_StringDataInternal add( char const* data , uint32_t size );
RP_StringDataInternal get( char const* data , uint32_t size ) const;
private:
uint32_t find( char const* data , uint32_t length , uint32_t hash ) const;
};
} // namespace
namespace ebcl { M_DECLARE_HASH( A_StringDataInternal ); }
/*= UTF-8 UTILITY FUNCTIONS ==================================================*/
bool ebcl::UTF8IsValid( char const* string )
{
assert( string != nullptr );
char const* ptr = string;
char c;
while ( ( c = *ptr ) != '\0' ) {
// 4 bytes
if ( ( c & 0xf8 ) == 0xf0 ) {
// 3 following bytes should be part of this codepoint
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
|| ( ptr[ 2 ] & 0xc0 ) != 0x80
|| ( ptr[ 3 ] & 0xc0 ) != 0x80 ) {
return false;
}
// Check for overlongs
if ( ( c & 0x07 ) == 0 && ( ptr[ 1 ] & 0x30 ) == 0 ) {
return false;
}
ptr += 4;
// 3 bytes
} else if ( ( c & 0xf0 ) == 0xe0 ) {
// 2 following bytes should be part of this codepoint
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
|| ( ptr[ 2 ] & 0xc0 ) != 0x80 ) {
return false;
}
// Check for overlongs
if ( ( c & 0x0f ) == 0 && ( ptr[ 1 ] & 0x20 ) == 0 ) {
return false;
}
ptr += 3;
} else if ( ( c & 0xe0 ) == 0xc0 ) {
// Next byte should be part of this codepoint
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80 ) {
return false;
}
// Check for overlongs
if ( ( c & 0x1e ) == 0 ) {
return false;
}
ptr += 2;
} else if ( ( c & 0x80 ) != 0 ) {
return false;
} else {
ptr ++;
}
}
return true;
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8Length( char const* string )
{
assert( string != nullptr );
uint32_t len = 0;
char const* ptr = string;
char c;
while ( ( c = *ptr ) != '\0' ) {
if ( ( c & 0xf8 ) == 0xf0 ) {
ptr += 4;
} else if ( ( c & 0xf0 ) == 0xe0 ) {
ptr += 3;
} else if ( ( c & 0xe0 ) == 0xc0 ) {
ptr += 2;
} else {
ptr ++;
}
len ++;
}
return len;
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8Size( char const* string )
{
assert( string != nullptr );
char const* ptr = string;
char c;
while ( ( c = *ptr ) != '\0' ) {
if ( ( c & 0xf8 ) == 0xf0 ) {
ptr += 4;
} else if ( ( c & 0xf0 ) == 0xe0 ) {
ptr += 3;
} else if ( ( c & 0xe0 ) == 0xc0 ) {
ptr += 2;
} else {
ptr ++;
}
}
return uint32_t( ptr - string );
}
/*----------------------------------------------------------------------------*/
bool ebcl::UTF8Info( char const* string , uint32_t& size , uint32_t& length )
{
assert( string != nullptr );
char const* ptr = string;
uint32_t len = 0;
bool valid = true;
char c;
while ( ( c = *ptr ) != '\0' ) {
// 4 bytes
if ( ( c & 0xf8 ) == 0xf0 ) {
valid = valid
// 3 following bytes should be part of this
// codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
&& ( ptr[ 3 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( ( c & 0x07 ) != 0 || ( ptr[ 1 ] & 0x30 ) != 0 );
ptr += 4;
// 3 bytes
} else if ( ( c & 0xf0 ) == 0xe0 ) {
valid = valid
// 2 following bytes should be part of this
// codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( ( c & 0x0f ) != 0 || ( ptr[ 1 ] & 0x20 ) != 0 );
ptr += 3;
} else if ( ( c & 0xe0 ) == 0xc0 ) {
valid = valid
// Next byte should be part of this codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( c & 0x1e ) == 0;
ptr += 2;
} else {
valid = valid && ( c & 0x80 ) == 0;
ptr ++;
}
len ++;
}
length = len;
size = ptr - string;
return valid;
}
/*----------------------------------------------------------------------------*/
bool ebcl::UTF8BufferInfo( char const* data , uint32_t size , uint32_t& length )
{
assert( data != nullptr );
char const* ptr = data;
uint32_t len = 0;
bool valid = true;
while ( ptr < data + size ) {
char c = *ptr;
bool ptrOk;
// 4 bytes
if ( ( c & 0xf8 ) == 0xf0 ) {
ptrOk = ptr + 3 < data + size;
valid = valid && ptrOk
// 3 following bytes should be part of this
// codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
&& ( ptr[ 3 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( ( c & 0x07 ) != 0
|| ( ptr[ 1 ] & 0x30 ) != 0 );
ptr += 4;
// 3 bytes
} else if ( ( c & 0xf0 ) == 0xe0 ) {
ptrOk = ptr + 2 < data + size;
valid = valid && ptrOk
// 2 following bytes should be part of this
// codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( ( c & 0x0f ) != 0
|| ( ptr[ 1 ] & 0x20 ) != 0 );
ptr += 3;
} else if ( ( c & 0xe0 ) == 0xc0 ) {
ptrOk = ptr + 1 < data + size;
valid = valid && ptrOk
// Next byte should be part of this codepoint
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
// Check for overlongs
&& ( c & 0x1e ) != 0;
ptr += 2;
} else {
valid = valid && ( c & 0x80 ) == 0;
ptrOk = true;
ptr ++;
}
if ( ptrOk ) {
len ++;
}
}
length = len;
return valid;
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8GetCodepoint( char const* data , uint32_t& bytes )
{
assert( data != nullptr );
if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
bytes = 4;
return ( ( data[ 0 ] & 0x07 ) << 18 )
| ( ( data[ 1 ] & 0x3f ) << 12 )
| ( ( data[ 2 ] & 0x3f ) << 6 )
| ( data[ 3 ] & 0x3f );
} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
bytes = 3;
return ( ( data[ 0 ] & 0x0f ) << 12 )
| ( ( data[ 1 ] & 0x3f ) << 6 )
| ( data[ 2 ] & 0x3f );
} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
bytes = 2;
return ( ( data[ 0 ] & 0x1f ) << 6 )
| ( data[ 1 ] & 0x3f );
} else {
bytes = 1;
return data[ 0 ];
}
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8GetCodepoint( char const* data )
{
assert( data != nullptr );
if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
return ( ( data[ 0 ] & 0x07 ) << 18 )
| ( ( data[ 1 ] & 0x3f ) << 12 )
| ( ( data[ 2 ] & 0x3f ) << 6 )
| ( data[ 3 ] & 0x3f );
} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
return ( ( data[ 0 ] & 0x0f ) << 12 )
| ( ( data[ 1 ] & 0x3f ) << 6 )
| ( data[ 2 ] & 0x3f );
} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
return ( ( data[ 0 ] & 0x1f ) << 6 )
| ( data[ 1 ] & 0x3f );
} else {
return data[ 0 ];
}
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8PutCodepoint( char* output , uint32_t available , uint32_t codepoint )
{
if ( codepoint < 0x80 && available >= 1 ) {
*output = char( codepoint );
return 1;
} else if ( codepoint < 0x800 && available >= 2 ) {
output[ 0 ] = char( ( codepoint >> 6 ) | 0xc0 );
output[ 1 ] = char( ( codepoint & 0x3f ) | 0x80 );
return 2;
} else if ( codepoint < 0x10000 && available >= 3 ) {
output[ 0 ] = char( ( codepoint >> 12 ) | 0xe0 );
output[ 1 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
output[ 2 ] = char( ( codepoint & 0x3f ) | 0x80 );
return 3;
} else if ( codepoint < 0x110000 && available >= 4 ) {
output[ 0 ] = char( ( codepoint >> 18 ) | 0xf0 );
output[ 1 ] = char( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
output[ 2 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
output[ 3 ] = char( ( codepoint & 0x3f ) | 0x80 );
return 4;
}
return 0;
}
/*----------------------------------------------------------------------------*/
uint32_t ebcl::UTF8GetMemoryOffset( char const* input , uint32_t index )
{
assert( input != nullptr );
char const* p = input;
while ( index != 0 ) {
if ( ( *p & 0xf8 ) == 0xf0 ) {
p += 4;
} else if ( ( *p & 0xf0 ) == 0xe0 ) {
p += 3;
} else if ( ( *p & 0xe0 ) == 0xc0 ) {
p += 2;
} else {
p ++;
}
index --;
}
return p - input;
}
/*----------------------------------------------------------------------------*/
uint64_t ebcl::UTF8ToUnsignedInteger( char const* input , uint32_t size , bool * ok ,
int base , bool useSep , uint32_t separator )
{
char const* inputPos( input );
char const* const inputEnd( input + size );
if ( ok ) {
*ok = false;
}
// Find start
bool checkBase( false );
bool hadSign( false );
while ( 1 ) {
if ( inputPos >= inputEnd ) {
return 0;
}
uint32_t nBytes;
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
if ( c == '+' ) {
if ( hadSign ) {
return 0;
}
hadSign = true;
} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
checkBase = ( base == 0 && c == '0' );
if ( checkBase ) {
inputPos += nBytes;
}
break;
} else {
return 0;
}
}
inputPos += nBytes;
}
// Detect base
if ( checkBase ) {
if ( inputPos >= inputEnd ) {
if ( ok != nullptr ) {
*ok = true;
}
return 0;
}
uint32_t nBytes;
const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );
bool next( true );
if ( nc == 'x' || nc == 'X' ) {
base = 16;
} else if ( nc == 'b' || nc == 'B' ) {
base = 2;
} else {
base = 8;
next = false;
}
if ( next ) {
inputPos += nBytes;
}
} else if ( base == 0 ) {
base = 10;
}
// Start converting
const uint64_t ubase( base );
const uint64_t cutoff( UINT64_MAX / ubase );
const uint64_t limit( UINT64_MAX % ubase );
uint64_t accum( 0 );
int any( 0 );
while ( inputPos < inputEnd ) {
uint32_t nBytes;
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
inputPos += nBytes;
if ( useSep && c == separator ) {
continue;
}
uint32_t value;
if ( c.isNumeric( ) ) {
value = c - '0';
} else if ( c.isAlpha( ) ) {
value = c.toUpper( ) - 55;
} else {
any = 0;
break;
}
if ( value >= ubase ) {
any = 0;
break;
}
if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
any = -1;
} else {
any = 1;
accum = accum * ubase + value;
}
}
if ( any < 0 ) {
accum = UINT64_MAX;
} else if ( any > 0 && ok ) {
*ok = true;
}
return accum;
}
/*----------------------------------------------------------------------------*/
int64_t ebcl::UTF8ToInteger( char const* input , uint32_t size , bool * ok ,
int base , bool useSep , uint32_t separator )
{
char const* inputPos( input );
char const* const inputEnd( input + size );
if ( ok ) {
*ok = false;
}
// Find start
bool checkBase( false );
bool hadSign( false );
bool neg( false );
while ( 1 ) {
if ( inputPos >= inputEnd ) {
return 0;
}
uint32_t nBytes;
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
if ( c == '+' || c == '-' ) {
if ( hadSign ) {
return 0;
}
neg = ( c == '-' );
hadSign = true;
} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
checkBase = ( base == 0 && c == '0' );
if ( checkBase ) {
inputPos += nBytes;
}
break;
} else {
return 0;
}
}
inputPos += nBytes;
}
// Detect base
if ( checkBase ) {
if ( inputPos >= inputEnd ) {
if ( ok != nullptr ) {
*ok = true;
}
return 0;
}
uint32_t nBytes;
const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );
bool next( true );
if ( nc == 'x' || nc == 'X' ) {
base = 16;
} else if ( nc == 'b' || nc == 'B' ) {
base = 2;
} else {
base = 8;
next = false;
}
if ( next ) {
inputPos += nBytes;
}
} else if ( base == 0 ) {
base = 10;
}
// Start converting
const uint64_t ubase( base );
const uint64_t max( neg ? ( uint64_t( 0 - ( INT64_MIN + INT64_MAX ) ) + INT64_MAX ) : INT64_MAX );
const uint64_t cutoff( max / ubase );
const uint64_t limit( max % ubase );
uint64_t accum( 0 );
int any( 0 );
while ( inputPos < inputEnd ) {
uint32_t nBytes;
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
inputPos += nBytes;
if ( useSep && c == separator ) {
continue;
}
uint32_t value;
if ( c.isNumeric( ) ) {
value = c - '0';
} else if ( c.isAlpha( ) ) {
value = c.toUpper( ) - 55;
} else {
any = 0;
break;
}
if ( value >= ubase ) {
any = 0;
break;
}
if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
any = -1;
} else {
any = 1;
accum = accum * ubase + value;
}
}
if ( any < 0 ) {
accum = neg ? INT64_MIN : INT64_MAX;
} else if ( any > 0 ) {
if ( neg ) {
accum = (~accum) + 1;
}
if ( ok ) {
*ok = true;
}
}
return accum;
}
/*----------------------------------------------------------------------------*/
double ebcl::UTF8ToDouble( char const* input , uint32_t size ,
bool * ok , uint32_t decimalPoint ,
bool useSep , uint32_t separator )
{
char const* inputPos( input );
char const* const inputEnd( input + size );
char output[ size + 1 ];
char * outputPos( output );
if ( ok ) {
*ok = false;
}
enum E_State_ {
INIT ,
HAD_SIGN ,
INT_PART ,
HAD_SEP ,
FRACT_PART ,
AFTER_EXP ,
AFTER_EXP_SIGN ,
EXP_PART
};
E_State_ state( INIT );
while ( inputPos < inputEnd ) {
uint32_t nBytes;
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
inputPos += nBytes;
switch ( state ) {
case INIT:
if ( c.isWhitespace( ) ) {
continue;
}
if ( c == '+' || c == '-' ) {
*( outputPos ++ ) = c;
state = HAD_SIGN;
} else if ( c == decimalPoint ) {
*( outputPos ++ ) = '.';
state = FRACT_PART;
} else if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = INT_PART;
} else {
return 0;
}
break;
case HAD_SIGN:
if ( c == decimalPoint ) {
*( outputPos ++ ) = '.';
state = FRACT_PART;
} else if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = INT_PART;
} else {
return 0;
}
break;
case INT_PART:
if ( c == decimalPoint ) {
*( outputPos ++ ) = '.';
state = FRACT_PART;
} else if ( c == 'e' || c == 'E' ) {
*( outputPos ++ ) = 'e';
state = AFTER_EXP;
} else if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
} else if ( useSep && c == separator ) {
state = HAD_SEP;
} else {
return 0;
}
break;
case HAD_SEP:
if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = INT_PART;
} else {
return 0;
}
break;
case FRACT_PART:
if ( c == 'e' || c == 'E' ) {
*( outputPos ++ ) = 'e';
state = AFTER_EXP;
} else if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = FRACT_PART;
} else {
return 0;
}
break;
case AFTER_EXP:
if ( c == '+' || c == '-' ) {
*( outputPos ++ ) = c;
state = AFTER_EXP_SIGN;
} else if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = EXP_PART;
} else {
return 0;
}
break;
case AFTER_EXP_SIGN:
if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
state = EXP_PART;
} else {
return 0;
}
break;
case EXP_PART:
if ( c.isNumeric( ) ) {
*( outputPos ++ ) = c;
} else {
return 0;
}
break;
}
}
if ( state == INIT || state == HAD_SEP || state == HAD_SIGN
|| state == AFTER_EXP || state == AFTER_EXP_SIGN ) {
return 0;
}
*outputPos = '\0';
errno = 0;
auto v( strtod( output , nullptr ) );
if ( errno == 0 && ok ) {
*ok = true;
}
return v;
}
/*= T_Character ==============================================================*/
M_DEFINE_OBJECT_READER( T_Character )
{
const char first( reader.read< char >( ) );
if ( ( first & 0xf8 ) == 0xf0 ) {
return ( ( first & 0x07 ) << 18 )
| ( ( reader.read< char >( ) & 0x3f ) << 12 )
| ( ( reader.read< char >( ) & 0x3f ) << 6 )
| ( reader.read< char >( ) & 0x3f );
} else if ( ( first & 0xf0 ) == 0xe0 ) {
return ( ( first & 0x0f ) << 12 )
| ( ( reader.read< char >( ) & 0x3f ) << 6 )
| ( reader.read< char >( ) & 0x3f );
} else if ( ( first & 0xe0 ) == 0xc0 ) {
return ( ( first & 0x1f ) << 6 )
| ( reader.read< char >( ) & 0x3f );
} else {
return first;
}
}
M_DEFINE_OBJECT_WRITER( T_Character )
{
const uint32_t codepoint( item );
if ( codepoint < 0x80 ) {
writer.write< char >( codepoint );
} else if ( codepoint < 0x800 ) {
writer.write< char >( ( codepoint >> 6 ) | 0xc0 );
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
} else if ( codepoint < 0x10000 ) {
writer.write< char >( ( codepoint >> 12 ) | 0xe0 );
writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
} else {
writer.write< char >( ( codepoint >> 18 ) | 0xf0 );
writer.write< char >( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
}
}
/*= A_StringData =============================================================*/
inline A_StringData::~A_StringData( )
{ }
/*= A_StringDataInternal =====================================================*/
inline A_StringDataInternal::A_StringDataInternal( bool poolable )
: poolable( poolable )
{ }
A_StringDataInternal::~A_StringDataInternal( )
{ }
void A_StringDataInternal::addUser( )
{ }
void A_StringDataInternal::removeUser( )
{ }
inline M_DEFINE_HASH( A_StringDataInternal )
{
return HashData( reinterpret_cast< uint8_t const* >( item.data( ) ) ,
item.size( ) );
}
/*= T_EmptyString ============================================================*/
T_EmptyString::T_EmptyString( )
: A_StringDataInternal( false )
{
data_ = nullptr;
size_ = 0;
valid_ = true;
length_ = 0;
}
T_EmptyString T_EmptyString::EmptyString;
/*= T_StaticString ===========================================================*/
T_StaticString::T_StaticString( char const* data , uint32_t size )
: A_StringDataInternal( false )
{
size_ = size;
data_ = ( char* )::operator new ( size_ );
memcpy( data_ , data , size_ );
valid_ = UTF8BufferInfo( data , size , length_ );
}
T_StaticString::~T_StaticString( )
{
::operator delete ( data_ );
}
/*= A_RefCountedString =======================================================*/
inline A_RefCountedString::A_RefCountedString( )
: A_StringDataInternal( true ) , users_( 1 )
{ }
void A_RefCountedString::addUser( )
{
users_.fetch_add( 1 , std::memory_order_acq_rel );
}
void A_RefCountedString::removeUser( )
{
if ( users_.fetch_sub( 1 , std::memory_order_acq_rel ) == 1 ) {
std::atomic_thread_fence( std::memory_order_acq_rel );
if ( users_.load( std::memory_order_acq_rel ) == 0 ) {
delete this;
}
}
}
/*= T_DynamicString ==========================================================*/
namespace {
static thread_local T_ThreadedPoolAllocator<
sizeof( T_DynamicString ) , alignof( T_DynamicString ) ,
32 , 16
> DynamicStringAllocator_;
}
void* T_DynamicString::operator new(
const size_t size ) noexcept
{
return DynamicStringAllocator_.allocate( size );
}
void T_DynamicString::operator delete(
void* const object ) noexcept
{
DynamicStringAllocator_.free( object );
}
/*----------------------------------------------------------------------------*/
T_DynamicString::T_DynamicString( char const* data , uint32_t size , bool nodup )
: A_RefCountedString( )
{
size_ = size;
if ( nodup ) {
data_ = const_cast< char* >( data );
} else {
data_ = ( char* )::operator new ( size_ );
memcpy( data_ , data , size_ );
}
valid_ = UTF8BufferInfo( data , size , length_ );
}
T_DynamicString::~T_DynamicString( )
{
::operator delete ( data_ );
}
/*= T_Substring ==============================================================*/
namespace {
static thread_local T_ThreadedPoolAllocator<
sizeof( T_Substring ) , alignof( T_Substring ) ,
32 , 4
> SubstringAllocator_;
}
void* T_Substring::operator new(
const size_t size ) noexcept
{
return SubstringAllocator_.allocate( size );
}
void T_Substring::operator delete(
void* const object ) noexcept
{
SubstringAllocator_.free( object );
}
/*----------------------------------------------------------------------------*/
T_Substring::T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size )
: A_RefCountedString( ) , source_( source )
{
assert( size + offset <= source_->size( ) );
source_->addUser( );
data_ = const_cast< char* >( source->data( ) + offset );
size_ = size;
valid_ = UTF8BufferInfo( data_ , size_ , length_ );
}
T_Substring::~T_Substring( )
{
source_->removeUser( );
}
/*= T_StringPool =============================================================*/
T_StringPool T_StringPool::Pool;
T_ReadWriteMutex T_StringPool::Mutex;
/*----------------------------------------------------------------------------*/
T_StringPool::T_StringPool( )
: index_( 16384 , 4096 , 4096 ) , strings_( 4096 )
{ }
/*----------------------------------------------------------------------------*/
RP_StringDataInternal T_StringPool::add( char const* data , uint32_t size )
{
T_ReadLock lock( T_StringPool::Mutex );
const auto hash( HashData( (uint8_t const*) data , size ) );
const auto idx( find( data , size , hash ) );
if ( idx == T_HashIndex::INVALID_INDEX ) {
const T_WriteLock wLock( lock.upgrade( ) );
index_.add( hash );
const auto str( strings_.add( NewOwned< T_StaticString >( data , size ) ) );
return strings_[ str ].get( );
} else {
return strings_[ idx ].get( );
}
}
/*----------------------------------------------------------------------------*/
RP_StringDataInternal T_StringPool::get( char const* data , uint32_t size ) const
{
const T_ReadLock lock( T_StringPool::Mutex );
const auto hash( HashData( reinterpret_cast< uint8_t const* >( data ) , size ) );
const auto idx( find( data , size , hash ) );
if ( idx == T_HashIndex::INVALID_INDEX ) {
return nullptr;
} else {
return strings_[ idx ].get( );
}
}
/*----------------------------------------------------------------------------*/
uint32_t T_StringPool::find( char const* data , uint32_t sz , uint32_t hash ) const
{
uint32_t idx = index_.first( hash );
while ( idx != T_HashIndex::INVALID_INDEX ) {
auto const& p( strings_[ idx ] );
if ( p->size( ) == sz && !memcmp( p->data( ) , data , sz ) ) {
break;
}
idx = index_.next( idx );
}
return idx;
}
/*= T_StringIterator =========================================================*/
T_StringIterator::T_StringIterator( RP_StringData data , uint32_t index )
: data_( data ) , index_( index )
{
if ( data_ != nullptr ) {
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
pos_ = UTF8GetMemoryOffset( data_->data( ) , index );
codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ , bytes_ );
} else {
pos_ = codepoint_ = bytes_ = 0;
}
}
T_StringIterator::T_StringIterator( T_StringIterator const& other )
: data_( other.data_ ) , pos_( other.pos_ ) , index_( other.index_ ) ,
codepoint_( other.codepoint_ ) , bytes_( other.bytes_ )
{
if ( data_ != nullptr ) {
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
}
}
/*----------------------------------------------------------------------------*/
T_StringIterator::~T_StringIterator( )
{
if ( data_ != nullptr ) {
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
}
}
/*----------------------------------------------------------------------------*/
T_StringIterator& T_StringIterator::operator= ( T_StringIterator const& other )
{
if ( data_ != nullptr ) {
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
}
data_ = other.data_;
pos_ = other.pos_;
index_ = other.index_;
codepoint_ = other.codepoint_;
bytes_ = other.bytes_;
if ( data_ != nullptr ) {
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
}
return *this;
}
/*----------------------------------------------------------------------------*/
void ebcl::swap( T_StringIterator& lhs , T_StringIterator& rhs ) noexcept
{
using std::swap;
swap( lhs.data_ , rhs.data_ );
swap( lhs.pos_ , rhs.pos_ );
swap( lhs.index_ , rhs.index_ );
swap( lhs.codepoint_ , rhs.codepoint_ );
swap( lhs.bytes_ , rhs.bytes_ );
}
/*----------------------------------------------------------------------------*/
bool T_StringIterator::next( )
{
if ( atEnd( ) ) {
return false;
}
pos_ += bytes_;
if ( pos_ == data_->size( ) ) {
codepoint_ = bytes_ = 0;
} else {
codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ ,
bytes_ );
index_ ++;
}
return true;
}
/*= T_String =================================================================*/
T_String::T_String( ) noexcept
: data_( &T_EmptyString::EmptyString )
{ }
T_String::T_String( char const* initial )
{
if ( initial == nullptr || *initial == 0 ) {
data_ = &T_EmptyString::EmptyString;
} else {
const uint32_t len( strlen( initial ) );
data_ = T_StringPool::Pool.get( initial , len );
if ( data_ == nullptr ) {
data_ = new T_DynamicString( initial , len , false );
}
}
}
T_String::T_String( T_StringBuilder&& sb )
: T_String( sb.data_ , sb.size_ , true )
{
sb.data_ = nullptr;
sb.size_ = sb.length_ = sb.capacity_ = 0;
}
T_String::T_String( T_StringBuilder const& sb )
: T_String( sb.data_ , sb.size_ )
{ }
T_String::T_String( char const* data , uint32_t size , bool nodup )
{
if ( data == nullptr || size == 0 ) {
data_ = &T_EmptyString::EmptyString;
} else {
data_ = new T_DynamicString( data , size , nodup );
}
}
/*----------------------------------------------------------------------------*/
T_String::T_String( T_String const& source )
: data_( source.data_ )
{
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
}
T_String::T_String( T_String&& source ) noexcept
: data_( &T_EmptyString::EmptyString )
{
swap( *this , source );
}
/*----------------------------------------------------------------------------*/
T_String::~T_String( )
{
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
}
/*----------------------------------------------------------------------------*/
T_String& T_String::operator= ( T_String&& string ) noexcept
{
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
data_ = string.data_;
string.data_ = &T_EmptyString::EmptyString;
return *this;
}
T_String& T_String::operator= ( T_String const& string )
{
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
data_ = string.data_;
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
return *this;
}
T_String& T_String::operator= ( T_StringBuilder&& sb )
{
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
data_ = new T_DynamicString( sb.data_ , sb.size_ , true );
sb.data_ = nullptr;
sb.size_ = sb.length_ = sb.capacity_ = 0;
return *this;
}
/*----------------------------------------------------------------------------*/
T_String& T_String::operator= ( T_StringBuilder const& sb )
{
assert( data_ != nullptr );
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
data_ = new T_DynamicString( sb.data_ , sb.size_ , false );
return *this;
}
/*----------------------------------------------------------------------------*/
void ebcl::swap( T_String& lhs , T_String& rhs ) noexcept
{
using std::swap;
swap( lhs.data_ , rhs.data_ );
}
/*----------------------------------------------------------------------------*/
T_String T_String::Pooled( char const* data , uint32_t size )
{
assert( data != nullptr );
T_String s;
if ( size ) {
s.data_ = T_StringPool::Pool.add( data , size );
}
return s;
}
/*----------------------------------------------------------------------------*/
T_String& T_String::addToPool( )
{
const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
if ( d->poolable ) {
data_ = T_StringPool::Pool.add( d->data( ) , d->size( ) );
d->removeUser( );
}
return *this;
}
T_String& T_String::usePool( )
{
const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
if ( d->poolable ) {
const auto nd( T_StringPool::Pool.get( d->data( ) , d->size( ) ) );
if ( nd != nullptr ) {
data_ = nd;
d->removeUser( );
}
}
return *this;
}
/*----------------------------------------------------------------------------*/
T_String T_String::left( uint32_t count ) const
{
if ( count >= length( ) ) {
return *this;
}
const auto end( UTF8GetMemoryOffset( data( ) , count ) );
T_String s;
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) , 0 , end );
return s;
}
T_String T_String::right( uint32_t count ) const
{
if ( count >= length( ) ) {
return *this;
}
const auto start( UTF8GetMemoryOffset( data( ) , length( ) - count ) );
T_String s;
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
start , data_->size( ) - start );
return s;
}
T_String T_String::substr( uint32_t offset , uint32_t count ) const
{
// Special cases
if ( offset == 0 ) {
return left( count );
}
if ( offset >= length( ) || count == 0 ) {
return T_String( );
}
const auto end( uint64_t( offset ) + count );
if ( end >= length( ) ) {
return right( end > length( )
? ( length( ) - offset )
: count );
}
const auto start( UTF8GetMemoryOffset( data( ) , offset ) );
const auto bytes( UTF8GetMemoryOffset( data( ) + start , count ) );
T_String s;
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
start , bytes );
return s;
}
/*----------------------------------------------------------------------------*/
T_String T_String::trim( ) const noexcept
{
if ( length( ) == 0 ) {
return T_String( );
}
T_Optional< uint32_t > firstNws;
uint32_t lastNws = 0;
T_StringIterator it( *this );
while ( !it.atEnd( ) ) {
T_Character c( it );
if ( !c.isWhitespace( ) ) {
if ( !firstNws ) {
firstNws = it.index( );
}
lastNws = it.index( );
}
it.next( );
}
if ( !firstNws ) {
return T_String( );
}
return range( *firstNws , lastNws );
}
/*----------------------------------------------------------------------------*/
T_String T_String::mapped( T_Character::F_Map f ) const noexcept
{
T_StringIterator it{ *this };
T_StringBuilder sb;
while ( !it.atEnd( ) ) {
sb << f( it );
it.next( );
}
return std::move( sb );
}
T_String T_String::toUpper( ) const noexcept
{
return mapped( [](auto c){
return c.toUpper( );
} );
}
T_String T_String::toLower( ) const noexcept
{
return mapped( [](auto c){
return c.toLower( );
} );
}
/*----------------------------------------------------------------------------*/
int32_t T_String::compare( T_String const& other ) const
{
if ( this == &other || data_ == other.data_ ) {
return 0;
}
if ( !( valid( ) && other.valid( ) ) ) {
return 0;
}
T_StringIterator it1( *this ) , it2( other );
while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
T_Character c1( it1 ) ,
c2( it2 );
if ( c1 < c2 ) {
return -1;
} else if ( c1 > c2 ) {
return 1;
}
it1.next( );
it2.next( );
}
if ( !it2.atEnd( ) ) {
return -1;
} else if ( !it1.atEnd( ) ) {
return 1;
} else {
return 0;
}
}
int32_t T_String::compareIgnoreCase( T_String const& other ) const
{
if ( !( valid( ) && other.valid( ) ) ) {
return 0;
}
T_StringIterator it1( *this ) , it2( other );
while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
auto c1( T_Character( it1 ).toLower( ) ) ,
c2( T_Character( it2 ).toLower( ) );
if ( c1 < c2 ) {
return -1;
} else if ( c1 > c2 ) {
return 1;
}
it1.next( );
it2.next( );
}
if ( !it2.atEnd( ) ) {
return -1;
} else if ( !it1.atEnd( ) ) {
return 1;
} else {
return 0;
}
}
/*----------------------------------------------------------------------------*/
bool T_String::startsWith( T_String const& other ) const
{
if ( !( valid( ) && other.valid( ) ) ) {
return false;
} else if ( other.length( ) > length( ) ) {
return false;
} else if ( other.length( ) == 0 ) {
return true;
} else {
return !memcmp( data( ) , other.data( ) , other.size( ) );
}
}
bool T_String::endsWith( T_String const& other ) const
{
if ( !( valid( ) && other.valid( ) ) ) {
return false;
} else if ( other.length( ) > length( ) ) {
return false;
} else if ( other.length( ) == 0 ) {
return true;
} else {
return !memcmp( data( ) + size( ) - other.size( ) ,
other.data( ) , other.size( ) );
}
}
/*----------------------------------------------------------------------------*/
int32_t T_String::find( T_String const& other , uint32_t from ) const
{
if ( from > length( )
|| length( ) - from < other.length( )
|| !( valid( ) && other.valid( ) ) ) {
return -1;
}
if ( other.length( ) == 0 ) {
return from;
}
uint32_t pos = from , byte = UTF8GetMemoryOffset( data( ) , from );
while ( byte + other.size( ) <= size( ) ) {
char const* tp = data( ) + byte;
char const* op = other.data( );
if ( !memcmp( tp , op , other.size( ) ) ) {
return pos;
}
byte += UTF8GetMemoryOffset( tp , 1 );
pos ++;
}
return -1;
}
/*----------------------------------------------------------------------------*/
int32_t T_String::find( T_Character character , uint32_t from ) const
{
if ( !( valid( ) && character.isValid( ) && from < length( ) ) ) {
return -1;
}
T_StringIterator it( getIterator( from ) );
while ( !it.atEnd( ) ) {
if ( T_Character( it ) == character ) {
return it.index( );
}
it.next( );
}
return -1;
}
/*----------------------------------------------------------------------------*/
T_String T_String::replace( T_Character initial , T_Character replacement ) const
{
if ( !( valid( ) && initial.isValid( ) && replacement.isValid( ) ) ) {
return T_String( );
}
if ( initial == replacement || !*this ) {
return *this;
}
T_StringBuilder sb;
sb.ensureCapacity( size( ) );
T_StringIterator it( *this );
while ( !it.atEnd( ) ) {
T_Character c( it );
it.next( );
if ( c == initial ) {
sb << replacement;
} else {
sb << c;
}
}
return T_String( std::move( sb ) );
}
T_String T_String::replace( T_String const& initial , T_String const& replacement ) const
{
if ( !( valid( ) && initial.valid( ) && replacement.valid( ) ) ) {
return T_String( );
}
if ( !initial || initial == replacement || initial.length( ) > length( ) || !*this ) {
return *this;
}
if ( initial.length( ) == length( ) && *this == initial ) {
return replacement;
}
char const* const tgt( initial.data( ) );
const auto tgtSize( initial.size( ) );
char const* const src( data( ) );
const auto srcSize( size( ) );
T_StringBuilder sb;
uint32_t pos = 0;
while ( pos < srcSize ) {
uint32_t delta;
if ( pos + tgtSize <= size( ) && !memcmp( src + pos , tgt , tgtSize ) ) {
sb << replacement;
delta = tgtSize;
} else {
sb << T_Character( UTF8GetCodepoint( src + pos , delta ) );
}
pos += delta;
}
return T_String( std::move( sb ) );
}
/*----------------------------------------------------------------------------*/
T_Buffer< char > T_String::toOSString( ) const
{
if ( !valid( ) ) {
return T_Buffer< char >( );
}
#ifdef _WIN32
const auto n( data_->length( ) );
T_Buffer< char > output( ( n + 1 ) * 2 );
if ( !MultiByteToWideChar( CP_UTF8 , 0 ,
data( ) , data_->size( ) ,
( wchar_t* ) output.data( ) , n ) )
{
return T_Buffer< char >( );
}
output[ n * 2 ] = output[ n * 2 + 1 ] = 0;
return output;
#else
const auto n( data_->size( ) );
T_Buffer< char > output( n + 1 );
memcpy( output.data( ) , data( ) , n );
output[ n ] = 0;
return output;
#endif
}
/*----------------------------------------------------------------------------*/
M_DEFINE_OBJECT_READER( T_String )
{
const uint32_t size( reader.read< uint32_t >( ) );
if ( size == 0 ) {
return T_String( );
}
char* const buffer = ( char* )::operator new ( size );
const uint32_t r = reader.stream( ).read( buffer , size );
if ( r != size ) {
throw X_StreamError( E_StreamError::BAD_DATA );
}
return T_String( buffer , size , true );
}
M_DEFINE_OBJECT_WRITER( T_String )
{
const uint32_t s( item.size( ) );
writer.write( s );
if ( s != 0 ) {
const uint32_t w( writer.stream( ).write( item.data( ) , s ) );
if ( w != s ) {
throw X_StreamError( E_StreamError::BAD_DATA );
}
}
}
/*= T_StringBuilder ==========================================================*/
T_StringBuilder::T_StringBuilder( T_StringBuilder const& other )
: data_( nullptr ) , capacity_( 0 ) , size_( other.size_ ) ,
length_( other.length_ )
{
ensureCapacity( size_ );
if ( size_ != 0 ) {
memcpy( data_ , other.data_ , size_ );
}
}
T_StringBuilder::T_StringBuilder( T_StringBuilder&& other ) noexcept
: T_StringBuilder( )
{
swap( *this , other );
}
T_StringBuilder::T_StringBuilder( char const* data , uint32_t size )
: T_StringBuilder( )
{
ensureCapacity( size );
memcpy( data_ , data , size );
size_ = size;
UTF8BufferInfo( data , size , length_ );
}
T_StringBuilder::T_StringBuilder( T_String const& string )
: T_StringBuilder( )
{
size_ = string.size( );
length_ = string.length( );
ensureCapacity( size_ );
memcpy( data_ , string.data( ) , size_ );
}
/*----------------------------------------------------------------------------*/
T_StringBuilder::~T_StringBuilder( )
{
::operator delete ( data_ );
}
/*----------------------------------------------------------------------------*/
void ebcl::swap( T_StringBuilder& lhs , T_StringBuilder& rhs )
{
using std::swap;
swap( lhs.data_ , rhs.data_ );
swap( lhs.capacity_ , rhs.capacity_ );
swap( lhs.size_ , rhs.size_ );
swap( lhs.length_ , rhs.length_ );
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder const& other )
{
ensureCapacity( other.size( ) );
size_ = other.size_;
length_ = other.length_;
if ( size_ != 0 ) {
memcpy( data_ , other.data_ , size_ );
}
return *this;
}
T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder&& other ) noexcept
{
if ( data_ ) {
::operator delete( data_ );
}
data_ = other.data_;
capacity_ = other.capacity_;
size_ = other.size_;
length_ = other.length_;
other.data_ = nullptr;
other.capacity_ = other.size_ = other.length_ = 0;
return *this;
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::ensureCapacity( uint32_t minCap )
{
if ( minCap > capacity_ ) {
const uint32_t mod = minCap % C_GROWTH;
const uint32_t nCap = ( mod != 0 )
? ( minCap + C_GROWTH - mod ) : minCap;
char* const nData = ( char* )::operator new ( nCap );
if ( data_ != nullptr ) {
memcpy( nData , data_ , size_ );
::operator delete ( data_ );
}
data_ = nData;
capacity_ = nCap;
}
return *this;
}
T_StringBuilder& T_StringBuilder::free( )
{
::operator delete ( data_ );
capacity_ = 0;
data_ = nullptr;
return clear( );
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::append( T_StringBuilder const& other )
{
const uint32_t sz = other.size_;
if ( sz != 0 ) {
ensureCapacity( size_ + sz );
memcpy( data_ + size_ , other.data_ , sz );
size_ += sz;
length_ += other.length_;
}
return *this;
}
T_StringBuilder& T_StringBuilder::append( T_StringBuilder&& other )
{
if ( size_ == 0 && capacity_ <= other.capacity_ ) {
swap( *this , other );
return *this;
} else {
return append( ( T_StringBuilder const& ) other );
}
}
T_StringBuilder& T_StringBuilder::append( T_String const& string )
{
const uint32_t sz = string.size( );
if ( string.size( ) != 0 ) {
ensureCapacity( size_ + sz );
memcpy( data_ + size_ , string.data( ) , sz );
size_ += sz;
length_ += string.length( );
}
return *this;
}
T_StringBuilder& T_StringBuilder::append( char const* string , uint32_t size )
{
if ( size != 0 ) {
uint32_t len;
UTF8BufferInfo( string , size , len );
ensureCapacity( size_ + size );
memcpy( data_ + size_ , string , size );
size_ += size;
length_ += len;
}
return *this;
}
T_StringBuilder& T_StringBuilder::append( char character )
{
if ( uint8_t( character ) < 128 ) {
ensureCapacity( size_ + 1 );
data_[ size_ ++ ] = character;
length_ ++;
}
return *this;
}
T_StringBuilder& T_StringBuilder::append( T_Character character )
{
if ( character.isValid( ) ) {
ensureCapacity( capacity_ + 4 );
uint32_t w = UTF8PutCodepoint( data_ + size_ ,
capacity_ - size_ , character );
assert( w != 0 );
size_ += w;
length_ ++;
}
return *this;
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::appendNumeric( int64_t value , int base , bool useSep , T_Character sep ,
int sepEvery )
{
assert( base >= 2 && base <= 36 );
assert( sepEvery > 0 );
assert( sep.isValid( ) );
if ( value == 0 ) {
return append( '0' );
}
const bool neg = value < 0;
const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 )
+ ( neg ? 1 : 0 );
uint32_t output[ size ];
uint32_t len = 0 , ecap = 0;
int sepl = 0;
if ( neg ) {
value = -value;
}
while ( value != 0 ) {
const uint32_t mod = value % base;
output[ size - ( len + 1 ) ] = mod
+ ( mod < 10 ? '0' : ( 'A' - 10 ) );
len ++;
sepl ++;
ecap ++;
assert( len < size );
value = ( value - mod ) / base;
if ( useSep && sepl == sepEvery && value != 0 ) {
output[ size - ( len + 1 ) ] = sep;
len ++;
sepl = 0;
ecap += 4;
assert( len < size );
}
}
if ( neg ) {
output[ size - ( len + 1 ) ] = '-';
len ++;
ecap ++;
assert( len < size );
}
ensureCapacity( size_ + ecap );
ecap = 0;
char* ptr = data_ + size_;
for ( uint32_t i = size - len ; i < size ; i ++ ) {
const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
ptr += wr;
ecap += wr;
}
size_ += ecap;
length_ += len;
return *this;
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::appendNumeric( uint64_t value , int base , bool useSep , T_Character sep ,
int sepEvery )
{
assert( base >= 2 && base <= 36 );
assert( sepEvery > 0 );
assert( sep.isValid( ) );
if ( value == 0 ) {
return append( '0' );
}
const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 );
uint32_t output[ size ];
uint32_t len = 0 , ecap = 0;
int sepl = 0;
while ( value != 0 ) {
const uint32_t mod = value % base;
output[ size - ( len + 1 ) ] = mod
+ ( mod < 10 ? '0' : ( 'A' - 10 ) );
len ++;
sepl ++;
ecap ++;
assert( len < size );
value = ( value - mod ) / base;
if ( useSep && sepl == sepEvery && value != 0 ) {
output[ size - ( len + 1 ) ] = sep;
len ++;
sepl = 0;
ecap += 4;
assert( len < size );
}
}
ensureCapacity( size_ + ecap );
ecap = 0;
char* ptr = data_ + size_;
for ( uint32_t i = size - len ; i < size ; i ++ ) {
const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
ptr += wr;
ecap += wr;
}
size_ += ecap;
length_ += len;
return *this;
}
/*----------------------------------------------------------------------------*/
T_StringBuilder& T_StringBuilder::appendDouble( double value , uint32_t precision , bool trailingZeros )
{
char const* const fmt = trailingZeros ? "%.*f" : "%.*g";
const int nchars = snprintf( nullptr , 0 , fmt , precision , value );
char buffer[ nchars + 1 ];
snprintf( buffer , nchars + 1 , fmt , precision , value );
return append( buffer , nchars );
}
/*----------------------------------------------------------------------------*/
M_DEFINE_OBJECT_WRITER( T_StringBuilder )
{
writer.write( item.size( ) );
writer.stream( ).write( item.data( ) , item.size( ) );
}