Emmanuel BENOîT
30d8d3057e
* Makefile changes allowing for cross-compilation * SRDData: had to rename an enum member from ERROR to ERR because apparently creating a macro called ERROR is a thing in the windows headers * DynLib: ported * Strings: fixes in toOSString, untested for now * Fixes in some tests * TODO list update
2045 lines
46 KiB
C++
2045 lines
46 KiB
C++
/******************************************************************************/
|
|
/* STRINGS AND RELATED UTILITIES **********************************************/
|
|
/******************************************************************************/
|
|
|
|
|
|
#include <atomic>
|
|
#include <ebcl/Strings.hh>
|
|
#include <ebcl/HashIndex.hh>
|
|
#include <ebcl/Threading.hh>
|
|
#include <ebcl/Types.hh>
|
|
#include <ebcl/Alloc.hh>
|
|
#include <ebcl/BinaryStreams.hh>
|
|
|
|
|
|
using namespace ebcl;
|
|
|
|
|
|
/*= STRING STORAGE AND POOLING CLASSES =======================================*/
|
|
|
|
namespace {
|
|
|
|
// T_StringDataInternal - Actual interface w/ reference counting methods
|
|
class A_StringDataInternal : virtual public A_StringData
|
|
{
|
|
public:
|
|
const bool poolable;
|
|
|
|
explicit A_StringDataInternal( bool poolable );
|
|
virtual ~A_StringDataInternal( );
|
|
|
|
// Add/remove user, for e.g. reference counting
|
|
virtual void addUser( );
|
|
virtual void removeUser( );
|
|
};
|
|
|
|
|
|
M_ABSTRACT_POINTERS( StringDataInternal );
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
// T_EmptyString - Fake storage for the empty string
|
|
class T_EmptyString final : public A_StringDataInternal
|
|
{
|
|
public:
|
|
static T_EmptyString EmptyString;
|
|
|
|
T_EmptyString( );
|
|
|
|
T_EmptyString( T_EmptyString const& ) = delete;
|
|
T_EmptyString( T_EmptyString&& other ) = delete;
|
|
};
|
|
|
|
|
|
// T_StaticString - Storage for read-only strings
|
|
class T_StaticString final : public A_StringDataInternal
|
|
{
|
|
public:
|
|
T_StaticString( ) = delete;
|
|
T_StaticString( T_StaticString const& ) = delete;
|
|
T_StaticString( T_StaticString&& other ) noexcept = delete;
|
|
|
|
T_StaticString( char const* string , uint32_t size );
|
|
~T_StaticString( ) override;
|
|
};
|
|
|
|
|
|
// A_RefCountedString - A reference-counted string. Used for dynamic strings
|
|
// and substrings.
|
|
class A_RefCountedString : public A_StringDataInternal
|
|
{
|
|
private:
|
|
std::atomic< uint32_t > users_;
|
|
|
|
public:
|
|
A_RefCountedString( );
|
|
A_RefCountedString( A_RefCountedString const& ) = delete;
|
|
A_RefCountedString( A_RefCountedString&& ) = delete;
|
|
|
|
void addUser( ) override;
|
|
void removeUser( ) override;
|
|
};
|
|
|
|
|
|
// T_DynamicString - Storage for dynamically-created strings
|
|
class T_DynamicString final : public A_RefCountedString
|
|
{
|
|
public:
|
|
T_DynamicString( ) = delete;
|
|
T_DynamicString( T_DynamicString const& ) = delete;
|
|
T_DynamicString( T_DynamicString&& other ) noexcept = delete;
|
|
|
|
// Pool allocation
|
|
void* operator new( size_t size ) noexcept;
|
|
void operator delete( void* object ) noexcept;
|
|
|
|
T_DynamicString( char const* data , uint32_t size , bool nodup );
|
|
~T_DynamicString( ) override;
|
|
};
|
|
|
|
|
|
// T_Substring - Storage for a string that is in fact a part of another
|
|
class T_Substring final : public A_RefCountedString
|
|
{
|
|
private:
|
|
RP_StringDataInternal source_;
|
|
|
|
public:
|
|
T_Substring( ) = delete;
|
|
T_Substring( T_Substring const& ) = delete;
|
|
T_Substring( T_Substring&& ) = delete;
|
|
|
|
// Pool allocation
|
|
void* operator new( size_t size ) noexcept;
|
|
void operator delete( void* object ) noexcept;
|
|
|
|
T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size );
|
|
~T_Substring( ) override;
|
|
};
|
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
// T_StringPool - Pool of string storage classes
|
|
class T_StringPool final
|
|
{
|
|
private:
|
|
T_HashIndex index_;
|
|
T_Array< T_OwnPtr< T_StaticString > > strings_;
|
|
|
|
public:
|
|
static T_StringPool Pool;
|
|
static T_ReadWriteMutex Mutex;
|
|
|
|
T_StringPool( );
|
|
|
|
RP_StringDataInternal add( char const* data , uint32_t size );
|
|
RP_StringDataInternal get( char const* data , uint32_t size ) const;
|
|
|
|
private:
|
|
uint32_t find( char const* data , uint32_t length , uint32_t hash ) const;
|
|
};
|
|
|
|
|
|
} // namespace
|
|
|
|
namespace ebcl { M_DECLARE_HASH( A_StringDataInternal ); }
|
|
|
|
|
|
/*= UTF-8 UTILITY FUNCTIONS ==================================================*/
|
|
|
|
bool ebcl::UTF8IsValid( char const* string )
|
|
{
|
|
assert( string != nullptr );
|
|
|
|
char const* ptr = string;
|
|
char c;
|
|
while ( ( c = *ptr ) != '\0' ) {
|
|
// 4 bytes
|
|
if ( ( c & 0xf8 ) == 0xf0 ) {
|
|
// 3 following bytes should be part of this codepoint
|
|
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
|
|
|| ( ptr[ 2 ] & 0xc0 ) != 0x80
|
|
|| ( ptr[ 3 ] & 0xc0 ) != 0x80 ) {
|
|
return false;
|
|
}
|
|
|
|
// Check for overlongs
|
|
if ( ( c & 0x07 ) == 0 && ( ptr[ 1 ] & 0x30 ) == 0 ) {
|
|
return false;
|
|
}
|
|
|
|
ptr += 4;
|
|
|
|
// 3 bytes
|
|
} else if ( ( c & 0xf0 ) == 0xe0 ) {
|
|
// 2 following bytes should be part of this codepoint
|
|
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80
|
|
|| ( ptr[ 2 ] & 0xc0 ) != 0x80 ) {
|
|
return false;
|
|
}
|
|
|
|
// Check for overlongs
|
|
if ( ( c & 0x0f ) == 0 && ( ptr[ 1 ] & 0x20 ) == 0 ) {
|
|
return false;
|
|
}
|
|
|
|
ptr += 3;
|
|
|
|
} else if ( ( c & 0xe0 ) == 0xc0 ) {
|
|
// Next byte should be part of this codepoint
|
|
if ( ( ptr[ 1 ] & 0xc0 ) != 0x80 ) {
|
|
return false;
|
|
}
|
|
|
|
// Check for overlongs
|
|
if ( ( c & 0x1e ) == 0 ) {
|
|
return false;
|
|
}
|
|
|
|
ptr += 2;
|
|
|
|
} else if ( ( c & 0x80 ) != 0 ) {
|
|
return false;
|
|
|
|
} else {
|
|
ptr ++;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8Length( char const* string )
|
|
{
|
|
assert( string != nullptr );
|
|
|
|
uint32_t len = 0;
|
|
char const* ptr = string;
|
|
char c;
|
|
while ( ( c = *ptr ) != '\0' ) {
|
|
if ( ( c & 0xf8 ) == 0xf0 ) {
|
|
ptr += 4;
|
|
} else if ( ( c & 0xf0 ) == 0xe0 ) {
|
|
ptr += 3;
|
|
} else if ( ( c & 0xe0 ) == 0xc0 ) {
|
|
ptr += 2;
|
|
} else {
|
|
ptr ++;
|
|
}
|
|
len ++;
|
|
}
|
|
|
|
return len;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8Size( char const* string )
|
|
{
|
|
assert( string != nullptr );
|
|
|
|
char const* ptr = string;
|
|
char c;
|
|
while ( ( c = *ptr ) != '\0' ) {
|
|
if ( ( c & 0xf8 ) == 0xf0 ) {
|
|
ptr += 4;
|
|
} else if ( ( c & 0xf0 ) == 0xe0 ) {
|
|
ptr += 3;
|
|
} else if ( ( c & 0xe0 ) == 0xc0 ) {
|
|
ptr += 2;
|
|
} else {
|
|
ptr ++;
|
|
}
|
|
}
|
|
|
|
return uint32_t( ptr - string );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
bool ebcl::UTF8Info( char const* string , uint32_t& size , uint32_t& length )
|
|
{
|
|
assert( string != nullptr );
|
|
|
|
char const* ptr = string;
|
|
uint32_t len = 0;
|
|
bool valid = true;
|
|
char c;
|
|
while ( ( c = *ptr ) != '\0' ) {
|
|
// 4 bytes
|
|
if ( ( c & 0xf8 ) == 0xf0 ) {
|
|
valid = valid
|
|
// 3 following bytes should be part of this
|
|
// codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 3 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( ( c & 0x07 ) != 0 || ( ptr[ 1 ] & 0x30 ) != 0 );
|
|
ptr += 4;
|
|
|
|
// 3 bytes
|
|
} else if ( ( c & 0xf0 ) == 0xe0 ) {
|
|
valid = valid
|
|
// 2 following bytes should be part of this
|
|
// codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( ( c & 0x0f ) != 0 || ( ptr[ 1 ] & 0x20 ) != 0 );
|
|
ptr += 3;
|
|
|
|
} else if ( ( c & 0xe0 ) == 0xc0 ) {
|
|
valid = valid
|
|
// Next byte should be part of this codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( c & 0x1e ) == 0;
|
|
ptr += 2;
|
|
|
|
} else {
|
|
valid = valid && ( c & 0x80 ) == 0;
|
|
ptr ++;
|
|
}
|
|
|
|
len ++;
|
|
}
|
|
|
|
length = len;
|
|
size = ptr - string;
|
|
return valid;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
bool ebcl::UTF8BufferInfo( char const* data , uint32_t size , uint32_t& length )
|
|
{
|
|
assert( data != nullptr );
|
|
|
|
char const* ptr = data;
|
|
uint32_t len = 0;
|
|
bool valid = true;
|
|
while ( ptr < data + size ) {
|
|
char c = *ptr;
|
|
bool ptrOk;
|
|
|
|
// 4 bytes
|
|
if ( ( c & 0xf8 ) == 0xf0 ) {
|
|
ptrOk = ptr + 3 < data + size;
|
|
valid = valid && ptrOk
|
|
// 3 following bytes should be part of this
|
|
// codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 3 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( ( c & 0x07 ) != 0
|
|
|| ( ptr[ 1 ] & 0x30 ) != 0 );
|
|
ptr += 4;
|
|
|
|
// 3 bytes
|
|
} else if ( ( c & 0xf0 ) == 0xe0 ) {
|
|
ptrOk = ptr + 2 < data + size;
|
|
valid = valid && ptrOk
|
|
// 2 following bytes should be part of this
|
|
// codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
&& ( ptr[ 2 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( ( c & 0x0f ) != 0
|
|
|| ( ptr[ 1 ] & 0x20 ) != 0 );
|
|
ptr += 3;
|
|
|
|
} else if ( ( c & 0xe0 ) == 0xc0 ) {
|
|
ptrOk = ptr + 1 < data + size;
|
|
valid = valid && ptrOk
|
|
// Next byte should be part of this codepoint
|
|
&& ( ptr[ 1 ] & 0xc0 ) == 0x80
|
|
// Check for overlongs
|
|
&& ( c & 0x1e ) != 0;
|
|
ptr += 2;
|
|
|
|
} else {
|
|
valid = valid && ( c & 0x80 ) == 0;
|
|
ptrOk = true;
|
|
ptr ++;
|
|
}
|
|
|
|
if ( ptrOk ) {
|
|
len ++;
|
|
}
|
|
}
|
|
|
|
length = len;
|
|
return valid;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8GetCodepoint( char const* data , uint32_t& bytes )
|
|
{
|
|
assert( data != nullptr );
|
|
if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
|
|
bytes = 4;
|
|
return ( ( data[ 0 ] & 0x07 ) << 18 )
|
|
| ( ( data[ 1 ] & 0x3f ) << 12 )
|
|
| ( ( data[ 2 ] & 0x3f ) << 6 )
|
|
| ( data[ 3 ] & 0x3f );
|
|
|
|
} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
|
|
bytes = 3;
|
|
return ( ( data[ 0 ] & 0x0f ) << 12 )
|
|
| ( ( data[ 1 ] & 0x3f ) << 6 )
|
|
| ( data[ 2 ] & 0x3f );
|
|
|
|
} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
|
|
bytes = 2;
|
|
return ( ( data[ 0 ] & 0x1f ) << 6 )
|
|
| ( data[ 1 ] & 0x3f );
|
|
|
|
} else {
|
|
bytes = 1;
|
|
return data[ 0 ];
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8GetCodepoint( char const* data )
|
|
{
|
|
assert( data != nullptr );
|
|
if ( ( data[ 0 ] & 0xf8 ) == 0xf0 ) {
|
|
return ( ( data[ 0 ] & 0x07 ) << 18 )
|
|
| ( ( data[ 1 ] & 0x3f ) << 12 )
|
|
| ( ( data[ 2 ] & 0x3f ) << 6 )
|
|
| ( data[ 3 ] & 0x3f );
|
|
|
|
} else if ( ( data[ 0 ] & 0xf0 ) == 0xe0 ) {
|
|
return ( ( data[ 0 ] & 0x0f ) << 12 )
|
|
| ( ( data[ 1 ] & 0x3f ) << 6 )
|
|
| ( data[ 2 ] & 0x3f );
|
|
|
|
} else if ( ( data[ 0 ] & 0xe0 ) == 0xc0 ) {
|
|
return ( ( data[ 0 ] & 0x1f ) << 6 )
|
|
| ( data[ 1 ] & 0x3f );
|
|
|
|
} else {
|
|
return data[ 0 ];
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8PutCodepoint( char* output , uint32_t available , uint32_t codepoint )
|
|
{
|
|
if ( codepoint < 0x80 && available >= 1 ) {
|
|
*output = char( codepoint );
|
|
return 1;
|
|
} else if ( codepoint < 0x800 && available >= 2 ) {
|
|
output[ 0 ] = char( ( codepoint >> 6 ) | 0xc0 );
|
|
output[ 1 ] = char( ( codepoint & 0x3f ) | 0x80 );
|
|
return 2;
|
|
} else if ( codepoint < 0x10000 && available >= 3 ) {
|
|
output[ 0 ] = char( ( codepoint >> 12 ) | 0xe0 );
|
|
output[ 1 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
|
|
output[ 2 ] = char( ( codepoint & 0x3f ) | 0x80 );
|
|
return 3;
|
|
} else if ( codepoint < 0x110000 && available >= 4 ) {
|
|
output[ 0 ] = char( ( codepoint >> 18 ) | 0xf0 );
|
|
output[ 1 ] = char( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
|
|
output[ 2 ] = char( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
|
|
output[ 3 ] = char( ( codepoint & 0x3f ) | 0x80 );
|
|
return 4;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t ebcl::UTF8GetMemoryOffset( char const* input , uint32_t index )
|
|
{
|
|
assert( input != nullptr );
|
|
|
|
char const* p = input;
|
|
while ( index != 0 ) {
|
|
if ( ( *p & 0xf8 ) == 0xf0 ) {
|
|
p += 4;
|
|
} else if ( ( *p & 0xf0 ) == 0xe0 ) {
|
|
p += 3;
|
|
} else if ( ( *p & 0xe0 ) == 0xc0 ) {
|
|
p += 2;
|
|
} else {
|
|
p ++;
|
|
}
|
|
index --;
|
|
}
|
|
|
|
return p - input;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint64_t ebcl::UTF8ToUnsignedInteger( char const* input , uint32_t size , bool * ok ,
|
|
int base , bool useSep , uint32_t separator )
|
|
{
|
|
char const* inputPos( input );
|
|
char const* const inputEnd( input + size );
|
|
|
|
if ( ok ) {
|
|
*ok = false;
|
|
}
|
|
|
|
// Find start
|
|
bool checkBase( false );
|
|
bool hadSign( false );
|
|
while ( 1 ) {
|
|
if ( inputPos >= inputEnd ) {
|
|
return 0;
|
|
}
|
|
uint32_t nBytes;
|
|
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
|
|
if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
|
|
if ( c == '+' ) {
|
|
if ( hadSign ) {
|
|
return 0;
|
|
}
|
|
hadSign = true;
|
|
} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
|
|
checkBase = ( base == 0 && c == '0' );
|
|
if ( checkBase ) {
|
|
inputPos += nBytes;
|
|
}
|
|
break;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
inputPos += nBytes;
|
|
}
|
|
|
|
// Detect base
|
|
if ( checkBase ) {
|
|
if ( inputPos >= inputEnd ) {
|
|
if ( ok != nullptr ) {
|
|
*ok = true;
|
|
}
|
|
return 0;
|
|
}
|
|
uint32_t nBytes;
|
|
const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
|
|
bool next( true );
|
|
if ( nc == 'x' || nc == 'X' ) {
|
|
base = 16;
|
|
} else if ( nc == 'b' || nc == 'B' ) {
|
|
base = 2;
|
|
} else {
|
|
base = 8;
|
|
next = false;
|
|
}
|
|
if ( next ) {
|
|
inputPos += nBytes;
|
|
}
|
|
} else if ( base == 0 ) {
|
|
base = 10;
|
|
}
|
|
|
|
// Start converting
|
|
const uint64_t ubase( base );
|
|
const uint64_t cutoff( UINT64_MAX / ubase );
|
|
const uint64_t limit( UINT64_MAX % ubase );
|
|
uint64_t accum( 0 );
|
|
int any( 0 );
|
|
while ( inputPos < inputEnd ) {
|
|
uint32_t nBytes;
|
|
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
inputPos += nBytes;
|
|
|
|
if ( useSep && c == separator ) {
|
|
continue;
|
|
}
|
|
|
|
uint32_t value;
|
|
if ( c.isNumeric( ) ) {
|
|
value = c - '0';
|
|
} else if ( c.isAlpha( ) ) {
|
|
value = c.toUpper( ) - 55;
|
|
} else {
|
|
any = 0;
|
|
break;
|
|
}
|
|
if ( value >= ubase ) {
|
|
any = 0;
|
|
break;
|
|
}
|
|
|
|
if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
|
|
any = -1;
|
|
} else {
|
|
any = 1;
|
|
accum = accum * ubase + value;
|
|
}
|
|
}
|
|
|
|
if ( any < 0 ) {
|
|
accum = UINT64_MAX;
|
|
} else if ( any > 0 && ok ) {
|
|
*ok = true;
|
|
}
|
|
return accum;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
int64_t ebcl::UTF8ToInteger( char const* input , uint32_t size , bool * ok ,
|
|
int base , bool useSep , uint32_t separator )
|
|
{
|
|
char const* inputPos( input );
|
|
char const* const inputEnd( input + size );
|
|
|
|
if ( ok ) {
|
|
*ok = false;
|
|
}
|
|
|
|
// Find start
|
|
bool checkBase( false );
|
|
bool hadSign( false );
|
|
bool neg( false );
|
|
while ( 1 ) {
|
|
if ( inputPos >= inputEnd ) {
|
|
return 0;
|
|
}
|
|
uint32_t nBytes;
|
|
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
|
|
if ( !c.isWhitespace( ) && ( !useSep || c != separator ) ) {
|
|
if ( c == '+' || c == '-' ) {
|
|
if ( hadSign ) {
|
|
return 0;
|
|
}
|
|
neg = ( c == '-' );
|
|
hadSign = true;
|
|
} else if ( c.isNumeric( ) || c.isAlpha( ) ) {
|
|
checkBase = ( base == 0 && c == '0' );
|
|
if ( checkBase ) {
|
|
inputPos += nBytes;
|
|
}
|
|
break;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
inputPos += nBytes;
|
|
}
|
|
|
|
// Detect base
|
|
if ( checkBase ) {
|
|
if ( inputPos >= inputEnd ) {
|
|
if ( ok != nullptr ) {
|
|
*ok = true;
|
|
}
|
|
return 0;
|
|
}
|
|
uint32_t nBytes;
|
|
const T_Character nc( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
|
|
bool next( true );
|
|
if ( nc == 'x' || nc == 'X' ) {
|
|
base = 16;
|
|
} else if ( nc == 'b' || nc == 'B' ) {
|
|
base = 2;
|
|
} else {
|
|
base = 8;
|
|
next = false;
|
|
}
|
|
if ( next ) {
|
|
inputPos += nBytes;
|
|
}
|
|
} else if ( base == 0 ) {
|
|
base = 10;
|
|
}
|
|
|
|
// Start converting
|
|
const uint64_t ubase( base );
|
|
const uint64_t max( neg ? ( uint64_t( 0 - ( INT64_MIN + INT64_MAX ) ) + INT64_MAX ) : INT64_MAX );
|
|
const uint64_t cutoff( max / ubase );
|
|
const uint64_t limit( max % ubase );
|
|
uint64_t accum( 0 );
|
|
int any( 0 );
|
|
while ( inputPos < inputEnd ) {
|
|
uint32_t nBytes;
|
|
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
inputPos += nBytes;
|
|
|
|
if ( useSep && c == separator ) {
|
|
continue;
|
|
}
|
|
|
|
uint32_t value;
|
|
if ( c.isNumeric( ) ) {
|
|
value = c - '0';
|
|
} else if ( c.isAlpha( ) ) {
|
|
value = c.toUpper( ) - 55;
|
|
} else {
|
|
any = 0;
|
|
break;
|
|
}
|
|
if ( value >= ubase ) {
|
|
any = 0;
|
|
break;
|
|
}
|
|
|
|
if ( any < 0 || accum > cutoff || ( accum == cutoff && value > limit ) ) {
|
|
any = -1;
|
|
} else {
|
|
any = 1;
|
|
accum = accum * ubase + value;
|
|
}
|
|
}
|
|
|
|
if ( any < 0 ) {
|
|
accum = neg ? INT64_MIN : INT64_MAX;
|
|
} else if ( any > 0 ) {
|
|
if ( neg ) {
|
|
accum = (~accum) + 1;
|
|
}
|
|
if ( ok ) {
|
|
*ok = true;
|
|
}
|
|
}
|
|
return accum;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
double ebcl::UTF8ToDouble( char const* input , uint32_t size ,
|
|
bool * ok , uint32_t decimalPoint ,
|
|
bool useSep , uint32_t separator )
|
|
{
|
|
char const* inputPos( input );
|
|
char const* const inputEnd( input + size );
|
|
char output[ size + 1 ];
|
|
char * outputPos( output );
|
|
|
|
if ( ok ) {
|
|
*ok = false;
|
|
}
|
|
|
|
enum E_State_ {
|
|
INIT ,
|
|
HAD_SIGN ,
|
|
INT_PART ,
|
|
HAD_SEP ,
|
|
FRACT_PART ,
|
|
AFTER_EXP ,
|
|
AFTER_EXP_SIGN ,
|
|
EXP_PART
|
|
};
|
|
E_State_ state( INIT );
|
|
|
|
while ( inputPos < inputEnd ) {
|
|
uint32_t nBytes;
|
|
const T_Character c( UTF8GetCodepoint( inputPos , nBytes ) );
|
|
inputPos += nBytes;
|
|
|
|
switch ( state ) {
|
|
|
|
case INIT:
|
|
if ( c.isWhitespace( ) ) {
|
|
continue;
|
|
}
|
|
if ( c == '+' || c == '-' ) {
|
|
*( outputPos ++ ) = c;
|
|
state = HAD_SIGN;
|
|
} else if ( c == decimalPoint ) {
|
|
*( outputPos ++ ) = '.';
|
|
state = FRACT_PART;
|
|
} else if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = INT_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case HAD_SIGN:
|
|
if ( c == decimalPoint ) {
|
|
*( outputPos ++ ) = '.';
|
|
state = FRACT_PART;
|
|
} else if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = INT_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case INT_PART:
|
|
if ( c == decimalPoint ) {
|
|
*( outputPos ++ ) = '.';
|
|
state = FRACT_PART;
|
|
} else if ( c == 'e' || c == 'E' ) {
|
|
*( outputPos ++ ) = 'e';
|
|
state = AFTER_EXP;
|
|
} else if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
} else if ( useSep && c == separator ) {
|
|
state = HAD_SEP;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case HAD_SEP:
|
|
if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = INT_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case FRACT_PART:
|
|
if ( c == 'e' || c == 'E' ) {
|
|
*( outputPos ++ ) = 'e';
|
|
state = AFTER_EXP;
|
|
} else if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = FRACT_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case AFTER_EXP:
|
|
if ( c == '+' || c == '-' ) {
|
|
*( outputPos ++ ) = c;
|
|
state = AFTER_EXP_SIGN;
|
|
} else if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = EXP_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case AFTER_EXP_SIGN:
|
|
if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
state = EXP_PART;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
|
|
case EXP_PART:
|
|
if ( c.isNumeric( ) ) {
|
|
*( outputPos ++ ) = c;
|
|
} else {
|
|
return 0;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if ( state == INIT || state == HAD_SEP || state == HAD_SIGN
|
|
|| state == AFTER_EXP || state == AFTER_EXP_SIGN ) {
|
|
return 0;
|
|
}
|
|
*outputPos = '\0';
|
|
|
|
errno = 0;
|
|
auto v( strtod( output , nullptr ) );
|
|
if ( errno == 0 && ok ) {
|
|
*ok = true;
|
|
}
|
|
return v;
|
|
}
|
|
|
|
|
|
/*= T_Character ==============================================================*/
|
|
|
|
M_DEFINE_OBJECT_READER( T_Character )
|
|
{
|
|
const char first( reader.read< char >( ) );
|
|
if ( ( first & 0xf8 ) == 0xf0 ) {
|
|
return ( ( first & 0x07 ) << 18 )
|
|
| ( ( reader.read< char >( ) & 0x3f ) << 12 )
|
|
| ( ( reader.read< char >( ) & 0x3f ) << 6 )
|
|
| ( reader.read< char >( ) & 0x3f );
|
|
|
|
} else if ( ( first & 0xf0 ) == 0xe0 ) {
|
|
return ( ( first & 0x0f ) << 12 )
|
|
| ( ( reader.read< char >( ) & 0x3f ) << 6 )
|
|
| ( reader.read< char >( ) & 0x3f );
|
|
|
|
} else if ( ( first & 0xe0 ) == 0xc0 ) {
|
|
return ( ( first & 0x1f ) << 6 )
|
|
| ( reader.read< char >( ) & 0x3f );
|
|
|
|
} else {
|
|
return first;
|
|
}
|
|
}
|
|
|
|
M_DEFINE_OBJECT_WRITER( T_Character )
|
|
{
|
|
const uint32_t codepoint( item );
|
|
if ( codepoint < 0x80 ) {
|
|
writer.write< char >( codepoint );
|
|
|
|
} else if ( codepoint < 0x800 ) {
|
|
writer.write< char >( ( codepoint >> 6 ) | 0xc0 );
|
|
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
|
|
|
|
} else if ( codepoint < 0x10000 ) {
|
|
writer.write< char >( ( codepoint >> 12 ) | 0xe0 );
|
|
writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
|
|
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
|
|
|
|
} else {
|
|
writer.write< char >( ( codepoint >> 18 ) | 0xf0 );
|
|
writer.write< char >( ( ( codepoint >> 12 ) & 0x3f ) | 0x80 );
|
|
writer.write< char >( ( ( codepoint >> 6 ) & 0x3f ) | 0x80 );
|
|
writer.write< char >( ( codepoint & 0x3f ) | 0x80 );
|
|
}
|
|
}
|
|
|
|
/*= A_StringData =============================================================*/
|
|
|
|
inline A_StringData::~A_StringData( )
|
|
{ }
|
|
|
|
|
|
/*= A_StringDataInternal =====================================================*/
|
|
|
|
inline A_StringDataInternal::A_StringDataInternal( bool poolable )
|
|
: poolable( poolable )
|
|
{ }
|
|
|
|
A_StringDataInternal::~A_StringDataInternal( )
|
|
{ }
|
|
|
|
void A_StringDataInternal::addUser( )
|
|
{ }
|
|
|
|
void A_StringDataInternal::removeUser( )
|
|
{ }
|
|
|
|
inline M_DEFINE_HASH( A_StringDataInternal )
|
|
{
|
|
return HashData( reinterpret_cast< uint8_t const* >( item.data( ) ) ,
|
|
item.size( ) );
|
|
}
|
|
|
|
|
|
/*= T_EmptyString ============================================================*/
|
|
|
|
T_EmptyString::T_EmptyString( )
|
|
: A_StringDataInternal( false )
|
|
{
|
|
data_ = nullptr;
|
|
size_ = 0;
|
|
valid_ = true;
|
|
length_ = 0;
|
|
}
|
|
|
|
T_EmptyString T_EmptyString::EmptyString;
|
|
|
|
|
|
/*= T_StaticString ===========================================================*/
|
|
|
|
T_StaticString::T_StaticString( char const* data , uint32_t size )
|
|
: A_StringDataInternal( false )
|
|
{
|
|
size_ = size;
|
|
data_ = ( char* )::operator new ( size_ );
|
|
memcpy( data_ , data , size_ );
|
|
valid_ = UTF8BufferInfo( data , size , length_ );
|
|
}
|
|
|
|
T_StaticString::~T_StaticString( )
|
|
{
|
|
::operator delete ( data_ );
|
|
}
|
|
|
|
|
|
/*= A_RefCountedString =======================================================*/
|
|
|
|
inline A_RefCountedString::A_RefCountedString( )
|
|
: A_StringDataInternal( true ) , users_( 1 )
|
|
{ }
|
|
|
|
void A_RefCountedString::addUser( )
|
|
{
|
|
users_.fetch_add( 1 , std::memory_order_acq_rel );
|
|
}
|
|
|
|
void A_RefCountedString::removeUser( )
|
|
{
|
|
if ( users_.fetch_sub( 1 , std::memory_order_acq_rel ) == 1 ) {
|
|
std::atomic_thread_fence( std::memory_order_acq_rel );
|
|
if ( users_.load( std::memory_order_acq_rel ) == 0 ) {
|
|
delete this;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*= T_DynamicString ==========================================================*/
|
|
|
|
namespace {
|
|
static thread_local T_ThreadedPoolAllocator<
|
|
sizeof( T_DynamicString ) , alignof( T_DynamicString ) ,
|
|
32 , 16
|
|
> DynamicStringAllocator_;
|
|
|
|
}
|
|
|
|
void* T_DynamicString::operator new(
|
|
const size_t size ) noexcept
|
|
{
|
|
return DynamicStringAllocator_.allocate( size );
|
|
}
|
|
|
|
void T_DynamicString::operator delete(
|
|
void* const object ) noexcept
|
|
{
|
|
DynamicStringAllocator_.free( object );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
|
T_DynamicString::T_DynamicString( char const* data , uint32_t size , bool nodup )
|
|
: A_RefCountedString( )
|
|
{
|
|
size_ = size;
|
|
if ( nodup ) {
|
|
data_ = const_cast< char* >( data );
|
|
} else {
|
|
data_ = ( char* )::operator new ( size_ );
|
|
memcpy( data_ , data , size_ );
|
|
}
|
|
valid_ = UTF8BufferInfo( data , size , length_ );
|
|
}
|
|
|
|
T_DynamicString::~T_DynamicString( )
|
|
{
|
|
::operator delete ( data_ );
|
|
}
|
|
|
|
|
|
/*= T_Substring ==============================================================*/
|
|
|
|
namespace {
|
|
static thread_local T_ThreadedPoolAllocator<
|
|
sizeof( T_Substring ) , alignof( T_Substring ) ,
|
|
32 , 4
|
|
> SubstringAllocator_;
|
|
|
|
}
|
|
|
|
void* T_Substring::operator new(
|
|
const size_t size ) noexcept
|
|
{
|
|
return SubstringAllocator_.allocate( size );
|
|
}
|
|
|
|
void T_Substring::operator delete(
|
|
void* const object ) noexcept
|
|
{
|
|
SubstringAllocator_.free( object );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_Substring::T_Substring( RP_StringDataInternal source , uint32_t offset , uint32_t size )
|
|
: A_RefCountedString( ) , source_( source )
|
|
{
|
|
assert( size + offset <= source_->size( ) );
|
|
|
|
source_->addUser( );
|
|
data_ = const_cast< char* >( source->data( ) + offset );
|
|
size_ = size;
|
|
valid_ = UTF8BufferInfo( data_ , size_ , length_ );
|
|
}
|
|
|
|
T_Substring::~T_Substring( )
|
|
{
|
|
source_->removeUser( );
|
|
}
|
|
|
|
|
|
/*= T_StringPool =============================================================*/
|
|
|
|
T_StringPool T_StringPool::Pool;
|
|
T_ReadWriteMutex T_StringPool::Mutex;
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringPool::T_StringPool( )
|
|
: index_( 16384 , 4096 , 4096 ) , strings_( 4096 )
|
|
{ }
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
RP_StringDataInternal T_StringPool::add( char const* data , uint32_t size )
|
|
{
|
|
T_ReadLock lock( T_StringPool::Mutex );
|
|
const auto hash( HashData( (uint8_t const*) data , size ) );
|
|
const auto idx( find( data , size , hash ) );
|
|
|
|
if ( idx == T_HashIndex::INVALID_INDEX ) {
|
|
const T_WriteLock wLock( lock.upgrade( ) );
|
|
index_.add( hash );
|
|
const auto str( strings_.add( NewOwned< T_StaticString >( data , size ) ) );
|
|
return strings_[ str ].get( );
|
|
} else {
|
|
return strings_[ idx ].get( );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
RP_StringDataInternal T_StringPool::get( char const* data , uint32_t size ) const
|
|
{
|
|
const T_ReadLock lock( T_StringPool::Mutex );
|
|
const auto hash( HashData( reinterpret_cast< uint8_t const* >( data ) , size ) );
|
|
const auto idx( find( data , size , hash ) );
|
|
|
|
if ( idx == T_HashIndex::INVALID_INDEX ) {
|
|
return nullptr;
|
|
} else {
|
|
return strings_[ idx ].get( );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
uint32_t T_StringPool::find( char const* data , uint32_t sz , uint32_t hash ) const
|
|
{
|
|
uint32_t idx = index_.first( hash );
|
|
while ( idx != T_HashIndex::INVALID_INDEX ) {
|
|
auto const& p( strings_[ idx ] );
|
|
if ( p->size( ) == sz && !memcmp( p->data( ) , data , sz ) ) {
|
|
break;
|
|
}
|
|
idx = index_.next( idx );
|
|
}
|
|
return idx;
|
|
}
|
|
|
|
|
|
/*= T_StringIterator =========================================================*/
|
|
|
|
T_StringIterator::T_StringIterator( RP_StringData data , uint32_t index )
|
|
: data_( data ) , index_( index )
|
|
{
|
|
if ( data_ != nullptr ) {
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
|
|
pos_ = UTF8GetMemoryOffset( data_->data( ) , index );
|
|
codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ , bytes_ );
|
|
} else {
|
|
pos_ = codepoint_ = bytes_ = 0;
|
|
}
|
|
}
|
|
|
|
T_StringIterator::T_StringIterator( T_StringIterator const& other )
|
|
: data_( other.data_ ) , pos_( other.pos_ ) , index_( other.index_ ) ,
|
|
codepoint_( other.codepoint_ ) , bytes_( other.bytes_ )
|
|
{
|
|
if ( data_ != nullptr ) {
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringIterator::~T_StringIterator( )
|
|
{
|
|
if ( data_ != nullptr ) {
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringIterator& T_StringIterator::operator= ( T_StringIterator const& other )
|
|
{
|
|
if ( data_ != nullptr ) {
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
}
|
|
data_ = other.data_;
|
|
pos_ = other.pos_;
|
|
index_ = other.index_;
|
|
codepoint_ = other.codepoint_;
|
|
bytes_ = other.bytes_;
|
|
if ( data_ != nullptr ) {
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void ebcl::swap( T_StringIterator& lhs , T_StringIterator& rhs ) noexcept
|
|
{
|
|
using std::swap;
|
|
swap( lhs.data_ , rhs.data_ );
|
|
swap( lhs.pos_ , rhs.pos_ );
|
|
swap( lhs.index_ , rhs.index_ );
|
|
swap( lhs.codepoint_ , rhs.codepoint_ );
|
|
swap( lhs.bytes_ , rhs.bytes_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
bool T_StringIterator::next( )
|
|
{
|
|
if ( atEnd( ) ) {
|
|
return false;
|
|
}
|
|
pos_ += bytes_;
|
|
if ( pos_ == data_->size( ) ) {
|
|
codepoint_ = bytes_ = 0;
|
|
} else {
|
|
codepoint_ = UTF8GetCodepoint( data_->data( ) + pos_ ,
|
|
bytes_ );
|
|
index_ ++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
/*= T_String =================================================================*/
|
|
|
|
T_String::T_String( ) noexcept
|
|
: data_( &T_EmptyString::EmptyString )
|
|
{ }
|
|
|
|
T_String::T_String( char const* initial )
|
|
{
|
|
if ( initial == nullptr || *initial == 0 ) {
|
|
data_ = &T_EmptyString::EmptyString;
|
|
} else {
|
|
const uint32_t len( strlen( initial ) );
|
|
data_ = T_StringPool::Pool.get( initial , len );
|
|
if ( data_ == nullptr ) {
|
|
data_ = new T_DynamicString( initial , len , false );
|
|
}
|
|
}
|
|
}
|
|
|
|
T_String::T_String( T_StringBuilder&& sb )
|
|
: T_String( sb.data_ , sb.size_ , true )
|
|
{
|
|
sb.data_ = nullptr;
|
|
sb.size_ = sb.length_ = sb.capacity_ = 0;
|
|
}
|
|
|
|
T_String::T_String( T_StringBuilder const& sb )
|
|
: T_String( sb.data_ , sb.size_ )
|
|
{ }
|
|
|
|
T_String::T_String( char const* data , uint32_t size , bool nodup )
|
|
{
|
|
if ( data == nullptr || size == 0 ) {
|
|
data_ = &T_EmptyString::EmptyString;
|
|
} else {
|
|
data_ = new T_DynamicString( data , size , nodup );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String::T_String( T_String const& source )
|
|
: data_( source.data_ )
|
|
{
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
|
|
}
|
|
|
|
T_String::T_String( T_String&& source ) noexcept
|
|
: data_( &T_EmptyString::EmptyString )
|
|
{
|
|
swap( *this , source );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String::~T_String( )
|
|
{
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String& T_String::operator= ( T_String&& string ) noexcept
|
|
{
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
data_ = string.data_;
|
|
string.data_ = &T_EmptyString::EmptyString;
|
|
return *this;
|
|
}
|
|
|
|
T_String& T_String::operator= ( T_String const& string )
|
|
{
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
data_ = string.data_;
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->addUser( );
|
|
return *this;
|
|
}
|
|
|
|
T_String& T_String::operator= ( T_StringBuilder&& sb )
|
|
{
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
data_ = new T_DynamicString( sb.data_ , sb.size_ , true );
|
|
sb.data_ = nullptr;
|
|
sb.size_ = sb.length_ = sb.capacity_ = 0;
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String& T_String::operator= ( T_StringBuilder const& sb )
|
|
{
|
|
assert( data_ != nullptr );
|
|
dynamic_cast< RP_StringDataInternal >( data_ )->removeUser( );
|
|
data_ = new T_DynamicString( sb.data_ , sb.size_ , false );
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void ebcl::swap( T_String& lhs , T_String& rhs ) noexcept
|
|
{
|
|
using std::swap;
|
|
swap( lhs.data_ , rhs.data_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String T_String::Pooled( char const* data , uint32_t size )
|
|
{
|
|
assert( data != nullptr );
|
|
T_String s;
|
|
if ( size ) {
|
|
s.data_ = T_StringPool::Pool.add( data , size );
|
|
}
|
|
return s;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String& T_String::addToPool( )
|
|
{
|
|
const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
|
|
if ( d->poolable ) {
|
|
data_ = T_StringPool::Pool.add( d->data( ) , d->size( ) );
|
|
d->removeUser( );
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
|
|
T_String& T_String::usePool( )
|
|
{
|
|
const auto d( dynamic_cast< RP_StringDataInternal >( data_ ) );
|
|
if ( d->poolable ) {
|
|
const auto nd( T_StringPool::Pool.get( d->data( ) , d->size( ) ) );
|
|
if ( nd != nullptr ) {
|
|
data_ = nd;
|
|
d->removeUser( );
|
|
}
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String T_String::left( uint32_t count ) const
|
|
{
|
|
if ( count >= length( ) ) {
|
|
return *this;
|
|
}
|
|
|
|
const auto end( UTF8GetMemoryOffset( data( ) , count ) );
|
|
T_String s;
|
|
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) , 0 , end );
|
|
return s;
|
|
}
|
|
|
|
T_String T_String::right( uint32_t count ) const
|
|
{
|
|
if ( count >= length( ) ) {
|
|
return *this;
|
|
}
|
|
|
|
const auto start( UTF8GetMemoryOffset( data( ) , length( ) - count ) );
|
|
T_String s;
|
|
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
|
|
start , data_->size( ) - start );
|
|
return s;
|
|
}
|
|
|
|
T_String T_String::substr( uint32_t offset , uint32_t count ) const
|
|
{
|
|
// Special cases
|
|
if ( offset == 0 ) {
|
|
return left( count );
|
|
}
|
|
if ( offset >= length( ) || count == 0 ) {
|
|
return T_String( );
|
|
}
|
|
|
|
const auto end( uint64_t( offset ) + count );
|
|
if ( end >= length( ) ) {
|
|
return right( end > length( )
|
|
? ( length( ) - offset )
|
|
: count );
|
|
}
|
|
|
|
const auto start( UTF8GetMemoryOffset( data( ) , offset ) );
|
|
const auto bytes( UTF8GetMemoryOffset( data( ) + start , count ) );
|
|
T_String s;
|
|
s.data_ = new T_Substring( dynamic_cast< RP_StringDataInternal >( data_ ) ,
|
|
start , bytes );
|
|
return s;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String T_String::trim( ) const noexcept
|
|
{
|
|
if ( length( ) == 0 ) {
|
|
return T_String( );
|
|
}
|
|
|
|
T_Optional< uint32_t > firstNws;
|
|
uint32_t lastNws = 0;
|
|
T_StringIterator it( *this );
|
|
while ( !it.atEnd( ) ) {
|
|
T_Character c( it );
|
|
if ( !c.isWhitespace( ) ) {
|
|
if ( !firstNws ) {
|
|
firstNws = it.index( );
|
|
}
|
|
lastNws = it.index( );
|
|
}
|
|
it.next( );
|
|
}
|
|
|
|
if ( !firstNws ) {
|
|
return T_String( );
|
|
}
|
|
return range( *firstNws , lastNws );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String T_String::mapped( T_Character::F_Map f ) const noexcept
|
|
{
|
|
T_StringIterator it{ *this };
|
|
T_StringBuilder sb;
|
|
while ( !it.atEnd( ) ) {
|
|
sb << f( it );
|
|
it.next( );
|
|
}
|
|
return std::move( sb );
|
|
}
|
|
|
|
T_String T_String::toUpper( ) const noexcept
|
|
{
|
|
return mapped( [](auto c){
|
|
return c.toUpper( );
|
|
} );
|
|
}
|
|
|
|
T_String T_String::toLower( ) const noexcept
|
|
{
|
|
return mapped( [](auto c){
|
|
return c.toLower( );
|
|
} );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
int32_t T_String::compare( T_String const& other ) const
|
|
{
|
|
if ( this == &other || data_ == other.data_ ) {
|
|
return 0;
|
|
}
|
|
if ( !( valid( ) && other.valid( ) ) ) {
|
|
return 0;
|
|
}
|
|
|
|
T_StringIterator it1( *this ) , it2( other );
|
|
while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
|
|
T_Character c1( it1 ) ,
|
|
c2( it2 );
|
|
if ( c1 < c2 ) {
|
|
return -1;
|
|
} else if ( c1 > c2 ) {
|
|
return 1;
|
|
}
|
|
it1.next( );
|
|
it2.next( );
|
|
}
|
|
|
|
if ( !it2.atEnd( ) ) {
|
|
return -1;
|
|
} else if ( !it1.atEnd( ) ) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int32_t T_String::compareIgnoreCase( T_String const& other ) const
|
|
{
|
|
if ( !( valid( ) && other.valid( ) ) ) {
|
|
return 0;
|
|
}
|
|
|
|
T_StringIterator it1( *this ) , it2( other );
|
|
while ( !( it1.atEnd( ) || it2.atEnd( ) ) ) {
|
|
auto c1( T_Character( it1 ).toLower( ) ) ,
|
|
c2( T_Character( it2 ).toLower( ) );
|
|
if ( c1 < c2 ) {
|
|
return -1;
|
|
} else if ( c1 > c2 ) {
|
|
return 1;
|
|
}
|
|
it1.next( );
|
|
it2.next( );
|
|
}
|
|
|
|
if ( !it2.atEnd( ) ) {
|
|
return -1;
|
|
} else if ( !it1.atEnd( ) ) {
|
|
return 1;
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
bool T_String::startsWith( T_String const& other ) const
|
|
{
|
|
if ( !( valid( ) && other.valid( ) ) ) {
|
|
return false;
|
|
} else if ( other.length( ) > length( ) ) {
|
|
return false;
|
|
} else if ( other.length( ) == 0 ) {
|
|
return true;
|
|
} else {
|
|
return !memcmp( data( ) , other.data( ) , other.size( ) );
|
|
}
|
|
}
|
|
|
|
bool T_String::endsWith( T_String const& other ) const
|
|
{
|
|
if ( !( valid( ) && other.valid( ) ) ) {
|
|
return false;
|
|
} else if ( other.length( ) > length( ) ) {
|
|
return false;
|
|
} else if ( other.length( ) == 0 ) {
|
|
return true;
|
|
} else {
|
|
return !memcmp( data( ) + size( ) - other.size( ) ,
|
|
other.data( ) , other.size( ) );
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
int32_t T_String::find( T_String const& other , uint32_t from ) const
|
|
{
|
|
if ( from > length( )
|
|
|| length( ) - from < other.length( )
|
|
|| !( valid( ) && other.valid( ) ) ) {
|
|
return -1;
|
|
}
|
|
|
|
if ( other.length( ) == 0 ) {
|
|
return from;
|
|
}
|
|
|
|
uint32_t pos = from , byte = UTF8GetMemoryOffset( data( ) , from );
|
|
while ( byte + other.size( ) <= size( ) ) {
|
|
char const* tp = data( ) + byte;
|
|
char const* op = other.data( );
|
|
if ( !memcmp( tp , op , other.size( ) ) ) {
|
|
return pos;
|
|
}
|
|
byte += UTF8GetMemoryOffset( tp , 1 );
|
|
pos ++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
int32_t T_String::find( T_Character character , uint32_t from ) const
|
|
{
|
|
if ( !( valid( ) && character.isValid( ) && from < length( ) ) ) {
|
|
return -1;
|
|
}
|
|
|
|
T_StringIterator it( getIterator( from ) );
|
|
while ( !it.atEnd( ) ) {
|
|
if ( T_Character( it ) == character ) {
|
|
return it.index( );
|
|
}
|
|
it.next( );
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_String T_String::replace( T_Character initial , T_Character replacement ) const
|
|
{
|
|
if ( !( valid( ) && initial.isValid( ) && replacement.isValid( ) ) ) {
|
|
return T_String( );
|
|
}
|
|
if ( initial == replacement || !*this ) {
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder sb;
|
|
sb.ensureCapacity( size( ) );
|
|
T_StringIterator it( *this );
|
|
while ( !it.atEnd( ) ) {
|
|
T_Character c( it );
|
|
it.next( );
|
|
if ( c == initial ) {
|
|
sb << replacement;
|
|
} else {
|
|
sb << c;
|
|
}
|
|
}
|
|
return T_String( std::move( sb ) );
|
|
}
|
|
|
|
T_String T_String::replace( T_String const& initial , T_String const& replacement ) const
|
|
{
|
|
if ( !( valid( ) && initial.valid( ) && replacement.valid( ) ) ) {
|
|
return T_String( );
|
|
}
|
|
|
|
if ( !initial || initial == replacement || initial.length( ) > length( ) || !*this ) {
|
|
return *this;
|
|
}
|
|
if ( initial.length( ) == length( ) && *this == initial ) {
|
|
return replacement;
|
|
}
|
|
|
|
char const* const tgt( initial.data( ) );
|
|
const auto tgtSize( initial.size( ) );
|
|
char const* const src( data( ) );
|
|
const auto srcSize( size( ) );
|
|
|
|
T_StringBuilder sb;
|
|
uint32_t pos = 0;
|
|
while ( pos < srcSize ) {
|
|
uint32_t delta;
|
|
if ( pos + tgtSize <= size( ) && !memcmp( src + pos , tgt , tgtSize ) ) {
|
|
sb << replacement;
|
|
delta = tgtSize;
|
|
} else {
|
|
sb << T_Character( UTF8GetCodepoint( src + pos , delta ) );
|
|
}
|
|
pos += delta;
|
|
}
|
|
return T_String( std::move( sb ) );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_Buffer< char > T_String::toOSString( ) const
|
|
{
|
|
if ( !valid( ) ) {
|
|
return T_Buffer< char >( );
|
|
}
|
|
|
|
#ifdef _WIN32
|
|
|
|
const auto n( data_->length( ) );
|
|
const auto rsz( MultiByteToWideChar( CP_UTF8 , 0 ,
|
|
data( ) , data_->size( ) ,
|
|
nullptr , 0 ) );
|
|
T_Buffer< char > output( rsz * 2 );
|
|
if ( !MultiByteToWideChar( CP_UTF8 , 0 ,
|
|
data( ) , data_->size( ) ,
|
|
( wchar_t* ) output.data( ) , rsz * 2 ) )
|
|
{
|
|
return T_Buffer< char >( );
|
|
}
|
|
return output;
|
|
|
|
#else
|
|
|
|
const auto n( data_->size( ) );
|
|
T_Buffer< char > output( n + 1 );
|
|
memcpy( output.data( ) , data( ) , n );
|
|
output[ n ] = 0;
|
|
return output;
|
|
|
|
#endif
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
M_DEFINE_OBJECT_READER( T_String )
|
|
{
|
|
const uint32_t size( reader.read< uint32_t >( ) );
|
|
if ( size == 0 ) {
|
|
return T_String( );
|
|
}
|
|
|
|
char* const buffer = ( char* )::operator new ( size );
|
|
const uint32_t r = reader.stream( ).read( buffer , size );
|
|
if ( r != size ) {
|
|
throw X_StreamError( E_StreamError::BAD_DATA );
|
|
}
|
|
return T_String( buffer , size , true );
|
|
}
|
|
|
|
M_DEFINE_OBJECT_WRITER( T_String )
|
|
{
|
|
const uint32_t s( item.size( ) );
|
|
writer.write( s );
|
|
if ( s != 0 ) {
|
|
const uint32_t w( writer.stream( ).write( item.data( ) , s ) );
|
|
if ( w != s ) {
|
|
throw X_StreamError( E_StreamError::BAD_DATA );
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*= T_StringBuilder ==========================================================*/
|
|
|
|
T_StringBuilder::T_StringBuilder( T_StringBuilder const& other )
|
|
: data_( nullptr ) , capacity_( 0 ) , size_( other.size_ ) ,
|
|
length_( other.length_ )
|
|
{
|
|
ensureCapacity( size_ );
|
|
if ( size_ != 0 ) {
|
|
memcpy( data_ , other.data_ , size_ );
|
|
}
|
|
}
|
|
|
|
T_StringBuilder::T_StringBuilder( T_StringBuilder&& other ) noexcept
|
|
: T_StringBuilder( )
|
|
{
|
|
swap( *this , other );
|
|
}
|
|
|
|
T_StringBuilder::T_StringBuilder( char const* data , uint32_t size )
|
|
: T_StringBuilder( )
|
|
{
|
|
ensureCapacity( size );
|
|
memcpy( data_ , data , size );
|
|
size_ = size;
|
|
UTF8BufferInfo( data , size , length_ );
|
|
}
|
|
|
|
T_StringBuilder::T_StringBuilder( T_String const& string )
|
|
: T_StringBuilder( )
|
|
{
|
|
size_ = string.size( );
|
|
length_ = string.length( );
|
|
ensureCapacity( size_ );
|
|
memcpy( data_ , string.data( ) , size_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder::~T_StringBuilder( )
|
|
{
|
|
::operator delete ( data_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void ebcl::swap( T_StringBuilder& lhs , T_StringBuilder& rhs )
|
|
{
|
|
using std::swap;
|
|
swap( lhs.data_ , rhs.data_ );
|
|
swap( lhs.capacity_ , rhs.capacity_ );
|
|
swap( lhs.size_ , rhs.size_ );
|
|
swap( lhs.length_ , rhs.length_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder const& other )
|
|
{
|
|
ensureCapacity( other.size( ) );
|
|
size_ = other.size_;
|
|
length_ = other.length_;
|
|
if ( size_ != 0 ) {
|
|
memcpy( data_ , other.data_ , size_ );
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::operator=( T_StringBuilder&& other ) noexcept
|
|
{
|
|
if ( data_ ) {
|
|
::operator delete( data_ );
|
|
}
|
|
data_ = other.data_;
|
|
capacity_ = other.capacity_;
|
|
size_ = other.size_;
|
|
length_ = other.length_;
|
|
other.data_ = nullptr;
|
|
other.capacity_ = other.size_ = other.length_ = 0;
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::ensureCapacity( uint32_t minCap )
|
|
{
|
|
if ( minCap > capacity_ ) {
|
|
const uint32_t mod = minCap % C_GROWTH;
|
|
const uint32_t nCap = ( mod != 0 )
|
|
? ( minCap + C_GROWTH - mod ) : minCap;
|
|
char* const nData = ( char* )::operator new ( nCap );
|
|
if ( data_ != nullptr ) {
|
|
memcpy( nData , data_ , size_ );
|
|
::operator delete ( data_ );
|
|
}
|
|
data_ = nData;
|
|
capacity_ = nCap;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::free( )
|
|
{
|
|
::operator delete ( data_ );
|
|
capacity_ = 0;
|
|
data_ = nullptr;
|
|
return clear( );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::append( T_StringBuilder const& other )
|
|
{
|
|
const uint32_t sz = other.size_;
|
|
if ( sz != 0 ) {
|
|
ensureCapacity( size_ + sz );
|
|
memcpy( data_ + size_ , other.data_ , sz );
|
|
size_ += sz;
|
|
length_ += other.length_;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::append( T_StringBuilder&& other )
|
|
{
|
|
if ( size_ == 0 && capacity_ <= other.capacity_ ) {
|
|
swap( *this , other );
|
|
return *this;
|
|
} else {
|
|
return append( ( T_StringBuilder const& ) other );
|
|
}
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::append( T_String const& string )
|
|
{
|
|
const uint32_t sz = string.size( );
|
|
if ( string.size( ) != 0 ) {
|
|
ensureCapacity( size_ + sz );
|
|
memcpy( data_ + size_ , string.data( ) , sz );
|
|
size_ += sz;
|
|
length_ += string.length( );
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::append( char const* string , uint32_t size )
|
|
{
|
|
if ( size != 0 ) {
|
|
uint32_t len;
|
|
UTF8BufferInfo( string , size , len );
|
|
ensureCapacity( size_ + size );
|
|
memcpy( data_ + size_ , string , size );
|
|
size_ += size;
|
|
length_ += len;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::append( char character )
|
|
{
|
|
if ( uint8_t( character ) < 128 ) {
|
|
ensureCapacity( size_ + 1 );
|
|
data_[ size_ ++ ] = character;
|
|
length_ ++;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
T_StringBuilder& T_StringBuilder::append( T_Character character )
|
|
{
|
|
if ( character.isValid( ) ) {
|
|
ensureCapacity( capacity_ + 4 );
|
|
uint32_t w = UTF8PutCodepoint( data_ + size_ ,
|
|
capacity_ - size_ , character );
|
|
assert( w != 0 );
|
|
size_ += w;
|
|
length_ ++;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::appendNumeric( int64_t value , int base , bool useSep , T_Character sep ,
|
|
int sepEvery )
|
|
{
|
|
assert( base >= 2 && base <= 36 );
|
|
assert( sepEvery > 0 );
|
|
assert( sep.isValid( ) );
|
|
|
|
if ( value == 0 ) {
|
|
return append( '0' );
|
|
}
|
|
|
|
const bool neg = value < 0;
|
|
const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 )
|
|
+ ( neg ? 1 : 0 );
|
|
uint32_t output[ size ];
|
|
uint32_t len = 0 , ecap = 0;
|
|
int sepl = 0;
|
|
|
|
if ( neg ) {
|
|
value = -value;
|
|
}
|
|
|
|
while ( value != 0 ) {
|
|
const uint32_t mod = value % base;
|
|
output[ size - ( len + 1 ) ] = mod
|
|
+ ( mod < 10 ? '0' : ( 'A' - 10 ) );
|
|
len ++;
|
|
sepl ++;
|
|
ecap ++;
|
|
assert( len < size );
|
|
|
|
value = ( value - mod ) / base;
|
|
if ( useSep && sepl == sepEvery && value != 0 ) {
|
|
output[ size - ( len + 1 ) ] = sep;
|
|
len ++;
|
|
sepl = 0;
|
|
ecap += 4;
|
|
assert( len < size );
|
|
}
|
|
}
|
|
|
|
if ( neg ) {
|
|
output[ size - ( len + 1 ) ] = '-';
|
|
len ++;
|
|
ecap ++;
|
|
assert( len < size );
|
|
}
|
|
|
|
ensureCapacity( size_ + ecap );
|
|
ecap = 0;
|
|
char* ptr = data_ + size_;
|
|
for ( uint32_t i = size - len ; i < size ; i ++ ) {
|
|
const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
|
|
ptr += wr;
|
|
ecap += wr;
|
|
}
|
|
size_ += ecap;
|
|
length_ += len;
|
|
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::appendNumeric( uint64_t value , int base , bool useSep , T_Character sep ,
|
|
int sepEvery )
|
|
{
|
|
assert( base >= 2 && base <= 36 );
|
|
assert( sepEvery > 0 );
|
|
assert( sep.isValid( ) );
|
|
|
|
if ( value == 0 ) {
|
|
return append( '0' );
|
|
}
|
|
|
|
const uint32_t size = 64 + ( useSep ? ( 64 / sepEvery ) : 0 );
|
|
uint32_t output[ size ];
|
|
uint32_t len = 0 , ecap = 0;
|
|
int sepl = 0;
|
|
|
|
while ( value != 0 ) {
|
|
const uint32_t mod = value % base;
|
|
output[ size - ( len + 1 ) ] = mod
|
|
+ ( mod < 10 ? '0' : ( 'A' - 10 ) );
|
|
len ++;
|
|
sepl ++;
|
|
ecap ++;
|
|
assert( len < size );
|
|
|
|
value = ( value - mod ) / base;
|
|
if ( useSep && sepl == sepEvery && value != 0 ) {
|
|
output[ size - ( len + 1 ) ] = sep;
|
|
len ++;
|
|
sepl = 0;
|
|
ecap += 4;
|
|
assert( len < size );
|
|
}
|
|
}
|
|
|
|
ensureCapacity( size_ + ecap );
|
|
ecap = 0;
|
|
char* ptr = data_ + size_;
|
|
for ( uint32_t i = size - len ; i < size ; i ++ ) {
|
|
const uint32_t wr = UTF8PutCodepoint( ptr , 4 , output[ i ] );
|
|
ptr += wr;
|
|
ecap += wr;
|
|
}
|
|
size_ += ecap;
|
|
length_ += len;
|
|
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_StringBuilder& T_StringBuilder::appendDouble( double value , uint32_t precision , bool trailingZeros )
|
|
{
|
|
char const* const fmt = trailingZeros ? "%.*f" : "%.*g";
|
|
const int nchars = snprintf( nullptr , 0 , fmt , precision , value );
|
|
char buffer[ nchars + 1 ];
|
|
snprintf( buffer , nchars + 1 , fmt , precision , value );
|
|
return append( buffer , nchars );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
M_DEFINE_OBJECT_WRITER( T_StringBuilder )
|
|
{
|
|
writer.write( item.size( ) );
|
|
writer.stream( ).write( item.data( ) , item.size( ) );
|
|
}
|