/******************************************************************************/
/* STRINGS AND RELATED UTILITIES **********************************************/
/******************************************************************************/

#ifndef _H_EBCL_STRINGS
#define _H_EBCL_STRINGS
#include <ebcl/Externals.hh>
#include <ebcl/Pointers.hh>
#include <ebcl/Arrays.hh>
#include <ebcl/Buffers.hh>
namespace ebcl {


/*= UTF-8 UTILITY FUNCTIONS ==================================================*/

// Is the specified C string valid UTF-8 ?
bool UTF8IsValid( char const* string );

// Get the length (in characters) of the specified UTF-8 0-terminated string
uint32_t UTF8Length( char const* string );

// Get the size (in bytes) of the specified UTF-8 0-terminated string
uint32_t UTF8Size( char const* string );

// Combined function that does all the above
bool UTF8Info( char const* string , uint32_t& size , uint32_t& length );

// Check if the specified data is a valid UTF-8 string, and compute its length
// (in characters).
bool UTF8BufferInfo( char const* data , uint32_t size , uint32_t& length );

// Get the codepoint from a sequence of UTF-8 bytes. Sets "bytes" to the amount
// of bytes read from the input.
uint32_t UTF8GetCodepoint( char const* data , uint32_t& bytes );

// Similar to the above, without the amount of bytes output.
uint32_t UTF8GetCodepoint( char const* data );

// Write an UTF-8 encoded codepoint to a string, returns the amount of bytes
// that were written, or 0 if there wasn't enough space.
uint32_t UTF8PutCodepoint( char* output , uint32_t available , uint32_t codepoint );

// Get the memory offset of a codepoint in an UTF-8 sequence based on its index.
uint32_t UTF8GetMemoryOffset( char const* input , uint32_t index );

// Convert an UTF-8 sequence into an unsigned integer.
uint64_t UTF8ToUnsignedInteger( char const* input , uint32_t size ,
		bool * ok = nullptr , int base = 10 , bool useSep = false ,
		uint32_t separator = ' ' );

// Convert an UTF-8 sequence into a signed integer.
int64_t UTF8ToInteger( char const* input , uint32_t size ,
		bool * ok = nullptr , int base = 10 , bool useSep = false ,
		uint32_t separator = ' ' );

// Convert an UTF-8 sequence into a double precision floating point number. The
// sequence will be checked, converted into a C string and passed to strtod()
// for actual conversion.
double UTF8ToDouble( char const* input , uint32_t size ,
		bool * ok = nullptr , uint32_t decimalPoint = '.' ,
		bool useSep = false , uint32_t separator = ' ' );


/*= UNICODE CHARACTERS =======================================================*/

struct T_Character
{
	using F_Map = std::function< T_Character( T_Character ) >;

	const uint32_t codepoint;

	constexpr T_Character( ) noexcept;
	constexpr T_Character( T_Character const& other ) noexcept;
	M_WITH_INT( T ) constexpr T_Character( T codepoint ) noexcept;

	// ---------------------------------------------------------------------

	constexpr bool isValid( ) const;
	constexpr bool isAscii( ) const;
	constexpr bool isControl( ) const;
	constexpr bool isUppercase( ) const;
	constexpr bool isLowercase( ) const;
	constexpr bool isAlpha( ) const;
	constexpr bool isNumeric( ) const;
	constexpr bool isAlphanumeric( ) const;
	constexpr bool isWhitespace( ) const;

	// ---------------------------------------------------------------------

	constexpr bool operator== ( T_Character const& other ) const;
	constexpr bool operator!= ( T_Character const& other ) const;
	constexpr bool operator< ( T_Character const& other ) const;
	constexpr bool operator> ( T_Character const& other ) const;
	constexpr bool operator<= ( T_Character const& other ) const;
	constexpr bool operator>= ( T_Character const& other ) const;

	M_WITH_INT( T ) constexpr bool operator== ( T other ) const;
	M_WITH_INT( T ) constexpr bool operator!= ( T other ) const;
	M_WITH_INT( T ) constexpr bool operator< ( T other ) const;
	M_WITH_INT( T ) constexpr bool operator<= ( T other ) const;
	M_WITH_INT( T ) constexpr bool operator> ( T other ) const;
	M_WITH_INT( T ) constexpr bool operator>= ( T other ) const;

	constexpr operator uint32_t ( ) const;

	// ---------------------------------------------------------------------

	uint32_t writeTo( char* output , uint32_t avail ) const;

	// ---------------------------------------------------------------------

	constexpr T_Character toUpper( ) const;
	constexpr T_Character toLower( ) const;
};


M_CLASS_POINTERS( Character );


/*= IMMUTABLE UTF8 STRINGS ===================================================*/

/*
 * NOTE: the *objects* are NOT immutable. The strings they contain, however,
 * are.
 */

class T_StringBuilder;
class T_StringIterator;

// T_StringData - Abstract base for the various types of string storage.
class A_StringData
{
   protected:
	char* data_;
	uint32_t size_;
	bool valid_;
	uint32_t length_;

	A_StringData( ) = default;
	A_StringData( A_StringData const& ) = delete;
	A_StringData( A_StringData&& ) = delete;

   public:
	virtual ~A_StringData( ) = 0;

	// Is valid UTF-8?
	bool valid( ) const;
	// Get a pointer to the data
	char const * data( ) const;
	// Length in characters
	uint32_t length( ) const;
	// Size in bytes
	uint32_t size( ) const;
};
M_ABSTRACT_POINTERS( StringData );

/*----------------------------------------------------------------------------*/

// T_String - Main UTF-8 string class
class T_String
{
   private:
	RP_StringData data_;

   public:
	// Construct an empty string. No overhead whatsoever, just an assignment
	T_String( ) noexcept;

	// This constructor will try to:
	//	- use the empty string if initial is null or ""
	//	- use a pooled string if one matches initial
	//	- create a dynamic string otherwise.
	// Because of this, it is relatively slow and should be avoided in
	// general.
	T_String( char const* initial );

	T_String( T_StringBuilder&& sb );
	T_String( T_StringBuilder const& sb );

	// Construct a dynamic string, either using the provided memory
	// or duplicating it. Doesn't use the pools at all.
	T_String( char const* data , uint32_t size , bool nodup = false );

	T_String( T_String const& source );
	T_String( T_String&& source ) noexcept;

	~T_String( );

	// ---------------------------------------------------------------------

	// Get a pooled string. Faster than constructing then calling
	// addToPool( ) if the string is already pooled. If it isn't, it will
	// be added as a dynamic string.
	static T_String Pooled( char const* string );
	static T_String Pooled( char const* data , uint32_t size );

	// ---------------------------------------------------------------------

	T_String& operator= ( T_String&& string ) noexcept;
	T_String& operator= ( T_String const& string );

	T_String& operator= ( T_StringBuilder&& sb );
	T_String& operator= ( T_StringBuilder const& sb );

	friend void swap( T_String& lhs , T_String& rhs ) noexcept;

	// ---------------------------------------------------------------------

	// Adds the string to the pool if it isn't pooled already. If the pool
	// already contains a pooled version of this string, use it instead.
	T_String& addToPool( );

	// Attempts to use the pooled version of a string if it exists. If it
	// doesn't, keep using the current version.
	T_String& usePool( );

	// ---------------------------------------------------------------------

	bool valid( ) const;
	uint32_t size( ) const;
	uint32_t length( ) const;
	char const * data( ) const;
	operator bool ( ) const;
	bool operator! ( ) const;

	// ---------------------------------------------------------------------

	// Return the character at the specified index
	T_Character operator[] ( uint32_t index ) const;

	// Return a substring from the left side of the string
	T_String left( uint32_t count ) const;
	// Return a substring from the right side of the string
	T_String right( uint32_t count ) const;
	// Return a substring from the specified offset
	T_String substr( uint32_t offset , uint32_t count = UINT32_MAX ) const;
	// Return the substring between the two specified offsets
	T_String range( uint32_t start , uint32_t end ) const;

	// Remove whitespace from the start and end of the string
	T_String trim( ) const noexcept;

	// ---------------------------------------------------------------------

	// Generate a string using a function that transforms characters
	T_String mapped( T_Character::F_Map f ) const noexcept;

	// Convert the string to uppercase
	T_String toUpper( ) const noexcept;
	// Convert the string to lowercase
	T_String toLower( ) const noexcept;

	// ---------------------------------------------------------------------

	bool equals( T_String const& other ) const;
	bool equals( char const* string ) const;

	int32_t compare( T_String const& other ) const;
	int32_t compareIgnoreCase( T_String const& other ) const;

	bool startsWith( T_String const& other ) const;
	bool endsWith( T_String const& other ) const;

	// Finds a sub-string. Returns -1 if it isn't found.
	int32_t find( T_String const& other , uint32_t from = 0 ) const;
	// Finds a character. Returns -1 if it isn't found.
	int32_t find( T_Character character , uint32_t from = 0 ) const;

	bool operator== ( T_String const& other ) const;
	bool operator!= ( T_String const& other ) const;
	bool operator< ( T_String const& other ) const;
	bool operator> ( T_String const& other ) const;
	bool operator>= ( T_String const& other ) const;
	bool operator<= ( T_String const& other ) const;

	bool operator== ( char const* string ) const;
	bool operator!= ( char const* string ) const;

	// ---------------------------------------------------------------------

	T_String replace( T_Character initial , T_Character replacement ) const;
	T_String replace( T_String const& initial , T_String const& replacement ) const;

	// ---------------------------------------------------------------------

	uint64_t toUnsignedInteger( bool * ok = nullptr , int base = 10 ,
			bool useSep = false , T_Character separator = ' ' ) const;
	int64_t toInteger( bool * ok = nullptr , int base = 10 , bool useSep = false ,
			T_Character separator = ' ' ) const;
	double toDouble( bool * ok = nullptr , T_Character decimalPoint = '.' ,
			bool useSep = false , T_Character separator = ' ' ) const;

	// ---------------------------------------------------------------------

	T_StringIterator getIterator( uint32_t offset ) const;
	operator T_StringIterator( ) const;

	// Converts the string to an array of bytes suitable for use with the
	// operating system's functions (e.g. UTF-8 C string on Linux, or
	// UTF-16 strings on Windows)
	T_Buffer< char > toOSString( ) const;
};
M_CLASS_POINTERS( String );
M_DECLARE_HASH( T_String );
M_DECLARE_COMPARATOR( T_String );

void swap( T_String& lhs , T_String& rhs ) noexcept;

extern template class T_Array< T_String >;


/*= STRING ITERATORS =========================================================*/

class T_StringIterator final
{
	friend class T_String;

   private:
	RP_StringData data_;
	uint32_t pos_;
	uint32_t index_;
	uint32_t codepoint_;
	uint32_t bytes_;

	T_StringIterator( RP_StringData data , uint32_t index );

   public:
	T_StringIterator( ) = delete;
	T_StringIterator( T_StringIterator const& other );
	T_StringIterator( T_StringIterator&& other ) noexcept;

	~T_StringIterator( );

	T_StringIterator& operator= ( T_StringIterator const& other );
	T_StringIterator& operator= ( T_StringIterator&& other ) noexcept;

	friend void swap( T_StringIterator& lhs , T_StringIterator& rhs ) noexcept;

	// ---------------------------------------------------------------------

	bool next( );

	uint32_t index( ) const;
	bool atEnd( ) const;

	T_Character character( ) const;
	operator T_Character ( ) const;
};


M_CLASS_POINTERS( StringIterator );
void swap( T_StringIterator& lhs , T_StringIterator& rhs ) noexcept;


/*= STRING BUILDERS ==========================================================*/

class T_StringBuilder
{
   public:
	enum : uint32_t { C_GROWTH = 32 };

   private:
	char* data_;
	uint32_t capacity_;
	uint32_t size_;
	uint32_t length_;

	friend class T_String;

   public:
	T_StringBuilder( ) noexcept;

	T_StringBuilder( T_StringBuilder const& other );
	T_StringBuilder( T_StringBuilder&& other ) noexcept;

	T_StringBuilder( char const* data , uint32_t size );
	explicit T_StringBuilder( T_String const& string );
	explicit T_StringBuilder( char const* string );

	~T_StringBuilder( );

	T_StringBuilder& operator =( T_StringBuilder const& other );
	T_StringBuilder& operator =( T_StringBuilder&& other ) noexcept;

	friend void swap( T_StringBuilder& lhs , T_StringBuilder& rhs );

	// ---------------------------------------------------------------------

	char const * data( ) const;
	uint32_t capacity( ) const;
	uint32_t size( ) const;
	uint32_t length( ) const;
	operator bool ( ) const;
	bool operator! ( ) const;

	// ---------------------------------------------------------------------

	T_StringBuilder& ensureCapacity( uint32_t minCap );
	T_StringBuilder& clear( );
	T_StringBuilder& free( );
	T_StringBuilder& truncate( uint32_t maxLength ) noexcept;

	// ---------------------------------------------------------------------

	T_StringBuilder& append( T_StringBuilder const& other );
	T_StringBuilder& append( T_StringBuilder&& other );
	T_StringBuilder& append( T_String const& string );
	T_StringBuilder& append( char const* string , uint32_t size );
	T_StringBuilder& append( char character );
	T_StringBuilder& append( T_Character character );

	T_StringBuilder& appendNumeric( int64_t value , int base = 10 , bool useSep = false ,
				       T_Character sep = ' ' , int sepEvery = 3 );

	T_StringBuilder& appendNumeric( uint64_t value , int base = 10 , bool useSep = false ,
				       T_Character sep = ' ' , int sepEvery = 3 );

	T_StringBuilder& appendDouble( double value , uint32_t precision = 6 , bool trailingZeros = false );

	// ---------------------------------------------------------------------

	bool operator== ( T_StringBuilder const& other ) const;
	bool operator!= ( T_StringBuilder const& other ) const;

	bool operator== ( T_String const& string ) const;
	bool operator!= ( T_String const& string ) const;

	bool operator== ( char const* string ) const;
	bool operator!= ( char const* string ) const;

	// ---------------------------------------------------------------------

	uint64_t toUnsignedInteger( bool * ok = nullptr , int base = 10 ,
			bool useSep = false , T_Character separator = ' ' ) const;
	int64_t toInteger( bool * ok = nullptr , int base = 10 , bool useSep = false ,
			T_Character separator = ' ' ) const;
	double toDouble( bool * ok = nullptr , T_Character decimalPoint = '.' ,
			bool useSep = false , T_Character separator = ' ' ) const;
};


M_CLASS_POINTERS( StringBuilder );
void swap( T_StringBuilder& lhs , T_StringBuilder& rhs );

// Operator <<
M_LSHIFT_OP( T_StringBuilder , T_StringBuilder const& );
M_LSHIFT_OP( T_StringBuilder , T_StringBuilder && );
M_LSHIFT_OP( T_StringBuilder , T_String const& );
M_LSHIFT_OP( T_StringBuilder , char const* );
M_LSHIFT_OP( T_StringBuilder , char );
M_LSHIFT_OP( T_StringBuilder , T_Character );
M_LSHIFT_OP( T_StringBuilder , int16_t );
M_LSHIFT_OP( T_StringBuilder , int32_t );
M_LSHIFT_OP( T_StringBuilder , int64_t );
M_LSHIFT_OP( T_StringBuilder , uint16_t );
M_LSHIFT_OP( T_StringBuilder , uint32_t );
M_LSHIFT_OP( T_StringBuilder , uint64_t );
M_LSHIFT_OP( T_StringBuilder , float );
M_LSHIFT_OP( T_StringBuilder , double );


} // namespace
#endif // _H_EBCL_STRINGS
#include <ebcl/inline/Strings.hh>