1330 lines
28 KiB
C++
1330 lines
28 KiB
C++
/******************************************************************************/
|
|
/* SRD - TEXT STORAGE *********************************************************/
|
|
/******************************************************************************/
|
|
|
|
|
|
#include <ebcl/SRDText.hh>
|
|
#include <ebcl/BinaryStreams.hh>
|
|
using namespace ebcl;
|
|
|
|
|
|
/*= T_SRDLexerPrivate_ =======================================================*/
|
|
|
|
namespace {
|
|
struct T_SRDLexerPrivate_
|
|
{
|
|
// E_State_ - Lexer states
|
|
enum class E_State_ {
|
|
BASE , // Base state
|
|
COMMENT_SL_START , // Start of single-line comment
|
|
COMMENT_SL , // In single-line comment
|
|
COMMENT_ML_START , // Start of multi-line comment
|
|
COMMENT_ML , // In multi-line comment
|
|
STRING , // In a quoted string
|
|
STRING_ESCAPE , // In string, after backslash
|
|
STRING_UTF8 , // In string, after \u/\U
|
|
INITIAL_DASH , // After an initial '-'
|
|
NB_INT_PART , // Number, integral part
|
|
NB_FRAC_PART_M , // Number, fractional part (mandatory)
|
|
NB_FRAC_PART , // Number, fractional part
|
|
NB_EXP_START , // Number, exponent start
|
|
NB_EXP_DASH , // Number, exponent, after '-'
|
|
NB_EXP , // Number, exponent
|
|
WORD , // Inside a word
|
|
WORD_AFTER_DASH , // Word, right after a '-'
|
|
VAR , // Inside a variable name
|
|
VAR_AFTER_DASH , // Variable name, right after a '-'
|
|
ER_WORD , // Word/variable error recovery
|
|
BINARY , // In a binary data array
|
|
BINARY_DIGIT , // In a binary data array, after a digit
|
|
BINARY_SEPARATOR , // In a binary data array, after a second digit
|
|
ER_BINARY , // Binary data error recovery
|
|
};
|
|
|
|
const T_String name_;
|
|
T_SRDErrors& errors_;
|
|
A_SRDReaderTarget& target_;
|
|
|
|
E_State_ state_;
|
|
T_StringBuilder buffer_;
|
|
T_StringBuilder stringBuffer_;
|
|
T_Array< uint8_t > binBuffer_;
|
|
uint32_t depth_;
|
|
uint32_t line_;
|
|
size_t char_;
|
|
uint32_t tokLine_;
|
|
size_t tokChar_;
|
|
|
|
uint32_t utf8Left_;
|
|
uint32_t utf8Char_;
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
T_SRDLexerPrivate_( T_String const& name ,
|
|
T_SRDErrors& errors ,
|
|
A_SRDReaderTarget& target );
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Various helpers
|
|
void tokStart( T_Character c );
|
|
void push( T_SRDToken&& token );
|
|
void error( char const* string );
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Output
|
|
void pushString( );
|
|
void pushComment( );
|
|
void pushInteger( );
|
|
void pushFloat( );
|
|
void pushWord( );
|
|
void pushVar( );
|
|
void pushBinary( );
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// State implementations
|
|
void pcBase( T_Character c );
|
|
void pcCommentSLStart( T_Character c );
|
|
void pcCommentSL( T_Character c );
|
|
void pcCommentMLStart( T_Character c );
|
|
void pcCommentML( T_Character c );
|
|
void pcString( T_Character c );
|
|
void pcStringEscape( T_Character c );
|
|
void pcStringUTF8( T_Character c );
|
|
void pcInitialDash( T_Character c );
|
|
void pcNbIntPart( T_Character c );
|
|
void pcNbFracPart( T_Character c , bool mandatory );
|
|
void pcNbExp( T_Character c );
|
|
void pcWord( T_Character c , bool afterDash , bool isVar );
|
|
void pcBinary( T_Character c );
|
|
void pcBinaryDigit( T_Character c );
|
|
void pcBinarySeparator( T_Character c );
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
// Main processing
|
|
void processCharacter( T_Character c );
|
|
void processEnd( );
|
|
};
|
|
} // namespace
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline T_SRDLexerPrivate_::T_SRDLexerPrivate_(
|
|
T_String const& name ,
|
|
T_SRDErrors& errors ,
|
|
A_SRDReaderTarget& target )
|
|
: name_( name ) , errors_( errors ) , target_( target ) ,
|
|
state_( E_State_::BASE ) , line_( 1 ) , char_( 1 )
|
|
{ }
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void T_SRDLexerPrivate_::tokStart( T_Character c )
|
|
{
|
|
tokLine_ = line_;
|
|
tokChar_ = char_;
|
|
buffer_.clear( );
|
|
stringBuffer_.clear( );
|
|
depth_ = 0;
|
|
buffer_ << c;
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::push( T_SRDToken&& token )
|
|
{
|
|
token.location( name_ , tokLine_ , tokChar_ );
|
|
if ( !token.hasFullText( ) && buffer_.size( ) != 0 ) {
|
|
token.setFullText( std::move( buffer_ ) );
|
|
}
|
|
target_.push( errors_ , std::move( token ) );
|
|
}
|
|
|
|
inline void T_SRDLexerPrivate_::error( char const* string )
|
|
{
|
|
errors_.add( string , name_ , line_ , char_ );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDLexerPrivate_::pushString( )
|
|
{
|
|
push( T_SRDToken::String( std::move( stringBuffer_ ) ) );
|
|
}
|
|
|
|
inline void T_SRDLexerPrivate_::pushComment( )
|
|
{
|
|
push( T_SRDToken::Comment( std::move( stringBuffer_ ) ) );
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pushInteger( )
|
|
{
|
|
bool ok;
|
|
auto v( buffer_.toInteger( &ok ) );
|
|
if ( !ok ) {
|
|
error( "invalid integer value" );
|
|
push( T_SRDToken::Integer( 0 ) );
|
|
} else {
|
|
push( T_SRDToken::AutoInteger( v ) );
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pushFloat( )
|
|
{
|
|
stringBuffer_ << buffer_ << '\0';
|
|
|
|
errno = 0;
|
|
auto v = strtod( stringBuffer_.data( ) , nullptr );
|
|
if ( errno != 0 ) {
|
|
error( "invalid floating point value" );
|
|
push( T_SRDToken::Float( 0 ) );
|
|
} else {
|
|
push( T_SRDToken::Float( v ) );
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pushWord( )
|
|
{
|
|
T_String w( std::move( stringBuffer_ ) );
|
|
w.usePool( );
|
|
push( T_SRDToken::Word( std::move( w ) ) );
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pushVar( )
|
|
{
|
|
T_String w( std::move( stringBuffer_ ) );
|
|
w.usePool( );
|
|
push( T_SRDToken::Variable( std::move( w ) ) );
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pushBinary( )
|
|
{
|
|
if ( binBuffer_.size( ) ) {
|
|
push( T_SRDToken::Binary( &binBuffer_[ 0 ] , binBuffer_.size( ) ) );
|
|
} else {
|
|
push( T_SRDToken::Binary( (uint8_t*) nullptr , 0 ) );
|
|
}
|
|
binBuffer_.clear( );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void T_SRDLexerPrivate_::pcBase( T_Character c )
|
|
{
|
|
switch ( c.codepoint ) {
|
|
|
|
case '(':
|
|
tokStart( c );
|
|
push( T_SRDToken::ListStart( ) );
|
|
break;
|
|
|
|
case ')':
|
|
tokStart( c );
|
|
push( T_SRDToken::ListEnd( ) );
|
|
break;
|
|
|
|
case '"': state_ = E_State_::STRING;
|
|
break;
|
|
|
|
case '$': state_ = E_State_::VAR_AFTER_DASH;
|
|
break;
|
|
|
|
case '-': state_ = E_State_::INITIAL_DASH;
|
|
break;
|
|
|
|
case '.': state_ = E_State_::NB_FRAC_PART_M;
|
|
break;
|
|
|
|
case '#': state_ = E_State_::COMMENT_SL_START;
|
|
break;
|
|
|
|
case '{':
|
|
state_ = E_State_::COMMENT_ML_START;
|
|
break;
|
|
|
|
case '[':
|
|
state_ = E_State_::BINARY;
|
|
break;
|
|
|
|
default:
|
|
if ( c.isAlpha( ) ) {
|
|
state_ = E_State_::WORD;
|
|
|
|
} else if ( c.isNumeric( ) ) {
|
|
state_ = E_State_::NB_INT_PART;
|
|
|
|
} else if ( !c.isWhitespace( ) ) {
|
|
error( "unexpected character" );
|
|
}
|
|
break;
|
|
}
|
|
|
|
if ( state_ != E_State_::BASE ) {
|
|
tokStart( c );
|
|
if ( state_ == E_State_::WORD ) {
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void T_SRDLexerPrivate_::pcCommentSLStart( T_Character c )
|
|
{
|
|
if ( c == '\n' ) {
|
|
buffer_.clear( );
|
|
state_ = E_State_::BASE;
|
|
return;
|
|
}
|
|
|
|
buffer_ << c;
|
|
if ( !c.isWhitespace( ) ) {
|
|
state_ = E_State_::COMMENT_SL;
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcCommentSL( T_Character c )
|
|
{
|
|
if ( c == '\n' ) {
|
|
pushComment( );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
buffer_ << c;
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcCommentMLStart( T_Character c )
|
|
{
|
|
if ( c.isWhitespace( ) ) {
|
|
buffer_ << c;
|
|
} else if ( c == '}' ) {
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
state_ = E_State_::COMMENT_ML;
|
|
pcCommentML( c );
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcCommentML( T_Character c )
|
|
{
|
|
buffer_ << c;
|
|
if ( c == '}' && depth_ == 0 ) {
|
|
pushComment( );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
if ( c == '{' ) {
|
|
depth_ ++;
|
|
} else if ( c == '}' ) {
|
|
depth_ --;
|
|
}
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcString( T_Character c )
|
|
{
|
|
if ( c == '\n' ) {
|
|
pushString( );
|
|
error( "unterminated string" );
|
|
state_ = E_State_::BASE;
|
|
} else if ( c.isControl( ) && c != '\t' ) {
|
|
error( "control character in string" );
|
|
} else {
|
|
buffer_ << c;
|
|
if ( c == '"' ) {
|
|
pushString( );
|
|
state_ = E_State_::BASE;
|
|
} else if ( c == '\\' ) {
|
|
state_ = E_State_::STRING_ESCAPE;
|
|
} else {
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcStringEscape( T_Character c )
|
|
{
|
|
if ( c == '\n' ) {
|
|
pushString( );
|
|
error( "unterminated string" );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
if ( c.isControl( ) && c != '\t' ) {
|
|
error( "control character in string" );
|
|
} else {
|
|
buffer_ << c;
|
|
if ( c == 'n' ) {
|
|
stringBuffer_ << '\n';
|
|
} else if ( c == 'b' ) {
|
|
stringBuffer_ << char( 8 );
|
|
} else if ( c == 'f' ) {
|
|
stringBuffer_ << char( 12 );
|
|
} else if ( c == 'r' ) {
|
|
stringBuffer_ << '\r';
|
|
} else if ( c == 't' ) {
|
|
stringBuffer_ << '\t';
|
|
} else if ( c.toLower( ) == 'u' ) {
|
|
state_ = E_State_::STRING_UTF8;
|
|
utf8Char_ = 0;
|
|
utf8Left_ = ( c == 'u' ) ? 4 : 8;
|
|
return;
|
|
} else {
|
|
stringBuffer_ << c;
|
|
}
|
|
}
|
|
state_ = E_State_::STRING;
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcStringUTF8( T_Character c )
|
|
{
|
|
if ( c == '\n' ) {
|
|
pushString( );
|
|
error( "unterminated string" );
|
|
state_ = E_State_::BASE;
|
|
} else if ( c.isControl( ) ) {
|
|
error( "control character in UTF-8 sequence" );
|
|
state_ = E_State_::STRING;
|
|
} else if ( c == '"' ) {
|
|
pushString( );
|
|
error( "incomplete UTF-8 sequence" );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
buffer_ << c;
|
|
uint32_t value;
|
|
if ( c.isNumeric( ) ) {
|
|
value = c - '0';
|
|
} else if ( c.isAlpha( ) ) {
|
|
value = c.toUpper( ) - 55;
|
|
if ( value > 15 ) {
|
|
error( "invalid UTF-8 sequence" );
|
|
state_ = E_State_::STRING;
|
|
return;
|
|
}
|
|
} else {
|
|
error( "invalid UTF-8 sequence" );
|
|
state_ = E_State_::STRING;
|
|
return;
|
|
}
|
|
utf8Char_ = ( utf8Char_ << 4 ) | value;
|
|
utf8Left_ --;
|
|
if ( utf8Left_ == 0 ) {
|
|
T_Character c( utf8Char_ );
|
|
if ( c.isValid( ) ) {
|
|
stringBuffer_ << c;
|
|
} else {
|
|
error( "invalid UTF-8 codepoint" );
|
|
}
|
|
state_ = E_State_::STRING;
|
|
}
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcInitialDash( T_Character c )
|
|
{
|
|
if ( c.isNumeric( ) ) {
|
|
state_ = E_State_::NB_INT_PART;
|
|
pcNbIntPart( c );
|
|
} else if ( c.isAlpha( ) ) {
|
|
state_ = E_State_::WORD_AFTER_DASH;
|
|
stringBuffer_ << '-';
|
|
pcWord( c , true , false );
|
|
} else if ( c == '.' ) {
|
|
state_ = E_State_::NB_FRAC_PART_M;
|
|
buffer_ << c;
|
|
} else {
|
|
error( "unexpected character" );
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcNbIntPart( T_Character c )
|
|
{
|
|
if ( c.isNumeric( ) ) {
|
|
buffer_ << c;
|
|
} else if ( c == '.' ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_FRAC_PART;
|
|
} else if ( c == 'e' || c == 'E' ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_EXP_START;
|
|
} else {
|
|
const bool ok = c.isWhitespace( ) || c == '(' || c == ')'
|
|
|| c == '{' || c == '#';
|
|
if ( !ok ) {
|
|
error( "unexpected character" );
|
|
}
|
|
|
|
pushInteger( );
|
|
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcNbFracPart( T_Character c , bool mandatory )
|
|
{
|
|
if ( c.isNumeric( ) ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_FRAC_PART;
|
|
} else {
|
|
if ( mandatory ) {
|
|
error( "fractional part expected" );
|
|
}
|
|
|
|
if ( c == 'e' || c == 'E' ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_EXP_START;
|
|
} else {
|
|
const bool ok = c.isWhitespace( ) || c == '('
|
|
|| c == ')' || c == '{' || c == '#';
|
|
if ( !ok ) {
|
|
error( "unexpected character" );
|
|
}
|
|
|
|
pushFloat( );
|
|
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcNbExp( T_Character c )
|
|
{
|
|
if ( state_ == E_State_::NB_EXP_START && ( c == '-' || c == '+' ) ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_EXP_DASH;
|
|
return;
|
|
}
|
|
|
|
if ( c.isNumeric( ) ) {
|
|
buffer_ << c;
|
|
state_ = E_State_::NB_EXP;
|
|
return;
|
|
}
|
|
|
|
if ( state_ != E_State_::NB_EXP ) {
|
|
error( "exponent expected" );
|
|
}
|
|
|
|
const bool ok = c.isWhitespace( ) || c == '('
|
|
|| c == ')' || c == '{' || c == '#';
|
|
if ( !ok ) {
|
|
error( "unexpected character" );
|
|
}
|
|
|
|
stringBuffer_ << buffer_ << '\0';
|
|
auto v = strtod( stringBuffer_.data( ) , nullptr );
|
|
if ( errno == ERANGE ) {
|
|
error( "invalid floating point value" );
|
|
push( T_SRDToken::Float( 0 ) );
|
|
} else {
|
|
push( T_SRDToken::Float( v ) );
|
|
}
|
|
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcWord( T_Character c , bool afterDash , bool isVar )
|
|
{
|
|
if ( c.isAlpha( ) || c.isNumeric( ) || c == '-' ) {
|
|
if ( afterDash && !c.isAlpha( ) ) {
|
|
error( "letter expected" );
|
|
state_ = E_State_::ER_WORD;
|
|
stringBuffer_.clear( );
|
|
stringBuffer_ << "invalid";
|
|
if ( isVar ) {
|
|
pushVar( );
|
|
} else {
|
|
pushWord( );
|
|
}
|
|
return;
|
|
}
|
|
buffer_ << c;
|
|
stringBuffer_ << c;
|
|
state_ = isVar
|
|
? ( c == '-'
|
|
? E_State_::VAR_AFTER_DASH
|
|
: E_State_::VAR )
|
|
: ( c == '-'
|
|
? E_State_::WORD_AFTER_DASH
|
|
: E_State_::WORD );
|
|
return;
|
|
}
|
|
|
|
const bool ok = c.isWhitespace( ) || c == '(' || c == ')' || c == '{'
|
|
|| c == '#';
|
|
if ( !ok ) {
|
|
error( "unexpected character" );
|
|
} else if ( afterDash ) {
|
|
error( isVar ? "incomplete variable name" : "incomplete word" );
|
|
}
|
|
|
|
if ( !ok || afterDash ) {
|
|
stringBuffer_.clear( );
|
|
stringBuffer_ << "invalid";
|
|
}
|
|
|
|
if ( isVar ) {
|
|
pushVar( );
|
|
} else {
|
|
pushWord( );
|
|
}
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcBinary(
|
|
T_Character c )
|
|
{
|
|
buffer_ << c;
|
|
if ( c == ']' ) {
|
|
pushBinary( );
|
|
state_ = E_State_::BASE;
|
|
} else if ( c.isAlpha( ) ) {
|
|
T_Character l( c.toUpper( ) );
|
|
if ( l > 'F' ) {
|
|
error( "invalid binary digit" );
|
|
state_ = E_State_::ER_BINARY;
|
|
} else {
|
|
binBuffer_.add( ( uint32_t( l ) - 55 ) * 16 );
|
|
state_ = E_State_::BINARY_DIGIT;
|
|
}
|
|
} else if ( c.isNumeric( ) ) {
|
|
binBuffer_.add( ( uint32_t( c ) - 48 ) * 16 );
|
|
state_ = E_State_::BINARY_DIGIT;
|
|
} else if ( !c.isWhitespace( ) ) {
|
|
error( "invalid binary digit" );
|
|
state_ = E_State_::ER_BINARY;
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcBinaryDigit(
|
|
T_Character c )
|
|
{
|
|
buffer_ << c;
|
|
if ( c.isAlpha( ) ) {
|
|
T_Character l( c.toUpper( ) );
|
|
if ( l > 'F' ) {
|
|
error( "invalid binary digit" );
|
|
state_ = E_State_::ER_BINARY;
|
|
} else {
|
|
const auto i( binBuffer_.size( ) - 1 );
|
|
binBuffer_[ i ] += uint32_t( l ) - 55;
|
|
state_ = E_State_::BINARY;
|
|
}
|
|
} else if ( c.isNumeric( ) ) {
|
|
const auto i( binBuffer_.size( ) - 1 );
|
|
binBuffer_[ i ] += uint32_t( c ) - 48;
|
|
state_ = E_State_::BINARY;
|
|
} else {
|
|
error( "invalid binary digit" );
|
|
if ( c.isWhitespace( ) ) {
|
|
state_ = E_State_::BINARY;
|
|
} else if ( c == ']' ) {
|
|
pushBinary( );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
state_ = E_State_::ER_BINARY;
|
|
}
|
|
}
|
|
}
|
|
|
|
void T_SRDLexerPrivate_::pcBinarySeparator(
|
|
T_Character c )
|
|
{
|
|
buffer_ << c;
|
|
if ( c.isWhitespace( ) ) {
|
|
state_ = E_State_::BINARY;
|
|
} else if ( c == ']' ) {
|
|
pushBinary( );
|
|
state_ = E_State_::BASE;
|
|
} else {
|
|
error( "invalid binary digit" );
|
|
state_ = E_State_::ER_BINARY;
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDLexerPrivate_::processCharacter( T_Character c )
|
|
{
|
|
switch ( state_ ) {
|
|
case E_State_::BASE:
|
|
pcBase( c );
|
|
break;
|
|
|
|
case E_State_::COMMENT_SL_START:
|
|
pcCommentSLStart( c );
|
|
break;
|
|
|
|
case E_State_::COMMENT_SL:
|
|
pcCommentSL( c );
|
|
break;
|
|
|
|
case E_State_::COMMENT_ML_START:
|
|
pcCommentMLStart( c );
|
|
break;
|
|
|
|
case E_State_::COMMENT_ML:
|
|
pcCommentML( c );
|
|
break;
|
|
|
|
case E_State_::STRING:
|
|
pcString( c );
|
|
break;
|
|
|
|
case E_State_::STRING_ESCAPE:
|
|
pcStringEscape( c );
|
|
break;
|
|
|
|
case E_State_::STRING_UTF8:
|
|
pcStringUTF8( c );
|
|
break;
|
|
|
|
case E_State_::INITIAL_DASH:
|
|
pcInitialDash( c );
|
|
break;
|
|
|
|
case E_State_::NB_INT_PART:
|
|
pcNbIntPart( c );
|
|
break;
|
|
|
|
case E_State_::NB_FRAC_PART_M:
|
|
pcNbFracPart( c , true );
|
|
break;
|
|
|
|
case E_State_::NB_FRAC_PART:
|
|
pcNbFracPart( c , false );
|
|
break;
|
|
|
|
case E_State_::NB_EXP_START:
|
|
case E_State_::NB_EXP_DASH:
|
|
case E_State_::NB_EXP:
|
|
pcNbExp( c );
|
|
break;
|
|
|
|
case E_State_::WORD:
|
|
pcWord( c , false , false );
|
|
break;
|
|
|
|
case E_State_::WORD_AFTER_DASH:
|
|
pcWord( c , true , false );
|
|
break;
|
|
|
|
case E_State_::VAR:
|
|
pcWord( c , false , true );
|
|
break;
|
|
|
|
case E_State_::VAR_AFTER_DASH:
|
|
pcWord( c , true , true );
|
|
break;
|
|
|
|
case E_State_::ER_WORD:
|
|
if ( !( c.isAlphanumeric( ) || c == '-' ) ) {
|
|
state_ = E_State_::BASE;
|
|
pcBase( c );
|
|
}
|
|
break;
|
|
|
|
case E_State_::BINARY:
|
|
pcBinary( c );
|
|
break;
|
|
|
|
case E_State_::BINARY_DIGIT:
|
|
pcBinaryDigit( c );
|
|
break;
|
|
|
|
case E_State_::BINARY_SEPARATOR:
|
|
pcBinarySeparator( c );
|
|
break;
|
|
|
|
case E_State_::ER_BINARY:
|
|
buffer_ << c;
|
|
if ( c == ']' ) {
|
|
pushBinary( );
|
|
state_ = E_State_::BASE;
|
|
} else if ( c.isWhitespace( ) ) {
|
|
state_ = E_State_::BINARY;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if ( c == '\n' ) {
|
|
line_ ++;
|
|
char_ = 1;
|
|
} else {
|
|
char_ ++;
|
|
}
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDLexerPrivate_::processEnd( )
|
|
{
|
|
switch ( state_ ) {
|
|
|
|
case E_State_::BASE:
|
|
case E_State_::ER_WORD:
|
|
case E_State_::COMMENT_SL_START:
|
|
break;
|
|
|
|
case E_State_::STRING:
|
|
case E_State_::STRING_ESCAPE:
|
|
case E_State_::STRING_UTF8:
|
|
error( "unterminated string" );
|
|
pushString( );
|
|
break;
|
|
|
|
case E_State_::COMMENT_SL:
|
|
pushComment( );
|
|
break;
|
|
|
|
case E_State_::COMMENT_ML:
|
|
pushComment( );
|
|
// fall through
|
|
case E_State_::COMMENT_ML_START:
|
|
error( "unterminated multi-line comment" );
|
|
break;
|
|
|
|
case E_State_::NB_INT_PART:
|
|
pushInteger( );
|
|
break;
|
|
|
|
case E_State_::NB_FRAC_PART_M:
|
|
error( "fractional part expected" );
|
|
// fall through
|
|
case E_State_::NB_FRAC_PART:
|
|
case E_State_::NB_EXP:
|
|
pushFloat( );
|
|
break;
|
|
|
|
case E_State_::NB_EXP_START:
|
|
case E_State_::NB_EXP_DASH:
|
|
error( "exponent expected" );
|
|
pushFloat( );
|
|
break;
|
|
|
|
case E_State_::WORD_AFTER_DASH:
|
|
error( "incomplete word" );
|
|
stringBuffer_.clear( );
|
|
stringBuffer_ << "invalid";
|
|
// fall through
|
|
case E_State_::WORD:
|
|
pushWord( );
|
|
break;
|
|
|
|
case E_State_::VAR_AFTER_DASH:
|
|
error( "incomplete variable name" );
|
|
stringBuffer_.clear( );
|
|
stringBuffer_ << "invalid";
|
|
pushVar( );
|
|
break;
|
|
|
|
case E_State_::VAR:
|
|
if ( stringBuffer_.size( ) == 0 ) {
|
|
error( "incomplete variable name" );
|
|
stringBuffer_ << "invalid";
|
|
}
|
|
pushVar( );
|
|
break;
|
|
|
|
case E_State_::INITIAL_DASH:
|
|
error( "unexpected end of input" );
|
|
break;
|
|
|
|
case E_State_::BINARY:
|
|
case E_State_::BINARY_DIGIT:
|
|
case E_State_::BINARY_SEPARATOR:
|
|
case E_State_::ER_BINARY:
|
|
error( "incomplete binary data" );
|
|
pushBinary( );
|
|
break;
|
|
}
|
|
target_.end( errors_ );
|
|
binBuffer_.free( );
|
|
}
|
|
|
|
|
|
/*= T_SRDLexer ===============================================================*/
|
|
|
|
T_SRDLexer::T_SRDLexer(
|
|
T_String const& name ,
|
|
T_SRDErrors& errors ,
|
|
A_SRDReaderTarget& target )
|
|
: A_PrivateImplementation( new T_SRDLexerPrivate_(
|
|
name , errors , target ) )
|
|
{ }
|
|
|
|
void T_SRDLexer::processCharacter( T_Character c )
|
|
{
|
|
p< T_SRDLexerPrivate_ >( ).processCharacter( c );
|
|
}
|
|
|
|
void T_SRDLexer::processEnd( )
|
|
{
|
|
p< T_SRDLexerPrivate_ >( ).processEnd( );
|
|
}
|
|
|
|
/*= T_SRDTextReader ==========================================================*/
|
|
|
|
void T_SRDTextReader::read( T_String const& name , A_InputStream& input )
|
|
{
|
|
T_SRDErrors errors;
|
|
|
|
T_SRDLexer lexer( name , errors , target_ );
|
|
T_BinaryReader reader( input );
|
|
target_.start( errors );
|
|
try {
|
|
do {
|
|
lexer.processCharacter( reader.read< T_Character >( ) );
|
|
} while ( 1 );
|
|
} catch ( X_StreamError const& e ) {
|
|
if ( e.code( ) != E_StreamError::END ) {
|
|
throw;
|
|
}
|
|
}
|
|
lexer.processEnd( );
|
|
|
|
if ( errors.size( ) != 0 ) {
|
|
throw X_SRDErrors( errors );
|
|
}
|
|
}
|
|
|
|
|
|
/*= TEXT LEXING HELPERS ======================================================*/
|
|
|
|
T_SRDList ebcl::SRDFromText( T_String const& name , T_String const& string , bool structured )
|
|
{
|
|
T_SRDMemoryTarget mt( structured );
|
|
T_SRDErrors errors;
|
|
mt.start( errors );
|
|
|
|
T_SRDLexer lexer( name , errors , mt );
|
|
T_StringIterator it( string );
|
|
while ( !it.atEnd( ) ) {
|
|
lexer.processCharacter( T_Character( it ) );
|
|
it.next( );
|
|
}
|
|
lexer.processEnd( );
|
|
|
|
if ( errors.size( ) != 0 ) {
|
|
throw X_SRDErrors( errors );
|
|
}
|
|
return mt.list( );
|
|
}
|
|
|
|
T_SRDList ebcl::SRDFromText( T_String const& name , char const* string , bool structured )
|
|
{
|
|
T_SRDMemoryTarget mt( structured );
|
|
T_SRDErrors errors;
|
|
mt.start( errors );
|
|
|
|
T_SRDLexer lexer( name , errors , mt );
|
|
char const* ptr = string;
|
|
while ( *ptr ) {
|
|
lexer.processCharacter( T_Character( *ptr ++ ) );
|
|
}
|
|
lexer.processEnd( );
|
|
|
|
if ( errors.size( ) != 0 ) {
|
|
throw X_SRDErrors( errors );
|
|
}
|
|
return mt.list( );
|
|
}
|
|
|
|
|
|
/*= T_SRDTextWriterPrivate_ ==================================================*/
|
|
|
|
namespace {
|
|
struct T_SRDTextWriterPrivate_
|
|
{
|
|
A_OutputStream& output_;
|
|
T_StringBuilder sb_;
|
|
bool justHadList_;
|
|
bool isFirst_;
|
|
uint32_t depth_;
|
|
|
|
//----------------------------------------------------------------------
|
|
|
|
explicit T_SRDTextWriterPrivate_( A_OutputStream& output );
|
|
|
|
void flush( bool force );
|
|
void addTabs( );
|
|
void beforeToken( );
|
|
|
|
void start( );
|
|
void end( );
|
|
|
|
void startList( );
|
|
void endList( );
|
|
|
|
void putWord( T_String const& word );
|
|
void putString( T_String const& string );
|
|
void putComment( T_String const& comment );
|
|
void putVariable( T_String const& variable );
|
|
void putBinary( T_Buffer< uint8_t > const& buffer );
|
|
void putInteger( int64_t value );
|
|
void putInt32( int32_t value );
|
|
void putInt64( int64_t value );
|
|
void putFloat( double value );
|
|
};
|
|
} // namespace
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriterPrivate_::T_SRDTextWriterPrivate_( A_OutputStream& output )
|
|
: output_( output )
|
|
{ }
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void T_SRDTextWriterPrivate_::addTabs( )
|
|
{
|
|
sb_ << "\r\n";
|
|
for ( uint32_t i = 0 ; i < depth_ ; i ++ ) {
|
|
sb_ << " ";
|
|
}
|
|
}
|
|
|
|
void T_SRDTextWriterPrivate_::beforeToken( )
|
|
{
|
|
if ( justHadList_ ) {
|
|
addTabs( );
|
|
justHadList_ = false;
|
|
} else if ( !isFirst_ ) {
|
|
sb_ << ' ';
|
|
}
|
|
isFirst_ = false;
|
|
}
|
|
|
|
void T_SRDTextWriterPrivate_::flush( bool force )
|
|
{
|
|
if ( !force && sb_.size( ) < 1024 ) {
|
|
return;
|
|
}
|
|
output_.write( ( uint8_t const* ) sb_.data( ) , sb_.size( ) );
|
|
sb_.clear( );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDTextWriterPrivate_::start( )
|
|
{
|
|
justHadList_ = false;
|
|
isFirst_ = true;
|
|
depth_ = 0;
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::end( )
|
|
{
|
|
if ( depth_ != 0 ) {
|
|
throw X_SRDWriterError( "unterminated lists" );
|
|
}
|
|
if ( sb_.size( ) && sb_.data( )[ sb_.size( ) - 1 ] != '\n' ) {
|
|
sb_ << "\r\n";
|
|
}
|
|
flush( true );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDTextWriterPrivate_::startList( )
|
|
{
|
|
if ( !isFirst_ ) {
|
|
addTabs( );
|
|
}
|
|
sb_ << '(';
|
|
depth_ ++;
|
|
isFirst_ = true;
|
|
justHadList_ = false;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::endList( )
|
|
{
|
|
if ( depth_ == 0 ) {
|
|
throw X_SRDWriterError( "at top level" );
|
|
}
|
|
sb_ << ')';
|
|
depth_ --;
|
|
justHadList_ = true;
|
|
isFirst_ = false;
|
|
flush( false );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDTextWriterPrivate_::putWord( T_String const& word )
|
|
{
|
|
beforeToken( );
|
|
sb_ << word;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putString( T_String const& string )
|
|
{
|
|
beforeToken( );
|
|
sb_ << '"';
|
|
|
|
T_StringIterator it( string );
|
|
while ( !it.atEnd( ) ) {
|
|
T_Character c( it );
|
|
it.next( );
|
|
|
|
if ( c == '\n' ) {
|
|
sb_ << "\\n";
|
|
} else if ( c == '\r' ) {
|
|
sb_ << "\\r";
|
|
} else if ( c == '\t' ) {
|
|
sb_ << "\\t";
|
|
} else if ( c == 8 ) {
|
|
sb_ << "\\b";
|
|
} else if ( c == 12 ) {
|
|
sb_ << "\\f";
|
|
} else if ( c == '\\' ) {
|
|
sb_ << "\\\\";
|
|
} else if ( c == '"' ) {
|
|
sb_ << "\\\"";
|
|
} else if ( c > 0xffff ) {
|
|
sb_ << "\\U";
|
|
uint32_t x( 0x10000000 );
|
|
while ( c < x ) {
|
|
sb_ << '0';
|
|
x >>= 4;
|
|
}
|
|
sb_.appendNumeric( uint64_t( c ) , 16 );
|
|
} else if ( c.isControl( ) || !c.isAscii( ) ) {
|
|
sb_ << "\\u";
|
|
uint32_t x( 0x1000 );
|
|
while ( c < x ) {
|
|
sb_ << '0';
|
|
x >>= 4;
|
|
}
|
|
sb_.appendNumeric( uint64_t( c ) , 16 );
|
|
} else {
|
|
sb_ << c;
|
|
}
|
|
}
|
|
|
|
sb_ << '"';
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putComment( T_String const& comment )
|
|
{
|
|
bool newLine = true;
|
|
if ( !isFirst_ ) {
|
|
sb_ << ' ';
|
|
}
|
|
|
|
T_StringIterator it( comment );
|
|
while ( !it.atEnd( ) ) {
|
|
T_Character c( it );
|
|
it.next( );
|
|
if ( newLine ) {
|
|
sb_ << '#';
|
|
newLine = false;
|
|
}
|
|
if ( c == '\n' ) {
|
|
addTabs( );
|
|
newLine = true;
|
|
} else if ( c != '\r' ) {
|
|
sb_ << c;
|
|
}
|
|
}
|
|
|
|
if ( !newLine && comment ) {
|
|
addTabs( );
|
|
}
|
|
isFirst_ = true;
|
|
justHadList_ = false;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putVariable( T_String const& variable )
|
|
{
|
|
beforeToken( );
|
|
sb_ << '$' << variable;
|
|
flush( false );
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
void T_SRDTextWriterPrivate_::putBinary( T_Buffer< uint8_t > const& buffer )
|
|
{
|
|
const auto bs( buffer.size( ) );
|
|
beforeToken( );
|
|
sb_ << '[';
|
|
if ( bs <= 8 ) {
|
|
for ( uint32_t i = 0 ; i < bs ; i ++ ) {
|
|
sb_ << ' ';
|
|
if ( buffer[ i ] < 16 ) {
|
|
sb_ << '0';
|
|
}
|
|
sb_.appendNumeric( uint64_t( buffer[ i ] ) , 16 );
|
|
}
|
|
if ( bs ) {
|
|
sb_ << ' ';
|
|
}
|
|
} else {
|
|
depth_ ++;
|
|
|
|
for ( uint32_t i = 0 ; i < bs ; i ++ ) {
|
|
if ( i % 16 == 0 ) {
|
|
addTabs( );
|
|
} else {
|
|
sb_ << ' ';
|
|
}
|
|
if ( buffer[ i ] < 16 ) {
|
|
sb_ << '0';
|
|
}
|
|
sb_.appendNumeric( uint64_t( buffer[ i ] ) , 16 );
|
|
}
|
|
|
|
depth_ --;
|
|
addTabs( );
|
|
}
|
|
sb_ << ']';
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
inline void T_SRDTextWriterPrivate_::putInteger( int64_t value )
|
|
{
|
|
beforeToken( );
|
|
sb_ << value;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putInt32( int32_t value )
|
|
{
|
|
beforeToken( );
|
|
sb_ << value;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putInt64( int64_t value )
|
|
{
|
|
beforeToken( );
|
|
sb_ << value;
|
|
flush( false );
|
|
}
|
|
|
|
inline void T_SRDTextWriterPrivate_::putFloat( double value )
|
|
{
|
|
beforeToken( );
|
|
sb_.appendDouble( value , 25 );
|
|
flush( false );
|
|
}
|
|
|
|
|
|
/*= T_SRDTextWriter ==========================================================*/
|
|
|
|
T_SRDTextWriter::T_SRDTextWriter( A_OutputStream& output )
|
|
: A_PrivateImplementation( new T_SRDTextWriterPrivate_( output ) )
|
|
{ }
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::start( )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).start( );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::end( )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).end( );
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::startList( )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).startList( );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::endList( )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).endList( );
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putText( T_String const& text )
|
|
{
|
|
if ( T_SRDToken::IsWord( text ) ) {
|
|
return putWord( text );
|
|
} else {
|
|
return putString( text );
|
|
}
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putWord( T_String const& word )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putWord( word );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putString( T_String const& string )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putString( string );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putComment( T_String const& comment )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putComment( comment );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putVariable( T_String const& variable )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putVariable( variable );
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putBinary(
|
|
T_Buffer< uint8_t > const& buffer )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putBinary( buffer );
|
|
return *this;
|
|
}
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putInteger( int64_t value )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putInteger( value );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putInt32( int32_t value )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putInt32( value );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putInt64( int64_t value )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putInt64( value );
|
|
return *this;
|
|
}
|
|
|
|
T_SRDTextWriter& T_SRDTextWriter::putFloat( double value )
|
|
{
|
|
p< T_SRDTextWriterPrivate_ >( ).putFloat( value );
|
|
return *this;
|
|
}
|