From 5497856be2761a6f4f1848636304cff134668766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emmanuel=20Beno=C3=AEt?= Date: Wed, 27 Dec 2017 10:49:31 +0100 Subject: [PATCH] Filesystem abstraction --- TODO | 2 + include/ebcl/Filesystem.hh | 136 +++++++++++ include/ebcl/inline/Filesystem.hh | 170 +++++++++++++ src/Filesystem.cc | 387 ++++++++++++++++++++++++++++++ src/files.mk | 1 + 5 files changed, 696 insertions(+) create mode 100644 TODO create mode 100644 include/ebcl/Filesystem.hh create mode 100644 include/ebcl/inline/Filesystem.hh create mode 100644 src/Filesystem.cc diff --git a/TODO b/TODO new file mode 100644 index 0000000..1dafb5d --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +Tests for T_FSPath +Generalize iterators diff --git a/include/ebcl/Filesystem.hh b/include/ebcl/Filesystem.hh new file mode 100644 index 0000000..8c33398 --- /dev/null +++ b/include/ebcl/Filesystem.hh @@ -0,0 +1,136 @@ +/******************************************************************************/ +/* FILESYSTEM ABSTRACTION *****************************************************/ +/******************************************************************************/ + +#ifndef _H_EBCL_FILESYSTEM +#define _H_EBCL_FILESYSTEM + +#include +#include +namespace ebcl { + + +/*= FILESYSTEM PATH ==========================================================*/ + +class T_FSPath +{ + public: + using T_Components = T_AutoArray< T_String , 16 >; + + private: + + // Root will be '/' on Unix, or a drive letter on Windows. Relative + // paths will have a blank root. + T_String root_; + T_Components components_; + bool valid_; + + public: + //---------------------------------------------------------------------- + // Helpers + + static bool IsValidComponent( T_String const& string ) noexcept; + static bool IsValidRoot( T_String const& string ) noexcept; + + //---------------------------------------------------------------------- + // Basic constructors and assignment operators + + T_FSPath( ) noexcept; + + T_FSPath( T_FSPath const& other ) noexcept; + T_FSPath& operator =( T_FSPath const& other ) noexcept; + + T_FSPath( T_FSPath&& other ) noexcept; + T_FSPath& operator =( T_FSPath&& other ) noexcept; + + T_FSPath& swap( T_FSPath& other ) noexcept; + + // Construct from a string + T_FSPath( T_String const& path ) noexcept; + + //---------------------------------------------------------------------- + + bool isValid( ) const noexcept; + + T_String const& root( ) const noexcept; + T_Components const& components( ) const noexcept; + + bool isRelative( ) const noexcept; + bool isAbsolute( ) const noexcept; + + //---------------------------------------------------------------------- + + // Convert the path into a string + T_String toString( ) const noexcept; + // Compute a hash value for the path + uint32_t computeHash( ) const noexcept; + // Append to a string builder + void appendTo( T_StringBuilder& sb ) const noexcept; + + // Comparisons. On Windows the comparison will be case-insensitive. + int32_t compareTo( T_FSPath const& other ) const noexcept; + bool operator ==( T_FSPath const& other ) const noexcept; + bool operator !=( T_FSPath const& other ) const noexcept; + bool operator <( T_FSPath const& other ) const noexcept; + bool operator <=( T_FSPath const& other ) const noexcept; + bool operator >( T_FSPath const& other ) const noexcept; + bool operator >=( T_FSPath const& other ) const noexcept; + + //---------------------------------------------------------------------- + + // Returns the parent's path + T_FSPath parent( ) const noexcept; + + // Returns the child's path + T_FSPath child( T_String const& name ) const noexcept; + + // Appends the specified (relative) path to the current path and return + // the result. If the path is absolute, the result will be invalid. + T_FSPath operator +( T_FSPath const& other ) const noexcept; + + // Checks if the current path and the specified path have the same + // parent and are different. + bool inDirectoryOf( T_FSPath const& other ) const noexcept; + + // Checks if the current path is the direct parent of the specified path + bool isParentOf( T_FSPath const& other ) const noexcept; + // Checks if the current path is the direct child of the specified path + bool isChildOf( T_FSPath const& other ) const noexcept; + + // Checks if the current path is a parent of the specified path + bool isAbove( T_FSPath const& other ) const noexcept; + // Checks if the current path is a child of the specified path + bool isUnder( T_FSPath const& other ) const noexcept; + + // Create a relative path based on the current path and a "parent" path. + // Both paths must be canonical and valid. If they have different roots, + // a copy of the current path will be returned. + T_FSPath makeRelative( T_FSPath const& relTo ) const noexcept; + + //---------------------------------------------------------------------- + + // Checks whether the specified path is canonical + bool isCanonical( ) const noexcept; + // Return a canonical path from the current path + T_FSPath canonical( ) const noexcept; +}; +M_DECLARE_SWAP( T_FSPath ); +M_DECLARE_HASH( T_FSPath ); +M_LSHIFT_OP( T_StringBuilder , T_FSPath const& ); + + +/*= FILESYSTEM ===============================================================*/ + +class Filesystem final +{ + Filesystem( ) = delete; + public: + // Return the absolute path to the current working directory + static T_FSPath Cwd( ) noexcept; + +}; + + +} // namespace ebcl +#endif // _H_EBCL_FILESYSTEM +#include diff --git a/include/ebcl/inline/Filesystem.hh b/include/ebcl/inline/Filesystem.hh new file mode 100644 index 0000000..00af515 --- /dev/null +++ b/include/ebcl/inline/Filesystem.hh @@ -0,0 +1,170 @@ +/******************************************************************************/ +/* FILESYSTEM ABSTRACTION - INLINE CODE ***************************************/ +/******************************************************************************/ + +#ifndef _H_EBCL_INLINE_FILESYSTEM +#define _H_EBCL_INLINE_FILESYSTEM + +#include +namespace ebcl { + + +/*= T_FSPath =================================================================*/ + +inline T_FSPath::T_FSPath( ) noexcept + : root_{ } , components_{ } , valid_{ true } +{ } + +/*----------------------------------------------------------------------------*/ + +inline T_FSPath::T_FSPath( + T_FSPath const& other ) noexcept + : root_{ other.root_ } , components_{ other.components_ } , + valid_{ other.valid_ } +{ } + +inline T_FSPath& T_FSPath::operator =( + T_FSPath const& other ) noexcept +{ + root_ = other.root_; + components_ = other.components_; + valid_ = other.valid_; + return *this; +} + +/*----------------------------------------------------------------------------*/ + +inline T_FSPath::T_FSPath( T_FSPath&& other ) noexcept + : T_FSPath{ } +{ + swap( other ); +} + +inline T_FSPath& T_FSPath::operator =( + T_FSPath&& other ) noexcept +{ + root_ = T_String{ }; + components_.free( ); + valid_ = true; + return swap( other ); +} + +/*----------------------------------------------------------------------------*/ + +inline T_FSPath& T_FSPath::swap( + T_FSPath& other ) noexcept +{ + using std::swap; + swap( root_ , other.root_ ); + swap( components_ , other.components_ ); + swap( valid_ , other.valid_ ); + return *this; +} + +inline M_DEFINE_SWAP( T_FSPath ) +{ + lhs.swap( rhs ); +} + +/*----------------------------------------------------------------------------*/ + +inline bool T_FSPath::isValid( ) const noexcept +{ + return valid_; +} + +inline T_String const& T_FSPath::root( ) const noexcept +{ + return root_; +} + +inline T_FSPath::T_Components const& T_FSPath::components( ) const noexcept +{ + return components_; +} + +inline bool T_FSPath::isRelative( ) const noexcept +{ + return !root_; +} + +inline bool T_FSPath::isAbsolute( ) const noexcept +{ + return bool( root_ ); +} + +/*----------------------------------------------------------------------------*/ + +inline T_String T_FSPath::toString( ) const noexcept +{ + T_StringBuilder sb; + appendTo( sb ); + return T_String{ std::move( sb ) }; +} + +inline M_DEFINE_HASH( T_FSPath ) +{ + return item.computeHash( ); +} + +inline M_LSHIFT_OP( T_StringBuilder , T_FSPath const& ) +{ + value.appendTo( obj ); + return obj; +} + +/*----------------------------------------------------------------------------*/ + +inline bool T_FSPath::operator ==( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) == 0; +} + +inline bool T_FSPath::operator !=( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) != 0; +} + +inline bool T_FSPath::operator <( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) < 0; +} + +inline bool T_FSPath::operator <=( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) <= 0; +} + +inline bool T_FSPath::operator >( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) > 0; +} + +inline bool T_FSPath::operator >=( + T_FSPath const& other ) const noexcept +{ + return compareTo( other ) >= 0; +} + +/*----------------------------------------------------------------------------*/ + +inline bool T_FSPath::isChildOf( + T_FSPath const& other ) const noexcept +{ + return other.isParentOf( *this ); +} + +inline bool T_FSPath::isUnder( + T_FSPath const& other ) const noexcept +{ + return other.isAbove( *this ); +} + + +} // namespace ebcl +#endif // _H_EBCL_INLINE_FILESYSTEM diff --git a/src/Filesystem.cc b/src/Filesystem.cc new file mode 100644 index 0000000..2148b6b --- /dev/null +++ b/src/Filesystem.cc @@ -0,0 +1,387 @@ +/******************************************************************************/ +/* FILESYSTEM ABSTRACTION *****************************************************/ +/******************************************************************************/ + +#include +using namespace ebcl; + + +#ifdef _WIN32 +# define M_PATHSEP_ '\\' +#else +# define M_PATHSEP_ '/' +# include +#endif + + +/*= T_FSPath =================================================================*/ + +bool T_FSPath::IsValidRoot( + T_String const& str ) noexcept +{ + if ( !str ) { + return true; + } + +#ifdef _WIN32 + // TODO: support for network names (\\server\path) + if ( str.length( ) == 3 && ( str.endsWith( ":\\" ) + || str.endsWith( ":/" ) ) ) { + return str[ 0 ].isAlpha( ); + } else { + return ( str == "/" || str == "\\" ); + } +#else + return ( str == "/" || str == "\\" ); +#endif +} + +bool T_FSPath::IsValidComponent( + T_String const& str ) noexcept +{ + if ( !str ) { + return false; + } + + T_StringIterator it{ str }; + while ( !it.atEnd( ) ) { + const auto c{ it.character( ) }; + it.next( ); + if ( c.isControl( ) || c == '\\' || c == '/' || c == '"' + || c == '<' || c == '>' || c == '|' || c == ':' + || c == '*' || c == '?' ) { + return false; + } + } + return true; +} + +/*----------------------------------------------------------------------------*/ + +T_FSPath::T_FSPath( + T_String const& path ) noexcept +{ + if ( !path ) { + valid_ = true; + return; + } + + // Find sequences of (back)slashes in the string. For each sequence + // we will store the positions of the first and last (back)slash. + T_AutoArray< uint32_t , 64 > slashPos; + T_StringIterator it{ path }; + while ( !it.atEnd( ) ) { + const auto c{ it.character( ) }; + const auto p{ it.index( ) }; + it.next( ); + + if ( c != '/' && c != '\\' ) { + continue; + } + + const auto ps{ slashPos.size( ) }; + if ( ps == 0 || slashPos[ ps - 1 ] != p - 1 ) { + slashPos.add( p ); + slashPos.add( p ); + } else { + slashPos[ ps - 1 ] ++; + } + } + + // No (back)slashes, this is a relative path + const auto sps{ slashPos.size( ) }; + if ( sps == 0 ) { + components_.add( path ); + valid_ = IsValidComponent( path ); + return; + } + + // Collect the first item, with the first (back)slash included, + // check if it's a root + const auto firstSlashPos{ slashPos[ 0 ] }; + T_String firstItem{ path.substr( 0 , 1 + firstSlashPos ) }; + valid_ = true; + if ( IsValidRoot( firstItem ) ) { + root_ = std::move( firstItem ); + } + + // Get and check the components + uint32_t pos{ root_ ? 2u : 0u }; + while ( pos <= sps ) { + T_String item{ path.range( + pos ? ( 1 + slashPos[ pos - 1 ] ) : 0 , + pos == sps ? path.length( ) : ( slashPos[ pos ] - 1 ) ) + }; + assert( item || pos == sps ); + if ( item ) { + valid_ = valid_ && IsValidComponent( item ); + components_.add( std::move( item ) ); + } + pos += 2; + } +} + +/*----------------------------------------------------------------------------*/ + +uint32_t T_FSPath::computeHash( ) const noexcept +{ + uint32_t h{ ComputeHash( root_ ) }; + const auto cs{ components_.size( ) }; + h = ( ( h << 27 ) | ( h >> 5 ) ) ^ cs; + for ( auto i = 0u ; i < cs ; i ++ ) { + h = ( ( h << 27 ) | ( h >> 5 ) ) ^ ComputeHash( components_[ i ] ); + } + return h; +} + +void T_FSPath::appendTo( + T_StringBuilder& sb ) const noexcept +{ + const auto cs{ components_.size( ) }; + if ( !( root_ || cs ) ) { + sb << '.'; + return; + } + + for ( auto i = 0u ; i < cs ; i ++ ) { + if ( i == 0 ) { + sb << root_; + } else { + sb << M_PATHSEP_; + } + sb << components_[ i ]; + } +} + +/*----------------------------------------------------------------------------*/ + +#ifdef _WIN32 +# define M_CMPSTR_(A,B) (A).compareIgnoreCase( B ) +#else +# define M_CMPSTR_(A,B) (A).compare( B ) +#endif + +int32_t T_FSPath::compareTo( + T_FSPath const& other ) const noexcept +{ + if ( &other == this ) { + return 0; + } + + int32_t cmp{ M_CMPSTR_( root_ , other.root_ ) }; + if ( cmp == 0 ) { + const auto nca{ components_.size( ) } , + ncb{ other.components_.size( ) } , + nc{ std::min( nca , ncb ) }; + for ( auto i = 0u ; i < nc && cmp == 0 ; i ++ ) { + cmp = M_CMPSTR_( components_[ i ] , other.components_[ i ] ); + } + if ( cmp == 0 ) { + cmp = T_Comparator< uint32_t >::compare( nca , ncb ); + } + } + return cmp; +} + +/*----------------------------------------------------------------------------*/ + +T_FSPath T_FSPath::parent( ) const noexcept +{ + T_FSPath p{ *this }; + if ( p.components_.size( ) ) { + p.components_.removeLast( ); + } + return p; +} + +T_FSPath T_FSPath::child( + T_String const& name ) const noexcept +{ + T_FSPath c{ *this }; + c.components_.add( name ); + c.valid_ = c.valid_ && IsValidComponent( name ); + return c; +} + +T_FSPath T_FSPath::operator +( + T_FSPath const& other ) const noexcept +{ + T_FSPath p{ *this }; + if ( other.root_ ) { + p.valid_ = false; + } else { + p.valid_ = p.valid_ && other.valid_; + } + p.components_.addAll( other.components_ ); + return p; +} + +/*----------------------------------------------------------------------------*/ + +bool T_FSPath::inDirectoryOf( + T_FSPath const& other ) const noexcept +{ + if ( &other == this ) { + return false; + } + if ( M_CMPSTR_( root_ , other.root_ ) != 0 ) { + return false; + } + + const auto nc{ components_.size( ) }; + if ( nc != other.components_.size( ) || nc == 0 ) { + return false; + } + + for ( auto i = 0u ; i < nc - 1 ; i ++ ) { + if ( M_CMPSTR_( components_[ i ] , other.components_[ i ] ) != 0 ) { + return false; + } + } + + return M_CMPSTR_( components_[ nc - 1 ] , other.components_[ nc - 1 ] ) != 0; +} + +bool T_FSPath::isParentOf( + T_FSPath const& other ) const noexcept +{ + if ( &other == this ) { + return false; + } + if ( M_CMPSTR_( root_ , other.root_ ) != 0 ) { + return false; + } + + const auto nc{ components_.size( ) }; + if ( nc + 1 != other.components_.size( ) ) { + return false; + } + + for ( auto i = 0u ; i < nc ; i ++ ) { + if ( M_CMPSTR_( components_[ i ] , other.components_[ i ] ) != 0 ) { + return false; + } + } + return true; +} + +bool T_FSPath::isAbove( + T_FSPath const& other ) const noexcept +{ + if ( &other == this ) { + return false; + } + if ( M_CMPSTR_( root_ , other.root_ ) != 0 ) { + return false; + } + + const auto nc{ components_.size( ) }; + if ( nc >= other.components_.size( ) ) { + return false; + } + + for ( auto i = 0u ; i < nc ; i ++ ) { + if ( M_CMPSTR_( components_[ i ] , other.components_[ i ] ) != 0 ) { + return false; + } + } + return true; +} + +/*----------------------------------------------------------------------------*/ + +T_FSPath T_FSPath::makeRelative( + T_FSPath const& relTo ) const noexcept +{ + assert( isValid( ) && relTo.isValid( ) ); + assert( isCanonical( ) && relTo.isCanonical( ) ); + if ( relTo.root_ != root_ ) { + return *this; + } + + const auto nca{ components_.size( ) } , + ncb{ relTo.components_.size( ) } , + nc{ std::min( nca , ncb ) }; + uint32_t nCommon{ 0u }; + for ( auto i = 0u ; i < nc ; i ++ ) { + if ( M_CMPSTR_( components_[ i ] , relTo.components_[ i ] ) != 0 ) { + break; + } + nCommon ++; + } + + T_FSPath np; + const T_String parent{ T_String::Pooled( ".." ) }; + for ( auto i = ncb ; i > nCommon ; i -- ) { + np.components_.add( parent ); + } + for ( auto i = nCommon ; i < nca ; i ++ ) { + np.components_.add( components_[ i ] ); + } + return np; +} + +/*----------------------------------------------------------------------------*/ + +bool T_FSPath::isCanonical( ) const noexcept +{ + if ( !( valid_ && root_ ) ) { + return false; + } + + auto const nc{ components_.size( ) }; + for ( auto i = 0u ; i < nc ; i ++ ) { + if ( components_[ i ] == "." || components_[ i ] == ".." ) { + return false; + } + } + return true; +} + +T_FSPath T_FSPath::canonical( ) const noexcept +{ + auto const nc{ components_.size( ) }; + if ( !( valid_ && nc && root_ ) ) { + return *this; + } + + T_FSPath np; + np.root_ = root_; + for ( auto i = 0u ; i < nc ; i ++ ) { + auto const& cmp{ components_[ i ] }; + if ( cmp == ".." ) { + if ( np.components_.size( ) ) { + np.components_.removeLast( ); + } + } else if ( cmp != "." ) { + np.components_.add( cmp ); + } + } + return np; +} + + +/*= Filesystem ===============================================================*/ + +T_FSPath Filesystem::Cwd( ) noexcept +{ + T_Buffer< char > buffer{ 256 }; +#if _WIN32 + auto req{ buffer.bytes( ) }; + do { + if ( req > buffer.bytes( ) ) { + buffer.resize( req + 1 ); + } + req = GetCurrentDirectory( buffer.bytes( ) , &buffer[ 0 ] ); + } while ( req > buffer.bytes( ) ); +#else + while ( getcwd( &buffer[ 0 ] , buffer.bytes( ) ) == nullptr ) { + assert( errno == ERANGE ); + buffer.resize( buffer.size( ) + 256 ); + } +#endif + + T_FSPath path{ &buffer[ 0 ] }; + assert( path.isValid( ) ); + return path; +} diff --git a/src/files.mk b/src/files.mk index 28885db..7ad9b31 100644 --- a/src/files.mk +++ b/src/files.mk @@ -1,6 +1,7 @@ LIB_SOURCES = \ src/DynLib.cc \ src/Files.cc \ + src/Filesystem.cc \ src/HashIndex.cc \ src/MemoryStreams.cc \ src/Pointers.cc \