MyGUI  3.2.0
MyGUI_UString.cpp
Go to the documentation of this file.
00001 
00006 /*
00007     This file is part of MyGUI.
00008 
00009     MyGUI is free software: you can redistribute it and/or modify
00010     it under the terms of the GNU Lesser General Public License as published by
00011     the Free Software Foundation, either version 3 of the License, or
00012     (at your option) any later version.
00013 
00014     MyGUI is distributed in the hope that it will be useful,
00015     but WITHOUT ANY WARRANTY; without even the implied warranty of
00016     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017     GNU Lesser General Public License for more details.
00018 
00019     You should have received a copy of the GNU Lesser General Public License
00020     along with MyGUI.  If not, see <http://www.gnu.org/licenses/>.
00021 */
00022 #include "MyGUI_Precompiled.h"
00023 #include "MyGUI_UString.h"
00024 
00025 namespace MyGUI
00026 {
00027 
00028     //--------------------------------------------------------------------------
00029     UString::_base_iterator::_base_iterator()
00030     {
00031         mString = 0;
00032     }
00033     //--------------------------------------------------------------------------
00034     void UString::_base_iterator::_seekFwd( size_type c )
00035     {
00036         mIter += c;
00037     }
00038     //--------------------------------------------------------------------------
00039     void UString::_base_iterator::_seekRev( size_type c )
00040     {
00041         mIter -= c;
00042     }
00043     //--------------------------------------------------------------------------
00044     void UString::_base_iterator::_become( const _base_iterator& i )
00045     {
00046         mIter = i.mIter;
00047         mString = i.mString;
00048     }
00049     //--------------------------------------------------------------------------
00050     bool UString::_base_iterator::_test_begin() const
00051     {
00052         return mIter == mString->mData.begin();
00053     }
00054     //--------------------------------------------------------------------------
00055     bool UString::_base_iterator::_test_end() const
00056     {
00057         return mIter == mString->mData.end();
00058     }
00059     //--------------------------------------------------------------------------
00060     UString::size_type UString::_base_iterator::_get_index() const
00061     {
00062         return mIter - mString->mData.begin();
00063     }
00064     //--------------------------------------------------------------------------
00065     void UString::_base_iterator::_jump_to( size_type index )
00066     {
00067         mIter = mString->mData.begin() + index;
00068     }
00069     //--------------------------------------------------------------------------
00070     UString::unicode_char UString::_base_iterator::_getCharacter() const
00071     {
00072         size_type current_index = _get_index();
00073         return mString->getChar( current_index );
00074     }
00075     //--------------------------------------------------------------------------
00076     int UString::_base_iterator::_setCharacter( unicode_char uc )
00077     {
00078         size_type current_index = _get_index();
00079         int change = mString->setChar( current_index, uc );
00080         _jump_to( current_index );
00081         return change;
00082     }
00083     //--------------------------------------------------------------------------
00084     void UString::_base_iterator::_moveNext()
00085     {
00086         _seekFwd( 1 ); // move 1 code point forward
00087         if ( _test_end() ) return; // exit if we hit the end
00088         if ( _utf16_surrogate_follow( mIter[0] ) ) {
00089             // landing on a follow code point means we might be part of a bigger character
00090             // so we test for that
00091             code_point lead_half = 0;
00092             //NB: we can't possibly be at the beginning here, so no need to test
00093             lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
00094             if ( _utf16_surrogate_lead( lead_half ) ) {
00095                 _seekFwd( 1 ); // if so, then advance 1 more code point
00096             }
00097         }
00098     }
00099     //--------------------------------------------------------------------------
00100     void UString::_base_iterator::_movePrev()
00101     {
00102         _seekRev( 1 ); // move 1 code point backwards
00103         if ( _test_begin() ) return; // exit if we hit the beginning
00104         if ( _utf16_surrogate_follow( mIter[0] ) ) {
00105             // landing on a follow code point means we might be part of a bigger character
00106             // so we test for that
00107             code_point lead_half = 0;
00108             lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
00109             if ( _utf16_surrogate_lead( lead_half ) ) {
00110                 _seekRev( 1 ); // if so, then rewind 1 more code point
00111             }
00112         }
00113     }
00114     //--------------------------------------------------------------------------
00115     //--------------------------------------------------------------------------
00116     //--------------------------------------------------------------------------
00117     //--------------------------------------------------------------------------
00118     UString::_fwd_iterator::_fwd_iterator()
00119     {
00120 
00121     }
00122     //--------------------------------------------------------------------------
00123     UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i )
00124     {
00125         _become( i );
00126     }
00127     //--------------------------------------------------------------------------
00128     UString::_fwd_iterator& UString::_fwd_iterator::operator++()
00129     {
00130         _seekFwd( 1 );
00131         return *this;
00132     }
00133     //--------------------------------------------------------------------------
00134     UString::_fwd_iterator UString::_fwd_iterator::operator++( int )
00135     {
00136         _fwd_iterator tmp( *this );
00137         _seekFwd( 1 );
00138         return tmp;
00139     }
00140     //--------------------------------------------------------------------------
00141     UString::_fwd_iterator& UString::_fwd_iterator::operator--()
00142     {
00143         _seekRev( 1 );
00144         return *this;
00145     }
00146     //--------------------------------------------------------------------------
00147     UString::_fwd_iterator UString::_fwd_iterator::operator--( int )
00148     {
00149         _fwd_iterator tmp( *this );
00150         _seekRev( 1 );
00151         return tmp;
00152     }
00153     //--------------------------------------------------------------------------
00154     UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n )
00155     {
00156         _fwd_iterator tmp( *this );
00157         if ( n < 0 )
00158             tmp._seekRev( -n );
00159         else
00160             tmp._seekFwd( n );
00161         return tmp;
00162     }
00163     //--------------------------------------------------------------------------
00164     UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n )
00165     {
00166         _fwd_iterator tmp( *this );
00167         if ( n < 0 )
00168             tmp._seekFwd( -n );
00169         else
00170             tmp._seekRev( n );
00171         return tmp;
00172     }
00173     //--------------------------------------------------------------------------
00174     UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n )
00175     {
00176         if ( n < 0 )
00177             _seekRev( -n );
00178         else
00179             _seekFwd( n );
00180         return *this;
00181     }
00182     //--------------------------------------------------------------------------
00183     UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n )
00184     {
00185         if ( n < 0 )
00186             _seekFwd( -n );
00187         else
00188             _seekRev( n );
00189         return *this;
00190     }
00191     //--------------------------------------------------------------------------
00192     UString::value_type& UString::_fwd_iterator::operator*() const
00193     {
00194         return *mIter;
00195     }
00196     //--------------------------------------------------------------------------
00197     UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const
00198     {
00199         _fwd_iterator tmp( *this );
00200         tmp += n;
00201         return *tmp;
00202     }
00203     //--------------------------------------------------------------------------
00204     UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
00205     {
00206         _moveNext();
00207         return *this;
00208     }
00209     //--------------------------------------------------------------------------
00210     UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
00211     {
00212         _movePrev();
00213         return *this;
00214     }
00215     //--------------------------------------------------------------------------
00216     UString::unicode_char UString::_fwd_iterator::getCharacter() const
00217     {
00218         return _getCharacter();
00219     }
00220     //--------------------------------------------------------------------------
00221     int UString::_fwd_iterator::setCharacter( unicode_char uc )
00222     {
00223         return _setCharacter( uc );
00224     }
00225     //--------------------------------------------------------------------------
00226     //--------------------------------------------------------------------------
00227     //--------------------------------------------------------------------------
00228     //--------------------------------------------------------------------------
00229     UString::_const_fwd_iterator::_const_fwd_iterator()
00230     {
00231 
00232     }
00233     //--------------------------------------------------------------------------
00234     UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i )
00235     {
00236         _become( i );
00237     }
00238     //--------------------------------------------------------------------------
00239     UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i )
00240     {
00241         _become( i );
00242     }
00243     //--------------------------------------------------------------------------
00244     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
00245     {
00246         _seekFwd( 1 );
00247         return *this;
00248     }
00249     //--------------------------------------------------------------------------
00250     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int )
00251     {
00252         _const_fwd_iterator tmp( *this );
00253         _seekFwd( 1 );
00254         return tmp;
00255     }
00256     //--------------------------------------------------------------------------
00257     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
00258     {
00259         _seekRev( 1 );
00260         return *this;
00261     }
00262     //--------------------------------------------------------------------------
00263     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int )
00264     {
00265         _const_fwd_iterator tmp( *this );
00266         _seekRev( 1 );
00267         return tmp;
00268     }
00269     //--------------------------------------------------------------------------
00270     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n )
00271     {
00272         _const_fwd_iterator tmp( *this );
00273         if ( n < 0 )
00274             tmp._seekRev( -n );
00275         else
00276             tmp._seekFwd( n );
00277         return tmp;
00278     }
00279     //--------------------------------------------------------------------------
00280     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n )
00281     {
00282         _const_fwd_iterator tmp( *this );
00283         if ( n < 0 )
00284             tmp._seekFwd( -n );
00285         else
00286             tmp._seekRev( n );
00287         return tmp;
00288     }
00289     //--------------------------------------------------------------------------
00290     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n )
00291     {
00292         if ( n < 0 )
00293             _seekRev( -n );
00294         else
00295             _seekFwd( n );
00296         return *this;
00297     }
00298     //--------------------------------------------------------------------------
00299     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n )
00300     {
00301         if ( n < 0 )
00302             _seekFwd( -n );
00303         else
00304             _seekRev( n );
00305         return *this;
00306     }
00307     //--------------------------------------------------------------------------
00308     const UString::value_type& UString::_const_fwd_iterator::operator*() const
00309     {
00310         return *mIter;
00311     }
00312     //--------------------------------------------------------------------------
00313     const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const
00314     {
00315         _const_fwd_iterator tmp( *this );
00316         tmp += n;
00317         return *tmp;
00318     }
00319     //--------------------------------------------------------------------------
00320     UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
00321     {
00322         _moveNext();
00323         return *this;
00324     }
00325     //--------------------------------------------------------------------------
00326     UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
00327     {
00328         _movePrev();
00329         return *this;
00330     }
00331     //--------------------------------------------------------------------------
00332     UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
00333     {
00334         return _getCharacter();
00335     }
00336     //--------------------------------------------------------------------------
00337     //--------------------------------------------------------------------------
00338     //--------------------------------------------------------------------------
00339     //--------------------------------------------------------------------------
00340     UString::_rev_iterator::_rev_iterator()
00341     {
00342 
00343     }
00344     //--------------------------------------------------------------------------
00345     UString::_rev_iterator::_rev_iterator( const _rev_iterator& i )
00346     {
00347         _become( i );
00348     }
00349     //--------------------------------------------------------------------------
00350     UString::_rev_iterator& UString::_rev_iterator::operator++()
00351     {
00352         _seekRev( 1 );
00353         return *this;
00354     }
00355     //--------------------------------------------------------------------------
00356     UString::_rev_iterator UString::_rev_iterator::operator++( int )
00357     {
00358         _rev_iterator tmp( *this );
00359         _seekRev( 1 );
00360         return tmp;
00361     }
00362     //--------------------------------------------------------------------------
00363     UString::_rev_iterator& UString::_rev_iterator::operator--()
00364     {
00365         _seekFwd( 1 );
00366         return *this;
00367     }
00368     //--------------------------------------------------------------------------
00369     UString::_rev_iterator UString::_rev_iterator::operator--( int )
00370     {
00371         _rev_iterator tmp( *this );
00372         _seekFwd( 1 );
00373         return tmp;
00374     }
00375     //--------------------------------------------------------------------------
00376     UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n )
00377     {
00378         _rev_iterator tmp( *this );
00379         if ( n < 0 )
00380             tmp._seekFwd( -n );
00381         else
00382             tmp._seekRev( n );
00383         return tmp;
00384     }
00385     //--------------------------------------------------------------------------
00386     UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n )
00387     {
00388         _rev_iterator tmp( *this );
00389         if ( n < 0 )
00390             tmp._seekRev( -n );
00391         else
00392             tmp._seekFwd( n );
00393         return tmp;
00394     }
00395     //--------------------------------------------------------------------------
00396     UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n )
00397     {
00398         if ( n < 0 )
00399             _seekFwd( -n );
00400         else
00401             _seekRev( n );
00402         return *this;
00403     }
00404     //--------------------------------------------------------------------------
00405     UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n )
00406     {
00407         if ( n < 0 )
00408             _seekRev( -n );
00409         else
00410             _seekFwd( n );
00411         return *this;
00412     }
00413     //--------------------------------------------------------------------------
00414     UString::value_type& UString::_rev_iterator::operator*() const
00415     {
00416         return mIter[-1];
00417     }
00418     //--------------------------------------------------------------------------
00419     UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const
00420     {
00421         _rev_iterator tmp( *this );
00422         tmp -= n;
00423         return *tmp;
00424     }
00425     //--------------------------------------------------------------------------
00426     //--------------------------------------------------------------------------
00427     //--------------------------------------------------------------------------
00428     //--------------------------------------------------------------------------
00429     UString::_const_rev_iterator::_const_rev_iterator()
00430     {
00431 
00432     }
00433     //--------------------------------------------------------------------------
00434     UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i )
00435     {
00436         _become( i );
00437     }
00438     //--------------------------------------------------------------------------
00439     UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i )
00440     {
00441         _become( i );
00442     }
00443     //--------------------------------------------------------------------------
00444     UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
00445     {
00446         _seekRev( 1 );
00447         return *this;
00448     }
00449     //--------------------------------------------------------------------------
00450     UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int )
00451     {
00452         _const_rev_iterator tmp( *this );
00453         _seekRev( 1 );
00454         return tmp;
00455     }
00456     //--------------------------------------------------------------------------
00457     UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
00458     {
00459         _seekFwd( 1 );
00460         return *this;
00461     }
00462     //--------------------------------------------------------------------------
00463     UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int )
00464     {
00465         _const_rev_iterator tmp( *this );
00466         _seekFwd( 1 );
00467         return tmp;
00468     }
00469     //--------------------------------------------------------------------------
00470     UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n )
00471     {
00472         _const_rev_iterator tmp( *this );
00473         if ( n < 0 )
00474             tmp._seekFwd( -n );
00475         else
00476             tmp._seekRev( n );
00477         return tmp;
00478     }
00479     //--------------------------------------------------------------------------
00480     UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n )
00481     {
00482         _const_rev_iterator tmp( *this );
00483         if ( n < 0 )
00484             tmp._seekRev( -n );
00485         else
00486             tmp._seekFwd( n );
00487         return tmp;
00488     }
00489     //--------------------------------------------------------------------------
00490     UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n )
00491     {
00492         if ( n < 0 )
00493             _seekFwd( -n );
00494         else
00495             _seekRev( n );
00496         return *this;
00497     }
00498     //--------------------------------------------------------------------------
00499     UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n )
00500     {
00501         if ( n < 0 )
00502             _seekRev( -n );
00503         else
00504             _seekFwd( n );
00505         return *this;
00506     }
00507     //--------------------------------------------------------------------------
00508     const UString::value_type& UString::_const_rev_iterator::operator*() const
00509     {
00510         return mIter[-1];
00511     }
00512     //--------------------------------------------------------------------------
00513     const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const
00514     {
00515         _const_rev_iterator tmp( *this );
00516         tmp -= n;
00517         return *tmp;
00518     }
00519     //--------------------------------------------------------------------------
00520     //--------------------------------------------------------------------------
00521     //--------------------------------------------------------------------------
00522     //--------------------------------------------------------------------------
00523     UString::UString()
00524     {
00525         _init();
00526     }
00527     //--------------------------------------------------------------------------
00528     UString::UString( const UString& copy )
00529     {
00530         _init();
00531         mData = copy.mData;
00532     }
00533     //--------------------------------------------------------------------------
00534     UString::UString( size_type length, const code_point& ch )
00535     {
00536         _init();
00537         assign( length, ch );
00538     }
00539     //--------------------------------------------------------------------------
00540     UString::UString( const code_point* str )
00541     {
00542         _init();
00543         assign( str );
00544     }
00545     //--------------------------------------------------------------------------
00546     UString::UString( const code_point* str, size_type length )
00547     {
00548         _init();
00549         assign( str, length );
00550     }
00551     //--------------------------------------------------------------------------
00552     UString::UString( const UString& str, size_type index, size_type length )
00553     {
00554         _init();
00555         assign( str, index, length );
00556     }
00557     //--------------------------------------------------------------------------
00558 #if MYGUI_IS_NATIVE_WCHAR_T
00559     UString::UString( const wchar_t* w_str )
00560     {
00561         _init();
00562         assign( w_str );
00563     }
00564     //--------------------------------------------------------------------------
00565     UString::UString( const wchar_t* w_str, size_type length )
00566     {
00567         _init();
00568         assign( w_str, length );
00569     }
00570 #endif
00571     //--------------------------------------------------------------------------
00572     UString::UString( const std::wstring& wstr )
00573     {
00574         _init();
00575         assign( wstr );
00576     }
00577     //--------------------------------------------------------------------------
00578     UString::UString( const char* c_str )
00579     {
00580         _init();
00581         assign( c_str );
00582     }
00583     //--------------------------------------------------------------------------
00584     UString::UString( const char* c_str, size_type length )
00585     {
00586         _init();
00587         assign( c_str, length );
00588     }
00589     //--------------------------------------------------------------------------
00590     UString::UString( const std::string& str )
00591     {
00592         _init();
00593         assign( str );
00594     }
00595     //--------------------------------------------------------------------------
00596     UString::~UString()
00597     {
00598         _cleanBuffer();
00599     }
00600     //--------------------------------------------------------------------------
00601     UString::size_type UString::size() const
00602     {
00603         return mData.size();
00604     }
00605     //--------------------------------------------------------------------------
00606     UString::size_type UString::length() const
00607     {
00608         return size();
00609     }
00610     //--------------------------------------------------------------------------
00611     UString::size_type UString::length_Characters() const
00612     {
00613         const_iterator i = begin(), ie = end();
00614         size_type c = 0;
00615         while ( i != ie ) {
00616             i.moveNext();
00617             ++c;
00618         }
00619         return c;
00620     }
00621     //--------------------------------------------------------------------------
00622     UString::size_type UString::max_size() const
00623     {
00624         return mData.max_size();
00625     }
00626     //--------------------------------------------------------------------------
00627     void UString::reserve( size_type size )
00628     {
00629         mData.reserve( size );
00630     }
00631     //--------------------------------------------------------------------------
00632     void UString::resize( size_type num, const code_point& val /*= 0 */ )
00633     {
00634         mData.resize( num, val );
00635     }
00636     //--------------------------------------------------------------------------
00637     void UString::swap( UString& from )
00638     {
00639         mData.swap( from.mData );
00640     }
00641     //--------------------------------------------------------------------------
00642     bool UString::empty() const
00643     {
00644         return mData.empty();
00645     }
00646     //--------------------------------------------------------------------------
00647     const UString::code_point* UString::c_str() const
00648     {
00649         return mData.c_str();
00650     }
00651     //--------------------------------------------------------------------------
00652     const UString::code_point* UString::data() const
00653     {
00654         return c_str();
00655     }
00656     //--------------------------------------------------------------------------
00657     UString::size_type UString::capacity() const
00658     {
00659         return mData.capacity();
00660     }
00661     //--------------------------------------------------------------------------
00662     void UString::clear()
00663     {
00664         mData.clear();
00665     }
00666     //--------------------------------------------------------------------------
00667     UString UString::substr( size_type index, size_type num /*= npos */ ) const
00668     {
00669         // this could avoid the extra copy if we used a private specialty constructor
00670         dstring data = mData.substr( index, num );
00671         UString tmp;
00672         tmp.mData.swap( data );
00673         return tmp;
00674     }
00675     //--------------------------------------------------------------------------
00676     void UString::push_back( unicode_char val )
00677     {
00678         code_point cp[2];
00679         size_t c = _utf32_to_utf16( val, cp );
00680         if ( c > 0 ) push_back( cp[0] );
00681         if ( c > 1 ) push_back( cp[1] );
00682     }
00683     //--------------------------------------------------------------------------
00684 #if MYGUI_IS_NATIVE_WCHAR_T
00685     void UString::push_back( wchar_t val )
00686     {
00687         // we do this because the Unicode method still preserves UTF-16 code points
00688         mData.push_back( static_cast<code_point>( val ) );
00689     }
00690 #endif
00691     //--------------------------------------------------------------------------
00692     void UString::push_back( code_point val )
00693     {
00694         mData.push_back( val );
00695     }
00696 
00697     void UString::push_back( char val )
00698     {
00699         mData.push_back( static_cast<code_point>( val ) );
00700     }
00701 
00702     bool UString::inString( unicode_char ch ) const
00703     {
00704         const_iterator i, ie = end();
00705         for ( i = begin(); i != ie; i.moveNext() ) {
00706             if ( i.getCharacter() == ch )
00707                 return true;
00708         }
00709         return false;
00710     }
00711 
00712     const std::string& UString::asUTF8() const
00713     {
00714         _load_buffer_UTF8();
00715         return *m_buffer.mStrBuffer;
00716     }
00717 
00718     const char* UString::asUTF8_c_str() const
00719     {
00720         _load_buffer_UTF8();
00721         return m_buffer.mStrBuffer->c_str();
00722     }
00723 
00724     const UString::utf32string& UString::asUTF32() const
00725     {
00726         _load_buffer_UTF32();
00727         return *m_buffer.mUTF32StrBuffer;
00728     }
00729 
00730     const UString::unicode_char* UString::asUTF32_c_str() const
00731     {
00732         _load_buffer_UTF32();
00733         return m_buffer.mUTF32StrBuffer->c_str();
00734     }
00735 
00736     const std::wstring& UString::asWStr() const
00737     {
00738         _load_buffer_WStr();
00739         return *m_buffer.mWStrBuffer;
00740     }
00741 
00742     const wchar_t* UString::asWStr_c_str() const
00743     {
00744         _load_buffer_WStr();
00745         return m_buffer.mWStrBuffer->c_str();
00746     }
00747 
00748     UString::code_point& UString::at( size_type loc )
00749     {
00750         return mData.at( loc );
00751     }
00752 
00753     const UString::code_point& UString::at( size_type loc ) const
00754     {
00755         return mData.at( loc );
00756     }
00757 
00758     UString::unicode_char UString::getChar( size_type loc ) const
00759     {
00760         const code_point* ptr = c_str();
00761         unicode_char uc;
00762         size_t l = _utf16_char_length( ptr[loc] );
00763         code_point cp[2] = { /* blame the code beautifier */
00764             0, 0
00765         };
00766         cp[0] = ptr[loc];
00767 
00768         if ( l == 2 && ( loc + 1 ) < mData.length() ) {
00769             cp[1] = ptr[loc+1];
00770         }
00771         _utf16_to_utf32( cp, uc );
00772         return uc;
00773     }
00774 
00775     int UString::setChar( size_type loc, unicode_char ch )
00776     {
00777         code_point cp[2] = { /* blame the code beautifier */
00778             0, 0
00779         };
00780         size_t l = _utf32_to_utf16( ch, cp );
00781         unicode_char existingChar = getChar( loc );
00782         size_t existingSize = _utf16_char_length( existingChar );
00783         size_t newSize = _utf16_char_length( ch );
00784 
00785         if ( newSize > existingSize ) {
00786             at( loc ) = cp[0];
00787             insert( loc + 1, 1, cp[1] );
00788             return 1;
00789         }
00790         if ( newSize < existingSize ) {
00791             erase( loc, 1 );
00792             at( loc ) = cp[0];
00793             return -1;
00794         }
00795 
00796         // newSize == existingSize
00797         at( loc ) = cp[0];
00798         if ( l == 2 ) at( loc + 1 ) = cp[1];
00799         return 0;
00800     }
00801 
00802     UString::iterator UString::begin()
00803     {
00804         iterator i;
00805         i.mIter = mData.begin();
00806         i.mString = this;
00807         return i;
00808     }
00809 
00810     UString::const_iterator UString::begin() const
00811     {
00812         const_iterator i;
00813         i.mIter = const_cast<UString*>( this )->mData.begin();
00814         i.mString = const_cast<UString*>( this );
00815         return i;
00816     }
00817 
00818     UString::iterator UString::end()
00819     {
00820         iterator i;
00821         i.mIter = mData.end();
00822         i.mString = this;
00823         return i;
00824     }
00825 
00826     UString::const_iterator UString::end() const
00827     {
00828         const_iterator i;
00829         i.mIter = const_cast<UString*>( this )->mData.end();
00830         i.mString = const_cast<UString*>( this );
00831         return i;
00832     }
00833 
00834     UString::reverse_iterator UString::rbegin()
00835     {
00836         reverse_iterator i;
00837         i.mIter = mData.end();
00838         i.mString = this;
00839         return i;
00840     }
00841 
00842     UString::const_reverse_iterator UString::rbegin() const
00843     {
00844         const_reverse_iterator i;
00845         i.mIter = const_cast<UString*>( this )->mData.end();
00846         i.mString = const_cast<UString*>( this );
00847         return i;
00848     }
00849 
00850     UString::reverse_iterator UString::rend()
00851     {
00852         reverse_iterator i;
00853         i.mIter = mData.begin();
00854         i.mString = this;
00855         return i;
00856     }
00857 
00858     UString::const_reverse_iterator UString::rend() const
00859     {
00860         const_reverse_iterator i;
00861         i.mIter = const_cast<UString*>( this )->mData.begin();
00862         i.mString = const_cast<UString*>( this );
00863         return i;
00864     }
00865 
00866     UString& UString::assign( iterator start, iterator end )
00867     {
00868         mData.assign( start.mIter, end.mIter );
00869         return *this;
00870     }
00871 
00872     UString& UString::assign( const UString& str )
00873     {
00874         mData.assign( str.mData );
00875         return *this;
00876     }
00877 
00878     UString& UString::assign( const code_point* str )
00879     {
00880         mData.assign( str );
00881         return *this;
00882     }
00883 
00884     UString& UString::assign( const code_point* str, size_type num )
00885     {
00886         mData.assign( str, num );
00887         return *this;
00888     }
00889 
00890     UString& UString::assign( const UString& str, size_type index, size_type len )
00891     {
00892         mData.assign( str.mData, index, len );
00893         return *this;
00894     }
00895 
00896     UString& UString::assign( size_type num, const code_point& ch )
00897     {
00898         mData.assign( num, ch );
00899         return *this;
00900     }
00901 
00902     UString& UString::assign( const std::wstring& wstr )
00903     {
00904         mData.clear();
00905         mData.reserve( wstr.length() ); // best guess bulk allocate
00906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
00907         code_point tmp;
00908         std::wstring::const_iterator i, ie = wstr.end();
00909         for ( i = wstr.begin(); i != ie; i++ ) {
00910             tmp = static_cast<code_point>( *i );
00911             mData.push_back( tmp );
00912         }
00913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
00914         code_point cp[3] = {0, 0, 0};
00915         unicode_char tmp;
00916         std::wstring::const_iterator i, ie = wstr.end();
00917         for ( i = wstr.begin(); i != ie; i++ ) {
00918             tmp = static_cast<unicode_char>( *i );
00919             size_t l = _utf32_to_utf16( tmp, cp );
00920             if ( l > 0 ) mData.push_back( cp[0] );
00921             if ( l > 1 ) mData.push_back( cp[1] );
00922         }
00923 #endif
00924         return *this;
00925     }
00926 
00927 #if MYGUI_IS_NATIVE_WCHAR_T
00928     UString& UString::assign( const wchar_t* w_str )
00929     {
00930         std::wstring tmp;
00931         tmp.assign( w_str );
00932         return assign( tmp );
00933     }
00934 
00935     UString& UString::assign( const wchar_t* w_str, size_type num )
00936     {
00937         std::wstring tmp;
00938         tmp.assign( w_str, num );
00939         return assign( tmp );
00940     }
00941 #endif
00942 
00943     UString& UString::assign( const std::string& str )
00944     {
00945         size_type len = _verifyUTF8( str );
00946         clear(); // empty our contents, if there are any
00947         reserve( len ); // best guess bulk capacity growth
00948 
00949         // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
00950         // then converting it to UTF-16, then finally appending the data buffer
00951 
00952         unicode_char uc;          // temporary Unicode character buffer
00953         unsigned char utf8buf[7]; // temporary UTF-8 buffer
00954         utf8buf[6] = 0;
00955         size_t utf8len;           // UTF-8 length
00956         code_point utf16buff[3];  // temporary UTF-16 buffer
00957         utf16buff[2] = 0;
00958         size_t utf16len;          // UTF-16 length
00959 
00960         std::string::const_iterator i, ie = str.end();
00961         for ( i = str.begin(); i != ie; i++ ) {
00962             utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
00963             for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
00964                 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
00965             }
00966             utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
00967             utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
00968             i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
00969 
00970             utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
00971             append( utf16buff, utf16len ); // append the characters to the string
00972         }
00973         return *this;
00974     }
00975 
00976     UString& UString::assign( const char* c_str )
00977     {
00978         std::string tmp( c_str );
00979         return assign( tmp );
00980     }
00981 
00982     UString& UString::assign( const char* c_str, size_type num )
00983     {
00984         std::string tmp;
00985         tmp.assign( c_str, num );
00986         return assign( tmp );
00987     }
00988 
00989     UString& UString::append( const UString& str )
00990     {
00991         mData.append( str.mData );
00992         return *this;
00993     }
00994 
00995     UString& UString::append( const code_point* str )
00996     {
00997         mData.append( str );
00998         return *this;
00999     }
01000 
01001     UString& UString::append( const UString& str, size_type index, size_type len )
01002     {
01003         mData.append( str.mData, index, len );
01004         return *this;
01005     }
01006 
01007     UString& UString::append( const code_point* str, size_type num )
01008     {
01009         mData.append( str, num );
01010         return *this;
01011     }
01012 
01013     UString& UString::append( size_type num, code_point ch )
01014     {
01015         mData.append( num, ch );
01016         return *this;
01017     }
01018 
01019     UString& UString::append( iterator start, iterator end )
01020     {
01021         mData.append( start.mIter, end.mIter );
01022         return *this;
01023     }
01024 
01025 #if MYGUI_IS_NATIVE_WCHAR_T
01026     UString& UString::append( const wchar_t* w_str, size_type num )
01027     {
01028         std::wstring tmp( w_str, num );
01029         return append( tmp );
01030     }
01031 
01032     UString& UString::append( size_type num, wchar_t ch )
01033     {
01034         return append( num, static_cast<unicode_char>( ch ) );
01035     }
01036 #endif
01037     UString& UString::append( const char* c_str, size_type num )
01038     {
01039         UString tmp( c_str, num );
01040         append( tmp );
01041         return *this;
01042     }
01043 
01044     UString& UString::append( size_type num, char ch )
01045     {
01046         append( num, static_cast<code_point>( ch ) );
01047         return *this;
01048     }
01049 
01050     UString& UString::append( size_type num, unicode_char ch )
01051     {
01052         code_point cp[2] = {0, 0};
01053         if ( _utf32_to_utf16( ch, cp ) == 2 ) {
01054             for ( size_type i = 0; i < num; i++ ) {
01055                 append( 1, cp[0] );
01056                 append( 1, cp[1] );
01057             }
01058         } else {
01059             for ( size_type i = 0; i < num; i++ ) {
01060                 append( 1, cp[0] );
01061             }
01062         }
01063         return *this;
01064     }
01065 
01066     UString::iterator UString::insert( iterator i, const code_point& ch )
01067     {
01068         iterator ret;
01069         ret.mIter = mData.insert( i.mIter, ch );
01070         ret.mString = this;
01071         return ret;
01072     }
01073 
01074     UString& UString::insert( size_type index, const UString& str )
01075     {
01076         mData.insert( index, str.mData );
01077         return *this;
01078     }
01079 
01080     UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
01081     {
01082         mData.insert( index1, str.mData, index2, num );
01083         return *this;
01084     }
01085 
01086     void UString::insert( iterator i, iterator start, iterator end )
01087     {
01088         mData.insert( i.mIter, start.mIter, end.mIter );
01089     }
01090 
01091     UString& UString::insert( size_type index, const code_point* str, size_type num )
01092     {
01093         mData.insert( index, str, num );
01094         return *this;
01095     }
01096 
01097 #if MYGUI_IS_NATIVE_WCHAR_T
01098     UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
01099     {
01100         UString tmp( w_str, num );
01101         insert( index, tmp );
01102         return *this;
01103     }
01104 #endif
01105 
01106     UString& UString::insert( size_type index, const char* c_str, size_type num )
01107     {
01108         UString tmp( c_str, num );
01109         insert( index, tmp );
01110         return *this;
01111     }
01112 
01113     UString& UString::insert( size_type index, size_type num, code_point ch )
01114     {
01115         mData.insert( index, num, ch );
01116         return *this;
01117     }
01118 
01119 #if MYGUI_IS_NATIVE_WCHAR_T
01120     UString& UString::insert( size_type index, size_type num, wchar_t ch )
01121     {
01122         insert( index, num, static_cast<unicode_char>( ch ) );
01123         return *this;
01124     }
01125 #endif
01126 
01127     UString& UString::insert( size_type index, size_type num, char ch )
01128     {
01129         insert( index, num, static_cast<code_point>( ch ) );
01130         return *this;
01131     }
01132 
01133     UString& UString::insert( size_type index, size_type num, unicode_char ch )
01134     {
01135         code_point cp[3] = {0, 0, 0};
01136         size_t l = _utf32_to_utf16( ch, cp );
01137         if ( l == 1 ) {
01138             return insert( index, num, cp[0] );
01139         }
01140         for ( size_type c = 0; c < num; c++ ) {
01141             // insert in reverse order to preserve ordering after insert
01142             insert( index, 1, cp[1] );
01143             insert( index, 1, cp[0] );
01144         }
01145         return *this;
01146     }
01147 
01148     void UString::insert( iterator i, size_type num, const code_point& ch )
01149     {
01150         mData.insert( i.mIter, num, ch );
01151     }
01152 #if MYGUI_IS_NATIVE_WCHAR_T
01153     void UString::insert( iterator i, size_type num, const wchar_t& ch )
01154     {
01155         insert( i, num, static_cast<unicode_char>( ch ) );
01156     }
01157 #endif
01158 
01159     void UString::insert( iterator i, size_type num, const char& ch )
01160     {
01161         insert( i, num, static_cast<code_point>( ch ) );
01162     }
01163 
01164     void UString::insert( iterator i, size_type num, const unicode_char& ch )
01165     {
01166         code_point cp[3] = {0, 0, 0};
01167         size_t l = _utf32_to_utf16( ch, cp );
01168         if ( l == 1 ) {
01169             insert( i, num, cp[0] );
01170         } else {
01171             for ( size_type c = 0; c < num; c++ ) {
01172                 // insert in reverse order to preserve ordering after insert
01173                 insert( i, 1, cp[1] );
01174                 insert( i, 1, cp[0] );
01175             }
01176         }
01177     }
01178 
01179     UString::iterator UString::erase( iterator loc )
01180     {
01181         iterator ret;
01182         ret.mIter = mData.erase( loc.mIter );
01183         ret.mString = this;
01184         return ret;
01185     }
01186 
01187     UString::iterator UString::erase( iterator start, iterator end )
01188     {
01189         iterator ret;
01190         ret.mIter = mData.erase( start.mIter, end.mIter );
01191         ret.mString = this;
01192         return ret;
01193     }
01194 
01195     UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
01196     {
01197         if ( num == npos )
01198             mData.erase( index );
01199         else
01200             mData.erase( index, num );
01201         return *this;
01202     }
01203 
01204     UString& UString::replace( size_type index1, size_type num1, const UString& str )
01205     {
01206         mData.replace( index1, num1, str.mData, 0, npos );
01207         return *this;
01208     }
01209 
01210     UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01211     {
01212         mData.replace( index1, num1, str.mData, 0, num2 );
01213         return *this;
01214     }
01215 
01216     UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01217     {
01218         mData.replace( index1, num1, str.mData, index2, num2 );
01219         return *this;
01220     }
01221 
01222     UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
01223     {
01224         _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01225 
01226         size_type index1 = begin() - st;
01227         size_type num1 = end - st;
01228         return replace( index1, num1, str, 0, num );
01229     }
01230 
01231     UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch )
01232     {
01233         mData.replace( index, num1, num2, ch );
01234         return *this;
01235     }
01236 
01237     UString& UString::replace( iterator start, iterator end, size_type num, code_point ch )
01238     {
01239         _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01240 
01241         size_type index1 = begin() - st;
01242         size_type num1 = end - st;
01243         return replace( index1, num1, num, ch );
01244     }
01245 
01246     int UString::compare( const UString& str ) const
01247     {
01248         return mData.compare( str.mData );
01249     }
01250 
01251     int UString::compare( const code_point* str ) const
01252     {
01253         return mData.compare( str );
01254     }
01255 
01256     int UString::compare( size_type index, size_type length, const UString& str ) const
01257     {
01258         return mData.compare( index, length, str.mData );
01259     }
01260 
01261     int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01262     {
01263         return mData.compare( index, length, str.mData, index2, length2 );
01264     }
01265 
01266     int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01267     {
01268         return mData.compare( index, length, str, length2 );
01269     }
01270 
01271 #if MYGUI_IS_NATIVE_WCHAR_T
01272     int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01273     {
01274         UString tmp( w_str, length2 );
01275         return compare( index, length, tmp );
01276     }
01277 #endif
01278 
01279     int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01280     {
01281         UString tmp( c_str, length2 );
01282         return compare( index, length, tmp );
01283     }
01284 
01285     UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
01286     {
01287         return mData.find( str.c_str(), index );
01288     }
01289 
01290     UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const
01291     {
01292         UString tmp( cp_str );
01293         return mData.find( tmp.c_str(), index, length );
01294     }
01295 
01296     UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const
01297     {
01298         UString tmp( c_str );
01299         return mData.find( tmp.c_str(), index, length );
01300     }
01301 
01302 #if MYGUI_IS_NATIVE_WCHAR_T
01303     UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
01304     {
01305         UString tmp( w_str );
01306         return mData.find( tmp.c_str(), index, length );
01307     }
01308 #endif
01309 
01310     UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
01311     {
01312         return find( static_cast<code_point>( ch ), index );
01313     }
01314 
01315     UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const
01316     {
01317         return mData.find( ch, index );
01318     }
01319 
01320 #if MYGUI_IS_NATIVE_WCHAR_T
01321     UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
01322     {
01323         return find( static_cast<unicode_char>( ch ), index );
01324     }
01325 #endif
01326 
01327     UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const
01328     {
01329         code_point cp[3] = {0, 0, 0};
01330         size_t l = _utf32_to_utf16( ch, cp );
01331         return find( UString( cp, l ), index );
01332     }
01333 
01334     UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
01335     {
01336         return mData.rfind( str.c_str(), index );
01337     }
01338 
01339     UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const
01340     {
01341         UString tmp( cp_str );
01342         return mData.rfind( tmp.c_str(), index, num );
01343     }
01344 
01345     UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
01346     {
01347         UString tmp( c_str );
01348         return mData.rfind( tmp.c_str(), index, num );
01349     }
01350 
01351 #if MYGUI_IS_NATIVE_WCHAR_T
01352     UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
01353     {
01354         UString tmp( w_str );
01355         return mData.rfind( tmp.c_str(), index, num );
01356     }
01357 #endif
01358 
01359     UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
01360     {
01361         return rfind( static_cast<code_point>( ch ), index );
01362     }
01363 
01364     UString::size_type UString::rfind( code_point ch, size_type index ) const
01365     {
01366         return mData.rfind( ch, index );
01367     }
01368 
01369 #if MYGUI_IS_NATIVE_WCHAR_T
01370     UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
01371     {
01372         return rfind( static_cast<unicode_char>( ch ), index );
01373     }
01374 #endif
01375 
01376     UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const
01377     {
01378         code_point cp[3] = {0, 0, 0};
01379         size_t l = _utf32_to_utf16( ch, cp );
01380         return rfind( UString( cp, l ), index );
01381     }
01382 
01383     UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
01384     {
01385         size_type i = 0;
01386         const size_type len = length();
01387         while ( i < num && ( index + i ) < len ) {
01388             unicode_char ch = getChar( index + i );
01389             if ( str.inString( ch ) )
01390                 return index + i;
01391             i += _utf16_char_length( ch ); // increment by the Unicode character length
01392         }
01393         return npos;
01394     }
01395 
01396     UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const
01397     {
01398         UString tmp;
01399         tmp.assign( 1, ch );
01400         return find_first_of( tmp, index );
01401     }
01402 
01403     UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
01404     {
01405         return find_first_of( static_cast<code_point>( ch ), index );
01406     }
01407 
01408 #if MYGUI_IS_NATIVE_WCHAR_T
01409     UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
01410     {
01411         return find_first_of( static_cast<unicode_char>( ch ), index );
01412     }
01413 #endif
01414 
01415     UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const
01416     {
01417         code_point cp[3] = {0, 0, 0};
01418         size_t l = _utf32_to_utf16( ch, cp );
01419         return find_first_of( UString( cp, l ), index );
01420     }
01421 
01422     UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
01423     {
01424         size_type i = 0;
01425         const size_type len = length();
01426         while ( i < num && ( index + i ) < len ) {
01427             unicode_char ch = getChar( index + i );
01428             if ( !str.inString( ch ) )
01429                 return index + i;
01430             i += _utf16_char_length( ch ); // increment by the Unicode character length
01431         }
01432         return npos;
01433     }
01434 
01435     UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const
01436     {
01437         UString tmp;
01438         tmp.assign( 1, ch );
01439         return find_first_not_of( tmp, index );
01440     }
01441 
01442     UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const
01443     {
01444         return find_first_not_of( static_cast<code_point>( ch ), index );
01445     }
01446 
01447 #if MYGUI_IS_NATIVE_WCHAR_T
01448     UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
01449     {
01450         return find_first_not_of( static_cast<unicode_char>( ch ), index );
01451     }
01452 #endif
01453 
01454     UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const
01455     {
01456         code_point cp[3] = {0, 0, 0};
01457         size_t l = _utf32_to_utf16( ch, cp );
01458         return find_first_not_of( UString( cp, l ), index );
01459     }
01460 
01461     UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
01462     {
01463         size_type i = 0;
01464         const size_type len = length();
01465         if ( index > len ) index = len - 1;
01466 
01467         while ( i < num && ( index - i ) != npos ) {
01468             size_type j = index - i;
01469             // careful to step full Unicode characters
01470             if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01471                 j = index - ++i;
01472             }
01473             // and back to the usual dull test
01474             unicode_char ch = getChar( j );
01475             if ( str.inString( ch ) )
01476                 return j;
01477             i++;
01478         }
01479         return npos;
01480     }
01481 
01482     UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const
01483     {
01484         UString tmp;
01485         tmp.assign( 1, ch );
01486         return find_last_of( tmp, index );
01487     }
01488 
01489 #if MYGUI_IS_NATIVE_WCHAR_T
01490     UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
01491     {
01492         return find_last_of( static_cast<unicode_char>( ch ), index );
01493     }
01494 #endif
01495 
01496     UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const
01497     {
01498         code_point cp[3] = {0, 0, 0};
01499         size_t l = _utf32_to_utf16( ch, cp );
01500         return find_last_of( UString( cp, l ), index );
01501     }
01502 
01503     UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
01504     {
01505         size_type i = 0;
01506         const size_type len = length();
01507         if ( index > len ) index = len - 1;
01508 
01509         while ( i < num && ( index - i ) != npos ) {
01510             size_type j = index - i;
01511             // careful to step full Unicode characters
01512             if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01513                 j = index - ++i;
01514             }
01515             // and back to the usual dull test
01516             unicode_char ch = getChar( j );
01517             if ( !str.inString( ch ) )
01518                 return j;
01519             i++;
01520         }
01521         return npos;
01522     }
01523 
01524     UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const
01525     {
01526         UString tmp;
01527         tmp.assign( 1, ch );
01528         return find_last_not_of( tmp, index );
01529     }
01530 
01531     UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
01532     {
01533         return find_last_not_of( static_cast<code_point>( ch ), index );
01534     }
01535 
01536 #if MYGUI_IS_NATIVE_WCHAR_T
01537     UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
01538     {
01539         return find_last_not_of( static_cast<unicode_char>( ch ), index );
01540     }
01541 #endif
01542 
01543     UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const
01544     {
01545         code_point cp[3] = {0, 0, 0};
01546         size_t l = _utf32_to_utf16( ch, cp );
01547         return find_last_not_of( UString( cp, l ), index );
01548     }
01549 
01550     bool UString::operator<( const UString& right ) const
01551     {
01552         return compare( right ) < 0;
01553     }
01554 
01555     bool UString::operator<=( const UString& right ) const
01556     {
01557         return compare( right ) <= 0;
01558     }
01559 
01560     UString& UString::operator=( const UString& s )
01561     {
01562         return assign( s );
01563     }
01564 
01565     UString& UString::operator=( code_point ch )
01566     {
01567         clear();
01568         return append( 1, ch );
01569     }
01570 
01571     UString& UString::operator=( char ch )
01572     {
01573         clear();
01574         return append( 1, ch );
01575     }
01576 
01577 #if MYGUI_IS_NATIVE_WCHAR_T
01578     UString& UString::operator=( wchar_t ch )
01579     {
01580         clear();
01581         return append( 1, ch );
01582     }
01583 #endif
01584 
01585     UString& UString::operator=( unicode_char ch )
01586     {
01587         clear();
01588         return append( 1, ch );
01589     }
01590 
01591     bool UString::operator>( const UString& right ) const
01592     {
01593         return compare( right ) > 0;
01594     }
01595 
01596     bool UString::operator>=( const UString& right ) const
01597     {
01598         return compare( right ) >= 0;
01599     }
01600 
01601     bool UString::operator==( const UString& right ) const
01602     {
01603         return compare( right ) == 0;
01604     }
01605 
01606     bool UString::operator!=( const UString& right ) const
01607     {
01608         return !operator==( right );
01609     }
01610 
01611     UString::code_point& UString::operator[]( size_type index )
01612     {
01613         return at( index );
01614     }
01615 
01616     const UString::code_point& UString::operator[]( size_type index ) const
01617     {
01618         return at( index );
01619     }
01620 
01621     UString::operator std::string() const 
01622     {
01623         return std::string( asUTF8() );
01624     }
01625     
01627     UString::operator std::wstring() const 
01628     {
01629         return std::wstring( asWStr() );
01630     }
01631 
01632 
01633     bool UString::_utf16_independent_char( code_point cp )
01634     {
01635         if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
01636             return false; // it matches a surrogate pair signature
01637         return true; // everything else is a standalone code point
01638     }
01639 
01640     bool UString::_utf16_surrogate_lead( code_point cp )
01641     {
01642         if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
01643             return true; // it is a 1st word
01644         return false; // it isn't
01645     }
01646 
01647     bool UString::_utf16_surrogate_follow( code_point cp )
01648     {
01649         if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
01650             return true; // it is a 2nd word
01651         return false; // everything else isn't
01652     }
01653 
01654     size_t UString::_utf16_char_length( code_point cp )
01655     {
01656         if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
01657             return 2; // if it is, then we are 2 words long
01658         return 1; // otherwise we are only 1 word long
01659     }
01660 
01661     size_t UString::_utf16_char_length( unicode_char uc )
01662     {
01663         if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
01664             return 2; // if so, we need a surrogate pair
01665         return 1; // otherwise we can stuff it into a single word
01666     }
01667 
01668     size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
01669     {
01670         const code_point& cp1 = in_cp[0];
01671         const code_point& cp2 = in_cp[1];
01672         bool wordPair = false;
01673 
01674         // does it look like a surrogate pair?
01675         if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
01676             // looks like one, but does the other half match the algorithm as well?
01677             if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
01678                 wordPair = true; // yep!
01679         }
01680 
01681         if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
01682             out_uc = cp1;
01683             return 1;
01684         }
01685 
01686         unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
01687         cU -= 0xD800; // remove the encoding markers
01688         cL -= 0xDC00;
01689 
01690         out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
01691         out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
01692         out_uc += 0x10000; // add back in the value offset
01693 
01694         return 2; // this whole operation takes to words, so that's what we'll return
01695     }
01696 
01697     size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
01698     {
01699         if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
01700             out_cp[0] = static_cast<code_point>(in_uc);
01701             return 1;
01702         }
01703         unicode_char uc = in_uc; // copy to writable buffer
01704         unsigned short tmp; // single code point buffer
01705         uc -= 0x10000; // subtract value offset
01706 
01707         //process upper word
01708         tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
01709         tmp += 0xD800; // add encoding offset
01710         out_cp[0] = tmp; // write
01711 
01712         // process lower word
01713         tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
01714         tmp += 0xDC00; // add encoding offset
01715         out_cp[1] = tmp; // write
01716 
01717         return 2; // return used word count (2 for surrogate pairs)
01718     }
01719 
01720     bool UString::_utf8_start_char( unsigned char cp )
01721     {
01722         return ( cp & ~_cont_mask ) != _cont;
01723     }
01724 
01725     size_t UString::_utf8_char_length( unsigned char cp )
01726     {
01727         if ( !( cp & 0x80 ) ) return 1;
01728         if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
01729         if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
01730         if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
01731         if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
01732         if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
01733         throw invalid_data( "invalid UTF-8 sequence header value" );
01734     }
01735 
01736     size_t UString::_utf8_char_length( unicode_char uc )
01737     {
01738         /*
01739         7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
01740         11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
01741         16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
01742         21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
01743         26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
01744         31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
01745         */
01746         if ( !( uc & ~0x0000007F ) ) return 1;
01747         if ( !( uc & ~0x000007FF ) ) return 2;
01748         if ( !( uc & ~0x0000FFFF ) ) return 3;
01749         if ( !( uc & ~0x001FFFFF ) ) return 4;
01750         if ( !( uc & ~0x03FFFFFF ) ) return 5;
01751         if ( !( uc & ~0x7FFFFFFF ) ) return 6;
01752         throw invalid_data( "invalid UTF-32 value" );
01753     }
01754 
01755     size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
01756     {
01757         size_t len = _utf8_char_length( in_cp[0] );
01758         if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
01759             out_uc = in_cp[0];
01760             return 1;
01761         }
01762 
01763         unicode_char c = 0; // temporary buffer
01764         size_t i = 0;
01765         switch ( len ) { // load header byte
01766             case 6:
01767                 c = in_cp[i] & _lead5_mask;
01768                 break;
01769             case 5:
01770                 c = in_cp[i] & _lead4_mask;
01771                 break;
01772             case 4:
01773                 c = in_cp[i] & _lead3_mask;
01774                 break;
01775             case 3:
01776                 c = in_cp[i] & _lead2_mask;
01777                 break;
01778             case 2:
01779                 c = in_cp[i] & _lead1_mask;
01780                 break;
01781         }
01782 
01783         for ( ++i; i < len; i++ ) { // load each continuation byte
01784             if (( in_cp[i] & ~_cont_mask ) != _cont )
01785                 throw invalid_data( "bad UTF-8 continuation byte" );
01786             c <<= 6;
01787             c |= ( in_cp[i] & _cont_mask );
01788         }
01789 
01790         out_uc = c; // write the final value and return the used byte length
01791         return len;
01792     }
01793 
01794     size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
01795     {
01796         size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
01797         unicode_char c = in_uc; // copy to temp buffer
01798 
01799         //stuff all of the lower bits
01800         for ( size_t i = len - 1; i > 0; i-- ) {
01801             out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
01802             c >>= 6;
01803         }
01804 
01805         //now write the header byte
01806         switch ( len ) {
01807             case 6:
01808                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
01809                 break;
01810             case 5:
01811                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
01812                 break;
01813             case 4:
01814                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
01815                 break;
01816             case 3:
01817                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
01818                 break;
01819             case 2:
01820                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
01821                 break;
01822             case 1:
01823             default:
01824                 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
01825                 break;
01826         }
01827 
01828         // return the byte length of the sequence
01829         return len;
01830     }
01831 
01832     UString::size_type UString::_verifyUTF8( const unsigned char* c_str )
01833     {
01834         std::string tmp( reinterpret_cast<const char*>( c_str ) );
01835         return _verifyUTF8( tmp );
01836     }
01837 
01838     UString::size_type UString::_verifyUTF8( const std::string& str )
01839     {
01840         std::string::const_iterator i, ie = str.end();
01841         i = str.begin();
01842         size_type length = 0;
01843 
01844         while ( i != ie ) {
01845             // characters pass until we find an extended sequence
01846             if (( *i ) & 0x80 ) {
01847                 unsigned char c = ( *i );
01848                 size_t contBytes = 0;
01849 
01850                 // get continuation byte count and test for overlong sequences
01851                 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
01852                     if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
01853                     contBytes = 1;
01854 
01855                 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
01856                     contBytes = 2;
01857                     if ( c == _lead2 ) { // possible overlong UTF-8 sequence
01858                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01859                         if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01860                     }
01861 
01862                 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
01863                     contBytes = 3;
01864                     if ( c == _lead3 ) { // possible overlong UTF-8 sequence
01865                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01866                         if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01867                     }
01868 
01869                 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
01870                     contBytes = 4;
01871                     if ( c == _lead4 ) { // possible overlong UTF-8 sequence
01872                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01873                         if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01874                     }
01875 
01876                 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
01877                     contBytes = 5;
01878                     if ( c == _lead5 ) { // possible overlong UTF-8 sequence
01879                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01880                         if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01881                     }
01882                 }
01883 
01884                 // check remaining continuation bytes for
01885                 while ( contBytes-- ) {
01886                     c = ( *( ++i ) ); // get next byte in sequence
01887                     if (( c & ~_cont_mask ) != _cont )
01888                         throw invalid_data( "bad UTF-8 continuation byte" );
01889                 }
01890             }
01891             length++;
01892             i++;
01893         }
01894         return length;
01895     }
01896 
01897     void UString::_init()
01898     {
01899         m_buffer.mVoidBuffer = 0;
01900         m_bufferType = bt_none;
01901         m_bufferSize = 0;
01902     }
01903 
01904     void UString::_cleanBuffer() const
01905     {
01906         if ( m_buffer.mVoidBuffer != 0 ) {
01907             switch ( m_bufferType ) {
01908                 case bt_string:
01909                     delete m_buffer.mStrBuffer;
01910                     break;
01911                 case bt_wstring:
01912                     delete m_buffer.mWStrBuffer;
01913                     break;
01914                 case bt_utf32string:
01915                     delete m_buffer.mUTF32StrBuffer;
01916                     break;
01917                 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
01918                 default:
01919                     //delete m_buffer.mVoidBuffer;
01920                     // delete void* is undefined, don't do that
01921                     assert("This should never happen - mVoidBuffer should never contain something if we "
01922                         "don't know the type");
01923                     break;
01924             }
01925             m_buffer.mVoidBuffer = 0;
01926             m_bufferSize = 0;
01927             m_bufferType = bt_none;
01928         }
01929     }
01930 
01931     void UString::_getBufferStr() const
01932     {
01933         if ( m_bufferType != bt_string ) {
01934             _cleanBuffer();
01935             m_buffer.mStrBuffer = new std::string();
01936             m_bufferType = bt_string;
01937         }
01938         m_buffer.mStrBuffer->clear();
01939     }
01940 
01941     void UString::_getBufferWStr() const
01942     {
01943         if ( m_bufferType != bt_wstring ) {
01944             _cleanBuffer();
01945             m_buffer.mWStrBuffer = new std::wstring();
01946             m_bufferType = bt_wstring;
01947         }
01948         m_buffer.mWStrBuffer->clear();
01949     }
01950 
01951     void UString::_getBufferUTF32Str() const
01952     {
01953         if ( m_bufferType != bt_utf32string ) {
01954             _cleanBuffer();
01955             m_buffer.mUTF32StrBuffer = new utf32string();
01956             m_bufferType = bt_utf32string;
01957         }
01958         m_buffer.mUTF32StrBuffer->clear();
01959     }
01960 
01961     void UString::_load_buffer_UTF8() const
01962     {
01963         _getBufferStr();
01964         std::string& buffer = ( *m_buffer.mStrBuffer );
01965         buffer.reserve( length() );
01966 
01967         unsigned char utf8buf[6];
01968         char* charbuf = ( char* )utf8buf;
01969         unicode_char c;
01970         size_t len;
01971 
01972         const_iterator i, ie = end();
01973         for ( i = begin(); i != ie; i.moveNext() ) {
01974             c = i.getCharacter();
01975             len = _utf32_to_utf8( c, utf8buf );
01976             size_t j = 0;
01977             while ( j < len )
01978                 buffer.push_back( charbuf[j++] );
01979         }
01980     }
01981 
01982     void UString::_load_buffer_WStr() const
01983     {
01984         _getBufferWStr();
01985         std::wstring& buffer = ( *m_buffer.mWStrBuffer );
01986         buffer.reserve( length() ); // may over reserve, but should be close enough
01987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
01988         const_iterator i, ie = end();
01989         for ( i = begin(); i != ie; ++i ) {
01990             buffer.push_back(( wchar_t )( *i ) );
01991         }
01992 #else // wchar_t fits UTF-32
01993         unicode_char c;
01994         const_iterator i, ie = end();
01995         for ( i = begin(); i != ie; i.moveNext() ) {
01996             c = i.getCharacter();
01997             buffer.push_back(( wchar_t )c );
01998         }
01999 #endif
02000     }
02001 
02002     void UString::_load_buffer_UTF32() const
02003     {
02004         _getBufferUTF32Str();
02005         utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02006         buffer.reserve( length() ); // may over reserve, but should be close enough
02007 
02008         unicode_char c;
02009 
02010         const_iterator i, ie = end();
02011         for ( i = begin(); i != ie; i.moveNext() ) {
02012             c = i.getCharacter();
02013             buffer.push_back( c );
02014         }
02015     }
02016 
02017 } // namespace MyGUI