FIFE 2008.0
|
00001 // Copyright 2006 Nemanja Trifunovic 00002 00003 /* 00004 Permission is hereby granted, free of charge, to any person or organization 00005 obtaining a copy of the software and accompanying documentation covered by 00006 this license (the "Software") to use, reproduce, display, distribute, 00007 execute, and transmit the Software, and to prepare derivative works of the 00008 Software, and to permit third-parties to whom the Software is furnished to 00009 do so, all subject to the following: 00010 00011 The copyright notices in the Software and this entire statement, including 00012 the above license grant, this restriction and the following disclaimer, 00013 must be included in all copies of the Software, in whole or in part, and 00014 all derivative works of the Software, unless such copies or derivative 00015 works are solely in the form of machine-executable object code generated by 00016 a source language processor. 00017 00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00019 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00020 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 00021 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 00022 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 00023 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00024 DEALINGS IN THE SOFTWARE. 00025 */ 00026 00027 00028 #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 00029 #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 00030 00031 #include "core.h" 00032 00033 namespace utf8 00034 { 00035 namespace unchecked 00036 { 00037 template <typename octet_iterator> 00038 octet_iterator append(uint32_t cp, octet_iterator result) 00039 { 00040 if (cp < 0x80) // one octet 00041 *(result++) = static_cast<uint8_t>(cp); 00042 else if (cp < 0x800) { // two octets 00043 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0); 00044 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00045 } 00046 else if (cp < 0x10000) { // three octets 00047 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0); 00048 *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f | 0x80); 00049 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00050 } 00051 else { // four octets 00052 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0); 00053 *(result++) = static_cast<uint8_t>((cp >> 12)& 0x3f | 0x80); 00054 *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f | 0x80); 00055 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); 00056 } 00057 return result; 00058 } 00059 template <typename octet_iterator> 00060 uint32_t next(octet_iterator& it) 00061 { 00062 uint32_t cp = internal::mask8(*it); 00063 typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it); 00064 switch (length) { 00065 case 1: 00066 break; 00067 case 2: 00068 it++; 00069 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); 00070 break; 00071 case 3: 00072 ++it; 00073 cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff); 00074 ++it; 00075 cp += (*it) & 0x3f; 00076 break; 00077 case 4: 00078 ++it; 00079 cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff); 00080 ++it; 00081 cp += (internal::mask8(*it) << 6) & 0xfff; 00082 ++it; 00083 cp += (*it) & 0x3f; 00084 break; 00085 } 00086 ++it; 00087 return cp; 00088 } 00089 00090 template <typename octet_iterator> 00091 uint32_t prior(octet_iterator& it) 00092 { 00093 while (internal::is_trail(*(--it))) ; 00094 octet_iterator temp = it; 00095 return next(temp); 00096 } 00097 00098 // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) 00099 template <typename octet_iterator> 00100 inline uint32_t previous(octet_iterator& it) 00101 { 00102 return prior(it); 00103 } 00104 00105 template <typename octet_iterator, typename distance_type> 00106 void advance (octet_iterator& it, distance_type n) 00107 { 00108 for (distance_type i = 0; i < n; ++i) 00109 next(it); 00110 } 00111 00112 template <typename octet_iterator> 00113 typename std::iterator_traits<octet_iterator>::difference_type 00114 distance (octet_iterator first, octet_iterator last) 00115 { 00116 typename std::iterator_traits<octet_iterator>::difference_type dist; 00117 for (dist = 0; first < last; ++dist) 00118 next(first); 00119 return dist; 00120 } 00121 00122 template <typename u16bit_iterator, typename octet_iterator> 00123 octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) 00124 { 00125 while (start != end) { 00126 uint32_t cp = internal::mask16(*start++); 00127 // Take care of surrogate pairs first 00128 if (internal::is_surrogate(cp)) { 00129 uint32_t trail_surrogate = internal::mask16(*start++); 00130 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; 00131 } 00132 result = append(cp, result); 00133 } 00134 return result; 00135 } 00136 00137 template <typename u16bit_iterator, typename octet_iterator> 00138 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) 00139 { 00140 while (start != end) { 00141 uint32_t cp = next(start); 00142 if (cp > 0xffff) { //make a surrogate pair 00143 *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET); 00144 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); 00145 } 00146 else 00147 *result++ = static_cast<uint16_t>(cp); 00148 } 00149 return result; 00150 } 00151 00152 template <typename octet_iterator, typename u32bit_iterator> 00153 octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) 00154 { 00155 while (start != end) 00156 result = append(*(start++), result); 00157 00158 return result; 00159 } 00160 00161 template <typename octet_iterator, typename u32bit_iterator> 00162 u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) 00163 { 00164 while (start < end) 00165 (*result++) = next(start); 00166 00167 return result; 00168 } 00169 00170 // The iterator class 00171 template <typename octet_iterator> 00172 class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 00173 octet_iterator it; 00174 public: 00175 iterator () {}; 00176 explicit iterator (const octet_iterator& octet_it): it(octet_it) {} 00177 // the default "big three" are OK 00178 octet_iterator base () const { return it; } 00179 uint32_t operator * () const 00180 { 00181 octet_iterator temp = it; 00182 return next(temp); 00183 } 00184 bool operator == (const iterator& rhs) const 00185 { 00186 return (it == rhs.it); 00187 } 00188 bool operator != (const iterator& rhs) const 00189 { 00190 return !(operator == (rhs)); 00191 } 00192 iterator& operator ++ () 00193 { 00194 std::advance(it, internal::sequence_length(it)); 00195 return *this; 00196 } 00197 iterator operator ++ (int) 00198 { 00199 iterator temp = *this; 00200 std::advance(it, internal::sequence_length(it)); 00201 return temp; 00202 } 00203 iterator& operator -- () 00204 { 00205 prior(it); 00206 return *this; 00207 } 00208 iterator operator -- (int) 00209 { 00210 iterator temp = *this; 00211 prior(it); 00212 return temp; 00213 } 00214 }; // class iterator 00215 00216 } // namespace utf8::unchecked 00217 } // namespace utf8 00218 00219 00220 #endif // header guard 00221