00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00024
00025
00027
00028
00029
00030
00031
00032
00033
00035
00036
00038 template <typename In>
00039 In Utf<8>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
00040 {
00041
00042 static const int trailing[256] =
00043 {
00044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00045 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00046 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00047 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00048 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00049 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00050 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00051 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
00052 };
00053 static const Uint32 offsets[6] =
00054 {
00055 0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080
00056 };
00057
00058
00059 int trailingBytes = trailing[static_cast<Uint8>(*begin)];
00060 if (begin + trailingBytes < end)
00061 {
00062 output = 0;
00063 switch (trailingBytes)
00064 {
00065 case 5 : output += static_cast<Uint8>(*begin++); output <<= 6;
00066 case 4 : output += static_cast<Uint8>(*begin++); output <<= 6;
00067 case 3 : output += static_cast<Uint8>(*begin++); output <<= 6;
00068 case 2 : output += static_cast<Uint8>(*begin++); output <<= 6;
00069 case 1 : output += static_cast<Uint8>(*begin++); output <<= 6;
00070 case 0 : output += static_cast<Uint8>(*begin++);
00071 }
00072 output -= offsets[trailingBytes];
00073 }
00074 else
00075 {
00076
00077 begin = end;
00078 output = replacement;
00079 }
00080
00081 return begin;
00082 }
00083
00084
00086 template <typename Out>
00087 Out Utf<8>::Encode(Uint32 input, Out output, Uint8 replacement)
00088 {
00089
00090 static const Uint8 firstBytes[7] =
00091 {
00092 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
00093 };
00094
00095
00096 if ((input > 0x0010FFFF) || ((input >= 0xD800) && (input <= 0xDBFF)))
00097 {
00098
00099 if (replacement)
00100 *output++ = replacement;
00101 }
00102 else
00103 {
00104
00105
00106
00107 int bytesToWrite = 1;
00108 if (input < 0x80) bytesToWrite = 1;
00109 else if (input < 0x800) bytesToWrite = 2;
00110 else if (input < 0x10000) bytesToWrite = 3;
00111 else if (input <= 0x0010FFFF) bytesToWrite = 4;
00112
00113
00114 Uint8 bytes[4];
00115 switch (bytesToWrite)
00116 {
00117 case 4 : bytes[3] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
00118 case 3 : bytes[2] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
00119 case 2 : bytes[1] = static_cast<Uint8>((input | 0x80) & 0xBF); input >>= 6;
00120 case 1 : bytes[0] = static_cast<Uint8> (input | firstBytes[bytesToWrite]);
00121 }
00122
00123
00124 const Uint8* currentByte = bytes;
00125 switch (bytesToWrite)
00126 {
00127 case 4 : *output++ = *currentByte++;
00128 case 3 : *output++ = *currentByte++;
00129 case 2 : *output++ = *currentByte++;
00130 case 1 : *output++ = *currentByte++;
00131 }
00132 }
00133
00134 return output;
00135 }
00136
00137
00139 template <typename In>
00140 In Utf<8>::Next(In begin, In end)
00141 {
00142 Uint32 codepoint;
00143 return Decode(begin, end, codepoint);
00144 }
00145
00146
00148 template <typename In>
00149 std::size_t Utf<8>::Count(In begin, In end)
00150 {
00151 std::size_t length = 0;
00152 while (begin < end)
00153 {
00154 begin = Next(begin, end);
00155 ++length;
00156 }
00157
00158 return length;
00159 }
00160
00161
00163 template <typename In, typename Out>
00164 Out Utf<8>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
00165 {
00166 while (begin < end)
00167 {
00168 Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
00169 output = Encode(codepoint, output);
00170 }
00171
00172 return output;
00173 }
00174
00175
00177 template <typename In, typename Out>
00178 Out Utf<8>::FromWide(In begin, In end, Out output)
00179 {
00180 while (begin < end)
00181 {
00182 Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
00183 output = Encode(codepoint, output);
00184 }
00185
00186 return output;
00187 }
00188
00189
00191 template <typename In, typename Out>
00192 Out Utf<8>::FromLatin1(In begin, In end, Out output)
00193 {
00194
00195
00196 while (begin < end)
00197 output = Encode(*begin++, output);
00198
00199 return output;
00200 }
00201
00202
00204 template <typename In, typename Out>
00205 Out Utf<8>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
00206 {
00207 while (begin < end)
00208 {
00209 Uint32 codepoint;
00210 begin = Decode(begin, end, codepoint);
00211 output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
00212 }
00213
00214 return output;
00215 }
00216
00217
00219 template <typename In, typename Out>
00220 Out Utf<8>::ToWide(In begin, In end, Out output, wchar_t replacement)
00221 {
00222 while (begin < end)
00223 {
00224 Uint32 codepoint;
00225 begin = Decode(begin, end, codepoint);
00226 output = Utf<32>::EncodeWide(codepoint, output, replacement);
00227 }
00228
00229 return output;
00230 }
00231
00232
00234 template <typename In, typename Out>
00235 Out Utf<8>::ToLatin1(In begin, In end, Out output, char replacement)
00236 {
00237
00238
00239 while (begin < end)
00240 {
00241 Uint32 codepoint;
00242 begin = Decode(begin, end, codepoint);
00243 *output++ = codepoint < 256 ? static_cast<char>(codepoint) : replacement;
00244 }
00245
00246 return output;
00247 }
00248
00249
00251 template <typename In, typename Out>
00252 Out Utf<8>::ToUtf8(In begin, In end, Out output)
00253 {
00254 while (begin < end)
00255 *output++ = *begin++;
00256
00257 return output;
00258 }
00259
00260
00262 template <typename In, typename Out>
00263 Out Utf<8>::ToUtf16(In begin, In end, Out output)
00264 {
00265 while (begin < end)
00266 {
00267 Uint32 codepoint;
00268 begin = Decode(begin, end, codepoint);
00269 output = Utf<16>::Encode(codepoint, output);
00270 }
00271
00272 return output;
00273 }
00274
00275
00277 template <typename In, typename Out>
00278 Out Utf<8>::ToUtf32(In begin, In end, Out output)
00279 {
00280 while (begin < end)
00281 {
00282 Uint32 codepoint;
00283 begin = Decode(begin, end, codepoint);
00284 *output++ = codepoint;
00285 }
00286
00287 return output;
00288 }
00289
00290
00292 template <typename In>
00293 In Utf<16>::Decode(In begin, In end, Uint32& output, Uint32 replacement)
00294 {
00295 Uint16 first = *begin++;
00296
00297
00298 if ((first >= 0xD800) && (first <= 0xDBFF))
00299 {
00300 if (begin < end)
00301 {
00302 Uint32 second = *begin++;
00303 if ((second >= 0xDC00) && (second <= 0xDFFF))
00304 {
00305
00306 output = static_cast<Uint32>(((first - 0xD800) << 10) + (second - 0xDC00) + 0x0010000);
00307 }
00308 else
00309 {
00310
00311 output = replacement;
00312 }
00313 }
00314 else
00315 {
00316
00317 begin = end;
00318 output = replacement;
00319 }
00320 }
00321 else
00322 {
00323
00324 output = first;
00325 }
00326
00327 return begin;
00328 }
00329
00330
00332 template <typename Out>
00333 Out Utf<16>::Encode(Uint32 input, Out output, Uint16 replacement)
00334 {
00335 if (input < 0xFFFF)
00336 {
00337
00338 if ((input >= 0xD800) && (input <= 0xDFFF))
00339 {
00340
00341 if (replacement)
00342 *output++ = replacement;
00343 }
00344 else
00345 {
00346
00347 *output++ = static_cast<Uint16>(input);
00348 }
00349 }
00350 else if (input > 0x0010FFFF)
00351 {
00352
00353 if (replacement)
00354 *output++ = replacement;
00355 }
00356 else
00357 {
00358
00359 input -= 0x0010000;
00360 *output++ = static_cast<Uint16>((input >> 10) + 0xD800);
00361 *output++ = static_cast<Uint16>((input & 0x3FFUL) + 0xDC00);
00362 }
00363
00364 return output;
00365 }
00366
00367
00369 template <typename In>
00370 In Utf<16>::Next(In begin, In end)
00371 {
00372 Uint32 codepoint;
00373 return Decode(begin, end, codepoint);
00374 }
00375
00376
00378 template <typename In>
00379 std::size_t Utf<16>::Count(In begin, In end)
00380 {
00381 std::size_t length = 0;
00382 while (begin < end)
00383 {
00384 begin = Next(begin, end);
00385 ++length;
00386 }
00387
00388 return length;
00389 }
00390
00391
00393 template <typename In, typename Out>
00394 Out Utf<16>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
00395 {
00396 while (begin < end)
00397 {
00398 Uint32 codepoint = Utf<32>::DecodeAnsi(*begin++, locale);
00399 output = Encode(codepoint, output);
00400 }
00401
00402 return output;
00403 }
00404
00405
00407 template <typename In, typename Out>
00408 Out Utf<16>::FromWide(In begin, In end, Out output)
00409 {
00410 while (begin < end)
00411 {
00412 Uint32 codepoint = Utf<32>::DecodeWide(*begin++);
00413 output = Encode(codepoint, output);
00414 }
00415
00416 return output;
00417 }
00418
00419
00421 template <typename In, typename Out>
00422 Out Utf<16>::FromLatin1(In begin, In end, Out output)
00423 {
00424
00425
00426 while (begin < end)
00427 *output++ = *begin++;
00428
00429 return output;
00430 }
00431
00432
00434 template <typename In, typename Out>
00435 Out Utf<16>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
00436 {
00437 while (begin < end)
00438 {
00439 Uint32 codepoint;
00440 begin = Decode(begin, end, codepoint);
00441 output = Utf<32>::EncodeAnsi(codepoint, output, replacement, locale);
00442 }
00443
00444 return output;
00445 }
00446
00447
00449 template <typename In, typename Out>
00450 Out Utf<16>::ToWide(In begin, In end, Out output, wchar_t replacement)
00451 {
00452 while (begin < end)
00453 {
00454 Uint32 codepoint;
00455 begin = Decode(begin, end, codepoint);
00456 output = Utf<32>::EncodeWide(codepoint, output, replacement);
00457 }
00458
00459 return output;
00460 }
00461
00462
00464 template <typename In, typename Out>
00465 Out Utf<16>::ToLatin1(In begin, In end, Out output, char replacement)
00466 {
00467
00468
00469 while (begin < end)
00470 {
00471 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
00472 begin++;
00473 }
00474
00475 return output;
00476 }
00477
00478
00480 template <typename In, typename Out>
00481 Out Utf<16>::ToUtf8(In begin, In end, Out output)
00482 {
00483 while (begin < end)
00484 {
00485 Uint32 codepoint;
00486 begin = Decode(begin, end, codepoint);
00487 output = Utf<8>::Encode(codepoint, output);
00488 }
00489
00490 return output;
00491 }
00492
00493
00495 template <typename In, typename Out>
00496 Out Utf<16>::ToUtf16(In begin, In end, Out output)
00497 {
00498 while (begin < end)
00499 *output++ = *begin++;
00500
00501 return output;
00502 }
00503
00504
00506 template <typename In, typename Out>
00507 Out Utf<16>::ToUtf32(In begin, In end, Out output)
00508 {
00509 while (begin < end)
00510 {
00511 Uint32 codepoint;
00512 begin = Decode(begin, end, codepoint);
00513 *output++ = codepoint;
00514 }
00515
00516 return output;
00517 }
00518
00519
00521 template <typename In>
00522 In Utf<32>::Decode(In begin, In end, Uint32& output, Uint32)
00523 {
00524 output = *begin++;
00525 return begin;
00526 }
00527
00528
00530 template <typename Out>
00531 Out Utf<32>::Encode(Uint32 input, Out output, Uint32 replacement)
00532 {
00533 *output++ = input;
00534 return output;
00535 }
00536
00537
00539 template <typename In>
00540 In Utf<32>::Next(In begin, In end)
00541 {
00542 return ++begin;
00543 }
00544
00545
00547 template <typename In>
00548 std::size_t Utf<32>::Count(In begin, In end)
00549 {
00550 return begin - end;
00551 }
00552
00553
00555 template <typename In, typename Out>
00556 Out Utf<32>::FromAnsi(In begin, In end, Out output, const std::locale& locale)
00557 {
00558 while (begin < end)
00559 *output++ = DecodeAnsi(*begin++, locale);
00560
00561 return output;
00562 }
00563
00564
00566 template <typename In, typename Out>
00567 Out Utf<32>::FromWide(In begin, In end, Out output)
00568 {
00569 while (begin < end)
00570 *output++ = DecodeWide(*begin++);
00571
00572 return output;
00573 }
00574
00575
00577 template <typename In, typename Out>
00578 Out Utf<32>::FromLatin1(In begin, In end, Out output)
00579 {
00580
00581
00582 while (begin < end)
00583 *output++ = *begin++;
00584
00585 return output;
00586 }
00587
00588
00590 template <typename In, typename Out>
00591 Out Utf<32>::ToAnsi(In begin, In end, Out output, char replacement, const std::locale& locale)
00592 {
00593 while (begin < end)
00594 output = EncodeAnsi(*begin++, output, replacement, locale);
00595
00596 return output;
00597 }
00598
00599
00601 template <typename In, typename Out>
00602 Out Utf<32>::ToWide(In begin, In end, Out output, wchar_t replacement)
00603 {
00604 while (begin < end)
00605 output = EncodeWide(*begin++, output, replacement);
00606
00607 return output;
00608 }
00609
00610
00612 template <typename In, typename Out>
00613 Out Utf<32>::ToLatin1(In begin, In end, Out output, char replacement)
00614 {
00615
00616
00617 while (begin < end)
00618 {
00619 *output++ = *begin < 256 ? static_cast<char>(*begin) : replacement;
00620 begin++;
00621 }
00622
00623 return output;
00624 }
00625
00626
00628 template <typename In, typename Out>
00629 Out Utf<32>::ToUtf8(In begin, In end, Out output)
00630 {
00631 while (begin < end)
00632 output = Utf<8>::Encode(*begin++, output);
00633
00634 return output;
00635 }
00636
00638 template <typename In, typename Out>
00639 Out Utf<32>::ToUtf16(In begin, In end, Out output)
00640 {
00641 while (begin < end)
00642 output = Utf<16>::Encode(*begin++, output);
00643
00644 return output;
00645 }
00646
00647
00649 template <typename In, typename Out>
00650 Out Utf<32>::ToUtf32(In begin, In end, Out output)
00651 {
00652 while (begin < end)
00653 *output++ = *begin++;
00654
00655 return output;
00656 }
00657
00658
00660 template <typename In>
00661 Uint32 Utf<32>::DecodeAnsi(In input, const std::locale& locale)
00662 {
00663
00664
00665
00666
00667
00668 #if defined(SFML_SYSTEM_WINDOWS) && \
00669 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
00670 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
00671
00672 wchar_t character = 0;
00673 mbtowc(&character, &input, 1);
00674 return static_cast<Uint32>(character);
00675
00676 #else
00677
00678
00679 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
00680
00681
00682 return static_cast<Uint32>(facet.widen(input));
00683
00684 #endif
00685 }
00686
00687
00689 template <typename In>
00690 Uint32 Utf<32>::DecodeWide(In input)
00691 {
00692
00693
00694
00695
00696
00697
00698 return input;
00699 }
00700
00701
00703 template <typename Out>
00704 Out Utf<32>::EncodeAnsi(Uint32 codepoint, Out output, char replacement, const std::locale& locale)
00705 {
00706
00707
00708
00709
00710
00711 #if defined(SFML_SYSTEM_WINDOWS) && \
00712 (defined(__GLIBCPP__) || defined (__GLIBCXX__)) && \
00713 !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION))
00714
00715 char character = 0;
00716 if (wctomb(&character, static_cast<wchar_t>(codepoint)) >= 0)
00717 *output++ = character;
00718 else if (replacement)
00719 *output++ = replacement;
00720
00721 return output;
00722
00723 #else
00724
00725
00726 const std::ctype<wchar_t>& facet = std::use_facet< std::ctype<wchar_t> >(locale);
00727
00728
00729 *output++ = facet.narrow(static_cast<wchar_t>(codepoint), replacement);
00730
00731 return output;
00732
00733 #endif
00734 }
00735
00736
00738 template <typename Out>
00739 Out Utf<32>::EncodeWide(Uint32 codepoint, Out output, wchar_t replacement)
00740 {
00741
00742
00743
00744
00745
00746
00747 switch (sizeof(wchar_t))
00748 {
00749 case 4:
00750 {
00751 *output++ = static_cast<wchar_t>(codepoint);
00752 break;
00753 }
00754
00755 default:
00756 {
00757 if ((codepoint <= 0xFFFF) && ((codepoint < 0xD800) || (codepoint > 0xDFFF)))
00758 {
00759 *output++ = static_cast<wchar_t>(codepoint);
00760 }
00761 else if (replacement)
00762 {
00763 *output++ = replacement;
00764 }
00765 break;
00766 }
00767 }
00768
00769 return output;
00770 }