csUnicodeTransform Class Reference
[Utilities]
Contains functions to convert between several UTF encodings. More...
#include <csutil/csuctransform.h>
Static Public Member Functions | |
UTF Decoders | |
static int | UTF8Decode (const utf8_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-8. | |
static int | UTF16Decode (const utf16_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-16. | |
static int | UTF32Decode (const utf32_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-32. | |
static int | Decode (const utf8_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-8. | |
static int | Decode (const utf16_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-16. | |
static int | Decode (const utf32_char *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point encoded in UTF-32. | |
UTF Encoders | |
static int | EncodeUTF8 (const utf32_char ch, utf8_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-8. | |
static int | EncodeUTF16 (const utf32_char ch, utf16_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-16. | |
static int | EncodeUTF32 (const utf32_char ch, utf32_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-32. | |
static int | Encode (const utf32_char ch, utf8_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-8. | |
static int | Encode (const utf32_char ch, utf16_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-16. | |
static int | Encode (const utf32_char ch, utf32_char *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to UTF-32. | |
Converters between strings in different UTF encodings | |
static size_t | UTF8to16 (utf16_char *dest, size_t destSize, const utf8_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-8 to UTF-16. | |
static size_t | UTF8to32 (utf32_char *dest, size_t destSize, const utf8_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-8 to UTF-32. | |
static size_t | UTF16to8 (utf8_char *dest, size_t destSize, const utf16_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-16 to UTF-8. | |
static size_t | UTF16to32 (utf32_char *dest, size_t destSize, const utf16_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-16 to UTF-32. | |
static size_t | UTF32to8 (utf8_char *dest, size_t destSize, const utf32_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-32 to UTF-8. | |
static size_t | UTF32to16 (utf16_char *dest, size_t destSize, const utf32_char *source, size_t srcSize=(size_t)-1) |
Convert UTF-32 to UTF-16. | |
Converters UTF and platform-specific wchar_t | |
static size_t | UTF8toWC (wchar_t *dest, size_t destSize, const utf8_char *source, size_t srcSize) |
Convert UTF-8 to platform-specific wide chars. | |
static size_t | UTF16toWC (wchar_t *dest, size_t destSize, const utf16_char *source, size_t srcSize) |
Convert UTF-16 to platform-specific wide chars. | |
static size_t | UTF32toWC (wchar_t *dest, size_t destSize, const utf32_char *source, size_t srcSize) |
Convert UTF-32 to platform-specific wide chars. | |
static size_t | WCtoUTF8 (utf8_char *dest, size_t destSize, const wchar_t *source, size_t srcSize) |
Convert platform-specific wide chars to UTF-8. | |
static size_t | WCtoUTF16 (utf16_char *dest, size_t destSize, const wchar_t *source, size_t srcSize) |
Convert platform-specific wide chars to UTF-16. | |
static size_t | WCtoUTF32 (utf32_char *dest, size_t destSize, const wchar_t *source, size_t srcSize) |
Convert platform-specific wide chars to UTF-32. | |
static int | Decode (const __wchar_t *str, size_t strlen, utf32_char &ch, bool *isValid=0, bool returnNonChar=false) |
Decode an Unicode code point from wchar_t. | |
static int | Encode (const utf32_char ch, __wchar_t *buf, size_t bufsize, bool allowNonchars=false) |
Encode an Unicode code point to wchar_t. | |
Helpers to skip encoded code units in different UTF encodings | |
static int | UTF8Skip (const utf8_char *str, size_t maxSkip) |
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char. | |
static int | UTF8Rewind (const utf8_char *str, size_t maxRew) |
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point. | |
static int | UTF16Skip (const utf16_char *str, size_t maxSkip) |
Determine how many code units in an UTF-16 buffer need to be skipped to get to the next encoded char. | |
static int | UTF16Rewind (const utf16_char *str, size_t maxRew) |
Determine how many code units in an UTF-16 buffer need to skipped back to get to the start of the previous encoded code point. | |
static int | UTF32Skip (const utf32_char *str, size_t maxSkip) |
Determine how many code units in an UTF-32 buffer need to be skipped to get to the next encoded char. | |
static int | UTF32Rewind (const utf32_char *str, size_t maxRew) |
Determine how many code units in an UTF-32 buffer need to skipped back to get to the start of the previous encoded code point. | |
Code point mappings | |
static size_t | MapToUpper (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its upper case equivalent(s). | |
static size_t | MapToLower (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its lower case equivalent(s). | |
static size_t | MapToFold (const utf32_char ch, utf32_char *dest, size_t destSize, uint flags=0) |
Map a code point to its fold equivalent(s). |
Detailed Description
Contains functions to convert between several UTF encodings.
Definition at line 79 of file csuctransform.h.
Member Function Documentation
static int csUnicodeTransform::Decode | ( | const utf8_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-8.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 277 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const utf16_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-16.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 286 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const utf32_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-32.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 295 of file csuctransform.h.
static int csUnicodeTransform::Decode | ( | const __wchar_t * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point from wchar_t.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 774 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, | |
utf8_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-8.
Encode an Unicode code point to UTF-8.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 461 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, | |
utf16_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-16.
Encode an Unicode code point to UTF-8.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 470 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, | |
utf32_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-32.
Encode an Unicode code point to UTF-8.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 479 of file csuctransform.h.
static int csUnicodeTransform::Encode | ( | const utf32_char | ch, | |
__wchar_t * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to wchar_t.
Encode an Unicode code point to UTF-8.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 783 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF16 | ( | const utf32_char | ch, | |
utf16_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-16.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF16_ENCODED utf16_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 401 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF32 | ( | const utf32_char | ch, | |
utf32_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-32.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF32_ENCODED utf32_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 443 of file csuctransform.h.
static int csUnicodeTransform::EncodeUTF8 | ( | const utf32_char | ch, | |
utf8_char * | buf, | |||
size_t | bufsize, | |||
bool | allowNonchars = false | |||
) | [inline, static] |
Encode an Unicode code point to UTF-8.
- Parameters:
-
ch Code point to encode. buf Pointer to the buffer receiving the encoded code point. When a fixed-size buffer is used it's a good idea to make it CS_UC_MAX_UTF8_ENCODED utf8_chars large. bufsize Number of code units that fit in buf. allowNonchars Whether non-character or high and low surrogates are encoded. Normally, those code points are rejected to prevent the generation of invalid encoded strings.
- Returns:
- The number of code units needed to encode ch.
- Remarks:
- The buffer will be filled up as much as possible. Check the returned value whether the encoded code point completely fit into the buffer.
Definition at line 333 of file csuctransform.h.
static size_t csUnicodeTransform::MapToFold | ( | const utf32_char | ch, | |
utf32_char * | dest, | |||
size_t | destSize, | |||
uint | flags = 0 | |||
) | [static] |
Map a code point to its fold equivalent(s).
Fold mapping is useful for binary comparison of two Unicode strings.
Map a code point to its upper case equivalent(s).
- Parameters:
-
ch Code point to be mapped. dest Destination buffer. destSize Number of code units the destination buffer can hold. flags Flags to control the result of the mapping. Currently supported is csUcMapSimple.
- Returns:
- Number of code units the complete mapping result would require.
static size_t csUnicodeTransform::MapToLower | ( | const utf32_char | ch, | |
utf32_char * | dest, | |||
size_t | destSize, | |||
uint | flags = 0 | |||
) | [static] |
Map a code point to its lower case equivalent(s).
Map a code point to its upper case equivalent(s).
- Parameters:
-
ch Code point to be mapped. dest Destination buffer. destSize Number of code units the destination buffer can hold. flags Flags to control the result of the mapping. Currently supported is csUcMapSimple.
- Returns:
- Number of code units the complete mapping result would require.
static size_t csUnicodeTransform::MapToUpper | ( | const utf32_char | ch, | |
utf32_char * | dest, | |||
size_t | destSize, | |||
uint | flags = 0 | |||
) | [static] |
Map a code point to its upper case equivalent(s).
- Parameters:
-
ch Code point to be mapped. dest Destination buffer. destSize Number of code units the destination buffer can hold. flags Flags to control the result of the mapping. Currently supported is csUcMapSimple.
- Returns:
- Number of code units the complete mapping result would require.
static int csUnicodeTransform::UTF16Decode | ( | const utf16_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-16.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 214 of file csuctransform.h.
static int csUnicodeTransform::UTF16Rewind | ( | const utf16_char * | str, | |
size_t | maxRew | |||
) | [inline, static] |
Determine how many code units in an UTF-16 buffer need to skipped back to get to the start of the previous encoded code point.
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
- Parameters:
-
str Pointer to the encoded code point after the code point that is actually to be skipped back. maxRew The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer.
- Returns:
- Number of chars to skip back in the buffer. Returns 0 if maxRew is 0.
Definition at line 963 of file csuctransform.h.
static int csUnicodeTransform::UTF16Skip | ( | const utf16_char * | str, | |
size_t | maxSkip | |||
) | [inline, static] |
Determine how many code units in an UTF-16 buffer need to be skipped to get to the next encoded char.
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
- Parameters:
-
str Pointer to buffer with encoded code point. maxSkip The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer.
- Returns:
- Number of chars to skip in the buffer. Returns 0 if maxSkip is 0.
Definition at line 950 of file csuctransform.h.
static size_t csUnicodeTransform::UTF16to32 | ( | utf32_char * | dest, | |
size_t | destSize, | |||
const utf16_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-16 to UTF-32.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 573 of file csuctransform.h.
static size_t csUnicodeTransform::UTF16to8 | ( | utf8_char * | dest, | |
size_t | destSize, | |||
const utf16_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-16 to UTF-8.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 568 of file csuctransform.h.
static size_t csUnicodeTransform::UTF16toWC | ( | wchar_t * | dest, | |
size_t | destSize, | |||
const utf16_char * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert UTF-16 to platform-specific wide chars.
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 684 of file csuctransform.h.
static int csUnicodeTransform::UTF32Decode | ( | const utf32_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-32.
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 257 of file csuctransform.h.
static int csUnicodeTransform::UTF32Rewind | ( | const utf32_char * | str, | |
size_t | maxRew | |||
) | [inline, static] |
Determine how many code units in an UTF-32 buffer need to skipped back to get to the start of the previous encoded code point.
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
- Parameters:
-
str Pointer to the encoded code point after the code point that is actually to be skipped back. maxRew The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer.
- Returns:
- Number of chars to skip back in the buffer. Returns 0 if maxRew is 0.
Definition at line 995 of file csuctransform.h.
static int csUnicodeTransform::UTF32Skip | ( | const utf32_char * | str, | |
size_t | maxSkip | |||
) | [inline, static] |
Determine how many code units in an UTF-32 buffer need to be skipped to get to the next encoded char.
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
- Parameters:
-
str Pointer to buffer with encoded code point. maxSkip The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer.
- Returns:
- Number of chars to skip in the buffer. Returns 0 if maxSkip is 0.
Definition at line 984 of file csuctransform.h.
static size_t csUnicodeTransform::UTF32to16 | ( | utf16_char * | dest, | |
size_t | destSize, | |||
const utf32_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-32 to UTF-16.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 584 of file csuctransform.h.
static size_t csUnicodeTransform::UTF32to8 | ( | utf8_char * | dest, | |
size_t | destSize, | |||
const utf32_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-32 to UTF-8.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 579 of file csuctransform.h.
static size_t csUnicodeTransform::UTF32toWC | ( | wchar_t * | dest, | |
size_t | destSize, | |||
const utf32_char * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert UTF-32 to platform-specific wide chars.
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 707 of file csuctransform.h.
static int csUnicodeTransform::UTF8Decode | ( | const utf8_char * | str, | |
size_t | strlen, | |||
utf32_char & | ch, | |||
bool * | isValid = 0 , |
|||
bool | returnNonChar = false | |||
) | [inline, static] |
Decode an Unicode code point encoded in UTF-8.
- Parameters:
-
str Pointer to the encoded code point. strlen Number of code units in the source string. ch Decoded code point. isValid When an error occured during decoding, ch contains the replacement character (CS_UC_CHAR_REPLACER). In this case, the bool pointed to by isValid will be set to false. The parameter can be 0, but in this case the information whether the decoded char is the replacement character because the source data is errorneous is lost. returnNonChar Whether decoded non-character or high and low surrogates are returned as such. Normally, those code points are replaced with CS_UC_CHAR_REPLACER to signal an invalid encoded code point.
- Returns:
- The number of code units in str that have to be skipped to retrieve the next encoded code point.
Definition at line 123 of file csuctransform.h.
static int csUnicodeTransform::UTF8Rewind | ( | const utf8_char * | str, | |
size_t | maxRew | |||
) | [inline, static] |
Determine how many code units in an UTF-8 buffer need to skipped back to get to the start of the previous encoded code point.
- Parameters:
-
str Pointer to the encoded code point after the code point that is actually to be skipped back. maxRew The number of code units to go back at max. Typically, this is the number of chars from str to the start of the buffer.
- Returns:
- Number of chars to skip back in the buffer. Returns 0 if maxRew is 0.
Definition at line 923 of file csuctransform.h.
static int csUnicodeTransform::UTF8Skip | ( | const utf8_char * | str, | |
size_t | maxSkip | |||
) | [inline, static] |
Determine how many code units in an UTF-8 buffer need to be skipped to get to the next encoded char.
- Parameters:
-
str Pointer to buffer with encoded code point. maxSkip The number of code units to skip at max. Usually, this is the number of chars from str to the end of the buffer.
- Returns:
- Number of chars to skip in the buffer. Returns 0 if maxSkip is 0.
Definition at line 882 of file csuctransform.h.
static size_t csUnicodeTransform::UTF8to16 | ( | utf16_char * | dest, | |
size_t | destSize, | |||
const utf8_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 557 of file csuctransform.h.
static size_t csUnicodeTransform::UTF8to32 | ( | utf32_char * | dest, | |
size_t | destSize, | |||
const utf8_char * | source, | |||
size_t | srcSize = (size_t)-1 | |||
) | [inline, static] |
Convert UTF-8 to UTF-32.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 562 of file csuctransform.h.
static size_t csUnicodeTransform::UTF8toWC | ( | wchar_t * | dest, | |
size_t | destSize, | |||
const utf8_char * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 674 of file csuctransform.h.
static size_t csUnicodeTransform::WCtoUTF16 | ( | utf16_char * | dest, | |
size_t | destSize, | |||
const wchar_t * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert platform-specific wide chars to UTF-16.
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 727 of file csuctransform.h.
static size_t csUnicodeTransform::WCtoUTF32 | ( | utf32_char * | dest, | |
size_t | destSize, | |||
const wchar_t * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert platform-specific wide chars to UTF-32.
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 750 of file csuctransform.h.
static size_t csUnicodeTransform::WCtoUTF8 | ( | utf8_char * | dest, | |
size_t | destSize, | |||
const wchar_t * | source, | |||
size_t | srcSize | |||
) | [inline, static] |
Convert platform-specific wide chars to UTF-8.
Convert UTF-8 to platform-specific wide chars.
Convert UTF-8 to UTF-16.
- Parameters:
-
dest Destination buffer. destSize Number of code units the destination buffer can hold. source Source buffer. srcSize Number of code units contained in the source buffer. If this is -1, the length will be determined automatically.
- Returns:
- Number of code units in the complete converted string, including null terminator.
- Remarks:
- If the complete converted string wouldn't fit the destination buffer, it is truncated. However, it'll also be null-terminated. Hence, if it has a size of 1, you get an empty string. The returned value is the number of code units needed for the *whole* converted string.
Definition at line 717 of file csuctransform.h.
The documentation for this class was generated from the following file:
- csutil/csuctransform.h
Generated for Crystal Space 1.4.1 by doxygen 1.7.1