Unicode Manipulation

Name

Unicode Manipulation -- 

Synopsis


#include <glib.h>


typedef     gunichar;
typedef     gunichar2;

gboolean    g_get_charset                   (char **charset);

gboolean    g_unichar_validate              (gunichar ch);
gboolean    g_unichar_isalnum               (gunichar c);
gboolean    g_unichar_isalpha               (gunichar c);
gboolean    g_unichar_iscntrl               (gunichar c);
gboolean    g_unichar_isdigit               (gunichar c);
gboolean    g_unichar_isgraph               (gunichar c);
gboolean    g_unichar_islower               (gunichar c);
gboolean    g_unichar_isprint               (gunichar c);
gboolean    g_unichar_ispunct               (gunichar c);
gboolean    g_unichar_isspace               (gunichar c);
gboolean    g_unichar_isupper               (gunichar c);
gboolean    g_unichar_isxdigit              (gunichar c);
gboolean    g_unichar_istitle               (gunichar c);
gboolean    g_unichar_isdefined             (gunichar c);
gboolean    g_unichar_iswide                (gunichar c);
gunichar    g_unichar_toupper               (gunichar c);
gunichar    g_unichar_tolower               (gunichar c);
gunichar    g_unichar_totitle               (gunichar c);
gint        g_unichar_digit_value           (gunichar c);
gint        g_unichar_xdigit_value          (gunichar c);
enum        GUnicodeType;
GUnicodeType g_unichar_type                 (gunichar c);
enum        GUnicodeBreakType;
GUnicodeBreakType g_unichar_break_type      (gunichar c);
void        g_unicode_canonical_ordering    (gunichar *string,
                                             gsize len);
gunichar*   g_unicode_canonical_decomposition
                                            (gunichar ch,
                                             gsize *result_len);

#define     g_utf8_next_char                (p)
gunichar    g_utf8_get_char                 (const gchar *p);
gchar*      g_utf8_offset_to_pointer        (const gchar *str,
                                             gint offset);
gint        g_utf8_pointer_to_offset        (const gchar *str,
                                             const gchar *pos);
gchar*      g_utf8_prev_char                (const gchar *p);
gchar*      g_utf8_find_next_char           (const gchar *p,
                                             const gchar *end);
gchar*      g_utf8_find_prev_char           (const gchar *str,
                                             const gchar *p);
gint        g_utf8_strlen                   (const gchar *p,
                                             gint max);
gchar*      g_utf8_strncpy                  (gchar *dest,
                                             const gchar *src,
                                             gsize n);
gchar*      g_utf8_strchr                   (const gchar *p,
                                             gint len,
                                             gunichar c);
gchar*      g_utf8_strrchr                  (const gchar *p,
                                             gint len,
                                             gunichar c);
gboolean    g_utf8_validate                 (const gchar *str,
                                             gint max_len,
                                             const gchar **end);

gunichar2*  g_utf8_to_utf16                 (const gchar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gunichar*   g_utf8_to_ucs4                  (const gchar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gunichar*   g_utf8_to_ucs4_fast             (const gchar *str,
                                             gint len,
                                             gint *items_written);
gunichar*   g_utf16_to_ucs4                 (const gunichar2 *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gchar*      g_utf16_to_utf8                 (const gunichar2 *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gunichar2*  g_ucs4_to_utf16                 (const gunichar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gchar*      g_ucs4_to_utf8                  (const gunichar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);
gint        g_unichar_to_utf8               (gunichar c,
                                             char *outbuf);

Description

Details

gunichar

typedef guint32 gunichar;


gunichar2

typedef guint16 gunichar2;


g_get_charset ()

gboolean    g_get_charset                   (char **charset);

Obtains the character set for the current locale; you might use this character set as an argument to g_convert(), to convert from the current locale's encoding to some other encoding. (Frequently g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)

The return value is TRUE if the locale's encoding is UTF-8, in that case you can perhaps avoid calling g_convert().

The string returned in charset is not allocated, and should not be freed.

charset : return location for character set name
Returns : TRUE if the returned charset is UTF-8


g_unichar_validate ()

gboolean    g_unichar_validate              (gunichar ch);

Checks whether ch is a valid Unicode character. Some possible integer values of ch will not be valid. 0 is considered a valid character, though it's normally a string terminator.

ch : a Unicode character
Returns : TRUE if ch is a valid Unicode character


g_unichar_isalnum ()

gboolean    g_unichar_isalnum               (gunichar c);

Determines whether a character is alphanumeric. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is an alphanumeric character


g_unichar_isalpha ()

gboolean    g_unichar_isalpha               (gunichar c);

Determines whether a character is alphabetic (i.e. a letter). Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is an alphabetic character


g_unichar_iscntrl ()

gboolean    g_unichar_iscntrl               (gunichar c);

Determines whether a character is a control character. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is a control character


g_unichar_isdigit ()

gboolean    g_unichar_isdigit               (gunichar c);

Determines whether a character is numeric (i.e. a digit). This covers ASCII 0-9 and also digits in other languages/scripts. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is a digit


g_unichar_isgraph ()

gboolean    g_unichar_isgraph               (gunichar c);

Determines whether a character is printable and not a space (returns FALSE for control characters, format characters, and spaces). g_unichar_isprint() is similar, but returns TRUE for spaces. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is printable unless it's a space


g_unichar_islower ()

gboolean    g_unichar_islower               (gunichar c);

Determines whether a character is a lowercase letter. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is a lowercase letter


g_unichar_isprint ()

gboolean    g_unichar_isprint               (gunichar c);

Determines whether a character is printable. Unlike g_unichar_isgraph(), returns TRUE for spaces. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is printable


g_unichar_ispunct ()

gboolean    g_unichar_ispunct               (gunichar c);

Determines whether a character is punctuation. Given some UTF-8 text, obtain a character value with g_utf8_get_char().

c : a Unicode character
Returns : TRUE if c is a punctuation character


g_unichar_isspace ()

gboolean    g_unichar_isspace               (gunichar c);

Determines whether a character is a space, tab, or line separator (newline, carriage return, etc.). Given some UTF-8 text, obtain a character value with g_utf8_get_char().

(Note: don't use this to do word breaking; you have to use Pango or equivalent to get word breaking right, the algorithm is fairly complex.)

c : a Unicode character
Returns : TRUE if c is a punctuation character


g_unichar_isupper ()

gboolean    g_unichar_isupper               (gunichar c);

Determines if a character is uppercase.

c : a unicode character
Returns : TRUE if c is an uppercase character.


g_unichar_isxdigit ()

gboolean    g_unichar_isxdigit              (gunichar c);

Determines if a characters is a hexidecimal digit

c : a unicode character.
Returns : TRUE if the character is a hexadecimal digit.


g_unichar_istitle ()

gboolean    g_unichar_istitle               (gunichar c);

Determines if a character is titlecase. Some characters in Unicode which are composites, such as the DZ digraph have three case variants instead of just two. The titlecase form is used at the beginning of a word where only the first letter is capitalized. The titlecase form of the DZ digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z

c : a unicode character
Returns : TRUE if the character is titlecase.


g_unichar_isdefined ()

gboolean    g_unichar_isdefined             (gunichar c);

Determines if a given character is assigned in the Unicode standard.

c : a unicode character
Returns : TRUE if the character has an assigned value.


g_unichar_iswide ()

gboolean    g_unichar_iswide                (gunichar c);

Determines if a character is typically rendered in a double-width cell.

c : a unicode character
Returns : TRUE if the character is wide.


g_unichar_toupper ()

gunichar    g_unichar_toupper               (gunichar c);

Convert a character to uppercase.

c : a unicode character
Returns : the result of converting c to uppercase. If c is not an lowercase or titlecase character, c is returned unchanged.


g_unichar_tolower ()

gunichar    g_unichar_tolower               (gunichar c);

Convert a character to lower case

c : a unicode character.
Returns : the result of converting c to lower case. If c is not an upperlower or titlecase character, c is returned unchanged.


g_unichar_totitle ()

gunichar    g_unichar_totitle               (gunichar c);

Convert a character to the titlecase

c : a unicode character
Returns : the result of converting c to titlecase. If c is not an uppercase or lowercase character, c is returned unchanged.


g_unichar_digit_value ()

gint        g_unichar_digit_value           (gunichar c);

Determines the numeric value of a character as a decimal degital.

c : a unicode character
Returns : If c is a decimal digit (according to `g_unichar_isdigit'), its numeric value. Otherwise, -1.


g_unichar_xdigit_value ()

gint        g_unichar_xdigit_value          (gunichar c);

Determines the numeric value of a character as a hexidecimal degital.

c : a unicode character
Returns : If c is a hex digit (according to `g_unichar_isxdigit'), its numeric value. Otherwise, -1.


enum GUnicodeType

typedef enum
{
  G_UNICODE_CONTROL,
  G_UNICODE_FORMAT,
  G_UNICODE_UNASSIGNED,
  G_UNICODE_PRIVATE_USE,
  G_UNICODE_SURROGATE,
  G_UNICODE_LOWERCASE_LETTER,
  G_UNICODE_MODIFIER_LETTER,
  G_UNICODE_OTHER_LETTER,
  G_UNICODE_TITLECASE_LETTER,
  G_UNICODE_UPPERCASE_LETTER,
  G_UNICODE_COMBINING_MARK,
  G_UNICODE_ENCLOSING_MARK,
  G_UNICODE_NON_SPACING_MARK,
  G_UNICODE_DECIMAL_NUMBER,
  G_UNICODE_LETTER_NUMBER,
  G_UNICODE_OTHER_NUMBER,
  G_UNICODE_CONNECT_PUNCTUATION,
  G_UNICODE_DASH_PUNCTUATION,
  G_UNICODE_CLOSE_PUNCTUATION,
  G_UNICODE_FINAL_PUNCTUATION,
  G_UNICODE_INITIAL_PUNCTUATION,
  G_UNICODE_OTHER_PUNCTUATION,
  G_UNICODE_OPEN_PUNCTUATION,
  G_UNICODE_CURRENCY_SYMBOL,
  G_UNICODE_MODIFIER_SYMBOL,
  G_UNICODE_MATH_SYMBOL,
  G_UNICODE_OTHER_SYMBOL,
  G_UNICODE_LINE_SEPARATOR,
  G_UNICODE_PARAGRAPH_SEPARATOR,
  G_UNICODE_SPACE_SEPARATOR
} GUnicodeType;


g_unichar_type ()

GUnicodeType g_unichar_type                 (gunichar c);

Classifies a unicode character by type.

c : a unicode character
Returns : the type of the character.


enum GUnicodeBreakType

typedef enum
{
  G_UNICODE_BREAK_MANDATORY,
  G_UNICODE_BREAK_CARRIAGE_RETURN,
  G_UNICODE_BREAK_LINE_FEED,
  G_UNICODE_BREAK_COMBINING_MARK,
  G_UNICODE_BREAK_SURROGATE,
  G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
  G_UNICODE_BREAK_INSEPARABLE,
  G_UNICODE_BREAK_NON_BREAKING_GLUE,
  G_UNICODE_BREAK_CONTINGENT,
  G_UNICODE_BREAK_SPACE,
  G_UNICODE_BREAK_AFTER,
  G_UNICODE_BREAK_BEFORE,
  G_UNICODE_BREAK_BEFORE_AND_AFTER,
  G_UNICODE_BREAK_HYPHEN,
  G_UNICODE_BREAK_NON_STARTER,
  G_UNICODE_BREAK_OPEN_PUNCTUATION,
  G_UNICODE_BREAK_CLOSE_PUNCTUATION,
  G_UNICODE_BREAK_QUOTATION,
  G_UNICODE_BREAK_EXCLAMATION,
  G_UNICODE_BREAK_IDEOGRAPHIC,
  G_UNICODE_BREAK_NUMERIC,
  G_UNICODE_BREAK_INFIX_SEPARATOR,
  G_UNICODE_BREAK_SYMBOL,
  G_UNICODE_BREAK_ALPHABETIC,
  G_UNICODE_BREAK_PREFIX,
  G_UNICODE_BREAK_POSTFIX,
  G_UNICODE_BREAK_COMPLEX_CONTEXT,
  G_UNICODE_BREAK_AMBIGUOUS,
  G_UNICODE_BREAK_UNKNOWN
} GUnicodeBreakType;


g_unichar_break_type ()

GUnicodeBreakType g_unichar_break_type      (gunichar c);

Determines the break type of c. c should be a Unicode character (to derive a character from UTF-8 encoded text, use g_utf8_get_char()). The break type is used to find word and line breaks ("text boundaries"), Pango implements the Unicode boundary resolution alogorithms and normally you would use a function such as pango_break() instead of caring about break types yourself.

c : a Unicode character
Returns : break type


g_unicode_canonical_ordering ()

void        g_unicode_canonical_ordering    (gunichar *string,
                                             gsize len);

string : 
len : 


g_unicode_canonical_decomposition ()

gunichar*   g_unicode_canonical_decomposition
                                            (gunichar ch,
                                             gsize *result_len);

ch : 
result_len : 
Returns : 


g_utf8_next_char()

#define     g_utf8_next_char(p)

Skips to the next character in a UTF-8 string. The string must be valid; this macro is as fast as possible, and has zero error-checking. You would use this macro to iterate over a string character by character. The macro returns the start of the next UTF-8 character. Before using this macro, use g_utf8_validate() to validate strings that may contain invalid UTF-8.

p :Pointer to the start of a valid UTF-8 character.


g_utf8_get_char ()

gunichar    g_utf8_get_char                 (const gchar *p);

Convert a sequence of bytes encoded as UTF-8 to a unicode character. If p does not point to a valid UTF-8 encoded character, results are undefined.

p : a pointer to unicode character encoded as UTF-8
Returns : the resulting character


g_utf8_offset_to_pointer ()

gchar*      g_utf8_offset_to_pointer        (const gchar *str,
                                             gint offset);

Converts from an integer character offset to a pointer to a position within the string.

str : a UTF-8 encoded string
offset : a character offset within the string.
Returns : the resulting pointer


g_utf8_pointer_to_offset ()

gint        g_utf8_pointer_to_offset        (const gchar *str,
                                             const gchar *pos);

Converts from a pointer to position within a string to a integer character offset

str : a UTF-8 encoded string
pos : a pointer to a position within str
Returns : the resulting character offset


g_utf8_prev_char ()

gchar*      g_utf8_prev_char                (const gchar *p);

Find the previous UTF-8 character in the string before p.

p does not have to be at the beginning of a UTF-8 character. No check is made to see if the character found is actually valid other than it starts with an appropriate byte. If p might be the first character of the string, you must use g_utf8_find_prev_char instead.

p : a pointer to a position within a UTF-8 encoded string
Returns : a pointer to the found character.


g_utf8_find_next_char ()

gchar*      g_utf8_find_next_char           (const gchar *p,
                                             const gchar *end);

Find the start of the next utf-8 character in the string after p

p does not have to be at the beginning of a UTF-8 chracter. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

p : a pointer to a position within a UTF-8 encoded string
end : a pointer to the end of the string, or NULL to indicate that the string is NULL terminated, in which case the returned value will be
Returns : a pointer to the found character or NULL


g_utf8_find_prev_char ()

gchar*      g_utf8_find_prev_char           (const gchar *str,
                                             const gchar *p);

Given a position p with a UTF-8 encoded string str, find the start of the previous UTF-8 character starting before p. Returns NULL if no UTF-8 characters are present in p before str.

p does not have to be at the beginning of a UTF-8 chracter. No check is made to see if the character found is actually valid other than it starts with an appropriate byte.

str : pointer to the beginning of a UTF-8 string
p : pointer to some position within str
Returns : a pointer to the found character or NULL.


g_utf8_strlen ()

gint        g_utf8_strlen                   (const gchar *p,
                                             gint max);

p : pointer to the start of a UTF-8 string.
max : the maximum number of bytes to examine. If max is less than 0, then the string is assumed to be nul-terminated.
Returns : the length of the string in characters


g_utf8_strncpy ()

gchar*      g_utf8_strncpy                  (gchar *dest,
                                             const gchar *src,
                                             gsize n);

Like the standard C strncpy() function, but copies a given number of characters instead of a given number of bytes. The src string must be valid UTF-8 encoded text. (Use g_utf8_validate() on all text before trying to use UTF-8 utility functions with it.)

dest : buffer to fill with characters from src
src : UTF-8 string
n : character count
Returns : dest


g_utf8_strchr ()

gchar*      g_utf8_strchr                   (const gchar *p,
                                             gint len,
                                             gunichar c);

Find the leftmost occurence of the given iso-10646 character in a UTF-8 string, while limiting the search to p_len bytes. If len is -1, allow unbounded search.

p : a nul-terminated utf-8 string
len : 
c : a iso-10646 character
Returns : NULL if the string does not contain the character, otherwise, a a pointer to the start of the leftmost of the character in the string.


g_utf8_strrchr ()

gchar*      g_utf8_strrchr                  (const gchar *p,
                                             gint len,
                                             gunichar c);

Find the rightmost occurence of the given iso-10646 character in a UTF-8 string, while limiting the search to p_len bytes. If len is -1, allow unbounded search.

p : a nul-terminated utf-8 string
len : 
c : a iso-10646 character/
Returns : NULL if the string does not contain the character, otherwise, a a pointer to the start of the rightmost of the character in the string.


g_utf8_validate ()

gboolean    g_utf8_validate                 (const gchar *str,
                                             gint max_len,
                                             const gchar **end);

Validates UTF-8 encoded text. str is the text to validate; if str is nul-terminated, then max_len can be -1, otherwise max_len should be the number of bytes to validate. If end is non-NULL, then the end of the valid range will be stored there (i.e. the address of the first invalid byte if some bytes were invalid, or the end of the text being validated otherwise).

Returns TRUE if all of str was valid. Many GLib and GTK+ routines <emphasis>require</emphasis> valid UTF8 as input; so data read from a file or the network should be checked with g_utf8_validate() before doing anything else with it.

str : a pointer to character data
max_len : max bytes to validate, or -1 to go until nul
end : return location for end of valid data
Returns : TRUE if the text was valid UTF-8.


g_utf8_to_utf16 ()

gunichar2*  g_utf8_to_utf16                 (const gchar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from UTF-8 to UTF-16. A 0 word will be added to the result after the converted text.

str : a UTF-8 encoded string
len : the maximum length of str to use. If < 0, then the string is NULL terminated.
items_read : location to store number of bytes read, or NULL. If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str contains a trailing partial character. If an error occurs then the index of the invalid input is stored here.
items_written : location to store number of words written, or NULL. The value stored here does not include the trailing 0 word.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_utf8_to_ucs4 ()

gunichar*   g_utf8_to_ucs4                  (const gchar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4. A trailing 0 will be added to the string after the converted text.

str : a UTF-8 encoded string
len : the maximum length of str to use. If < 0, then the string is NULL terminated.
items_read : location to store number of bytes read, or NULL. If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str contains a trailing partial character. If an error occurs then the index of the invalid input is stored here.
items_written : location to store number of characters written or NULL. The value here stored does not include the trailing 0 character.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_utf8_to_ucs4_fast ()

gunichar*   g_utf8_to_ucs4_fast             (const gchar *str,
                                             gint len,
                                             gint *items_written);

Convert a string from UTF-8 to a 32-bit fixed width representation as UCS-4, assuming valid UTF-8 input. This function is roughly twice as fast as g_utf8_to_ucs4() but does no error checking on the input.

str : a UTF-8 encoded string
len : the maximum length of str to use. If < 0, then the string is NULL terminated.
items_written : location to store the number of characters in the result, or NULL.
Returns : a pointer to a newly allocated UCS-4 string. This value must be freed with g_free()


g_utf16_to_ucs4 ()

gunichar*   g_utf16_to_ucs4                 (const gunichar2 *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from UTF-16 to UCS-4. The result will be terminated with a 0 character.

str : a UTF-16 encoded string
len : the maximum length of str to use. If < 0, then the string is terminated with a 0 character.
items_read : location to store number of words read, or NULL. If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str contains a trailing partial character. If an error occurs then the index of the invalid input is stored here.
items_written : location to store number of characters written, or NULL. The value stored here does not include the trailing 0 character.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UCS-4 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_utf16_to_utf8 ()

gchar*      g_utf16_to_utf8                 (const gunichar2 *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from UTF-16 to UTF-8. The result will be terminated with a 0 byte.

str : a UTF-16 encoded string
len : the maximum length of str to use. If < 0, then the string is terminated with a 0 character.
items_read : location to store number of words read, or NULL. If NULL, then G_CONVERT_ERROR_PARTIAL_INPUT will be returned in case str contains a trailing partial character. If an error occurs then the index of the invalid input is stored here.
items_written : location to store number of bytes written, or NULL. The value stored here does not include the trailing 0 byte.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_ucs4_to_utf16 ()

gunichar2*  g_ucs4_to_utf16                 (const gunichar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from UCS-4 to UTF-16. A 0 word will be added to the result after the converted text.

str : a UCS-4 encoded string
len : the maximum length of str to use. If < 0, then the string is terminated with a zero character.
items_read : location to store number of bytes read, or NULL. If an error occurs then the index of the invalid input is stored here.
items_written : location to store number of words written, or NULL. The value stored here does not include the trailing 0 word.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UTF-16 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_ucs4_to_utf8 ()

gchar*      g_ucs4_to_utf8                  (const gunichar *str,
                                             gint len,
                                             gint *items_read,
                                             gint *items_written,
                                             GError **error);

Convert a string from a 32-bit fixed width representation as UCS-4. to UTF-8. The result will be terminated with a 0 byte.

str : a UCS-4 encoded string
len : the maximum length of str to use. If < 0, then the string is NULL terminated.
items_read : location to store number of characters read read, or NULL.
items_written : location to store number of bytes written or NULL. The value here stored does not include the trailing 0 byte.
error : location to store the error occuring, or NULL to ignore errors. Any of the errors in GConvertError other than G_CONVERT_ERROR_NO_CONVERSION may occur.
Returns : a pointer to a newly allocated UTF-8 string. This value must be freed with g_free(). If an error occurs, NULL will be returned and error set.


g_unichar_to_utf8 ()

gint        g_unichar_to_utf8               (gunichar c,
                                             char *outbuf);

Convert a single character to utf8

c : a ISO10646 character code
outbuf : output buffer, must have at least 6 bytes of space. If NULL, the length will be computed and returned and nothing will be written to out.
Returns : number of bytes written