#ifndef PG_WCHAR_H
#define PG_WCHAR_H
typedef unsigned int pg_wchar;
#define MAX_MULTIBYTE_CHAR_LEN 4
#define SS2 0x8e
#define SS3 0x8f
#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
#define LC_ISO8859_1 0x81
#define LC_ISO8859_2 0x82
#define LC_ISO8859_3 0x83
#define LC_ISO8859_4 0x84
#define LC_TIS620 0x85
#define LC_ISO8859_7 0x86
#define LC_ISO8859_6 0x87
#define LC_ISO8859_8 0x88
#define LC_JISX0201K 0x89
#define LC_JISX0201R 0x8a
#define LC_KOI8_R 0x8b
#define LC_ISO8859_5 0x8c
#define LC_ISO8859_9 0x8d
#define LC_ISO8859_15 0x8e
#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
#define LC_JISX0208_1978 0x90
#define LC_GB2312_80 0x91
#define LC_JISX0208 0x92
#define LC_KS5601 0x93
#define LC_JISX0212 0x94
#define LC_CNS11643_1 0x95
#define LC_CNS11643_2 0x96
#define LC_JISX0213_1 0x97
#define LC_BIG5_1 0x98
#define LC_BIG5_2 0x99
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
#define LCPRV1_A 0x9a
#define LCPRV1_B 0x9b
#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
#define IS_LCPRV1_A_RANGE(c) \
((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
#define IS_LCPRV1_B_RANGE(c) \
((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
#define LCPRV2_A 0x9c
#define LCPRV2_B 0x9d
#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
#define IS_LCPRV2_A_RANGE(c) \
((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
#define IS_LCPRV2_B_RANGE(c) \
((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
#define LC_SISHENG 0xa0
#define LC_IPA 0xa1
#define LC_VISCII_LOWER 0xa2
#define LC_VISCII_UPPER 0xa3
#define LC_ARABIC_DIGIT 0xa4
#define LC_ARABIC_1_COLUMN 0xa5
#define LC_ASCII_RIGHT_TO_LEFT 0xa6
#define LC_LAO 0xa7
#define LC_ARABIC_2_COLUMN 0xa8
#define LC_INDIAN_1_COLUMN 0xf0
#define LC_TIBETAN_1_COLUMN 0xf1
#define LC_UNICODE_SUBSET_2 0xf2
#define LC_UNICODE_SUBSET_3 0xf3
#define LC_UNICODE_SUBSET 0xf4
#define LC_ETHIOPIC 0xf5
#define LC_CNS11643_3 0xf6
#define LC_CNS11643_4 0xf7
#define LC_CNS11643_5 0xf8
#define LC_CNS11643_6 0xf9
#define LC_CNS11643_7 0xfa
#define LC_INDIAN_2_COLUMN 0xfb
#define LC_TIBETAN 0xfc
typedef enum pg_enc
{
PG_SQL_ASCII = 0,
PG_EUC_JP,
PG_EUC_CN,
PG_EUC_KR,
PG_EUC_TW,
PG_EUC_JIS_2004,
PG_UTF8,
PG_MULE_INTERNAL,
PG_LATIN1,
PG_LATIN2,
PG_LATIN3,
PG_LATIN4,
PG_LATIN5,
PG_LATIN6,
PG_LATIN7,
PG_LATIN8,
PG_LATIN9,
PG_LATIN10,
PG_WIN1256,
PG_WIN1258,
PG_WIN866,
PG_WIN874,
PG_KOI8R,
PG_WIN1251,
PG_WIN1252,
PG_ISO_8859_5,
PG_ISO_8859_6,
PG_ISO_8859_7,
PG_ISO_8859_8,
PG_WIN1250,
PG_WIN1253,
PG_WIN1254,
PG_WIN1255,
PG_WIN1257,
PG_KOI8U,
PG_SJIS,
PG_BIG5,
PG_GBK,
PG_UHC,
PG_GB18030,
PG_JOHAB,
PG_SHIFT_JIS_2004,
_PG_LAST_ENCODING_
} pg_enc;
#define PG_ENCODING_BE_LAST PG_KOI8U
#define PG_VALID_BE_ENCODING(_enc) \
((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
#define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_ENCODING(_enc) \
((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
#define MAX_CONVERSION_GROWTH 4
#define MAX_CONVERSION_INPUT_LENGTH 16
#define MAX_UNICODE_EQUIVALENT_STRING 16
typedef struct pg_enc2name
{
const char *name;
pg_enc encoding;
#ifdef WIN32
unsigned codepage;
#endif
} pg_enc2name;
extern PGDLLIMPORT const pg_enc2name pg_enc2name_tbl[];
extern PGDLLIMPORT const char *pg_enc2gettext_tbl[];
typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
pg_wchar *to,
int len);
typedef int (*wchar2mb_with_len_converter) (const pg_wchar *from,
unsigned char *to,
int len);
typedef int (*mblen_converter) (const unsigned char *mbstr);
typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
typedef bool (*mbcharacter_incrementer) (unsigned char *mbstr, int len);
typedef int (*mbchar_verifier) (const unsigned char *mbstr, int len);
typedef int (*mbstr_verifier) (const unsigned char *mbstr, int len);
typedef struct
{
mb2wchar_with_len_converter mb2wchar_with_len;
wchar2mb_with_len_converter wchar2mb_with_len;
mblen_converter mblen;
mbdisplaylen_converter dsplen;
mbchar_verifier mbverifychar;
mbstr_verifier mbverifystr;
int maxmblen;
} pg_wchar_tbl;
extern PGDLLIMPORT const pg_wchar_tbl pg_wchar_table[];
typedef struct
{
const uint16 *chars16;
const uint32 *chars32;
uint32 b1root;
uint8 b1_lower;
uint8 b1_upper;
uint32 b2root;
uint8 b2_1_lower;
uint8 b2_1_upper;
uint8 b2_2_lower;
uint8 b2_2_upper;
uint32 b3root;
uint8 b3_1_lower;
uint8 b3_1_upper;
uint8 b3_2_lower;
uint8 b3_2_upper;
uint8 b3_3_lower;
uint8 b3_3_upper;
uint32 b4root;
uint8 b4_1_lower;
uint8 b4_1_upper;
uint8 b4_2_lower;
uint8 b4_2_upper;
uint8 b4_3_lower;
uint8 b4_3_upper;
uint8 b4_4_lower;
uint8 b4_4_upper;
} pg_mb_radix_tree;
typedef struct
{
uint32 utf1;
uint32 utf2;
uint32 code;
} pg_utf_to_local_combined;
typedef struct
{
uint32 code;
uint32 utf1;
uint32 utf2;
} pg_local_to_utf_combined;
typedef uint32 (*utf_local_conversion_func) (uint32 code);
#define CHECK_ENCODING_CONVERSION_ARGS(srcencoding,destencoding) \
check_encoding_conversion_args(PG_GETARG_INT32(0), \
PG_GETARG_INT32(1), \
PG_GETARG_INT32(4), \
(srcencoding), \
(destencoding))
static inline bool
is_valid_unicode_codepoint(pg_wchar c)
{
return (c > 0 && c <= 0x10FFFF);
}
static inline bool
is_utf16_surrogate_first(pg_wchar c)
{
return (c >= 0xD800 && c <= 0xDBFF);
}
static inline bool
is_utf16_surrogate_second(pg_wchar c)
{
return (c >= 0xDC00 && c <= 0xDFFF);
}
static inline pg_wchar
surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
{
return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
}
static inline pg_wchar
utf8_to_unicode(const unsigned char *c)
{
if ((*c & 0x80) == 0)
return (pg_wchar) c[0];
else if ((*c & 0xe0) == 0xc0)
return (pg_wchar) (((c[0] & 0x1f) << 6) |
(c[1] & 0x3f));
else if ((*c & 0xf0) == 0xe0)
return (pg_wchar) (((c[0] & 0x0f) << 12) |
((c[1] & 0x3f) << 6) |
(c[2] & 0x3f));
else if ((*c & 0xf8) == 0xf0)
return (pg_wchar) (((c[0] & 0x07) << 18) |
((c[1] & 0x3f) << 12) |
((c[2] & 0x3f) << 6) |
(c[3] & 0x3f));
else
return 0xffffffff;
}
static inline unsigned char *
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
{
if (c <= 0x7F)
{
utf8string[0] = c;
}
else if (c <= 0x7FF)
{
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
utf8string[1] = 0x80 | (c & 0x3F);
}
else if (c <= 0xFFFF)
{
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
utf8string[2] = 0x80 | (c & 0x3F);
}
else
{
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
utf8string[3] = 0x80 | (c & 0x3F);
}
return utf8string;
}
static inline int
unicode_utf8len(pg_wchar c)
{
if (c <= 0x7F)
return 1;
else if (c <= 0x7FF)
return 2;
else if (c <= 0xFFFF)
return 3;
else
return 4;
}
#if defined(USE_PRIVATE_ENCODING_FUNCS) || !defined(FRONTEND)
#define pg_char_to_encoding pg_char_to_encoding_private
#define pg_encoding_to_char pg_encoding_to_char_private
#define pg_valid_server_encoding pg_valid_server_encoding_private
#define pg_valid_server_encoding_id pg_valid_server_encoding_id_private
#define pg_utf_mblen pg_utf_mblen_private
#endif
extern int pg_char_to_encoding(const char *name);
extern const char *pg_encoding_to_char(int encoding);
extern int pg_valid_server_encoding_id(int encoding);
extern void pg_encoding_set_invalid(int encoding, char *dst);
extern int pg_encoding_mblen(int encoding, const char *mbstr);
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
extern int pg_encoding_dsplen(int encoding, const char *mbstr);
extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len);
extern int pg_encoding_max_length(int encoding);
extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name);
extern bool is_encoding_supported_by_icu(int encoding);
extern const char *get_encoding_name_for_icu(int encoding);
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
extern pg_wchar utf8_to_unicode(const unsigned char *c);
extern bool pg_utf8_islegal(const unsigned char *source, int length);
extern int pg_utf_mblen(const unsigned char *s);
extern int pg_mule_mblen(const unsigned char *s);
extern int pg_mb2wchar(const char *from, pg_wchar *to);
extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
extern int pg_encoding_mb2wchar_with_len(int encoding,
const char *from, pg_wchar *to, int len);
extern int pg_wchar2mb(const pg_wchar *from, char *to);
extern int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len);
extern int pg_encoding_wchar2mb_with_len(int encoding,
const pg_wchar *from, char *to, int len);
extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
extern size_t pg_wchar_strlen(const pg_wchar *str);
extern int pg_mblen(const char *mbstr);
extern int pg_dsplen(const char *mbstr);
extern int pg_mbstrlen(const char *mbstr);
extern int pg_mbstrlen_with_len(const char *mbstr, int limit);
extern int pg_mbcliplen(const char *mbstr, int len, int limit);
extern int pg_encoding_mbcliplen(int encoding, const char *mbstr,
int len, int limit);
extern int pg_mbcharcliplen(const char *mbstr, int len, int limit);
extern int pg_database_encoding_max_length(void);
extern mbcharacter_incrementer pg_database_encoding_character_incrementer(void);
extern int PrepareClientEncoding(int encoding);
extern int SetClientEncoding(int encoding);
extern void InitializeClientEncoding(void);
extern int pg_get_client_encoding(void);
extern const char *pg_get_client_encoding_name(void);
extern void SetDatabaseEncoding(int encoding);
extern int GetDatabaseEncoding(void);
extern const char *GetDatabaseEncodingName(void);
extern void SetMessageEncoding(int encoding);
extern int GetMessageEncoding(void);
#ifdef ENABLE_NLS
extern int pg_bind_textdomain_codeset(const char *domainname);
#endif
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,
int dest_encoding);
extern int pg_do_encoding_conversion_buf(Oid proc,
int src_encoding,
int dest_encoding,
unsigned char *src, int srclen,
unsigned char *dest, int destlen,
bool noError);
extern char *pg_client_to_server(const char *s, int len);
extern char *pg_server_to_client(const char *s, int len);
extern char *pg_any_to_server(const char *s, int len, int encoding);
extern char *pg_server_to_any(const char *s, int len, int encoding);
extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
extern int UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding, bool noError);
extern int LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
int encoding, bool noError);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
bool noError);
extern int pg_verify_mbstr_len(int encoding, const char *mbstr, int len,
bool noError);
extern void check_encoding_conversion_args(int src_encoding,
int dest_encoding,
int len,
int expected_src_encoding,
int expected_dest_encoding);
extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg_attribute_noreturn();
extern void report_untranslatable_char(int src_encoding, int dest_encoding,
const char *mbstr, int len) pg_attribute_noreturn();
extern int local2local(const unsigned char *l, unsigned char *p, int len,
int src_encoding, int dest_encoding,
const unsigned char *tab, bool noError);
extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding, bool noError);
extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
int lc, int encoding, bool noError);
extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab, bool noError);
extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab, bool noError);
#ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
#endif
#endif