@@ -201,6 +201,10 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
201201#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
202202#define MY_CS_LOWER_SORT 32768 /* If use lower case as weight */
203203#define MY_CS_STRNXFRM_BAD_NWEIGHTS 0x10000 /* strnxfrm ignores "nweights" */
204+ #define MY_CS_NOPAD 0x20000 /* if does not ignore trailing spaces */
205+ #define MY_CS_NON1TO1 0x40000 /* Has a complex mapping from characters
206+ to weights, e.g. contractions, expansions,
207+ ignorable characters */
204208#define MY_CHARSET_UNDEFINED 0
205209
206210/* Character repertoire flags */
@@ -511,6 +515,20 @@ struct my_charset_handler_st
511515 char * dst , size_t dst_length ,
512516 const char * src , size_t src_length ,
513517 size_t nchars , MY_STRCOPY_STATUS * status );
518+ /**
519+ Write a character to the target string, using its native code.
520+ For Unicode character sets (utf8, ucs2, utf16, utf16le, utf32, filename)
521+ native codes are equvalent to Unicode code points.
522+ For 8bit character sets the native code is just the byte value.
523+ For Asian characters sets:
524+ - MB1 native code is just the byte value (e.g. on the ASCII range)
525+ - MB2 native code is ((b0 << 8) + b1).
526+ - MB3 native code is ((b0 <<16) + (b1 << 8) + b2)
527+ Note, CHARSET_INFO::min_sort_char and CHARSET_INFO::max_sort_char
528+ are defined in native notation and should be written using
529+ cs->cset->native_to_mb() rather than cs->cset->wc_mb().
530+ */
531+ my_charset_conv_wc_mb native_to_mb ;
514532};
515533
516534extern MY_CHARSET_HANDLER my_charset_8bit_handler ;
@@ -664,6 +682,7 @@ extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *);
664682
665683int my_mb_wc_8bit (CHARSET_INFO * cs ,my_wc_t * wc , const uchar * s ,const uchar * e );
666684int my_wc_mb_8bit (CHARSET_INFO * cs ,my_wc_t wc , uchar * s , uchar * e );
685+ int my_wc_mb_bin (CHARSET_INFO * cs ,my_wc_t wc , uchar * s , uchar * e );
667686
668687int my_mb_ctype_8bit (CHARSET_INFO * ,int * , const uchar * ,const uchar * );
669688int my_mb_ctype_mb (CHARSET_INFO * ,int * , const uchar * ,const uchar * );
0 commit comments