mirror of
https://github.com/DaveGamble/cJSON.git
synced 2023-08-10 21:13:26 +03:00
inbuilt hex parser for unicode, which ought to be a lot faster.
git-svn-id: http://svn.code.sf.net/p/cjson/code@57 e3330c51-1366-4df0-8b21-3ccf24e3d50e
This commit is contained in:
parent
22e51c92f0
commit
c537515c17
18
cJSON.c
18
cJSON.c
@ -97,7 +97,6 @@ static const char *parse_number(cJSON *item,const char *num)
|
||||
{
|
||||
double n=0,sign=1,scale=0;int subscale=0,signsubscale=1;
|
||||
|
||||
/* Could use sscanf for this? */
|
||||
if (*num=='-') sign=-1,num++; /* Has sign? */
|
||||
if (*num=='0') num++; /* is zero */
|
||||
if (*num>='1' && *num<='9') do n=(n*10.0)+(*num++ -'0'); while (*num>='0' && *num<='9'); /* Number? */
|
||||
@ -138,6 +137,19 @@ static char *print_number(cJSON *item)
|
||||
return str;
|
||||
}
|
||||
|
||||
static unsigned parse_hex4(const char *str)
|
||||
{
|
||||
unsigned h=0;
|
||||
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
|
||||
h=(h&15)<<4;str++;
|
||||
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
|
||||
h=(h&15)<<4;str++;
|
||||
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
|
||||
h=(h&15)<<4;str++;
|
||||
if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0;
|
||||
return h;
|
||||
}
|
||||
|
||||
/* Parse the input text into an unescaped cstring, and populate item. */
|
||||
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||
static const char *parse_string(cJSON *item,const char *str)
|
||||
@ -165,14 +177,14 @@ static const char *parse_string(cJSON *item,const char *str)
|
||||
case 'r': *ptr2++='\r'; break;
|
||||
case 't': *ptr2++='\t'; break;
|
||||
case 'u': /* transcode utf16 to utf8. */
|
||||
sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */
|
||||
uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */
|
||||
|
||||
if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */
|
||||
|
||||
if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */
|
||||
{
|
||||
if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */
|
||||
sscanf(ptr+3,"%4x",&uc2);ptr+=6;
|
||||
uc2=parse_hex4(ptr+3);ptr+=6;
|
||||
if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */
|
||||
uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user