From c537515c171fa0c424c58eb3fbc7123801f99d1b Mon Sep 17 00:00:00 2001 From: Dave Gamble Date: Wed, 14 Aug 2013 13:20:42 +0000 Subject: [PATCH] inbuilt hex parser for unicode, which ought to be a lot faster. git-svn-id: http://svn.code.sf.net/p/cjson/code@57 e3330c51-1366-4df0-8b21-3ccf24e3d50e --- cJSON.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/cJSON.c b/cJSON.c index f1706df..6132cbc 100644 --- a/cJSON.c +++ b/cJSON.c @@ -97,7 +97,6 @@ static const char *parse_number(cJSON *item,const char *num) { double n=0,sign=1,scale=0;int subscale=0,signsubscale=1; - /* Could use sscanf for this? */ if (*num=='-') sign=-1,num++; /* Has sign? */ if (*num=='0') num++; /* is zero */ if (*num>='1' && *num<='9') do n=(n*10.0)+(*num++ -'0'); while (*num>='0' && *num<='9'); /* Number? */ @@ -138,6 +137,19 @@ static char *print_number(cJSON *item) return str; } +static unsigned parse_hex4(const char *str) +{ + unsigned h=0; + if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; + h=(h&15)<<4;str++; + if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; + h=(h&15)<<4;str++; + if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; + h=(h&15)<<4;str++; + if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; + return h; +} + /* Parse the input text into an unescaped cstring, and populate item. */ static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static const char *parse_string(cJSON *item,const char *str) @@ -165,14 +177,14 @@ static const char *parse_string(cJSON *item,const char *str) case 'r': *ptr2++='\r'; break; case 't': *ptr2++='\t'; break; case 'u': /* transcode utf16 to utf8. */ - sscanf(ptr+1,"%4x",&uc);ptr+=4; /* get the unicode char. */ + uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ { if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ - sscanf(ptr+3,"%4x",&uc2);ptr+=6; + uc2=parse_hex4(ptr+3);ptr+=6; if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); }