2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
utf-8 util
|
|
|
|
|
|
2022-01-04 12:21:08 +03:00
|
|
|
|
Copyright (c) 2019-2022 Dario Deledda. All rights reserved.
|
2020-06-08 00:04:23 +03:00
|
|
|
|
Use of this source code is governed by an MIT license
|
|
|
|
|
that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
This file contains utilities for utf8 strings
|
|
|
|
|
*/
|
2019-11-30 08:52:37 +03:00
|
|
|
|
module utf8
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
Utility functions
|
|
|
|
|
*/
|
2019-12-01 20:41:29 +03:00
|
|
|
|
|
2021-02-15 18:52:00 +03:00
|
|
|
|
// len return the length as number of unicode chars from a string
|
2019-12-01 20:41:29 +03:00
|
|
|
|
pub fn len(s string) int {
|
2021-04-12 05:58:03 +03:00
|
|
|
|
if s.len == 0 {
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-01 20:41:29 +03:00
|
|
|
|
mut count := 0
|
|
|
|
|
mut index := 0
|
|
|
|
|
|
|
|
|
|
for {
|
2021-03-01 16:35:57 +03:00
|
|
|
|
ch_len := utf8_char_len(s[index])
|
2019-12-01 20:41:29 +03:00
|
|
|
|
index += ch_len
|
2020-07-22 21:15:07 +03:00
|
|
|
|
count++
|
|
|
|
|
if index >= s.len {
|
2019-12-01 20:41:29 +03:00
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return count
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-12 21:08:44 +03:00
|
|
|
|
// get_uchar convert a unicode glyph in string[index] into a int unicode char
|
|
|
|
|
pub fn get_uchar(s string, index int) int {
|
|
|
|
|
mut res := 0
|
|
|
|
|
mut ch_len := 0
|
2021-02-18 16:43:39 +03:00
|
|
|
|
if s.len > 0 {
|
2021-03-01 16:35:57 +03:00
|
|
|
|
ch_len = utf8_char_len(s[index])
|
2019-12-12 21:08:44 +03:00
|
|
|
|
|
|
|
|
|
if ch_len == 1 {
|
2020-07-22 21:15:07 +03:00
|
|
|
|
return u16(s[index])
|
2021-04-11 10:28:19 +03:00
|
|
|
|
}
|
|
|
|
|
if ch_len > 1 && ch_len < 5 {
|
2019-12-12 21:08:44 +03:00
|
|
|
|
mut lword := 0
|
2021-04-11 10:28:19 +03:00
|
|
|
|
for i := 0; i < ch_len; i++ {
|
2021-11-29 03:48:49 +03:00
|
|
|
|
lword = int(u32(lword) << 8 | u32(s[index + i]))
|
2019-12-12 21:08:44 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 2 byte utf-8
|
|
|
|
|
// byte format: 110xxxxx 10xxxxxx
|
|
|
|
|
//
|
|
|
|
|
if ch_len == 2 {
|
|
|
|
|
res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
|
|
|
|
|
}
|
|
|
|
|
// 3 byte utf-8
|
|
|
|
|
// byte format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
//
|
|
|
|
|
else if ch_len == 3 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
|
2019-12-12 21:08:44 +03:00
|
|
|
|
}
|
|
|
|
|
// 4 byte utf-8
|
|
|
|
|
// byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
//
|
|
|
|
|
else if ch_len == 4 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
|
2019-12-12 21:08:44 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return res
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-12 05:58:03 +03:00
|
|
|
|
// raw_index - get the raw chracter from the string by the given index value.
|
|
|
|
|
// example: utf8.raw_index('我是V Lang', 1) => '是'
|
2021-04-11 09:04:18 +03:00
|
|
|
|
pub fn raw_index(s string, index int) string {
|
|
|
|
|
mut r := []rune{}
|
|
|
|
|
|
|
|
|
|
for i := 0; i < s.len; i++ {
|
2021-04-12 05:58:03 +03:00
|
|
|
|
if r.len - 1 == index {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
b := s[i]
|
|
|
|
|
ch_len := ((0xe5000000 >> ((b >> 3) & 0x1e)) & 3)
|
|
|
|
|
|
2021-04-11 09:04:18 +03:00
|
|
|
|
r << if ch_len > 0 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
i += ch_len
|
|
|
|
|
rune(get_uchar(s, i - ch_len))
|
|
|
|
|
} else {
|
|
|
|
|
rune(b)
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-04-11 09:04:18 +03:00
|
|
|
|
|
|
|
|
|
return r[index].str()
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-19 12:24:08 +03:00
|
|
|
|
// reverse - returns a reversed string.
|
|
|
|
|
// example: utf8.reverse('你好世界hello world') => 'dlrow olleh界世好你'.
|
|
|
|
|
pub fn reverse(s string) string {
|
2021-05-19 13:03:43 +03:00
|
|
|
|
len_s := len(s)
|
2021-05-19 12:24:08 +03:00
|
|
|
|
if len_s == 0 || len_s == 1 {
|
|
|
|
|
return s.clone()
|
|
|
|
|
}
|
|
|
|
|
mut str_array := []string{}
|
2021-05-19 13:03:43 +03:00
|
|
|
|
for i in 0 .. len_s {
|
|
|
|
|
str_array << raw_index(s, i)
|
2021-05-19 12:24:08 +03:00
|
|
|
|
}
|
|
|
|
|
str_array = str_array.reverse()
|
|
|
|
|
return str_array.join('')
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
Conversion functions
|
|
|
|
|
*/
|
2019-12-01 20:41:29 +03:00
|
|
|
|
|
|
|
|
|
// to_upper return an uppercase string from a string
|
2019-11-30 08:52:37 +03:00
|
|
|
|
pub fn to_upper(s string) string {
|
|
|
|
|
return up_low(s, true)
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-01 20:41:29 +03:00
|
|
|
|
// to_lower return an lowercase string from a string
|
2019-11-30 08:52:37 +03:00
|
|
|
|
pub fn to_lower(s string) string {
|
2019-12-07 17:13:25 +03:00
|
|
|
|
return up_low(s, false)
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
Punctuation functions
|
|
|
|
|
|
|
|
|
|
The "western" function search on a small table, that is quicker than
|
|
|
|
|
the global unicode table search. **Use only for western chars**.
|
|
|
|
|
*/
|
2019-12-01 20:41:29 +03:00
|
|
|
|
|
2019-12-12 21:08:44 +03:00
|
|
|
|
//
|
|
|
|
|
// Western
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
// is_punct return true if the string[index] byte is the start of a unicode western punctuation
|
2021-04-11 10:28:19 +03:00
|
|
|
|
pub fn is_punct(s string, index int) bool {
|
2019-12-12 21:08:44 +03:00
|
|
|
|
return is_uchar_punct(get_uchar(s, index))
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-20 21:16:50 +03:00
|
|
|
|
// is_control return true if the rune is control code
|
|
|
|
|
pub fn is_control(r rune) bool {
|
|
|
|
|
// control codes are all below 0xff
|
|
|
|
|
if r > max_latin_1 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return props[byte(r)] == 1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// is_letter returns true if the rune is unicode letter or in unicode category L
|
|
|
|
|
pub fn is_letter(r rune) bool {
|
|
|
|
|
if (r >= `a` && r <= `z`) || (r >= `A` && r <= `Z`) {
|
|
|
|
|
return true
|
|
|
|
|
} else if r <= max_latin_1 {
|
|
|
|
|
return props[byte(r)] & p_l_mask != 0
|
|
|
|
|
}
|
|
|
|
|
return is_excluding_latin(letter_table, r)
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-12 21:08:44 +03:00
|
|
|
|
// is_uchar_punct return true if the input unicode is a western unicode punctuation
|
2021-04-11 10:28:19 +03:00
|
|
|
|
pub fn is_uchar_punct(uchar int) bool {
|
|
|
|
|
return find_punct_in_table(uchar, utf8.unicode_punct_western) != 0
|
2019-12-12 21:08:44 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
|
// Global
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
// is_global_punct return true if the string[index] byte of is the start of a global unicode punctuation
|
2021-04-11 10:28:19 +03:00
|
|
|
|
pub fn is_global_punct(s string, index int) bool {
|
2019-12-12 21:08:44 +03:00
|
|
|
|
return is_uchar_global_punct(get_uchar(s, index))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// is_uchar_global_punct return true if the input unicode is a global unicode punctuation
|
2021-04-11 10:28:19 +03:00
|
|
|
|
pub fn is_uchar_global_punct(uchar int) bool {
|
|
|
|
|
return find_punct_in_table(uchar, utf8.unicode_punct) != 0
|
2019-12-12 21:08:44 +03:00
|
|
|
|
}
|
|
|
|
|
|
2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
Private functions
|
|
|
|
|
*/
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
// Raw to_lower utf-8 function
|
|
|
|
|
fn utf8_to_lower(in_cp int) int {
|
|
|
|
|
mut cp := in_cp
|
|
|
|
|
if ((0x0041 <= cp) && (0x005a >= cp)) || ((0x00c0 <= cp) && (0x00d6 >= cp))
|
|
|
|
|
|| ((0x00d8 <= cp) && (0x00de >= cp)) || ((0x0391 <= cp) && (0x03a1 >= cp))
|
|
|
|
|
|| ((0x03a3 <= cp) && (0x03ab >= cp)) || ((0x0410 <= cp) && (0x042f >= cp)) {
|
|
|
|
|
cp += 32
|
|
|
|
|
} else if (0x0400 <= cp) && (0x040f >= cp) {
|
|
|
|
|
cp += 80
|
|
|
|
|
} else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp))
|
|
|
|
|
|| ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp))
|
|
|
|
|
|| ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp))
|
|
|
|
|
|| ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp))
|
|
|
|
|
|| ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp))
|
|
|
|
|
|| ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) {
|
|
|
|
|
cp |= 0x1
|
|
|
|
|
} else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp))
|
|
|
|
|
|| ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp))
|
|
|
|
|
|| ((0x01cd <= cp) && (0x01dc >= cp)) {
|
|
|
|
|
cp += 1
|
|
|
|
|
cp &= ~0x1
|
|
|
|
|
} else if ((0x0531 <= cp) && (0x0556 >= cp)) || ((0x10A0 <= cp) && (0x10C5 >= cp)) {
|
|
|
|
|
// ARMENIAN or GEORGIAN
|
|
|
|
|
cp += 0x30
|
|
|
|
|
} else if (((0x1E00 <= cp) && (0x1E94 >= cp)) || ((0x1EA0 <= cp) && (0x1EF8 >= cp)))
|
|
|
|
|
&& (cp & 1 == 0) {
|
|
|
|
|
// LATIN CAPITAL LETTER
|
|
|
|
|
cp += 1
|
|
|
|
|
} else if (0x24B6 <= cp) && (0x24CF >= cp) {
|
|
|
|
|
// CIRCLED LATIN
|
|
|
|
|
cp += 0x1a
|
|
|
|
|
} else if (0xFF21 <= cp) && (0xFF3A >= cp) {
|
|
|
|
|
// FULLWIDTH LATIN CAPITAL
|
|
|
|
|
cp += 0x19
|
|
|
|
|
} else if ((0x1F08 <= cp) && (0x1F0F >= cp)) || ((0x1F18 <= cp) && (0x1F1D >= cp))
|
|
|
|
|
|| ((0x1F28 <= cp) && (0x1F2F >= cp)) || ((0x1F38 <= cp) && (0x1F3F >= cp))
|
|
|
|
|
|| ((0x1F48 <= cp) && (0x1F4D >= cp)) || ((0x1F68 <= cp) && (0x1F6F >= cp))
|
|
|
|
|
|| ((0x1F88 <= cp) && (0x1F8F >= cp)) || ((0x1F98 <= cp) && (0x1F9F >= cp))
|
|
|
|
|
|| ((0x1FA8 <= cp) && (0x1FAF >= cp)) {
|
|
|
|
|
// GREEK
|
|
|
|
|
cp -= 8
|
|
|
|
|
} else {
|
|
|
|
|
match cp {
|
|
|
|
|
0x0178 { cp = 0x00ff }
|
|
|
|
|
0x0243 { cp = 0x0180 }
|
|
|
|
|
0x018e { cp = 0x01dd }
|
|
|
|
|
0x023d { cp = 0x019a }
|
|
|
|
|
0x0220 { cp = 0x019e }
|
|
|
|
|
0x01b7 { cp = 0x0292 }
|
|
|
|
|
0x01c4 { cp = 0x01c6 }
|
|
|
|
|
0x01c7 { cp = 0x01c9 }
|
|
|
|
|
0x01ca { cp = 0x01cc }
|
|
|
|
|
0x01f1 { cp = 0x01f3 }
|
|
|
|
|
0x01f7 { cp = 0x01bf }
|
|
|
|
|
0x0187 { cp = 0x0188 }
|
|
|
|
|
0x018b { cp = 0x018c }
|
|
|
|
|
0x0191 { cp = 0x0192 }
|
|
|
|
|
0x0198 { cp = 0x0199 }
|
|
|
|
|
0x01a7 { cp = 0x01a8 }
|
|
|
|
|
0x01ac { cp = 0x01ad }
|
|
|
|
|
0x01af { cp = 0x01b0 }
|
|
|
|
|
0x01b8 { cp = 0x01b9 }
|
|
|
|
|
0x01bc { cp = 0x01bd }
|
|
|
|
|
0x01f4 { cp = 0x01f5 }
|
|
|
|
|
0x023b { cp = 0x023c }
|
|
|
|
|
0x0241 { cp = 0x0242 }
|
|
|
|
|
0x03fd { cp = 0x037b }
|
|
|
|
|
0x03fe { cp = 0x037c }
|
|
|
|
|
0x03ff { cp = 0x037d }
|
|
|
|
|
0x037f { cp = 0x03f3 }
|
|
|
|
|
0x0386 { cp = 0x03ac }
|
|
|
|
|
0x0388 { cp = 0x03ad }
|
|
|
|
|
0x0389 { cp = 0x03ae }
|
|
|
|
|
0x038a { cp = 0x03af }
|
|
|
|
|
0x038c { cp = 0x03cc }
|
|
|
|
|
0x038e { cp = 0x03cd }
|
|
|
|
|
0x038f { cp = 0x03ce }
|
|
|
|
|
0x0370 { cp = 0x0371 }
|
|
|
|
|
0x0372 { cp = 0x0373 }
|
|
|
|
|
0x0376 { cp = 0x0377 }
|
|
|
|
|
0x03f4 { cp = 0x03b8 }
|
|
|
|
|
0x03cf { cp = 0x03d7 }
|
|
|
|
|
0x03f9 { cp = 0x03f2 }
|
|
|
|
|
0x03f7 { cp = 0x03f8 }
|
|
|
|
|
0x03fa { cp = 0x03fb }
|
|
|
|
|
// GREEK
|
|
|
|
|
0x1F59 { cp = 0x1F51 }
|
|
|
|
|
0x1F5B { cp = 0x1F53 }
|
|
|
|
|
0x1F5D { cp = 0x1F55 }
|
|
|
|
|
0x1F5F { cp = 0x1F57 }
|
|
|
|
|
0x1FB8 { cp = 0x1FB0 }
|
|
|
|
|
0x1FB9 { cp = 0x1FB1 }
|
|
|
|
|
0x1FD8 { cp = 0x1FD0 }
|
|
|
|
|
0x1FD9 { cp = 0x1FD1 }
|
|
|
|
|
0x1FE8 { cp = 0x1FE0 }
|
|
|
|
|
0x1FE9 { cp = 0x1FE1 }
|
|
|
|
|
else {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return cp
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Raw to_upper utf-8 function
|
|
|
|
|
fn utf8_to_upper(in_cp int) int {
|
|
|
|
|
mut cp := in_cp
|
|
|
|
|
if ((0x0061 <= cp) && (0x007a >= cp)) || ((0x00e0 <= cp) && (0x00f6 >= cp))
|
|
|
|
|
|| ((0x00f8 <= cp) && (0x00fe >= cp)) || ((0x03b1 <= cp) && (0x03c1 >= cp))
|
|
|
|
|
|| ((0x03c3 <= cp) && (0x03cb >= cp)) || ((0x0430 <= cp) && (0x044f >= cp)) {
|
|
|
|
|
cp -= 32
|
|
|
|
|
} else if (0x0450 <= cp) && (0x045f >= cp) {
|
|
|
|
|
cp -= 80
|
|
|
|
|
} else if ((0x0100 <= cp) && (0x012f >= cp)) || ((0x0132 <= cp) && (0x0137 >= cp))
|
|
|
|
|
|| ((0x014a <= cp) && (0x0177 >= cp)) || ((0x0182 <= cp) && (0x0185 >= cp))
|
|
|
|
|
|| ((0x01a0 <= cp) && (0x01a5 >= cp)) || ((0x01de <= cp) && (0x01ef >= cp))
|
|
|
|
|
|| ((0x01f8 <= cp) && (0x021f >= cp)) || ((0x0222 <= cp) && (0x0233 >= cp))
|
|
|
|
|
|| ((0x0246 <= cp) && (0x024f >= cp)) || ((0x03d8 <= cp) && (0x03ef >= cp))
|
|
|
|
|
|| ((0x0460 <= cp) && (0x0481 >= cp)) || ((0x048a <= cp) && (0x04ff >= cp)) {
|
|
|
|
|
cp &= ~0x1
|
|
|
|
|
} else if ((0x0139 <= cp) && (0x0148 >= cp)) || ((0x0179 <= cp) && (0x017e >= cp))
|
|
|
|
|
|| ((0x01af <= cp) && (0x01b0 >= cp)) || ((0x01b3 <= cp) && (0x01b6 >= cp))
|
|
|
|
|
|| ((0x01cd <= cp) && (0x01dc >= cp)) {
|
|
|
|
|
cp -= 1
|
|
|
|
|
cp |= 0x1
|
|
|
|
|
} else if ((0x0561 <= cp) && (0x0586 >= cp)) || ((0x10D0 <= cp) && (0x10F5 >= cp)) {
|
|
|
|
|
// ARMENIAN or GEORGIAN
|
|
|
|
|
cp -= 0x30
|
|
|
|
|
} else if (((0x1E01 <= cp) && (0x1E95 >= cp)) || ((0x1EA1 <= cp) && (0x1EF9 >= cp)))
|
|
|
|
|
&& (cp & 1 == 1) {
|
|
|
|
|
// LATIN CAPITAL LETTER
|
|
|
|
|
cp -= 1
|
|
|
|
|
} else if (0x24D0 <= cp) && (0x24E9 >= cp) {
|
|
|
|
|
// CIRCLED LATIN
|
|
|
|
|
cp -= 0x1a
|
|
|
|
|
} else if (0xFF41 <= cp) && (0xFF5A >= cp) {
|
|
|
|
|
// FULLWIDTH LATIN CAPITAL
|
|
|
|
|
cp -= 0x19
|
|
|
|
|
} else if ((0x1F00 <= cp) && (0x1F07 >= cp)) || ((0x1F10 <= cp) && (0x1F15 >= cp))
|
|
|
|
|
|| ((0x1F20 <= cp) && (0x1F27 >= cp)) || ((0x1F30 <= cp) && (0x1F37 >= cp))
|
|
|
|
|
|| ((0x1F40 <= cp) && (0x1F45 >= cp)) || ((0x1F60 <= cp) && (0x1F67 >= cp))
|
|
|
|
|
|| ((0x1F80 <= cp) && (0x1F87 >= cp)) || ((0x1F90 <= cp) && (0x1F97 >= cp))
|
|
|
|
|
|| ((0x1FA0 <= cp) && (0x1FA7 >= cp)) {
|
|
|
|
|
// GREEK
|
|
|
|
|
cp += 8
|
|
|
|
|
} else {
|
|
|
|
|
match cp {
|
|
|
|
|
0x00ff { cp = 0x0178 }
|
|
|
|
|
0x0180 { cp = 0x0243 }
|
|
|
|
|
0x01dd { cp = 0x018e }
|
|
|
|
|
0x019a { cp = 0x023d }
|
|
|
|
|
0x019e { cp = 0x0220 }
|
|
|
|
|
0x0292 { cp = 0x01b7 }
|
|
|
|
|
0x01c6 { cp = 0x01c4 }
|
|
|
|
|
0x01c9 { cp = 0x01c7 }
|
|
|
|
|
0x01cc { cp = 0x01ca }
|
|
|
|
|
0x01f3 { cp = 0x01f1 }
|
|
|
|
|
0x01bf { cp = 0x01f7 }
|
|
|
|
|
0x0188 { cp = 0x0187 }
|
|
|
|
|
0x018c { cp = 0x018b }
|
|
|
|
|
0x0192 { cp = 0x0191 }
|
|
|
|
|
0x0199 { cp = 0x0198 }
|
|
|
|
|
0x01a8 { cp = 0x01a7 }
|
|
|
|
|
0x01ad { cp = 0x01ac }
|
|
|
|
|
0x01b0 { cp = 0x01af }
|
|
|
|
|
0x01b9 { cp = 0x01b8 }
|
|
|
|
|
0x01bd { cp = 0x01bc }
|
|
|
|
|
0x01f5 { cp = 0x01f4 }
|
|
|
|
|
0x023c { cp = 0x023b }
|
|
|
|
|
0x0242 { cp = 0x0241 }
|
|
|
|
|
0x037b { cp = 0x03fd }
|
|
|
|
|
0x037c { cp = 0x03fe }
|
|
|
|
|
0x037d { cp = 0x03ff }
|
|
|
|
|
0x03f3 { cp = 0x037f }
|
|
|
|
|
0x03ac { cp = 0x0386 }
|
|
|
|
|
0x03ad { cp = 0x0388 }
|
|
|
|
|
0x03ae { cp = 0x0389 }
|
|
|
|
|
0x03af { cp = 0x038a }
|
|
|
|
|
0x03cc { cp = 0x038c }
|
|
|
|
|
0x03cd { cp = 0x038e }
|
|
|
|
|
0x03ce { cp = 0x038f }
|
|
|
|
|
0x0371 { cp = 0x0370 }
|
|
|
|
|
0x0373 { cp = 0x0372 }
|
|
|
|
|
0x0377 { cp = 0x0376 }
|
|
|
|
|
0x03d1 { cp = 0x0398 }
|
|
|
|
|
0x03d7 { cp = 0x03cf }
|
|
|
|
|
0x03f2 { cp = 0x03f9 }
|
|
|
|
|
0x03f8 { cp = 0x03f7 }
|
|
|
|
|
0x03fb { cp = 0x03fa }
|
|
|
|
|
// GREEK
|
|
|
|
|
0x1F51 { cp = 0x1F59 }
|
|
|
|
|
0x1F53 { cp = 0x1F5B }
|
|
|
|
|
0x1F55 { cp = 0x1F5D }
|
|
|
|
|
0x1F57 { cp = 0x1F5F }
|
|
|
|
|
0x1FB0 { cp = 0x1FB8 }
|
|
|
|
|
0x1FB1 { cp = 0x1FB9 }
|
|
|
|
|
0x1FD0 { cp = 0x1FD8 }
|
|
|
|
|
0x1FD1 { cp = 0x1FD9 }
|
|
|
|
|
0x1FE0 { cp = 0x1FE8 }
|
|
|
|
|
0x1FE1 { cp = 0x1FE9 }
|
|
|
|
|
else {}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return cp
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-07 17:13:25 +03:00
|
|
|
|
//
|
|
|
|
|
// if upper_flag == true then make low ==> upper conversion
|
2019-12-01 20:41:29 +03:00
|
|
|
|
// if upper_flag == false then make upper ==> low conversion
|
2019-12-07 17:13:25 +03:00
|
|
|
|
//
|
2019-12-01 20:41:29 +03:00
|
|
|
|
// up_low make the dirt job
|
|
|
|
|
fn up_low(s string, upper_flag bool) string {
|
2020-05-16 17:12:23 +03:00
|
|
|
|
mut index := 0
|
2021-04-11 10:28:19 +03:00
|
|
|
|
mut tab_char := 0
|
2021-06-15 14:47:11 +03:00
|
|
|
|
mut str_res := unsafe { malloc_noscan(s.len + 1) }
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
|
|
|
|
for {
|
2021-03-01 16:35:57 +03:00
|
|
|
|
ch_len := utf8_char_len(s[index])
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
|
|
|
|
if ch_len == 1 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
if upper_flag == true {
|
2020-07-22 21:15:07 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index] = byte(C.toupper(s.str[index]))
|
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
} else {
|
2020-07-22 21:15:07 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index] = byte(C.tolower(s.str[index]))
|
|
|
|
|
}
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
} else if ch_len > 1 && ch_len < 5 {
|
2019-12-07 17:13:25 +03:00
|
|
|
|
mut lword := 0
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
for i := 0; i < ch_len; i++ {
|
2021-11-29 03:48:49 +03:00
|
|
|
|
lword = int(u32(lword) << 8 | u32(s[index + i]))
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
// println("#${index} ($lword)")
|
2019-12-07 17:13:25 +03:00
|
|
|
|
|
2019-12-12 21:08:44 +03:00
|
|
|
|
mut res := 0
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
|
|
|
|
// 2 byte utf-8
|
|
|
|
|
// byte format: 110xxxxx 10xxxxxx
|
|
|
|
|
//
|
2019-12-01 20:41:29 +03:00
|
|
|
|
if ch_len == 2 {
|
2019-11-29 16:25:25 +03:00
|
|
|
|
res = (lword & 0x1f00) >> 2 | (lword & 0x3f)
|
|
|
|
|
}
|
|
|
|
|
// 3 byte utf-8
|
|
|
|
|
// byte format: 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
//
|
2019-12-01 20:41:29 +03:00
|
|
|
|
else if ch_len == 3 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
res = (lword & 0x0f0000) >> 4 | (lword & 0x3f00) >> 2 | (lword & 0x3f)
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
// 4 byte utf-8
|
|
|
|
|
// byte format: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
//
|
2019-12-01 20:41:29 +03:00
|
|
|
|
else if ch_len == 4 {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
res = ((lword & 0x07000000) >> 6) | ((lword & 0x003f0000) >> 4) | ((lword & 0x00003F00) >> 2) | (lword & 0x0000003f)
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
// println("res: ${res.hex():8}")
|
2019-12-07 17:13:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
if upper_flag == false {
|
|
|
|
|
tab_char = utf8_to_lower(res)
|
|
|
|
|
} else {
|
|
|
|
|
tab_char = utf8_to_upper(res)
|
|
|
|
|
}
|
2019-12-07 17:13:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
if ch_len == 2 {
|
|
|
|
|
ch0 := byte((tab_char >> 6) & 0x1f) | 0xc0 // 110x xxxx
|
|
|
|
|
ch1 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
|
|
|
|
|
// C.printf("[%02x%02x] \n",ch0,ch1)
|
2019-12-07 17:13:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index + 0] = ch0
|
|
|
|
|
str_res[index + 1] = ch1
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
//****************************************************************
|
|
|
|
|
// BUG: doesn't compile, workaround use shitf to right of 0 bit
|
|
|
|
|
//****************************************************************
|
|
|
|
|
// str_res[index + 1 ] = byte( tab_char & 0xbf ) // 1011 1111
|
|
|
|
|
} else if ch_len == 3 {
|
|
|
|
|
ch0 := byte((tab_char >> 12) & 0x0f) | 0xe0 // 1110 xxxx
|
|
|
|
|
ch1 := byte((tab_char >> 6) & 0x3f) | 0x80 // 10xx xxxx
|
|
|
|
|
ch2 := byte((tab_char >> 0) & 0x3f) | 0x80 // 10xx xxxx
|
|
|
|
|
// C.printf("[%02x%02x%02x] \n",ch0,ch1,ch2)
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index + 0] = ch0
|
|
|
|
|
str_res[index + 1] = ch1
|
|
|
|
|
str_res[index + 2] = ch2
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
}
|
|
|
|
|
// TODO: write if needed
|
|
|
|
|
else if ch_len == 4 {
|
|
|
|
|
// place holder!!
|
|
|
|
|
// at the present time simply copy the utf8 char
|
|
|
|
|
for i in 0 .. ch_len {
|
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index + i] = s[index + i]
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
} else {
|
|
|
|
|
// other cases, just copy the string
|
|
|
|
|
for i in 0 .. ch_len {
|
2020-07-22 21:15:07 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index + i] = s[index + i]
|
|
|
|
|
}
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-01 20:41:29 +03:00
|
|
|
|
|
2020-05-16 17:12:23 +03:00
|
|
|
|
index += ch_len
|
2019-11-29 16:25:25 +03:00
|
|
|
|
|
2019-12-01 20:41:29 +03:00
|
|
|
|
// we are done, exit the loop
|
2020-05-16 17:12:23 +03:00
|
|
|
|
if index >= s.len {
|
2019-11-29 16:25:25 +03:00
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-01 20:41:29 +03:00
|
|
|
|
// for c compatibility set the ending 0
|
2020-07-22 21:15:07 +03:00
|
|
|
|
unsafe {
|
|
|
|
|
str_res[index] = 0
|
2021-04-11 10:28:19 +03:00
|
|
|
|
// C.printf("str_res: %s\n--------------\n",str_res)
|
2021-02-14 21:31:42 +03:00
|
|
|
|
return tos(str_res, s.len)
|
2020-07-22 21:15:07 +03:00
|
|
|
|
}
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-12 21:08:44 +03:00
|
|
|
|
// find punct in lockup table
|
2021-04-11 10:28:19 +03:00
|
|
|
|
fn find_punct_in_table(in_code int, in_table []int) int {
|
2019-12-12 21:08:44 +03:00
|
|
|
|
//
|
|
|
|
|
// We will use a simple binary search
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
mut first_index := 0
|
|
|
|
|
mut last_index := (in_table.len)
|
|
|
|
|
mut index := 0
|
|
|
|
|
mut x := 0
|
|
|
|
|
|
|
|
|
|
for {
|
2021-04-11 10:28:19 +03:00
|
|
|
|
index = (first_index + last_index) >> 1
|
|
|
|
|
x = in_table[index]
|
|
|
|
|
// C.printf("(%d..%d) index:%d base[%08x]==>[%08x]\n",first_index,last_index,index,in_code,x)
|
2019-12-12 21:08:44 +03:00
|
|
|
|
|
|
|
|
|
if x == in_code {
|
|
|
|
|
return index
|
2021-04-11 10:28:19 +03:00
|
|
|
|
} else if x > in_code {
|
|
|
|
|
last_index = index
|
|
|
|
|
} else {
|
|
|
|
|
first_index = index
|
2019-12-01 20:41:29 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-04-11 10:28:19 +03:00
|
|
|
|
if (last_index - first_index) <= 1 {
|
2019-12-01 20:41:29 +03:00
|
|
|
|
break
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
2021-04-11 10:28:19 +03:00
|
|
|
|
// C.printf("not found.\n")
|
2019-12-12 21:08:44 +03:00
|
|
|
|
return 0
|
2019-11-29 16:25:25 +03:00
|
|
|
|
}
|
|
|
|
|
|
2020-06-08 00:04:23 +03:00
|
|
|
|
/*
|
|
|
|
|
Unicode punctuation chars
|
|
|
|
|
|
|
|
|
|
source: http://www.unicode.org/faq/punctuation_symbols.html
|
|
|
|
|
*/
|
2021-04-11 10:28:19 +03:00
|
|
|
|
const (
|
|
|
|
|
// Western punctuation mark
|
|
|
|
|
// Character Name Browser Image
|
|
|
|
|
unicode_punct_western = [
|
|
|
|
|
0x0021 /* EXCLAMATION MARK ! */,
|
|
|
|
|
0x0022 /* QUOTATION MARK " */,
|
|
|
|
|
0x0027 /* APOSTROPHE ' */,
|
|
|
|
|
0x002A /* ASTERISK * */,
|
|
|
|
|
0x002C /* COMMA , */,
|
|
|
|
|
0x002E /* FULL STOP . */,
|
|
|
|
|
0x002F /* SOLIDUS / */,
|
|
|
|
|
0x003A /* COLON : */,
|
|
|
|
|
0x003B /* SEMICOLON ; */,
|
|
|
|
|
0x003F /* QUESTION MARK ? */,
|
|
|
|
|
0x00A1 /* INVERTED EXCLAMATION MARK ¡ */,
|
|
|
|
|
0x00A7 /* SECTION SIGN § */,
|
|
|
|
|
0x00B6 /* PILCROW SIGN ¶ */,
|
|
|
|
|
0x00B7 /* MIDDLE DOT · */,
|
|
|
|
|
0x00BF /* INVERTED QUESTION MARK ¿ */,
|
|
|
|
|
0x037E /* GREEK QUESTION MARK ; */,
|
|
|
|
|
0x0387 /* GREEK ANO TELEIA · */,
|
|
|
|
|
0x055A /* ARMENIAN APOSTROPHE ՚ */,
|
|
|
|
|
0x055B /* ARMENIAN EMPHASIS MARK ՛ */,
|
|
|
|
|
0x055C /* ARMENIAN EXCLAMATION MARK ՜ */,
|
|
|
|
|
0x055D /* ARMENIAN COMMA ՝ */,
|
|
|
|
|
0x055E /* ARMENIAN QUESTION MARK ՞ */,
|
|
|
|
|
0x055F /* ARMENIAN ABBREVIATION MARK ՟ */,
|
|
|
|
|
0x0589 /* ARMENIAN FULL STOP ։ */,
|
|
|
|
|
0x05C0 /* HEBREW PUNCTUATION PASEQ ׀ */,
|
|
|
|
|
0x05C3 /* HEBREW PUNCTUATION SOF PASUQ ׃ */,
|
|
|
|
|
0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA ׆ */,
|
|
|
|
|
0x05F3 /* HEBREW PUNCTUATION GERESH ׳ */,
|
|
|
|
|
0x05F4 /* HEBREW PUNCTUATION GERSHAYIM ״ */,
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
// Unicode Characters in the 'Punctuation, Other' Category
|
|
|
|
|
// Character Name Browser Image
|
|
|
|
|
unicode_punct = [
|
|
|
|
|
0x0021 /* EXCLAMATION MARK ! */,
|
|
|
|
|
0x0022 /* QUOTATION MARK " */,
|
|
|
|
|
0x0023 /* NUMBER SIGN # */,
|
|
|
|
|
0x0025 /* PERCENT SIGN % */,
|
|
|
|
|
0x0026 /* AMPERSAND & */,
|
|
|
|
|
0x0027 /* APOSTROPHE ' */,
|
|
|
|
|
0x002A /* ASTERISK * */,
|
|
|
|
|
0x002C /* COMMA , */,
|
|
|
|
|
0x002E /* FULL STOP . */,
|
|
|
|
|
0x002F /* SOLIDUS / */,
|
|
|
|
|
0x003A /* COLON : */,
|
|
|
|
|
0x003B /* SEMICOLON ; */,
|
|
|
|
|
0x003F /* QUESTION MARK ? */,
|
|
|
|
|
0x0040 /* COMMERCIAL AT @ */,
|
|
|
|
|
0x005C /* REVERSE SOLIDUS \ */,
|
|
|
|
|
0x00A1 /* INVERTED EXCLAMATION MARK ¡ */,
|
|
|
|
|
0x00A7 /* SECTION SIGN § */,
|
|
|
|
|
0x00B6 /* PILCROW SIGN ¶ */,
|
|
|
|
|
0x00B7 /* MIDDLE DOT · */,
|
|
|
|
|
0x00BF /* INVERTED QUESTION MARK ¿ */,
|
|
|
|
|
0x037E /* GREEK QUESTION MARK ; */,
|
|
|
|
|
0x0387 /* GREEK ANO TELEIA · */,
|
|
|
|
|
0x055A /* ARMENIAN APOSTROPHE ՚ */,
|
|
|
|
|
0x055B /* ARMENIAN EMPHASIS MARK ՛ */,
|
|
|
|
|
0x055C /* ARMENIAN EXCLAMATION MARK ՜ */,
|
|
|
|
|
0x055D /* ARMENIAN COMMA ՝ */,
|
|
|
|
|
0x055E /* ARMENIAN QUESTION MARK ՞ */,
|
|
|
|
|
0x055F /* ARMENIAN ABBREVIATION MARK ՟ */,
|
|
|
|
|
0x0589 /* ARMENIAN FULL STOP ։ */,
|
|
|
|
|
0x05C0 /* HEBREW PUNCTUATION PASEQ ׀ */,
|
|
|
|
|
0x05C3 /* HEBREW PUNCTUATION SOF PASUQ ׃ */,
|
|
|
|
|
0x05C6 /* HEBREW PUNCTUATION NUN HAFUKHA ׆ */,
|
|
|
|
|
0x05F3 /* HEBREW PUNCTUATION GERESH ׳ */,
|
|
|
|
|
0x05F4 /* HEBREW PUNCTUATION GERSHAYIM ״ */,
|
|
|
|
|
0x0609 /* ARABIC-INDIC PER MILLE SIGN ؉ */,
|
|
|
|
|
0x060A /* ARABIC-INDIC PER TEN THOUSAND SIGN ؊ */,
|
|
|
|
|
0x060C /* ARABIC COMMA ، */,
|
|
|
|
|
0x060D /* ARABIC DATE SEPARATOR ؍ */,
|
|
|
|
|
0x061B /* ARABIC SEMICOLON ؛ */,
|
|
|
|
|
0x061E /* ARABIC TRIPLE DOT PUNCTUATION MARK ؞ */,
|
|
|
|
|
0x061F /* ARABIC QUESTION MARK ؟ */,
|
|
|
|
|
0x066A /* ARABIC PERCENT SIGN ٪ */,
|
|
|
|
|
0x066B /* ARABIC DECIMAL SEPARATOR ٫ */,
|
|
|
|
|
0x066C /* ARABIC THOUSANDS SEPARATOR ٬ */,
|
|
|
|
|
0x066D /* ARABIC FIVE POINTED STAR ٭ */,
|
|
|
|
|
0x06D4 /* ARABIC FULL STOP ۔ */,
|
|
|
|
|
0x0700 /* SYRIAC END OF PARAGRAPH ܀ */,
|
|
|
|
|
0x0701 /* SYRIAC SUPRALINEAR FULL STOP ܁ */,
|
|
|
|
|
0x0702 /* SYRIAC SUBLINEAR FULL STOP ܂ */,
|
|
|
|
|
0x0703 /* SYRIAC SUPRALINEAR COLON ܃ */,
|
|
|
|
|
0x0704 /* SYRIAC SUBLINEAR COLON ܄ */,
|
|
|
|
|
0x0705 /* SYRIAC HORIZONTAL COLON ܅ */,
|
|
|
|
|
0x0706 /* SYRIAC COLON SKEWED LEFT ܆ */,
|
|
|
|
|
0x0707 /* SYRIAC COLON SKEWED RIGHT ܇ */,
|
|
|
|
|
0x0708 /* SYRIAC SUPRALINEAR COLON SKEWED LEFT ܈ */,
|
|
|
|
|
0x0709 /* SYRIAC SUBLINEAR COLON SKEWED RIGHT ܉ */,
|
|
|
|
|
0x070A /* SYRIAC CONTRACTION ܊ */,
|
|
|
|
|
0x070B /* SYRIAC HARKLEAN OBELUS ܋ */,
|
|
|
|
|
0x070C /* SYRIAC HARKLEAN METOBELUS ܌ */,
|
|
|
|
|
0x070D /* SYRIAC HARKLEAN ASTERISCUS ܍ */,
|
|
|
|
|
0x07F7 /* NKO SYMBOL GBAKURUNEN ߷ */,
|
|
|
|
|
0x07F8 /* NKO COMMA ߸ */,
|
|
|
|
|
0x07F9 /* NKO EXCLAMATION MARK ߹ */,
|
|
|
|
|
0x0830 /* SAMARITAN PUNCTUATION NEQUDAA ࠰ */,
|
|
|
|
|
0x0831 /* SAMARITAN PUNCTUATION AFSAAQ ࠱ */,
|
|
|
|
|
0x0832 /* SAMARITAN PUNCTUATION ANGED ࠲ */,
|
|
|
|
|
0x0833 /* SAMARITAN PUNCTUATION BAU ࠳ */,
|
|
|
|
|
0x0834 /* SAMARITAN PUNCTUATION ATMAAU ࠴ */,
|
|
|
|
|
0x0835 /* SAMARITAN PUNCTUATION SHIYYAALAA ࠵ */,
|
|
|
|
|
0x0836 /* SAMARITAN ABBREVIATION MARK ࠶ */,
|
|
|
|
|
0x0837 /* SAMARITAN PUNCTUATION MELODIC QITSA ࠷ */,
|
|
|
|
|
0x0838 /* SAMARITAN PUNCTUATION ZIQAA ࠸ */,
|
|
|
|
|
0x0839 /* SAMARITAN PUNCTUATION QITSA ࠹ */,
|
|
|
|
|
0x083A /* SAMARITAN PUNCTUATION ZAEF ࠺ */,
|
|
|
|
|
0x083B /* SAMARITAN PUNCTUATION TURU ࠻ */,
|
|
|
|
|
0x083C /* SAMARITAN PUNCTUATION ARKAANU ࠼ */,
|
|
|
|
|
0x083D /* SAMARITAN PUNCTUATION SOF MASHFAAT ࠽ */,
|
|
|
|
|
0x083E /* SAMARITAN PUNCTUATION ANNAAU ࠾ */,
|
|
|
|
|
0x085E /* MANDAIC PUNCTUATION ࡞ */,
|
|
|
|
|
0x0964 /* DEVANAGARI DANDA । */,
|
|
|
|
|
0x0965 /* DEVANAGARI DOUBLE DANDA ॥ */,
|
|
|
|
|
0x0970 /* DEVANAGARI ABBREVIATION SIGN ॰ */,
|
|
|
|
|
0x09FD /* BENGALI ABBREVIATION SIGN ৽ */,
|
|
|
|
|
0x0A76 /* GURMUKHI ABBREVIATION SIGN ੶ */,
|
|
|
|
|
0x0AF0 /* GUJARATI ABBREVIATION SIGN ૰ */,
|
|
|
|
|
0x0C77 /* TELUGU SIGN SIDDHAM ౷ */,
|
|
|
|
|
0x0C84 /* KANNADA SIGN SIDDHAM ಄ */,
|
|
|
|
|
0x0DF4 /* SINHALA PUNCTUATION KUNDDALIYA ෴ */,
|
|
|
|
|
0x0E4F /* THAI CHARACTER FONGMAN ๏ */,
|
|
|
|
|
0x0E5A /* THAI CHARACTER ANGKHANKHU ๚ */,
|
|
|
|
|
0x0E5B /* THAI CHARACTER KHOMUT ๛ */,
|
|
|
|
|
0x0F04 /* TIBETAN MARK INITIAL YIG MGO MDUN MA ༄ */,
|
|
|
|
|
0x0F05 /* TIBETAN MARK CLOSING YIG MGO SGAB MA ༅ */,
|
|
|
|
|
0x0F06 /* TIBETAN MARK CARET YIG MGO PHUR SHAD MA ༆ */,
|
|
|
|
|
0x0F07 /* TIBETAN MARK YIG MGO TSHEG SHAD MA ༇ */,
|
|
|
|
|
0x0F08 /* TIBETAN MARK SBRUL SHAD ༈ */,
|
|
|
|
|
0x0F09 /* TIBETAN MARK BSKUR YIG MGO ༉ */,
|
|
|
|
|
0x0F0A /* TIBETAN MARK BKA- SHOG YIG MGO ༊ */,
|
|
|
|
|
0x0F0B /* TIBETAN MARK INTERSYLLABIC TSHEG ་ */,
|
|
|
|
|
0x0F0C /* TIBETAN MARK DELIMITER TSHEG BSTAR ༌ */,
|
|
|
|
|
0x0F0D /* TIBETAN MARK SHAD ། */,
|
|
|
|
|
0x0F0E /* TIBETAN MARK NYIS SHAD ༎ */,
|
|
|
|
|
0x0F0F /* TIBETAN MARK TSHEG SHAD ༏ */,
|
|
|
|
|
0x0F10 /* TIBETAN MARK NYIS TSHEG SHAD ༐ */,
|
|
|
|
|
0x0F11 /* TIBETAN MARK RIN CHEN SPUNGS SHAD ༑ */,
|
|
|
|
|
0x0F12 /* TIBETAN MARK RGYA GRAM SHAD ༒ */,
|
|
|
|
|
0x0F14 /* TIBETAN MARK GTER TSHEG ༔ */,
|
|
|
|
|
0x0F85 /* TIBETAN MARK PALUTA ྅ */,
|
|
|
|
|
0x0FD0 /* TIBETAN MARK BSKA- SHOG GI MGO RGYAN ࿐ */,
|
|
|
|
|
0x0FD1 /* TIBETAN MARK MNYAM YIG GI MGO RGYAN ࿑ */,
|
|
|
|
|
0x0FD2 /* TIBETAN MARK NYIS TSHEG ࿒ */,
|
|
|
|
|
0x0FD3 /* TIBETAN MARK INITIAL BRDA RNYING YIG MGO MDUN MA ࿓ */,
|
|
|
|
|
0x0FD4 /* TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA ࿔ */,
|
|
|
|
|
0x0FD9 /* TIBETAN MARK LEADING MCHAN RTAGS ࿙ */,
|
|
|
|
|
0x0FDA /* TIBETAN MARK TRAILING MCHAN RTAGS ࿚ */,
|
|
|
|
|
0x104A /* MYANMAR SIGN LITTLE SECTION ၊ */,
|
|
|
|
|
0x104B /* MYANMAR SIGN SECTION ။ */,
|
|
|
|
|
0x104C /* MYANMAR SYMBOL LOCATIVE ၌ */,
|
|
|
|
|
0x104D /* MYANMAR SYMBOL COMPLETED ၍ */,
|
|
|
|
|
0x104E /* MYANMAR SYMBOL AFOREMENTIONED ၎ */,
|
|
|
|
|
0x104F /* MYANMAR SYMBOL GENITIVE ၏ */,
|
|
|
|
|
0x10FB /* GEORGIAN PARAGRAPH SEPARATOR ჻ */,
|
|
|
|
|
0x1360 /* ETHIOPIC SECTION MARK ፠ */,
|
|
|
|
|
0x1361 /* ETHIOPIC WORDSPACE ፡ */,
|
|
|
|
|
0x1362 /* ETHIOPIC FULL STOP ። */,
|
|
|
|
|
0x1363 /* ETHIOPIC COMMA ፣ */,
|
|
|
|
|
0x1364 /* ETHIOPIC SEMICOLON ፤ */,
|
|
|
|
|
0x1365 /* ETHIOPIC COLON ፥ */,
|
|
|
|
|
0x1366 /* ETHIOPIC PREFACE COLON ፦ */,
|
|
|
|
|
0x1367 /* ETHIOPIC QUESTION MARK ፧ */,
|
|
|
|
|
0x1368 /* ETHIOPIC PARAGRAPH SEPARATOR ፨ */,
|
|
|
|
|
0x166E /* CANADIAN SYLLABICS FULL STOP ᙮ */,
|
|
|
|
|
0x16EB /* RUNIC SINGLE PUNCTUATION ᛫ */,
|
|
|
|
|
0x16EC /* RUNIC MULTIPLE PUNCTUATION ᛬ */,
|
|
|
|
|
0x16ED /* RUNIC CROSS PUNCTUATION ᛭ */,
|
|
|
|
|
0x1735 /* PHILIPPINE SINGLE PUNCTUATION ᜵ */,
|
|
|
|
|
0x1736 /* PHILIPPINE DOUBLE PUNCTUATION ᜶ */,
|
|
|
|
|
0x17D4 /* KHMER SIGN KHAN ។ */,
|
|
|
|
|
0x17D5 /* KHMER SIGN BARIYOOSAN ៕ */,
|
|
|
|
|
0x17D6 /* KHMER SIGN CAMNUC PII KUUH ៖ */,
|
|
|
|
|
0x17D8 /* KHMER SIGN BEYYAL ៘ */,
|
|
|
|
|
0x17D9 /* KHMER SIGN PHNAEK MUAN ៙ */,
|
|
|
|
|
0x17DA /* KHMER SIGN KOOMUUT ៚ */,
|
|
|
|
|
0x1800 /* MONGOLIAN BIRGA ᠀ */,
|
|
|
|
|
0x1801 /* MONGOLIAN ELLIPSIS ᠁ */,
|
|
|
|
|
0x1802 /* MONGOLIAN COMMA ᠂ */,
|
|
|
|
|
0x1803 /* MONGOLIAN FULL STOP ᠃ */,
|
|
|
|
|
0x1804 /* MONGOLIAN COLON ᠄ */,
|
|
|
|
|
0x1805 /* MONGOLIAN FOUR DOTS ᠅ */,
|
|
|
|
|
0x1807 /* MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER ᠇ */,
|
|
|
|
|
0x1808 /* MONGOLIAN MANCHU COMMA ᠈ */,
|
|
|
|
|
0x1809 /* MONGOLIAN MANCHU FULL STOP ᠉ */,
|
|
|
|
|
0x180A /* MONGOLIAN NIRUGU ᠊ */,
|
|
|
|
|
0x1944 /* LIMBU EXCLAMATION MARK ᥄ */,
|
|
|
|
|
0x1945 /* LIMBU QUESTION MARK ᥅ */,
|
|
|
|
|
0x1A1E /* BUGINESE PALLAWA ᨞ */,
|
|
|
|
|
0x1A1F /* BUGINESE END OF SECTION ᨟ */,
|
|
|
|
|
0x1AA0 /* TAI THAM SIGN WIANG ᪠ */,
|
|
|
|
|
0x1AA1 /* TAI THAM SIGN WIANGWAAK ᪡ */,
|
|
|
|
|
0x1AA2 /* TAI THAM SIGN SAWAN ᪢ */,
|
|
|
|
|
0x1AA3 /* TAI THAM SIGN KEOW ᪣ */,
|
|
|
|
|
0x1AA4 /* TAI THAM SIGN HOY ᪤ */,
|
|
|
|
|
0x1AA5 /* TAI THAM SIGN DOKMAI ᪥ */,
|
|
|
|
|
0x1AA6 /* TAI THAM SIGN REVERSED ROTATED RANA ᪦ */,
|
|
|
|
|
0x1AA8 /* TAI THAM SIGN KAAN ᪨ */,
|
|
|
|
|
0x1AA9 /* TAI THAM SIGN KAANKUU ᪩ */,
|
|
|
|
|
0x1AAA /* TAI THAM SIGN SATKAAN ᪪ */,
|
|
|
|
|
0x1AAB /* TAI THAM SIGN SATKAANKUU ᪫ */,
|
|
|
|
|
0x1AAC /* TAI THAM SIGN HANG ᪬ */,
|
|
|
|
|
0x1AAD /* TAI THAM SIGN CAANG ᪭ */,
|
|
|
|
|
0x1B5A /* BALINESE PANTI ᭚ */,
|
|
|
|
|
0x1B5B /* BALINESE PAMADA ᭛ */,
|
|
|
|
|
0x1B5C /* BALINESE WINDU ᭜ */,
|
|
|
|
|
0x1B5D /* BALINESE CARIK PAMUNGKAH ᭝ */,
|
|
|
|
|
0x1B5E /* BALINESE CARIK SIKI ᭞ */,
|
|
|
|
|
0x1B5F /* BALINESE CARIK PAREREN ᭟ */,
|
|
|
|
|
0x1B60 /* BALINESE PAMENENG ᭠ */,
|
|
|
|
|
0x1BFC /* BATAK SYMBOL BINDU NA METEK ᯼ */,
|
|
|
|
|
0x1BFD /* BATAK SYMBOL BINDU PINARBORAS ᯽ */,
|
|
|
|
|
0x1BFE /* BATAK SYMBOL BINDU JUDUL ᯾ */,
|
|
|
|
|
0x1BFF /* BATAK SYMBOL BINDU PANGOLAT ᯿ */,
|
|
|
|
|
0x1C3B /* LEPCHA PUNCTUATION TA-ROL ᰻ */,
|
|
|
|
|
0x1C3C /* LEPCHA PUNCTUATION NYET THYOOM TA-ROL ᰼ */,
|
|
|
|
|
0x1C3D /* LEPCHA PUNCTUATION CER-WA ᰽ */,
|
|
|
|
|
0x1C3E /* LEPCHA PUNCTUATION TSHOOK CER-WA ᰾ */,
|
|
|
|
|
0x1C3F /* LEPCHA PUNCTUATION TSHOOK ᰿ */,
|
|
|
|
|
0x1C7E /* OL CHIKI PUNCTUATION MUCAAD ᱾ */,
|
|
|
|
|
0x1C7F /* OL CHIKI PUNCTUATION DOUBLE MUCAAD ᱿ */,
|
|
|
|
|
0x1CC0 /* SUNDANESE PUNCTUATION BINDU SURYA ᳀ */,
|
|
|
|
|
0x1CC1 /* SUNDANESE PUNCTUATION BINDU PANGLONG ᳁ */,
|
|
|
|
|
0x1CC2 /* SUNDANESE PUNCTUATION BINDU PURNAMA ᳂ */,
|
|
|
|
|
0x1CC3 /* SUNDANESE PUNCTUATION BINDU CAKRA ᳃ */,
|
|
|
|
|
0x1CC4 /* SUNDANESE PUNCTUATION BINDU LEU SATANGA ᳄ */,
|
|
|
|
|
0x1CC5 /* SUNDANESE PUNCTUATION BINDU KA SATANGA ᳅ */,
|
|
|
|
|
0x1CC6 /* SUNDANESE PUNCTUATION BINDU DA SATANGA ᳆ */,
|
|
|
|
|
0x1CC7 /* SUNDANESE PUNCTUATION BINDU BA SATANGA ᳇ */,
|
|
|
|
|
0x1CD3 /* VEDIC SIGN NIHSHVASA ᳓ */,
|
|
|
|
|
0x2016 /* DOUBLE VERTICAL LINE ‖ */,
|
|
|
|
|
0x2017 /* DOUBLE LOW LINE ‗ */,
|
|
|
|
|
0x2020 /* DAGGER † */,
|
|
|
|
|
0x2021 /* DOUBLE DAGGER ‡ */,
|
|
|
|
|
0x2022 /* BULLET • */,
|
|
|
|
|
0x2023 /* TRIANGULAR BULLET ‣ */,
|
|
|
|
|
0x2024 /* ONE DOT LEADER ․ */,
|
|
|
|
|
0x2025 /* TWO DOT LEADER ‥ */,
|
|
|
|
|
0x2026 /* HORIZONTAL ELLIPSIS … */,
|
|
|
|
|
0x2027 /* HYPHENATION POINT ‧ */,
|
|
|
|
|
0x2030 /* PER MILLE SIGN ‰ */,
|
|
|
|
|
0x2031 /* PER TEN THOUSAND SIGN ‱ */,
|
|
|
|
|
0x2032 /* PRIME ′ */,
|
|
|
|
|
0x2033 /* DOUBLE PRIME ″ */,
|
|
|
|
|
0x2034 /* TRIPLE PRIME ‴ */,
|
|
|
|
|
0x2035 /* REVERSED PRIME ‵ */,
|
|
|
|
|
0x2036 /* REVERSED DOUBLE PRIME ‶ */,
|
|
|
|
|
0x2037 /* REVERSED TRIPLE PRIME ‷ */,
|
|
|
|
|
0x2038 /* CARET ‸ */,
|
|
|
|
|
0x203B /* REFERENCE MARK ※ */,
|
|
|
|
|
0x203C /* DOUBLE EXCLAMATION MARK ‼ */,
|
|
|
|
|
0x203D /* INTERROBANG ‽ */,
|
|
|
|
|
0x203E /* OVERLINE ‾ */,
|
|
|
|
|
0x2041 /* CARET INSERTION POINT ⁁ */,
|
|
|
|
|
0x2042 /* ASTERISM ⁂ */,
|
|
|
|
|
0x2043 /* HYPHEN BULLET ⁃ */,
|
|
|
|
|
0x2047 /* DOUBLE QUESTION MARK ⁇ */,
|
|
|
|
|
0x2048 /* QUESTION EXCLAMATION MARK ⁈ */,
|
|
|
|
|
0x2049 /* EXCLAMATION QUESTION MARK ⁉ */,
|
|
|
|
|
0x204A /* TIRONIAN SIGN ET ⁊ */,
|
|
|
|
|
0x204B /* REVERSED PILCROW SIGN ⁋ */,
|
|
|
|
|
0x204C /* BLACK LEFTWARDS BULLET ⁌ */,
|
|
|
|
|
0x204D /* BLACK RIGHTWARDS BULLET ⁍ */,
|
|
|
|
|
0x204E /* LOW ASTERISK ⁎ */,
|
|
|
|
|
0x204F /* REVERSED SEMICOLON ⁏ */,
|
|
|
|
|
0x2050 /* CLOSE UP ⁐ */,
|
|
|
|
|
0x2051 /* TWO ASTERISKS ALIGNED VERTICALLY ⁑ */,
|
|
|
|
|
0x2053 /* SWUNG DASH ⁓ */,
|
|
|
|
|
0x2055 /* FLOWER PUNCTUATION MARK ⁕ */,
|
|
|
|
|
0x2056 /* THREE DOT PUNCTUATION ⁖ */,
|
|
|
|
|
0x2057 /* QUADRUPLE PRIME ⁗ */,
|
|
|
|
|
0x2058 /* FOUR DOT PUNCTUATION ⁘ */,
|
|
|
|
|
0x2059 /* FIVE DOT PUNCTUATION ⁙ */,
|
|
|
|
|
0x205A /* TWO DOT PUNCTUATION ⁚ */,
|
|
|
|
|
0x205B /* FOUR DOT MARK ⁛ */,
|
|
|
|
|
0x205C /* DOTTED CROSS ⁜ */,
|
|
|
|
|
0x205D /* TRICOLON ⁝ */,
|
|
|
|
|
0x205E /* VERTICAL FOUR DOTS ⁞ */,
|
|
|
|
|
0x2CF9 /* COPTIC OLD NUBIAN FULL STOP ⳹ */,
|
|
|
|
|
0x2CFA /* COPTIC OLD NUBIAN DIRECT QUESTION MARK ⳺ */,
|
|
|
|
|
0x2CFB /* COPTIC OLD NUBIAN INDIRECT QUESTION MARK ⳻ */,
|
|
|
|
|
0x2CFC /* COPTIC OLD NUBIAN VERSE DIVIDER ⳼ */,
|
|
|
|
|
0x2CFE /* COPTIC FULL STOP ⳾ */,
|
|
|
|
|
0x2CFF /* COPTIC MORPHOLOGICAL DIVIDER ⳿ */,
|
|
|
|
|
0x2D70 /* TIFINAGH SEPARATOR MARK ⵰ */,
|
|
|
|
|
0x2E00 /* RIGHT ANGLE SUBSTITUTION MARKER ⸀ */,
|
|
|
|
|
0x2E01 /* RIGHT ANGLE DOTTED SUBSTITUTION MARKER ⸁ */,
|
|
|
|
|
0x2E06 /* RAISED INTERPOLATION MARKER ⸆ */,
|
|
|
|
|
0x2E07 /* RAISED DOTTED INTERPOLATION MARKER ⸇ */,
|
|
|
|
|
0x2E08 /* DOTTED TRANSPOSITION MARKER ⸈ */,
|
|
|
|
|
0x2E0B /* RAISED SQUARE ⸋ */,
|
|
|
|
|
0x2E0E /* EDITORIAL CORONIS ⸎ */,
|
|
|
|
|
0x2E0F /* PARAGRAPHOS ⸏ */,
|
|
|
|
|
0x2E10 /* FORKED PARAGRAPHOS ⸐ */,
|
|
|
|
|
0x2E11 /* REVERSED FORKED PARAGRAPHOS ⸑ */,
|
|
|
|
|
0x2E12 /* HYPODIASTOLE ⸒ */,
|
|
|
|
|
0x2E13 /* DOTTED OBELOS ⸓ */,
|
|
|
|
|
0x2E14 /* DOWNWARDS ANCORA ⸔ */,
|
|
|
|
|
0x2E15 /* UPWARDS ANCORA ⸕ */,
|
|
|
|
|
0x2E16 /* DOTTED RIGHT-POINTING ANGLE ⸖ */,
|
|
|
|
|
0x2E18 /* INVERTED INTERROBANG ⸘ */,
|
|
|
|
|
0x2E19 /* PALM BRANCH ⸙ */,
|
|
|
|
|
0x2E1B /* TILDE WITH RING ABOVE ⸛ */,
|
|
|
|
|
0x2E1E /* TILDE WITH DOT ABOVE ⸞ */,
|
|
|
|
|
0x2E1F /* TILDE WITH DOT BELOW ⸟ */,
|
|
|
|
|
0x2E2A /* TWO DOTS OVER ONE DOT PUNCTUATION ⸪ */,
|
|
|
|
|
0x2E2B /* ONE DOT OVER TWO DOTS PUNCTUATION ⸫ */,
|
|
|
|
|
0x2E2C /* SQUARED FOUR DOT PUNCTUATION ⸬ */,
|
|
|
|
|
0x2E2D /* FIVE DOT MARK ⸭ */,
|
|
|
|
|
0x2E2E /* REVERSED QUESTION MARK ⸮ */,
|
|
|
|
|
0x2E30 /* RING POINT ⸰ */,
|
|
|
|
|
0x2E31 /* WORD SEPARATOR MIDDLE DOT ⸱ */,
|
|
|
|
|
0x2E32 /* TURNED COMMA ⸲ */,
|
|
|
|
|
0x2E33 /* RAISED DOT ⸳ */,
|
|
|
|
|
0x2E34 /* RAISED COMMA ⸴ */,
|
|
|
|
|
0x2E35 /* TURNED SEMICOLON ⸵ */,
|
|
|
|
|
0x2E36 /* DAGGER WITH LEFT GUARD ⸶ */,
|
|
|
|
|
0x2E37 /* DAGGER WITH RIGHT GUARD ⸷ */,
|
|
|
|
|
0x2E38 /* TURNED DAGGER ⸸ */,
|
|
|
|
|
0x2E39 /* TOP HALF SECTION SIGN ⸹ */,
|
|
|
|
|
0x2E3C /* STENOGRAPHIC FULL STOP ⸼ */,
|
|
|
|
|
0x2E3D /* VERTICAL SIX DOTS ⸽ */,
|
|
|
|
|
0x2E3E /* WIGGLY VERTICAL LINE ⸾ */,
|
|
|
|
|
0x2E3F /* CAPITULUM ⸿ */,
|
|
|
|
|
0x2E41 /* REVERSED COMMA ⹁ */,
|
|
|
|
|
0x2E43 /* DASH WITH LEFT UPTURN ⹃ */,
|
|
|
|
|
0x2E44 /* DOUBLE SUSPENSION MARK ⹄ */,
|
|
|
|
|
0x2E45 /* INVERTED LOW KAVYKA ⹅ */,
|
|
|
|
|
0x2E46 /* INVERTED LOW KAVYKA WITH KAVYKA ABOVE ⹆ */,
|
|
|
|
|
0x2E47 /* LOW KAVYKA ⹇ */,
|
|
|
|
|
0x2E48 /* LOW KAVYKA WITH DOT ⹈ */,
|
|
|
|
|
0x2E49 /* DOUBLE STACKED COMMA ⹉ */,
|
|
|
|
|
0x2E4A /* DOTTED SOLIDUS ⹊ */,
|
|
|
|
|
0x2E4B /* TRIPLE DAGGER ⹋ */,
|
|
|
|
|
0x2E4C /* MEDIEVAL COMMA ⹌ */,
|
|
|
|
|
0x2E4D /* PARAGRAPHUS MARK ⹍ */,
|
|
|
|
|
0x2E4E /* PUNCTUS ELEVATUS MARK ⹎ */,
|
|
|
|
|
0x2E4F /* CORNISH VERSE DIVIDER ⹏ */,
|
|
|
|
|
0x3001 /* IDEOGRAPHIC COMMA 、 */,
|
|
|
|
|
0x3002 /* IDEOGRAPHIC FULL STOP 。 */,
|
|
|
|
|
0x3003 /* DITTO MARK 〃 */,
|
|
|
|
|
0x303D /* PART ALTERNATION MARK 〽 */,
|
|
|
|
|
0x30FB /* KATAKANA MIDDLE DOT ・ */,
|
|
|
|
|
0xA4FE /* LISU PUNCTUATION COMMA ꓾ */,
|
|
|
|
|
0xA4FF /* LISU PUNCTUATION FULL STOP ꓿ */,
|
|
|
|
|
0xA60D /* VAI COMMA ꘍ */,
|
|
|
|
|
0xA60E /* VAI FULL STOP ꘎ */,
|
|
|
|
|
0xA60F /* VAI QUESTION MARK ꘏ */,
|
|
|
|
|
0xA673 /* SLAVONIC ASTERISK ꙳ */,
|
|
|
|
|
0xA67E /* CYRILLIC KAVYKA ꙾ */,
|
|
|
|
|
0xA6F2 /* BAMUM NJAEMLI ꛲ */,
|
|
|
|
|
0xA6F3 /* BAMUM FULL STOP ꛳ */,
|
|
|
|
|
0xA6F4 /* BAMUM COLON ꛴ */,
|
|
|
|
|
0xA6F5 /* BAMUM COMMA ꛵ */,
|
|
|
|
|
0xA6F6 /* BAMUM SEMICOLON ꛶ */,
|
|
|
|
|
0xA6F7 /* BAMUM QUESTION MARK ꛷ */,
|
|
|
|
|
0xA874 /* PHAGS-PA SINGLE HEAD MARK ꡴ */,
|
|
|
|
|
0xA875 /* PHAGS-PA DOUBLE HEAD MARK ꡵ */,
|
|
|
|
|
0xA876 /* PHAGS-PA MARK SHAD ꡶ */,
|
|
|
|
|
0xA877 /* PHAGS-PA MARK DOUBLE SHAD ꡷ */,
|
|
|
|
|
0xA8CE /* SAURASHTRA DANDA ꣎ */,
|
|
|
|
|
0xA8CF /* SAURASHTRA DOUBLE DANDA ꣏ */,
|
|
|
|
|
0xA8F8 /* DEVANAGARI SIGN PUSHPIKA ꣸ */,
|
|
|
|
|
0xA8F9 /* DEVANAGARI GAP FILLER ꣹ */,
|
|
|
|
|
0xA8FA /* DEVANAGARI CARET ꣺ */,
|
|
|
|
|
0xA8FC /* DEVANAGARI SIGN SIDDHAM ꣼ */,
|
|
|
|
|
0xA92E /* KAYAH LI SIGN CWI ꤮ */,
|
|
|
|
|
0xA92F /* KAYAH LI SIGN SHYA ꤯ */,
|
|
|
|
|
0xA95F /* REJANG SECTION MARK ꥟ */,
|
|
|
|
|
0xA9C1 /* JAVANESE LEFT RERENGGAN ꧁ */,
|
|
|
|
|
0xA9C2 /* JAVANESE RIGHT RERENGGAN ꧂ */,
|
|
|
|
|
0xA9C3 /* JAVANESE PADA ANDAP ꧃ */,
|
|
|
|
|
0xA9C4 /* JAVANESE PADA MADYA ꧄ */,
|
|
|
|
|
0xA9C5 /* JAVANESE PADA LUHUR ꧅ */,
|
|
|
|
|
0xA9C6 /* JAVANESE PADA WINDU ꧆ */,
|
|
|
|
|
0xA9C7 /* JAVANESE PADA PANGKAT ꧇ */,
|
|
|
|
|
0xA9C8 /* JAVANESE PADA LINGSA ꧈ */,
|
|
|
|
|
0xA9C9 /* JAVANESE PADA LUNGSI ꧉ */,
|
|
|
|
|
0xA9CA /* JAVANESE PADA ADEG ꧊ */,
|
|
|
|
|
0xA9CB /* JAVANESE PADA ADEG ADEG ꧋ */,
|
|
|
|
|
0xA9CC /* JAVANESE PADA PISELEH ꧌ */,
|
|
|
|
|
0xA9CD /* JAVANESE TURNED PADA PISELEH ꧍ */,
|
|
|
|
|
0xA9DE /* JAVANESE PADA TIRTA TUMETES ꧞ */,
|
|
|
|
|
0xA9DF /* JAVANESE PADA ISEN-ISEN ꧟ */,
|
|
|
|
|
0xAA5C /* CHAM PUNCTUATION SPIRAL ꩜ */,
|
|
|
|
|
0xAA5D /* CHAM PUNCTUATION DANDA ꩝ */,
|
|
|
|
|
0xAA5E /* CHAM PUNCTUATION DOUBLE DANDA ꩞ */,
|
|
|
|
|
0xAA5F /* CHAM PUNCTUATION TRIPLE DANDA ꩟ */,
|
|
|
|
|
0xAADE /* TAI VIET SYMBOL HO HOI ꫞ */,
|
|
|
|
|
0xAADF /* TAI VIET SYMBOL KOI KOI ꫟ */,
|
|
|
|
|
0xAAF0 /* MEETEI MAYEK CHEIKHAN ꫰ */,
|
|
|
|
|
0xAAF1 /* MEETEI MAYEK AHANG KHUDAM ꫱ */,
|
|
|
|
|
0xABEB /* MEETEI MAYEK CHEIKHEI ꯫ */,
|
|
|
|
|
0xFE10 /* PRESENTATION FORM FOR VERTICAL COMMA ︐ */,
|
|
|
|
|
0xFE11 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA ︑ */,
|
|
|
|
|
0xFE12 /* PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP ︒ */,
|
|
|
|
|
0xFE13 /* PRESENTATION FORM FOR VERTICAL COLON ︓ */,
|
|
|
|
|
0xFE14 /* PRESENTATION FORM FOR VERTICAL SEMICOLON ︔ */,
|
|
|
|
|
0xFE15 /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK ︕ */,
|
|
|
|
|
0xFE16 /* PRESENTATION FORM FOR VERTICAL QUESTION MARK ︖ */,
|
|
|
|
|
0xFE19 /* PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS ︙ */,
|
|
|
|
|
0xFE30 /* PRESENTATION FORM FOR VERTICAL TWO DOT LEADER ︰ */,
|
|
|
|
|
0xFE45 /* SESAME DOT ﹅ */,
|
|
|
|
|
0xFE46 /* WHITE SESAME DOT ﹆ */,
|
|
|
|
|
0xFE49 /* DASHED OVERLINE ﹉ */,
|
|
|
|
|
0xFE4A /* CENTRELINE OVERLINE ﹊ */,
|
|
|
|
|
0xFE4B /* WAVY OVERLINE ﹋ */,
|
|
|
|
|
0xFE4C /* DOUBLE WAVY OVERLINE ﹌ */,
|
|
|
|
|
0xFE50 /* SMALL COMMA ﹐ */,
|
|
|
|
|
0xFE51 /* SMALL IDEOGRAPHIC COMMA ﹑ */,
|
|
|
|
|
0xFE52 /* SMALL FULL STOP ﹒ */,
|
|
|
|
|
0xFE54 /* SMALL SEMICOLON ﹔ */,
|
|
|
|
|
0xFE55 /* SMALL COLON ﹕ */,
|
|
|
|
|
0xFE56 /* SMALL QUESTION MARK ﹖ */,
|
|
|
|
|
0xFE57 /* SMALL EXCLAMATION MARK ﹗ */,
|
|
|
|
|
0xFE5F /* SMALL NUMBER SIGN ﹟ */,
|
|
|
|
|
0xFE60 /* SMALL AMPERSAND ﹠ */,
|
|
|
|
|
0xFE61 /* SMALL ASTERISK ﹡ */,
|
|
|
|
|
0xFE68 /* SMALL REVERSE SOLIDUS ﹨ */,
|
|
|
|
|
0xFE6A /* SMALL PERCENT SIGN ﹪ */,
|
|
|
|
|
0xFE6B /* SMALL COMMERCIAL AT ﹫ */,
|
|
|
|
|
0xFF01 /* FULLWIDTH EXCLAMATION MARK ! */,
|
|
|
|
|
0xFF02 /* FULLWIDTH QUOTATION MARK " */,
|
|
|
|
|
0xFF03 /* FULLWIDTH NUMBER SIGN # */,
|
|
|
|
|
0xFF05 /* FULLWIDTH PERCENT SIGN % */,
|
|
|
|
|
0xFF06 /* FULLWIDTH AMPERSAND & */,
|
|
|
|
|
0xFF07 /* FULLWIDTH APOSTROPHE ' */,
|
|
|
|
|
0xFF0A /* FULLWIDTH ASTERISK * */,
|
|
|
|
|
0xFF0C /* FULLWIDTH COMMA , */,
|
|
|
|
|
0xFF0E /* FULLWIDTH FULL STOP . */,
|
|
|
|
|
0xFF0F /* FULLWIDTH SOLIDUS / */,
|
|
|
|
|
0xFF1A /* FULLWIDTH COLON : */,
|
|
|
|
|
0xFF1B /* FULLWIDTH SEMICOLON ; */,
|
|
|
|
|
0xFF1F /* FULLWIDTH QUESTION MARK ? */,
|
|
|
|
|
0xFF20 /* FULLWIDTH COMMERCIAL AT @ */,
|
|
|
|
|
0xFF3C /* FULLWIDTH REVERSE SOLIDUS \ */,
|
|
|
|
|
0xFF61 /* HALFWIDTH IDEOGRAPHIC FULL STOP 。 */,
|
|
|
|
|
0xFF64 /* HALFWIDTH IDEOGRAPHIC COMMA 、 */,
|
|
|
|
|
0xFF65 /* HALFWIDTH KATAKANA MIDDLE DOT ・ */,
|
|
|
|
|
0x10100 /* AEGEAN WORD SEPARATOR LINE 𐄀 */,
|
|
|
|
|
0x10101 /* AEGEAN WORD SEPARATOR DOT 𐄁 */,
|
|
|
|
|
0x10102 /* AEGEAN CHECK MARK 𐄂 */,
|
|
|
|
|
0x1039F /* UGARITIC WORD DIVIDER 𐎟 */,
|
|
|
|
|
0x103D0 /* OLD PERSIAN WORD DIVIDER 𐏐 */,
|
|
|
|
|
0x1056F /* CAUCASIAN ALBANIAN CITATION MARK 𐕯 */,
|
|
|
|
|
0x10857 /* IMPERIAL ARAMAIC SECTION SIGN 𐡗 */,
|
|
|
|
|
0x1091F /* PHOENICIAN WORD SEPARATOR 𐤟 */,
|
|
|
|
|
0x1093F /* LYDIAN TRIANGULAR MARK 𐤿 */,
|
|
|
|
|
0x10A50 /* KHAROSHTHI PUNCTUATION DOT 𐩐 */,
|
|
|
|
|
0x10A51 /* KHAROSHTHI PUNCTUATION SMALL CIRCLE 𐩑 */,
|
|
|
|
|
0x10A52 /* KHAROSHTHI PUNCTUATION CIRCLE 𐩒 */,
|
|
|
|
|
0x10A53 /* KHAROSHTHI PUNCTUATION CRESCENT BAR 𐩓 */,
|
|
|
|
|
0x10A54 /* KHAROSHTHI PUNCTUATION MANGALAM 𐩔 */,
|
|
|
|
|
0x10A55 /* KHAROSHTHI PUNCTUATION LOTUS 𐩕 */,
|
|
|
|
|
0x10A56 /* KHAROSHTHI PUNCTUATION DANDA 𐩖 */,
|
|
|
|
|
0x10A57 /* KHAROSHTHI PUNCTUATION DOUBLE DANDA 𐩗 */,
|
|
|
|
|
0x10A58 /* KHAROSHTHI PUNCTUATION LINES 𐩘 */,
|
|
|
|
|
0x10A7F /* OLD SOUTH ARABIAN NUMERIC INDICATOR 𐩿 */,
|
|
|
|
|
0x10AF0 /* MANICHAEAN PUNCTUATION STAR 𐫰 */,
|
|
|
|
|
0x10AF1 /* MANICHAEAN PUNCTUATION FLEURON 𐫱 */,
|
|
|
|
|
0x10AF2 /* MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT 𐫲 */,
|
|
|
|
|
0x10AF3 /* MANICHAEAN PUNCTUATION DOT WITHIN DOT 𐫳 */,
|
|
|
|
|
0x10AF4 /* MANICHAEAN PUNCTUATION DOT 𐫴 */,
|
|
|
|
|
0x10AF5 /* MANICHAEAN PUNCTUATION TWO DOTS 𐫵 */,
|
|
|
|
|
0x10AF6 /* MANICHAEAN PUNCTUATION LINE FILLER 𐫶 */,
|
|
|
|
|
0x10B39 /* AVESTAN ABBREVIATION MARK 𐬹 */,
|
|
|
|
|
0x10B3A /* TINY TWO DOTS OVER ONE DOT PUNCTUATION 𐬺 */,
|
|
|
|
|
0x10B3B /* SMALL TWO DOTS OVER ONE DOT PUNCTUATION 𐬻 */,
|
|
|
|
|
0x10B3C /* LARGE TWO DOTS OVER ONE DOT PUNCTUATION 𐬼 */,
|
|
|
|
|
0x10B3D /* LARGE ONE DOT OVER TWO DOTS PUNCTUATION 𐬽 */,
|
|
|
|
|
0x10B3E /* LARGE TWO RINGS OVER ONE RING PUNCTUATION 𐬾 */,
|
|
|
|
|
0x10B3F /* LARGE ONE RING OVER TWO RINGS PUNCTUATION 𐬿 */,
|
|
|
|
|
0x10B99 /* PSALTER PAHLAVI SECTION MARK 𐮙 */,
|
|
|
|
|
0x10B9A /* PSALTER PAHLAVI TURNED SECTION MARK 𐮚 */,
|
|
|
|
|
0x10B9B /* PSALTER PAHLAVI FOUR DOTS WITH CROSS 𐮛 */,
|
|
|
|
|
0x10B9C /* PSALTER PAHLAVI FOUR DOTS WITH DOT 𐮜 */,
|
|
|
|
|
0x10F55 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS 𐽕 */,
|
|
|
|
|
0x10F56 /* SOGDIAN PUNCTUATION TWO VERTICAL BARS WITH DOTS 𐽖 */,
|
|
|
|
|
0x10F57 /* SOGDIAN PUNCTUATION CIRCLE WITH DOT 𐽗 */,
|
|
|
|
|
0x10F58 /* SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS 𐽘 */,
|
|
|
|
|
0x10F59 /* SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 𐽙 */,
|
|
|
|
|
0x11047 /* BRAHMI DANDA 𑁇 */,
|
|
|
|
|
0x11048 /* BRAHMI DOUBLE DANDA 𑁈 */,
|
|
|
|
|
0x11049 /* BRAHMI PUNCTUATION DOT 𑁉 */,
|
|
|
|
|
0x1104A /* BRAHMI PUNCTUATION DOUBLE DOT 𑁊 */,
|
|
|
|
|
0x1104B /* BRAHMI PUNCTUATION LINE 𑁋 */,
|
|
|
|
|
0x1104C /* BRAHMI PUNCTUATION CRESCENT BAR 𑁌 */,
|
|
|
|
|
0x1104D /* BRAHMI PUNCTUATION LOTUS 𑁍 */,
|
|
|
|
|
0x110BB /* KAITHI ABBREVIATION SIGN 𑂻 */,
|
|
|
|
|
0x110BC /* KAITHI ENUMERATION SIGN 𑂼 */,
|
|
|
|
|
0x110BE /* KAITHI SECTION MARK 𑂾 */,
|
|
|
|
|
0x110BF /* KAITHI DOUBLE SECTION MARK 𑂿 */,
|
|
|
|
|
0x110C0 /* KAITHI DANDA 𑃀 */,
|
|
|
|
|
0x110C1 /* KAITHI DOUBLE DANDA 𑃁 */,
|
|
|
|
|
0x11140 /* CHAKMA SECTION MARK 𑅀 */,
|
|
|
|
|
0x11141 /* CHAKMA DANDA 𑅁 */,
|
|
|
|
|
0x11142 /* CHAKMA DOUBLE DANDA 𑅂 */,
|
|
|
|
|
0x11143 /* CHAKMA QUESTION MARK 𑅃 */,
|
|
|
|
|
0x11174 /* MAHAJANI ABBREVIATION SIGN 𑅴 */,
|
|
|
|
|
0x11175 /* MAHAJANI SECTION MARK 𑅵 */,
|
|
|
|
|
0x111C5 /* SHARADA DANDA 𑇅 */,
|
|
|
|
|
0x111C6 /* SHARADA DOUBLE DANDA 𑇆 */,
|
|
|
|
|
0x111C7 /* SHARADA ABBREVIATION SIGN 𑇇 */,
|
|
|
|
|
0x111C8 /* SHARADA SEPARATOR 𑇈 */,
|
|
|
|
|
0x111CD /* SHARADA SUTRA MARK 𑇍 */,
|
|
|
|
|
0x111DB /* SHARADA SIGN SIDDHAM 𑇛 */,
|
|
|
|
|
0x111DD /* SHARADA CONTINUATION SIGN 𑇝 */,
|
|
|
|
|
0x111DE /* SHARADA SECTION MARK-1 𑇞 */,
|
|
|
|
|
0x111DF /* SHARADA SECTION MARK-2 𑇟 */,
|
|
|
|
|
0x11238 /* KHOJKI DANDA 𑈸 */,
|
|
|
|
|
0x11239 /* KHOJKI DOUBLE DANDA 𑈹 */,
|
|
|
|
|
0x1123A /* KHOJKI WORD SEPARATOR 𑈺 */,
|
|
|
|
|
0x1123B /* KHOJKI SECTION MARK 𑈻 */,
|
|
|
|
|
0x1123C /* KHOJKI DOUBLE SECTION MARK 𑈼 */,
|
|
|
|
|
0x1123D /* KHOJKI ABBREVIATION SIGN 𑈽 */,
|
|
|
|
|
0x112A9 /* MULTANI SECTION MARK 𑊩 */,
|
|
|
|
|
0x1144B /* NEWA DANDA 𑑋 */,
|
|
|
|
|
0x1144C /* NEWA DOUBLE DANDA 𑑌 */,
|
|
|
|
|
0x1144D /* NEWA COMMA 𑑍 */,
|
|
|
|
|
0x1144E /* NEWA GAP FILLER 𑑎 */,
|
|
|
|
|
0x1144F /* NEWA ABBREVIATION SIGN 𑑏 */,
|
|
|
|
|
0x1145B /* NEWA PLACEHOLDER MARK 𑑛 */,
|
|
|
|
|
0x1145D /* NEWA INSERTION SIGN 𑑝 */,
|
|
|
|
|
0x114C6 /* TIRHUTA ABBREVIATION SIGN 𑓆 */,
|
|
|
|
|
0x115C1 /* SIDDHAM SIGN SIDDHAM 𑗁 */,
|
|
|
|
|
0x115C2 /* SIDDHAM DANDA 𑗂 */,
|
|
|
|
|
0x115C3 /* SIDDHAM DOUBLE DANDA 𑗃 */,
|
|
|
|
|
0x115C4 /* SIDDHAM SEPARATOR DOT 𑗄 */,
|
|
|
|
|
0x115C5 /* SIDDHAM SEPARATOR BAR 𑗅 */,
|
|
|
|
|
0x115C6 /* SIDDHAM REPETITION MARK-1 𑗆 */,
|
|
|
|
|
0x115C7 /* SIDDHAM REPETITION MARK-2 𑗇 */,
|
|
|
|
|
0x115C8 /* SIDDHAM REPETITION MARK-3 𑗈 */,
|
|
|
|
|
0x115C9 /* SIDDHAM END OF TEXT MARK 𑗉 */,
|
|
|
|
|
0x115CA /* SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS 𑗊 */,
|
|
|
|
|
0x115CB /* SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS 𑗋 */,
|
|
|
|
|
0x115CC /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS 𑗌 */,
|
|
|
|
|
0x115CD /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS 𑗍 */,
|
|
|
|
|
0x115CE /* SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS 𑗎 */,
|
|
|
|
|
0x115CF /* SIDDHAM SECTION MARK DOUBLE RING 𑗏 */,
|
|
|
|
|
0x115D0 /* SIDDHAM SECTION MARK DOUBLE RING WITH RAYS 𑗐 */,
|
|
|
|
|
0x115D1 /* SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS 𑗑 */,
|
|
|
|
|
0x115D2 /* SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS 𑗒 */,
|
|
|
|
|
0x115D3 /* SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS 𑗓 */,
|
|
|
|
|
0x115D4 /* SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS 𑗔 */,
|
|
|
|
|
0x115D5 /* SIDDHAM SECTION MARK WITH CIRCLES AND RAYS 𑗕 */,
|
|
|
|
|
0x115D6 /* SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES 𑗖 */,
|
|
|
|
|
0x115D7 /* SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES 𑗗 */,
|
|
|
|
|
0x11641 /* MODI DANDA 𑙁 */,
|
|
|
|
|
0x11642 /* MODI DOUBLE DANDA 𑙂 */,
|
|
|
|
|
0x11643 /* MODI ABBREVIATION SIGN 𑙃 */,
|
|
|
|
|
0x11660 /* MONGOLIAN BIRGA WITH ORNAMENT 𑙠 */,
|
|
|
|
|
0x11661 /* MONGOLIAN ROTATED BIRGA 𑙡 */,
|
|
|
|
|
0x11662 /* MONGOLIAN DOUBLE BIRGA WITH ORNAMENT 𑙢 */,
|
|
|
|
|
0x11663 /* MONGOLIAN TRIPLE BIRGA WITH ORNAMENT 𑙣 */,
|
|
|
|
|
0x11664 /* MONGOLIAN BIRGA WITH DOUBLE ORNAMENT 𑙤 */,
|
|
|
|
|
0x11665 /* MONGOLIAN ROTATED BIRGA WITH ORNAMENT 𑙥 */,
|
|
|
|
|
0x11666 /* MONGOLIAN ROTATED BIRGA WITH DOUBLE ORNAMENT 𑙦 */,
|
|
|
|
|
0x11667 /* MONGOLIAN INVERTED BIRGA 𑙧 */,
|
|
|
|
|
0x11668 /* MONGOLIAN INVERTED BIRGA WITH DOUBLE ORNAMENT 𑙨 */,
|
|
|
|
|
0x11669 /* MONGOLIAN SWIRL BIRGA 𑙩 */,
|
|
|
|
|
0x1166A /* MONGOLIAN SWIRL BIRGA WITH ORNAMENT 𑙪 */,
|
|
|
|
|
0x1166B /* MONGOLIAN SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙫 */,
|
|
|
|
|
0x1166C /* MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 𑙬 */,
|
|
|
|
|
0x1173C /* AHOM SIGN SMALL SECTION 𑜼 */,
|
|
|
|
|
0x1173D /* AHOM SIGN SECTION 𑜽 */,
|
|
|
|
|
0x1173E /* AHOM SIGN RULAI 𑜾 */,
|
|
|
|
|
0x1183B /* DOGRA ABBREVIATION SIGN 𑠻 */,
|
|
|
|
|
0x119E2 /* NANDINAGARI SIGN SIDDHAM 𑧢 */,
|
|
|
|
|
0x11A3F /* ZANABAZAR SQUARE INITIAL HEAD MARK 𑨿 */,
|
|
|
|
|
0x11A40 /* ZANABAZAR SQUARE CLOSING HEAD MARK 𑩀 */,
|
|
|
|
|
0x11A41 /* ZANABAZAR SQUARE MARK TSHEG 𑩁 */,
|
|
|
|
|
0x11A42 /* ZANABAZAR SQUARE MARK SHAD 𑩂 */,
|
|
|
|
|
0x11A43 /* ZANABAZAR SQUARE MARK DOUBLE SHAD 𑩃 */,
|
|
|
|
|
0x11A44 /* ZANABAZAR SQUARE MARK LONG TSHEG 𑩄 */,
|
|
|
|
|
0x11A45 /* ZANABAZAR SQUARE INITIAL DOUBLE-LINED HEAD MARK 𑩅 */,
|
|
|
|
|
0x11A46 /* ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK 𑩆 */,
|
|
|
|
|
0x11A9A /* SOYOMBO MARK TSHEG 𑪚 */,
|
|
|
|
|
0x11A9B /* SOYOMBO MARK SHAD 𑪛 */,
|
|
|
|
|
0x11A9C /* SOYOMBO MARK DOUBLE SHAD 𑪜 */,
|
|
|
|
|
0x11A9E /* SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME 𑪞 */,
|
|
|
|
|
0x11A9F /* SOYOMBO HEAD MARK WITH MOON AND SUN AND FLAME 𑪟 */,
|
|
|
|
|
0x11AA0 /* SOYOMBO HEAD MARK WITH MOON AND SUN 𑪠 */,
|
|
|
|
|
0x11AA1 /* SOYOMBO TERMINAL MARK-1 𑪡 */,
|
|
|
|
|
0x11AA2 /* SOYOMBO TERMINAL MARK-2 𑪢 */,
|
|
|
|
|
0x11C41 /* BHAIKSUKI DANDA 𑱁 */,
|
|
|
|
|
0x11C42 /* BHAIKSUKI DOUBLE DANDA 𑱂 */,
|
|
|
|
|
0x11C43 /* BHAIKSUKI WORD SEPARATOR 𑱃 */,
|
|
|
|
|
0x11C44 /* BHAIKSUKI GAP FILLER-1 𑱄 */,
|
|
|
|
|
0x11C45 /* BHAIKSUKI GAP FILLER-2 𑱅 */,
|
|
|
|
|
0x11C70 /* MARCHEN HEAD MARK 𑱰 */,
|
|
|
|
|
0x11C71 /* MARCHEN MARK SHAD 𑱱 */,
|
|
|
|
|
0x11EF7 /* MAKASAR PASSIMBANG 𑻷 */,
|
|
|
|
|
0x11EF8 /* MAKASAR END OF SECTION 𑻸 */,
|
|
|
|
|
0x11FFF /* TAMIL PUNCTUATION END OF TEXT 𑿿 */,
|
|
|
|
|
0x12470 /* CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER 𒑰 */,
|
|
|
|
|
0x12471 /* CUNEIFORM PUNCTUATION SIGN VERTICAL COLON 𒑱 */,
|
|
|
|
|
0x12472 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL COLON 𒑲 */,
|
|
|
|
|
0x12473 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON 𒑳 */,
|
|
|
|
|
0x12474 /* CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 𒑴 */,
|
|
|
|
|
0x16A6E /* MRO DANDA 𖩮 */,
|
|
|
|
|
0x16A6F /* MRO DOUBLE DANDA 𖩯 */,
|
|
|
|
|
0x16AF5 /* BASSA VAH FULL STOP 𖫵 */,
|
|
|
|
|
0x16B37 /* PAHAWH HMONG SIGN VOS THOM 𖬷 */,
|
|
|
|
|
0x16B38 /* PAHAWH HMONG SIGN VOS TSHAB CEEB 𖬸 */,
|
|
|
|
|
0x16B39 /* PAHAWH HMONG SIGN CIM CHEEM 𖬹 */,
|
|
|
|
|
0x16B3A /* PAHAWH HMONG SIGN VOS THIAB 𖬺 */,
|
|
|
|
|
0x16B3B /* PAHAWH HMONG SIGN VOS FEEM 𖬻 */,
|
|
|
|
|
0x16B44 /* PAHAWH HMONG SIGN XAUS 𖭄 */,
|
|
|
|
|
0x16E97 /* MEDEFAIDRIN COMMA 𖺗 */,
|
|
|
|
|
0x16E98 /* MEDEFAIDRIN FULL STOP 𖺘 */,
|
|
|
|
|
0x16E99 /* MEDEFAIDRIN SYMBOL AIVA 𖺙 */,
|
|
|
|
|
0x16E9A /* MEDEFAIDRIN EXCLAMATION OH 𖺚 */,
|
|
|
|
|
0x16FE2 /* OLD CHINESE HOOK MARK 𖿢 */,
|
|
|
|
|
0x1BC9F /* DUPLOYAN PUNCTUATION CHINOOK FULL STOP 𛲟 */,
|
|
|
|
|
0x1DA87 /* SIGNWRITING COMMA 𝪇 */,
|
|
|
|
|
0x1DA88 /* SIGNWRITING FULL STOP 𝪈 */,
|
|
|
|
|
0x1DA89 /* SIGNWRITING SEMICOLON 𝪉 */,
|
|
|
|
|
0x1DA8A /* SIGNWRITING COLON 𝪊 */,
|
|
|
|
|
0x1DA8B /* SIGNWRITING PARENTHESIS 𝪋 */,
|
|
|
|
|
0x1E95E /* ADLAM INITIAL EXCLAMATION MARK 𞥞 */,
|
|
|
|
|
0x1E95F /* ADLAM INITIAL QUESTION MARK */,
|
|
|
|
|
]
|
2019-12-12 21:08:44 +03:00
|
|
|
|
)
|