mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
vlib: add a new module builtin.wchar
, to ease dealing with C APIs that accept wchar_t*
(#18794)
This commit is contained in:
parent
de392003be
commit
ded6c38061
@ -295,7 +295,7 @@ fn C.SymCleanup(hProcess voidptr)
|
|||||||
|
|
||||||
fn C.MultiByteToWideChar(codePage u32, dwFlags u32, lpMultiMyteStr &char, cbMultiByte int, lpWideCharStr &u16, cchWideChar int) int
|
fn C.MultiByteToWideChar(codePage u32, dwFlags u32, lpMultiMyteStr &char, cbMultiByte int, lpWideCharStr &u16, cchWideChar int) int
|
||||||
|
|
||||||
fn C.wcslen(str &u16) int
|
fn C.wcslen(str voidptr) usize
|
||||||
|
|
||||||
fn C.WideCharToMultiByte(codePage u32, dwFlags u32, lpWideCharStr &u16, cchWideChar int, lpMultiByteStr &char, cbMultiByte int, lpDefaultChar &char, lpUsedDefaultChar &int) int
|
fn C.WideCharToMultiByte(codePage u32, dwFlags u32, lpWideCharStr &u16, cchWideChar int, lpMultiByteStr &char, cbMultiByte int, lpDefaultChar &char, lpUsedDefaultChar &int) int
|
||||||
|
|
||||||
|
@ -10,6 +10,8 @@ const cp_utf8 = 65001
|
|||||||
// The returned pointer of .to_wide(), has a type of &u16, and is suitable
|
// The returned pointer of .to_wide(), has a type of &u16, and is suitable
|
||||||
// for passing to Windows APIs that expect LPWSTR or wchar_t* parameters.
|
// for passing to Windows APIs that expect LPWSTR or wchar_t* parameters.
|
||||||
// See also MultiByteToWideChar ( https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar )
|
// See also MultiByteToWideChar ( https://learn.microsoft.com/en-us/windows/win32/api/stringapiset/nf-stringapiset-multibytetowidechar )
|
||||||
|
// See also builtin.wchar.from_string/1, for a version, that produces a
|
||||||
|
// platform dependant L"" C style wchar_t* wide string.
|
||||||
pub fn (_str string) to_wide() &u16 {
|
pub fn (_str string) to_wide() &u16 {
|
||||||
$if windows {
|
$if windows {
|
||||||
unsafe {
|
unsafe {
|
||||||
@ -29,19 +31,25 @@ pub fn (_str string) to_wide() &u16 {
|
|||||||
for i, r in srunes {
|
for i, r in srunes {
|
||||||
result[i] = u16(r)
|
result[i] = u16(r)
|
||||||
}
|
}
|
||||||
|
result[srunes.len] = 0
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// string_from_wide creates a V string, encoded in UTF-8, given a windows
|
// string_from_wide creates a V string, encoded in UTF-8, given a windows
|
||||||
// style string encoded in UTF-16.
|
// style string encoded in UTF-16. Note that this function first searches
|
||||||
|
// for the string terminator 0 character, and is thus slower, while more
|
||||||
|
// convenient compared to string_from_wide2/2 (you have to know the length
|
||||||
|
// in advance to use string_from_wide2/2).
|
||||||
|
// See also builtin.wchar.to_string/1, for a version that eases working with
|
||||||
|
// the platform dependent &wchar_t L"" strings.
|
||||||
[manualfree; unsafe]
|
[manualfree; unsafe]
|
||||||
pub fn string_from_wide(_wstr &u16) string {
|
pub fn string_from_wide(_wstr &u16) string {
|
||||||
$if windows {
|
$if windows {
|
||||||
unsafe {
|
unsafe {
|
||||||
wstr_len := C.wcslen(_wstr)
|
wstr_len := C.wcslen(_wstr)
|
||||||
return string_from_wide2(_wstr, wstr_len)
|
return string_from_wide2(_wstr, int(wstr_len))
|
||||||
}
|
}
|
||||||
} $else {
|
} $else {
|
||||||
mut i := 0
|
mut i := 0
|
||||||
@ -56,6 +64,8 @@ pub fn string_from_wide(_wstr &u16) string {
|
|||||||
// style string, encoded in UTF-16. It is more efficient, compared to
|
// style string, encoded in UTF-16. It is more efficient, compared to
|
||||||
// string_from_wide, but it requires you to know the input string length,
|
// string_from_wide, but it requires you to know the input string length,
|
||||||
// and to pass it as the second argument.
|
// and to pass it as the second argument.
|
||||||
|
// See also builtin.wchar.to_string2/2, for a version that eases working
|
||||||
|
// with the platform dependent &wchar_t L"" strings.
|
||||||
[manualfree; unsafe]
|
[manualfree; unsafe]
|
||||||
pub fn string_from_wide2(_wstr &u16, len int) string {
|
pub fn string_from_wide2(_wstr &u16, len int) string {
|
||||||
$if windows {
|
$if windows {
|
||||||
|
@ -72,6 +72,7 @@ fn test_string_from_wide2() {
|
|||||||
|
|
||||||
fn test_reverse_cyrillic_with_string_from_wide() {
|
fn test_reverse_cyrillic_with_string_from_wide() {
|
||||||
s := 'Проба'
|
s := 'Проба'
|
||||||
z := unsafe { string_from_wide(s.to_wide()) }
|
ws := s.to_wide()
|
||||||
|
z := unsafe { string_from_wide(ws) }
|
||||||
assert z == s
|
assert z == s
|
||||||
}
|
}
|
||||||
|
116
vlib/builtin/wchar/wchar.c.v
Normal file
116
vlib/builtin/wchar/wchar.c.v
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
module wchar
|
||||||
|
|
||||||
|
import strings
|
||||||
|
|
||||||
|
#include <wchar.h>
|
||||||
|
|
||||||
|
[typedef]
|
||||||
|
struct C.wchar_t {}
|
||||||
|
|
||||||
|
// Character is a type, that eases working with the platform dependent C.wchar_t type.
|
||||||
|
// Note: the size of C.wchar_t varies between platforms, it is 2 bytes on windows,
|
||||||
|
// and usually 4 bytes elsewhere.
|
||||||
|
pub type Character = C.wchar_t
|
||||||
|
|
||||||
|
// zero is a Character, that in C L"" strings represents the string end character (terminator).
|
||||||
|
pub const zero = from_rune(0)
|
||||||
|
|
||||||
|
// return a string representation of the given Character
|
||||||
|
pub fn (a Character) str() string {
|
||||||
|
return a.to_rune().str()
|
||||||
|
}
|
||||||
|
|
||||||
|
// == is an equality operator, to ease comparing Characters
|
||||||
|
// TODO: the default == operator, that V generates, does not work for C.wchar_t .
|
||||||
|
[inline]
|
||||||
|
pub fn (a Character) == (b Character) bool {
|
||||||
|
return u64(a) == u64(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// to_rune creates a V rune, given a Character
|
||||||
|
[inline]
|
||||||
|
pub fn (c Character) to_rune() rune {
|
||||||
|
return unsafe { *(&rune(&c)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
// from_rune creates a Character, given a V rune
|
||||||
|
[inline]
|
||||||
|
pub fn from_rune(r rune) Character {
|
||||||
|
return unsafe { *(&Character(&r)) }
|
||||||
|
}
|
||||||
|
|
||||||
|
// length_in_characters returns the length of the given wchar_t* wide C style L"" string.
|
||||||
|
// Example: assert unsafe { wchar.length_in_characters(wchar.from_string('abc')) } == 3
|
||||||
|
// See also `length_in_bytes` .
|
||||||
|
[unsafe]
|
||||||
|
pub fn length_in_characters(p voidptr) int {
|
||||||
|
mut len := 0
|
||||||
|
pc := &Character(p)
|
||||||
|
for unsafe { pc[len] != wchar.zero } {
|
||||||
|
len++
|
||||||
|
}
|
||||||
|
return len
|
||||||
|
}
|
||||||
|
|
||||||
|
// length_in_bytes returns the length of the given wchar_t* wide C style L"" string in bytes.
|
||||||
|
// Note that the size of wchar_t is different on the different platforms, thus the length in
|
||||||
|
// bytes for the same data converted from UTF-8 to a &Character buffer, will be different as well.
|
||||||
|
// i.e. unsafe { wchar.length_in_bytes(wchar.from_string('abc')) } will be 12 on unix, but
|
||||||
|
// 6 on windows.
|
||||||
|
[unsafe]
|
||||||
|
pub fn length_in_bytes(p voidptr) int {
|
||||||
|
return unsafe { length_in_characters(p) } * int(sizeof(Character))
|
||||||
|
}
|
||||||
|
|
||||||
|
// to_string creates a V string, encoded in UTF-8, given a wchar_t*
|
||||||
|
// wide C style L"" string. It relies that the string has a 0 terminator at its end,
|
||||||
|
// to determine the string's length.
|
||||||
|
// Note, that the size of wchar_t is platform-dependent, and is *2 bytes* on windows,
|
||||||
|
// while it is *4 bytes* on most everything else.
|
||||||
|
// Unless you are interfacing with a C library, that does specifically use `wchar_t`,
|
||||||
|
// consider using `string_from_wide` instead, which will always assume that the input
|
||||||
|
// data is in an UTF-16 encoding, no matter what the platform is.
|
||||||
|
[unsafe]
|
||||||
|
pub fn to_string(p voidptr) string {
|
||||||
|
unsafe {
|
||||||
|
len := length_in_characters(p)
|
||||||
|
return to_string2(p, len)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// to_string2 creates a V string, encoded in UTF-8, given a `C.wchar_t*`
|
||||||
|
// wide C style L"" string. Note, that the size of `C.wchar_t` is platform-dependent,
|
||||||
|
// and is *2 bytes* on windows, while *4* on most everything else.
|
||||||
|
// Unless you are interfacing with a C library, that does specifically use wchar_t,
|
||||||
|
// consider using string_from_wide2 instead, which will always assume that the input
|
||||||
|
// data is in an UTF-16 encoding, no matter what the platform is.
|
||||||
|
[manualfree; unsafe]
|
||||||
|
pub fn to_string2(p voidptr, len int) string {
|
||||||
|
pc := &Character(p)
|
||||||
|
mut sb := strings.new_builder(len)
|
||||||
|
defer {
|
||||||
|
unsafe { sb.free() }
|
||||||
|
}
|
||||||
|
for i := 0; i < len; i++ {
|
||||||
|
u := unsafe { rune(pc[i]) }
|
||||||
|
sb.write_rune(u)
|
||||||
|
}
|
||||||
|
res := sb.str()
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
// from_string converts the V string (in UTF-8 encoding), into a newly allocated
|
||||||
|
// platform specific buffer of C.wchar_t .
|
||||||
|
// The conversion is done by processing each rune of the input string 1 by 1.
|
||||||
|
[manualfree]
|
||||||
|
pub fn from_string(s string) &Character {
|
||||||
|
srunes := s.runes()
|
||||||
|
unsafe {
|
||||||
|
mut result := &Character(vcalloc_noscan((srunes.len + 1) * int(sizeof(Character))))
|
||||||
|
for i, r in srunes {
|
||||||
|
result[i] = from_rune(r)
|
||||||
|
}
|
||||||
|
result[srunes.len] = wchar.zero
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
}
|
36
vlib/builtin/wchar/wchar_test.v
Normal file
36
vlib/builtin/wchar/wchar_test.v
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
import builtin.wchar
|
||||||
|
|
||||||
|
const wide_serial_number_unix = [u16(67), 0, 76, 0, 52, 0, 54, 0, 73, 0, 49, 0, 65, 0, 48, 0, 48,
|
||||||
|
0, 54, 0, 52, 0, 57, 0, 0, 0, 0]
|
||||||
|
|
||||||
|
const wide_serial_number_windows = wide_serial_number_unix.map(u8(it))
|
||||||
|
|
||||||
|
const swide_serial_number = 'CL46I1A00649'
|
||||||
|
|
||||||
|
fn test_from_to_rune() {
|
||||||
|
for r in swide_serial_number.runes() {
|
||||||
|
c := wchar.from_rune(r)
|
||||||
|
assert c.to_rune() == r
|
||||||
|
}
|
||||||
|
assert wchar.from_rune(0).to_rune() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_to_string() {
|
||||||
|
mut p := voidptr(wide_serial_number_unix.data)
|
||||||
|
$if windows {
|
||||||
|
p = wide_serial_number_windows.data
|
||||||
|
}
|
||||||
|
assert unsafe { wchar.length_in_characters(p) } == swide_serial_number.len
|
||||||
|
s := unsafe { wchar.to_string(p) }
|
||||||
|
dump(s)
|
||||||
|
assert s == swide_serial_number
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_from_string() {
|
||||||
|
x := wchar.from_string(swide_serial_number)
|
||||||
|
assert unsafe { x[0] } == wchar.from_rune(`C`)
|
||||||
|
assert unsafe { x[1] } == wchar.from_rune(`L`)
|
||||||
|
assert unsafe { x[2] } == wchar.from_rune(`4`)
|
||||||
|
assert unsafe { x[11] } == wchar.from_rune(`9`)
|
||||||
|
assert unsafe { x[12] } == wchar.zero
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user