module wchar

import strings

#include <wchar.h>

[typedef]
struct C.wchar_t {}

// Character is a type, that eases working with the platform dependent C.wchar_t type.
// Note: the size of C.wchar_t varies between platforms, it is 2 bytes on windows,
// and usually 4 bytes elsewhere.
pub type Character = C.wchar_t

// zero is a Character, that in C L"" strings represents the string end character (terminator).
pub const zero = from_rune(0)

// return a string representation of the given Character
pub fn (a Character) str() string {
	return a.to_rune().str()
}

// == is an equality operator, to ease comparing Characters
// TODO: the default == operator, that V generates, does not work for C.wchar_t .
[inline]
pub fn (a Character) == (b Character) bool {
	return u64(a) == u64(b)
}

// to_rune creates a V rune, given a Character
[inline]
pub fn (c Character) to_rune() rune {
	return unsafe { *(&rune(&c)) }
}

// from_rune creates a Character, given a V rune
[inline]
pub fn from_rune(r rune) Character {
	return unsafe { *(&Character(&r)) }
}

// length_in_characters returns the length of the given wchar_t* wide C style L"" string.
// Example: assert unsafe { wchar.length_in_characters(wchar.from_string('abc')) } == 3
// See also `length_in_bytes` .
[unsafe]
pub fn length_in_characters(p voidptr) int {
	mut len := 0
	pc := &Character(p)
	for unsafe { pc[len] != wchar.zero } {
		len++
	}
	return len
}

// length_in_bytes returns the length of the given wchar_t* wide C style L"" string in bytes.
// Note that the size of wchar_t is different on the different platforms, thus the length in
// bytes for the same data converted from UTF-8 to a &Character buffer, will be different as well.
// i.e. unsafe { wchar.length_in_bytes(wchar.from_string('abc')) } will be 12 on unix, but
// 6 on windows.
[unsafe]
pub fn length_in_bytes(p voidptr) int {
	return unsafe { length_in_characters(p) } * int(sizeof(Character))
}

// to_string creates a V string, encoded in UTF-8, given a wchar_t*
// wide C style L"" string. It relies that the string has a 0 terminator at its end,
// to determine the string's length.
// Note, that the size of wchar_t is platform-dependent, and is *2 bytes* on windows,
// while it is *4 bytes* on most everything else.
// Unless you are interfacing with a C library, that does specifically use `wchar_t`,
// consider using `string_from_wide` instead, which will always assume that the input
// data is in an UTF-16 encoding, no matter what the platform is.
[unsafe]
pub fn to_string(p voidptr) string {
	unsafe {
		len := length_in_characters(p)
		return to_string2(p, len)
	}
}

// to_string2 creates a V string, encoded in UTF-8, given a `C.wchar_t*`
// wide C style L"" string. Note, that the size of `C.wchar_t` is platform-dependent,
// and is *2 bytes* on windows, while *4* on most everything else.
// Unless you are interfacing with a C library, that does specifically use wchar_t,
// consider using string_from_wide2 instead, which will always assume that the input
// data is in an UTF-16 encoding, no matter what the platform is.
[manualfree; unsafe]
pub fn to_string2(p voidptr, len int) string {
	pc := &Character(p)
	mut sb := strings.new_builder(len)
	defer {
		unsafe { sb.free() }
	}
	for i := 0; i < len; i++ {
		u := unsafe { rune(pc[i]) }
		sb.write_rune(u)
	}
	res := sb.str()
	return res
}

// from_string converts the V string (in UTF-8 encoding), into a newly allocated
// platform specific buffer of C.wchar_t .
// The conversion is done by processing each rune of the input string 1 by 1.
[manualfree]
pub fn from_string(s string) &Character {
	srunes := s.runes()
	unsafe {
		mut result := &Character(vcalloc_noscan((srunes.len + 1) * int(sizeof(Character))))
		for i, r in srunes {
			result[i] = from_rune(r)
		}
		result[srunes.len] = wchar.zero
		return result
	}
}