|
|
|
@@ -1,14 +1,11 @@
|
|
|
|
|
// urllib parses URLs and implements query escaping.
|
|
|
|
|
|
|
|
|
|
// See RFC 3986. This module generally follows RFC 3986, except where
|
|
|
|
|
// it deviates for compatibility reasons.
|
|
|
|
|
|
|
|
|
|
// Based off: https://github.com/golang/go/blob/master/src/net/url/url.go
|
|
|
|
|
// Last commit: https://github.com/golang/go/commit/fe2ed5054176935d4adcf13e891715ccf2ee3cce
|
|
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
module urllib
|
|
|
|
|
|
|
|
|
|
import strings
|
|
|
|
@@ -25,12 +22,14 @@ enum EncodingMode {
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
err_msg_escape = 'unescape: invalid URL escape'
|
|
|
|
|
err_msg_parse = 'parse: failed parsing url'
|
|
|
|
|
err_msg_parse = 'parse: failed parsing url'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
fn error_msg(message, val string) string {
|
|
|
|
|
mut msg := 'net.urllib.$message'
|
|
|
|
|
if val != '' { msg = '$msg ($val)' }
|
|
|
|
|
if val != '' {
|
|
|
|
|
msg = '$msg ($val)'
|
|
|
|
|
}
|
|
|
|
|
return msg
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -44,10 +43,9 @@ fn should_escape(c byte, mode EncodingMode) bool {
|
|
|
|
|
if (`a` <= c && c <= `z`) || (`A` <= c && c <= `Z`) || (`0` <= c && c <= `9`) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if mode == .encode_host || mode == .encode_zone {
|
|
|
|
|
// §3.2.2 host allows
|
|
|
|
|
// sub-delims = `!` / `$` / `&` / ``` / `(` / `)` / `*` / `+` / `,` / `;` / `=`
|
|
|
|
|
// sub-delims = `!` / `$` / `&` / ``` / `(` / `)` / `*` / `+` / `,` / `;` / `=`
|
|
|
|
|
// as part of reg-name.
|
|
|
|
|
// We add : because we include :port as part of host.
|
|
|
|
|
// We add [ ] because we include [ipv6]:port as part of host.
|
|
|
|
@@ -55,59 +53,58 @@ fn should_escape(c byte, mode EncodingMode) bool {
|
|
|
|
|
// we could possibly allow, and parse will reject them if we
|
|
|
|
|
// escape them (because hosts can`t use %-encoding for
|
|
|
|
|
// ASCII bytes).
|
|
|
|
|
if c in [`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`,
|
|
|
|
|
`:`, `[`, `]`, `<`, `>`, `"`]
|
|
|
|
|
{
|
|
|
|
|
if c in [`!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `:`, `[`, `]`, `<`, `>`, `"`] {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
match c {
|
|
|
|
|
`-`, `_`, `.`, `~` { // §2.3 Unreserved characters (mark)
|
|
|
|
|
`-`, `_`, `.`, `~` {
|
|
|
|
|
// §2.3 Unreserved characters (mark)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
`$`, `&`, `+`, `,`, `/`, `:`, `;`, `=`, `?`, `@` { // §2.2 Reserved characters (reserved)
|
|
|
|
|
// Different sections of the URL allow a few of
|
|
|
|
|
// the reserved characters to appear unescaped.
|
|
|
|
|
match mode {
|
|
|
|
|
.encode_path { // §3.3
|
|
|
|
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
|
|
|
// meaning to individual path segments. This package
|
|
|
|
|
// only manipulates the path as a whole, so we allow those
|
|
|
|
|
// last three as well. That leaves only ? to escape.
|
|
|
|
|
return c == `?`
|
|
|
|
|
`$`, `&`, `+`, `,`, `/`, `:`, `;`, `=`, `?`, `@` {
|
|
|
|
|
// §2.2 Reserved characters (reserved)
|
|
|
|
|
// Different sections of the URL allow a few of
|
|
|
|
|
// the reserved characters to appear unescaped.
|
|
|
|
|
match mode {
|
|
|
|
|
.encode_path {
|
|
|
|
|
// §3.3
|
|
|
|
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
|
|
|
// meaning to individual path segments. This package
|
|
|
|
|
// only manipulates the path as a whole, so we allow those
|
|
|
|
|
// last three as well. That leaves only ? to escape.
|
|
|
|
|
return c == `?`
|
|
|
|
|
}
|
|
|
|
|
.encode_path_segment {
|
|
|
|
|
// §3.3
|
|
|
|
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
|
|
|
// meaning to individual path segments.
|
|
|
|
|
return c == `/` || c == `;` || c == `,` || c == `?`
|
|
|
|
|
}
|
|
|
|
|
.encode_user_password {
|
|
|
|
|
// §3.2.1
|
|
|
|
|
// The RFC allows `;`, `:`, `&`, `=`, `+`, `$`, and `,` in
|
|
|
|
|
// userinfo, so we must escape only `@`, `/`, and `?`.
|
|
|
|
|
// The parsing of userinfo treats `:` as special so we must escape
|
|
|
|
|
// that too.
|
|
|
|
|
return c == `@` || c == `/` || c == `?` || c == `:`
|
|
|
|
|
}
|
|
|
|
|
.encode_query_component {
|
|
|
|
|
// §3.4
|
|
|
|
|
// The RFC reserves (so we must escape) everything.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
.encode_fragment {
|
|
|
|
|
// §4.1
|
|
|
|
|
// The RFC text is silent but the grammar allows
|
|
|
|
|
// everything, so escape nothing.
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.encode_path_segment { // §3.3
|
|
|
|
|
// The RFC allows : @ & = + $ but saves / ; , for assigning
|
|
|
|
|
// meaning to individual path segments.
|
|
|
|
|
return c == `/` || c == `;` || c == `,` || c == `?`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.encode_user_password { // §3.2.1
|
|
|
|
|
// The RFC allows `;`, `:`, `&`, `=`, `+`, `$`, and `,` in
|
|
|
|
|
// userinfo, so we must escape only `@`, `/`, and `?`.
|
|
|
|
|
// The parsing of userinfo treats `:` as special so we must escape
|
|
|
|
|
// that too.
|
|
|
|
|
return c == `@` || c == `/` || c == `?` || c == `:`
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.encode_query_component { // §3.4
|
|
|
|
|
// The RFC reserves (so we must escape) everything.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
.encode_fragment { // §4.1
|
|
|
|
|
// The RFC text is silent but the grammar allows
|
|
|
|
|
// everything, so escape nothing.
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
else {}
|
|
|
|
|
}
|
|
|
|
|
} else {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else {
|
|
|
|
|
}}
|
|
|
|
|
if mode == .encode_fragment {
|
|
|
|
|
// RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are
|
|
|
|
|
// included in reserved from RFC 2396 §2.2. The remaining sub-delims do not
|
|
|
|
@@ -116,13 +113,12 @@ fn should_escape(c byte, mode EncodingMode) bool {
|
|
|
|
|
// escape single quote to avoid breaking callers that had previously assumed that
|
|
|
|
|
// single quotes would be escaped. See issue #19917.
|
|
|
|
|
match c {
|
|
|
|
|
`!`, `(`, `)`, `*`{
|
|
|
|
|
`!`, `(`, `)`, `*` {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
else {}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Everything else must be escaped.
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
@@ -154,7 +150,7 @@ fn unescape(s_ string, mode EncodingMode) ?string {
|
|
|
|
|
// Count %, check that they're well-formed.
|
|
|
|
|
mut n := 0
|
|
|
|
|
mut has_plus := false
|
|
|
|
|
for i := 0; i < s.len; {
|
|
|
|
|
for i := 0; i < s.len; {
|
|
|
|
|
x := s[i]
|
|
|
|
|
match x {
|
|
|
|
|
`%` {
|
|
|
|
@@ -162,7 +158,7 @@ fn unescape(s_ string, mode EncodingMode) ?string {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
n++
|
|
|
|
|
if i+2 >= s.len || !ishex(s[i+1]) || !ishex(s[i+2]) {
|
|
|
|
|
if i + 2 >= s.len || !ishex(s[i + 1]) || !ishex(s[i + 2]) {
|
|
|
|
|
s = s[i..]
|
|
|
|
|
if s.len > 3 {
|
|
|
|
|
s = s[..3]
|
|
|
|
@@ -175,8 +171,8 @@ fn unescape(s_ string, mode EncodingMode) ?string {
|
|
|
|
|
// But https://tools.ietf.org/html/rfc6874#section-2
|
|
|
|
|
// introduces %25 being allowed to escape a percent sign
|
|
|
|
|
// in IPv6 scoped-address literals. Yay.
|
|
|
|
|
if mode == .encode_host && unhex(s[i+1]) < 8 && s[i..i+3] != '%25' {
|
|
|
|
|
return error(error_msg(err_msg_escape, s[i..i+3]))
|
|
|
|
|
if mode == .encode_host && unhex(s[i + 1]) < 8 && s[i..i + 3] != '%25' {
|
|
|
|
|
return error(error_msg(err_msg_escape, s[i..i + 3]))
|
|
|
|
|
}
|
|
|
|
|
if mode == .encode_zone {
|
|
|
|
|
// RFC 6874 says basically 'anything goes' for zone identifiers
|
|
|
|
@@ -186,47 +182,46 @@ fn unescape(s_ string, mode EncodingMode) ?string {
|
|
|
|
|
// That is, you can use escaping in the zone identifier but not
|
|
|
|
|
// to introduce bytes you couldn't just write directly.
|
|
|
|
|
// But Windows puts spaces here! Yay.
|
|
|
|
|
v := (unhex(s[i+1])<<byte(4) | unhex(s[i+2]))
|
|
|
|
|
if s[i..i+3] != '%25' && v != ` ` && should_escape(v, .encode_host) {
|
|
|
|
|
error(error_msg(err_msg_escape, s[i..i+3]))
|
|
|
|
|
v := (unhex(s[i + 1])<<byte(4) | unhex(s[i + 2]))
|
|
|
|
|
if s[i..i + 3] != '%25' && v != ` ` && should_escape(v, .encode_host) {
|
|
|
|
|
error(error_msg(err_msg_escape, s[i..i + 3]))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
i += 3
|
|
|
|
|
}
|
|
|
|
|
`+`{
|
|
|
|
|
`+` {
|
|
|
|
|
has_plus = mode == .encode_query_component
|
|
|
|
|
i++
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (mode == .encode_host || mode == .encode_zone) && s[i] < 0x80 && should_escape(s[i], mode) {
|
|
|
|
|
error(error_msg('unescape: invalid character in host name', s[i..i+1]))
|
|
|
|
|
error(error_msg('unescape: invalid character in host name', s[i..i + 1]))
|
|
|
|
|
}
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n == 0 && !has_plus {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mut t := strings.new_builder(s.len - 2*n)
|
|
|
|
|
mut t := strings.new_builder(s.len - 2 * n)
|
|
|
|
|
for i := 0; i < s.len; i++ {
|
|
|
|
|
x := s[i]
|
|
|
|
|
match x {
|
|
|
|
|
`%` {
|
|
|
|
|
t.write((unhex(s[i+1])<<byte(4) | unhex(s[i+2])).str() )
|
|
|
|
|
t.write((unhex(s[i + 1])<<byte(4) | unhex(s[i + 2])).str())
|
|
|
|
|
i += 2
|
|
|
|
|
}
|
|
|
|
|
`+` {
|
|
|
|
|
if mode == .encode_query_component {
|
|
|
|
|
t.write(' ')
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
t.write('+')
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
t.write(s[i].str())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
t.write(s[i].str())
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
return t.str()
|
|
|
|
|
}
|
|
|
|
@@ -252,26 +247,24 @@ fn escape(s string, mode EncodingMode) string {
|
|
|
|
|
if should_escape(c, mode) {
|
|
|
|
|
if c == ` ` && mode == .encode_query_component {
|
|
|
|
|
space_count++
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
hex_count++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if space_count == 0 && hex_count == 0 {
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
buf := [byte(0)].repeat(64)
|
|
|
|
|
mut t := []byte
|
|
|
|
|
|
|
|
|
|
required := s.len + 2*hex_count
|
|
|
|
|
required := s.len + 2 * hex_count
|
|
|
|
|
if required <= buf.len {
|
|
|
|
|
t = buf[..required]
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
t = [byte(0)].repeat(required)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if hex_count == 0 {
|
|
|
|
|
copy(t, s.bytes())
|
|
|
|
|
for i := 0; i < s.len; i++ {
|
|
|
|
@@ -279,9 +272,8 @@ fn escape(s string, mode EncodingMode) string {
|
|
|
|
|
t[i] = `+`
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return string(t, t.len)
|
|
|
|
|
return string(t,t.len)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
upperhex := '0123456789ABCDEF'
|
|
|
|
|
mut j := 0
|
|
|
|
|
for i := 0; i < s.len; i++ {
|
|
|
|
@@ -289,28 +281,30 @@ fn escape(s string, mode EncodingMode) string {
|
|
|
|
|
if c1 == ` ` && mode == .encode_query_component {
|
|
|
|
|
t[j] = `+`
|
|
|
|
|
j++
|
|
|
|
|
} else if should_escape(c1, mode) {
|
|
|
|
|
}
|
|
|
|
|
else if should_escape(c1, mode) {
|
|
|
|
|
t[j] = `%`
|
|
|
|
|
t[j+1] = upperhex[c1>>4]
|
|
|
|
|
t[j+2] = upperhex[c1&15]
|
|
|
|
|
t[j + 1] = upperhex[c1>>4]
|
|
|
|
|
t[j + 2] = upperhex[c1 & 15]
|
|
|
|
|
j += 3
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
t[j] = s[i]
|
|
|
|
|
j++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return string(t, t.len)
|
|
|
|
|
return string(t,t.len)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// A URL represents a parsed URL (technically, a URI reference).
|
|
|
|
|
//
|
|
|
|
|
// The general form represented is:
|
|
|
|
|
//
|
|
|
|
|
// [scheme:][//[userinfo@]host][/]path[?query][#fragment]
|
|
|
|
|
// [scheme:][//[userinfo@]host][/]path[?query][#fragment]
|
|
|
|
|
//
|
|
|
|
|
// URLs that do not start with a slash after the scheme are interpreted as:
|
|
|
|
|
//
|
|
|
|
|
// scheme:opaque[?query][#fragment]
|
|
|
|
|
// scheme:opaque[?query][#fragment]
|
|
|
|
|
//
|
|
|
|
|
// Note that the path field is stored in decoded form: /%47%6f%2f becomes /Go/.
|
|
|
|
|
// A consequence is that it is impossible to tell which slashes in the path were
|
|
|
|
@@ -323,22 +317,21 @@ fn escape(s string, mode EncodingMode) string {
|
|
|
|
|
pub struct URL {
|
|
|
|
|
pub mut:
|
|
|
|
|
scheme string
|
|
|
|
|
opaque string // encoded opaque data
|
|
|
|
|
opaque string // encoded opaque data
|
|
|
|
|
user &Userinfo // username and password information
|
|
|
|
|
host string // host or host:port
|
|
|
|
|
path string // path (relative paths may omit leading slash)
|
|
|
|
|
raw_path string // encoded path hint (see escaped_path method)
|
|
|
|
|
force_query bool // append a query ('?') even if raw_query is empty
|
|
|
|
|
raw_query string // encoded query values, without '?'
|
|
|
|
|
fragment string // fragment for references, without '#'
|
|
|
|
|
host string // host or host:port
|
|
|
|
|
path string // path (relative paths may omit leading slash)
|
|
|
|
|
raw_path string // encoded path hint (see escaped_path method)
|
|
|
|
|
force_query bool // append a query ('?') even if raw_query is empty
|
|
|
|
|
raw_query string // encoded query values, without '?'
|
|
|
|
|
fragment string // fragment for references, without '#'
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// user returns a Userinfo containing the provided username
|
|
|
|
|
// and no password set.
|
|
|
|
|
pub fn user(username string) &Userinfo {
|
|
|
|
|
return &Userinfo{
|
|
|
|
|
username: username,
|
|
|
|
|
password: '',
|
|
|
|
|
username: username
|
|
|
|
|
password: ''
|
|
|
|
|
password_set: false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@@ -352,7 +345,8 @@ pub fn user(username string) &Userinfo {
|
|
|
|
|
// information in clear text (such as URI) has proven to be a
|
|
|
|
|
// security risk in almost every case where it has been used.''
|
|
|
|
|
fn user_password(username, password string) &Userinfo {
|
|
|
|
|
return &Userinfo{username, password, true}
|
|
|
|
|
return &Userinfo{
|
|
|
|
|
username,password,true}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The Userinfo type is an immutable encapsulation of username and
|
|
|
|
@@ -401,7 +395,7 @@ fn split_by_scheme(rawurl string) ?[]string {
|
|
|
|
|
if i == 0 {
|
|
|
|
|
return error(error_msg('split_by_scheme: missing protocol scheme', ''))
|
|
|
|
|
}
|
|
|
|
|
return [rawurl[..i], rawurl[i+1..]]
|
|
|
|
|
return [rawurl[..i], rawurl[i + 1..]]
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
// we have encountered an invalid character,
|
|
|
|
@@ -422,15 +416,15 @@ fn get_scheme(rawurl string) ?string {
|
|
|
|
|
// split slices s into two substrings separated by the first occurence of
|
|
|
|
|
// sep. If cutc is true then sep is included with the second substring.
|
|
|
|
|
// If sep does not occur in s then s and the empty string is returned.
|
|
|
|
|
fn split(s string, sep byte, cutc bool) (string, string) {
|
|
|
|
|
fn split(s string, sep byte, cutc bool) (string,string) {
|
|
|
|
|
i := s.index_byte(sep)
|
|
|
|
|
if i < 0 {
|
|
|
|
|
return s, ''
|
|
|
|
|
return s,''
|
|
|
|
|
}
|
|
|
|
|
if cutc {
|
|
|
|
|
return s[..i], s[i+1..]
|
|
|
|
|
return s[..i],s[i + 1..]
|
|
|
|
|
}
|
|
|
|
|
return s[..i], s[i..]
|
|
|
|
|
return s[..i],s[i..]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// parse parses rawurl into a URL structure.
|
|
|
|
@@ -441,7 +435,7 @@ fn split(s string, sep byte, cutc bool) (string, string) {
|
|
|
|
|
// error, due to parsing ambiguities.
|
|
|
|
|
pub fn parse(rawurl string) ?URL {
|
|
|
|
|
// Cut off #frag
|
|
|
|
|
u, frag := split(rawurl, `#`, true)
|
|
|
|
|
u,frag := split(rawurl, `#`, true)
|
|
|
|
|
mut url := parse_url(u, false) or {
|
|
|
|
|
return error(error_msg(err_msg_parse, u))
|
|
|
|
|
}
|
|
|
|
@@ -472,17 +466,16 @@ fn parse_url(rawurl string, via_request bool) ?URL {
|
|
|
|
|
if string_contains_ctl_byte(rawurl) {
|
|
|
|
|
return error(error_msg('parse_url: invalid control character in URL', rawurl))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if rawurl == '' && via_request {
|
|
|
|
|
return error(error_msg('parse_url: empty URL', rawurl))
|
|
|
|
|
}
|
|
|
|
|
mut url := URL{user:0}
|
|
|
|
|
|
|
|
|
|
mut url := URL{
|
|
|
|
|
user: 0
|
|
|
|
|
}
|
|
|
|
|
if rawurl == '*' {
|
|
|
|
|
url.path = '*'
|
|
|
|
|
return url
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Split off possible leading 'http:', 'mailto:', etc.
|
|
|
|
|
// Cannot contain escaped characters.
|
|
|
|
|
p := split_by_scheme(rawurl) or {
|
|
|
|
@@ -491,17 +484,16 @@ fn parse_url(rawurl string, via_request bool) ?URL {
|
|
|
|
|
url.scheme = p[0]
|
|
|
|
|
mut rest := p[1]
|
|
|
|
|
url.scheme = url.scheme.to_lower()
|
|
|
|
|
|
|
|
|
|
// if rest.ends_with('?') && strings.count(rest, '?') == 1 {
|
|
|
|
|
if rest.ends_with('?') && !rest[..1].contains('?') {
|
|
|
|
|
url.force_query = true
|
|
|
|
|
rest = rest[..rest.len-1]
|
|
|
|
|
} else {
|
|
|
|
|
r, raw_query := split(rest, `?`, true)
|
|
|
|
|
rest = rest[..rest.len - 1]
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
r,raw_query := split(rest, `?`, true)
|
|
|
|
|
rest = r
|
|
|
|
|
url.raw_query = raw_query
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !rest.starts_with('/') {
|
|
|
|
|
if url.scheme != '' {
|
|
|
|
|
// We consider rootless paths per RFC 3986 as opaque.
|
|
|
|
@@ -511,23 +503,25 @@ fn parse_url(rawurl string, via_request bool) ?URL {
|
|
|
|
|
if via_request {
|
|
|
|
|
return error(error_msg('parse_url: invalid URI for request', ''))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Avoid confusion with malformed schemes, like cache_object:foo/bar.
|
|
|
|
|
// See golang.org/issue/16822.
|
|
|
|
|
//
|
|
|
|
|
// RFC 3986, §3.3:
|
|
|
|
|
// In addition, a URI reference (Section 4.1) may be a relative-path reference,
|
|
|
|
|
// in which case the first path segment cannot contain a colon (':') character.
|
|
|
|
|
colon := rest.index(':') or { return error('there should be a : in the URL') }
|
|
|
|
|
slash := rest.index('/') or { return error('there should be a / in the URL') }
|
|
|
|
|
colon := rest.index(':') or {
|
|
|
|
|
return error('there should be a : in the URL')
|
|
|
|
|
}
|
|
|
|
|
slash := rest.index('/') or {
|
|
|
|
|
return error('there should be a / in the URL')
|
|
|
|
|
}
|
|
|
|
|
if colon >= 0 && (slash < 0 || colon < slash) {
|
|
|
|
|
// First path segment has colon. Not allowed in relative URL.
|
|
|
|
|
return error(error_msg('parse_url: first path segment in URL cannot contain colon', ''))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((url.scheme != '' || !via_request) && !rest.starts_with('///')) && rest.starts_with('//') {
|
|
|
|
|
authority, r := split(rest[2..], `/`, false)
|
|
|
|
|
authority,r := split(rest[2..], `/`, false)
|
|
|
|
|
rest = r
|
|
|
|
|
a := parse_authority(authority) or {
|
|
|
|
|
return error(err)
|
|
|
|
@@ -551,7 +545,9 @@ struct ParseAuthorityRes {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_authority(authority string) ?ParseAuthorityRes {
|
|
|
|
|
i := authority.last_index('@') or { -1 }
|
|
|
|
|
i := authority.last_index('@') or {
|
|
|
|
|
-1
|
|
|
|
|
}
|
|
|
|
|
mut host := ''
|
|
|
|
|
mut zuser := user('')
|
|
|
|
|
if i < 0 {
|
|
|
|
@@ -559,14 +555,18 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
|
|
|
|
|
return error(err)
|
|
|
|
|
}
|
|
|
|
|
host = h
|
|
|
|
|
} else {
|
|
|
|
|
h := parse_host(authority[i+1..]) or {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
h := parse_host(authority[i + 1..]) or {
|
|
|
|
|
return error(err)
|
|
|
|
|
}
|
|
|
|
|
host = h
|
|
|
|
|
}
|
|
|
|
|
if i < 0 {
|
|
|
|
|
return ParseAuthorityRes{host: host, user: zuser}
|
|
|
|
|
return ParseAuthorityRes{
|
|
|
|
|
host: host
|
|
|
|
|
user: zuser
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
mut userinfo := authority[..i]
|
|
|
|
|
if !valid_userinfo(userinfo) {
|
|
|
|
@@ -578,8 +578,9 @@ fn parse_authority(authority string) ?ParseAuthorityRes {
|
|
|
|
|
}
|
|
|
|
|
userinfo = u
|
|
|
|
|
zuser = user(userinfo)
|
|
|
|
|
} else {
|
|
|
|
|
mut username, mut password := split(userinfo, `:`, true)
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
mut username,mut password := split(userinfo, `:`, true)
|
|
|
|
|
u := unescape(username, .encode_user_password) or {
|
|
|
|
|
return error(err)
|
|
|
|
|
}
|
|
|
|
@@ -603,20 +604,19 @@ fn parse_host(host string) ?string {
|
|
|
|
|
// parse an IP-Literal in RFC 3986 and RFC 6874.
|
|
|
|
|
// E.g., '[fe80::1]', '[fe80::1%25en0]', '[fe80::1]:80'.
|
|
|
|
|
mut i := host.last_index(']') or {
|
|
|
|
|
return error(error_msg('parse_host: missing \']\' in host', ''))
|
|
|
|
|
return error(error_msg("parse_host: missing \']\' in host", ''))
|
|
|
|
|
}
|
|
|
|
|
mut colon_port := host[i+1..]
|
|
|
|
|
mut colon_port := host[i + 1..]
|
|
|
|
|
if !valid_optional_port(colon_port) {
|
|
|
|
|
return error(error_msg('parse_host: invalid port $colon_port after host ', ''))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// RFC 6874 defines that %25 (%-encoded percent) introduces
|
|
|
|
|
// the zone identifier, and the zone identifier can use basically
|
|
|
|
|
// any %-encoding it likes. That's different from the host, which
|
|
|
|
|
// can only %-encode non-ASCII bytes.
|
|
|
|
|
// We do impose some restrictions on the zone, to avoid stupidity
|
|
|
|
|
// like newlines.
|
|
|
|
|
if zone := host[..i].index('%25') {
|
|
|
|
|
if zone:=host[..i].index('%25'){
|
|
|
|
|
host1 := unescape(host[..zone], .encode_host) or {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
@@ -628,7 +628,7 @@ fn parse_host(host string) ?string {
|
|
|
|
|
}
|
|
|
|
|
return host1 + host2 + host3
|
|
|
|
|
}
|
|
|
|
|
if idx := host.last_index(':') {
|
|
|
|
|
if idx:=host.last_index(':'){
|
|
|
|
|
colon_port = host[idx..]
|
|
|
|
|
if !valid_optional_port(colon_port) {
|
|
|
|
|
return error(error_msg('parse_host: invalid port $colon_port after host ', ''))
|
|
|
|
@@ -639,10 +639,9 @@ fn parse_host(host string) ?string {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return h
|
|
|
|
|
//host = h
|
|
|
|
|
//return host
|
|
|
|
|
// host = h
|
|
|
|
|
// return host
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// set_path sets the path and raw_path fields of the URL based on the provided
|
|
|
|
|
// escaped path p. It maintains the invariant that raw_path is only specified
|
|
|
|
|
// when it differs from the default encoding of the path.
|
|
|
|
@@ -660,7 +659,8 @@ fn (u mut URL) set_path(p string) ?bool {
|
|
|
|
|
if p == escp {
|
|
|
|
|
// Default encoding is fine.
|
|
|
|
|
u.raw_path = ''
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
u.raw_path = p
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
@@ -677,7 +677,9 @@ fn (u mut URL) set_path(p string) ?bool {
|
|
|
|
|
// reading u.raw_path directly.
|
|
|
|
|
fn (u &URL) escaped_path() string {
|
|
|
|
|
if u.raw_path != '' && valid_encoded_path(u.raw_path) {
|
|
|
|
|
unescape(u.raw_path, .encode_path) or { return '' }
|
|
|
|
|
unescape(u.raw_path, .encode_path) or {
|
|
|
|
|
return ''
|
|
|
|
|
}
|
|
|
|
|
return u.raw_path
|
|
|
|
|
}
|
|
|
|
|
if u.path == '*' {
|
|
|
|
@@ -705,12 +707,12 @@ fn valid_encoded_path(s string) bool {
|
|
|
|
|
}
|
|
|
|
|
`%` {
|
|
|
|
|
// ok - percent encoded, will decode
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if should_escape(s[i], .encode_path) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
@@ -735,8 +737,8 @@ fn valid_optional_port(port string) bool {
|
|
|
|
|
// str reassembles the URL into a valid URL string.
|
|
|
|
|
// The general form of the result is one of:
|
|
|
|
|
//
|
|
|
|
|
// scheme:opaque?query#fragment
|
|
|
|
|
// scheme://userinfo@host/path?query#fragment
|
|
|
|
|
// scheme:opaque?query#fragment
|
|
|
|
|
// scheme://userinfo@host/path?query#fragment
|
|
|
|
|
//
|
|
|
|
|
// If u.opaque is non-empty, String uses the first form;
|
|
|
|
|
// otherwise it uses the second form.
|
|
|
|
@@ -744,15 +746,15 @@ fn valid_optional_port(port string) bool {
|
|
|
|
|
// To obtain the path, String uses u.escaped_path().
|
|
|
|
|
//
|
|
|
|
|
// In the second form, the following rules apply:
|
|
|
|
|
// - if u.scheme is empty, scheme: is omitted.
|
|
|
|
|
// - if u.user is nil, userinfo@ is omitted.
|
|
|
|
|
// - if u.host is empty, host/ is omitted.
|
|
|
|
|
// - if u.scheme and u.host are empty and u.user is nil,
|
|
|
|
|
// the entire scheme://userinfo@host/ is omitted.
|
|
|
|
|
// - if u.host is non-empty and u.path begins with a /,
|
|
|
|
|
// the form host/path does not add its own /.
|
|
|
|
|
// - if u.raw_query is empty, ?query is omitted.
|
|
|
|
|
// - if u.fragment is empty, #fragment is omitted.
|
|
|
|
|
// - if u.scheme is empty, scheme: is omitted.
|
|
|
|
|
// - if u.user is nil, userinfo@ is omitted.
|
|
|
|
|
// - if u.host is empty, host/ is omitted.
|
|
|
|
|
// - if u.scheme and u.host are empty and u.user is nil,
|
|
|
|
|
// the entire scheme://userinfo@host/ is omitted.
|
|
|
|
|
// - if u.host is non-empty and u.path begins with a /,
|
|
|
|
|
// the form host/path does not add its own /.
|
|
|
|
|
// - if u.raw_query is empty, ?query is omitted.
|
|
|
|
|
// - if u.fragment is empty, #fragment is omitted.
|
|
|
|
|
pub fn (u &URL) str() string {
|
|
|
|
|
mut buf := strings.new_builder(200)
|
|
|
|
|
if u.scheme != '' {
|
|
|
|
@@ -761,7 +763,8 @@ pub fn (u &URL) str() string {
|
|
|
|
|
}
|
|
|
|
|
if u.opaque != '' {
|
|
|
|
|
buf.write(u.opaque)
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if u.scheme != '' || u.host != '' || !u.user.empty() {
|
|
|
|
|
if u.host != '' || u.path != '' || !u.user.empty() {
|
|
|
|
|
buf.write('//')
|
|
|
|
@@ -807,8 +810,6 @@ pub fn (u &URL) str() string {
|
|
|
|
|
// It is typically used for query parameters and form values.
|
|
|
|
|
// Unlike in the http.Header map, the keys in a Values map
|
|
|
|
|
// are case-sensitive.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// parseQuery parses the URL-encoded query string and returns
|
|
|
|
|
// a map listing the values specified for each key.
|
|
|
|
|
// parseQuery always returns a non-nil map containing all the
|
|
|
|
@@ -841,18 +842,19 @@ fn parse_query_values(m mut Values, query string) ?bool {
|
|
|
|
|
mut key := q
|
|
|
|
|
mut i := key.index_any('&;')
|
|
|
|
|
if i >= 0 {
|
|
|
|
|
q = key[i+1..]
|
|
|
|
|
q = key[i + 1..]
|
|
|
|
|
key = key[..i]
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
q = ''
|
|
|
|
|
}
|
|
|
|
|
if key == '' {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
mut value := ''
|
|
|
|
|
if idx := key.index('=') {
|
|
|
|
|
if idx:=key.index('='){
|
|
|
|
|
i = idx
|
|
|
|
|
value = key[i+1..]
|
|
|
|
|
value = key[i + 1..]
|
|
|
|
|
key = key[..i]
|
|
|
|
|
}
|
|
|
|
|
k := query_unescape(key) or {
|
|
|
|
@@ -860,7 +862,6 @@ fn parse_query_values(m mut Values, query string) ?bool {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
key = k
|
|
|
|
|
|
|
|
|
|
v := query_unescape(value) or {
|
|
|
|
|
had_error = true
|
|
|
|
|
continue
|
|
|
|
@@ -907,10 +908,14 @@ fn resolve_path(base, ref string) string {
|
|
|
|
|
mut full := ''
|
|
|
|
|
if ref == '' {
|
|
|
|
|
full = base
|
|
|
|
|
} else if ref[0] != `/` {
|
|
|
|
|
i := base.last_index('/') or { -1 }
|
|
|
|
|
full = base[..i+1] + ref
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else if ref[0] != `/` {
|
|
|
|
|
i := base.last_index('/') or {
|
|
|
|
|
-1
|
|
|
|
|
}
|
|
|
|
|
full = base[..i + 1] + ref
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
full = ref
|
|
|
|
|
}
|
|
|
|
|
if full == '' {
|
|
|
|
@@ -925,17 +930,17 @@ fn resolve_path(base, ref string) string {
|
|
|
|
|
}
|
|
|
|
|
'..' {
|
|
|
|
|
if dst.len > 0 {
|
|
|
|
|
dst = dst[..dst.len-1]
|
|
|
|
|
dst = dst[..dst.len - 1]
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
dst << elem
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
dst << elem
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
last := src[src.len-1]
|
|
|
|
|
last := src[src.len - 1]
|
|
|
|
|
if last == '.' || last == '..' {
|
|
|
|
|
// Add final slash to the joined path.
|
|
|
|
|
dst << ''
|
|
|
|
|
dst << ''
|
|
|
|
|
}
|
|
|
|
|
return '/' + dst.join('/').trim_left('/')
|
|
|
|
|
}
|
|
|
|
@@ -971,7 +976,9 @@ pub fn (u &URL) resolve_reference(ref &URL) ?URL {
|
|
|
|
|
// The 'absoluteURI' or 'net_path' cases.
|
|
|
|
|
// We can ignore the error from set_path since we know we provided a
|
|
|
|
|
// validly-escaped path.
|
|
|
|
|
url.set_path(resolve_path(ref.escaped_path(), '')) or {return error(err)}
|
|
|
|
|
url.set_path(resolve_path(ref.escaped_path(), '')) or {
|
|
|
|
|
return error(err)
|
|
|
|
|
}
|
|
|
|
|
return url
|
|
|
|
|
}
|
|
|
|
|
if ref.opaque != '' {
|
|
|
|
@@ -989,7 +996,9 @@ pub fn (u &URL) resolve_reference(ref &URL) ?URL {
|
|
|
|
|
// The 'abs_path' or 'rel_path' cases.
|
|
|
|
|
url.host = u.host
|
|
|
|
|
url.user = u.user
|
|
|
|
|
url.set_path(resolve_path(u.escaped_path(), ref.escaped_path())) or { return error(err) }
|
|
|
|
|
url.set_path(resolve_path(u.escaped_path(), ref.escaped_path())) or {
|
|
|
|
|
return error(err)
|
|
|
|
|
}
|
|
|
|
|
return url
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@@ -1010,7 +1019,8 @@ pub fn (u &URL) request_uri() string {
|
|
|
|
|
if result == '' {
|
|
|
|
|
result = '/'
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if result.starts_with('//') {
|
|
|
|
|
result = u.scheme + ':' + result
|
|
|
|
|
}
|
|
|
|
@@ -1026,43 +1036,40 @@ pub fn (u &URL) request_uri() string {
|
|
|
|
|
// If the result is enclosed in square brackets, as literal IPv6 addresses are,
|
|
|
|
|
// the square brackets are removed from the result.
|
|
|
|
|
pub fn (u &URL) hostname() string {
|
|
|
|
|
host, _ := split_host_port(u.host)
|
|
|
|
|
host,_ := split_host_port(u.host)
|
|
|
|
|
return host
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// port returns the port part of u.host, without the leading colon.
|
|
|
|
|
// If u.host doesn't contain a port, port returns an empty string.
|
|
|
|
|
pub fn (u &URL) port() string {
|
|
|
|
|
_, port := split_host_port(u.host)
|
|
|
|
|
_,port := split_host_port(u.host)
|
|
|
|
|
return port
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// split_host_port separates host and port. If the port is not valid, it returns
|
|
|
|
|
// the entire input as host, and it doesn't check the validity of the host.
|
|
|
|
|
// Per RFC 3986, it requires ports to be numeric.
|
|
|
|
|
fn split_host_port(hostport string) (string, string) {
|
|
|
|
|
fn split_host_port(hostport string) (string,string) {
|
|
|
|
|
mut host := hostport
|
|
|
|
|
mut port := ''
|
|
|
|
|
|
|
|
|
|
colon := host.last_index_byte(`:`)
|
|
|
|
|
if colon != -1 && valid_optional_port(host[colon..]) {
|
|
|
|
|
port = host[colon+1..]
|
|
|
|
|
port = host[colon + 1..]
|
|
|
|
|
host = host[..colon]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if host.starts_with('[') && host.ends_with(']') {
|
|
|
|
|
host = host[1..host.len-1]
|
|
|
|
|
host = host[1..host.len - 1]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return host, port
|
|
|
|
|
return host,port
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// valid_userinfo reports whether s is a valid userinfo string per RFC 3986
|
|
|
|
|
// Section 3.2.1:
|
|
|
|
|
// userinfo = *( unreserved / pct-encoded / sub-delims / ':' )
|
|
|
|
|
// unreserved = ALPHA / DIGIT / '-' / '.' / '_' / '~'
|
|
|
|
|
// sub-delims = '!' / '$' / '&' / ''' / '(' / ')'
|
|
|
|
|
// / '*' / '+' / ',' / ';' / '='
|
|
|
|
|
// userinfo = *( unreserved / pct-encoded / sub-delims / ':' )
|
|
|
|
|
// unreserved = ALPHA / DIGIT / '-' / '.' / '_' / '~'
|
|
|
|
|
// sub-delims = '!' / '$' / '&' / ''' / '(' / ')'
|
|
|
|
|
// / '*' / '+' / ',' / ';' / '='
|
|
|
|
|
//
|
|
|
|
|
// It doesn't validate pct-encoded. The caller does that via fn unescape.
|
|
|
|
|
pub fn valid_userinfo(s string) bool {
|
|
|
|
@@ -1077,13 +1084,12 @@ pub fn valid_userinfo(s string) bool {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
match r {
|
|
|
|
|
`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`,
|
|
|
|
|
`(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` {
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
`-`, `.`, `_`, `:`, `~`, `!`, `$`, `&`, `\\`, `(`, `)`, `*`, `+`, `,`, `;`, `=`, `%`, `@` {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
return false
|
|
|
|
|
}}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
@@ -1102,9 +1108,11 @@ fn string_contains_ctl_byte(s string) bool {
|
|
|
|
|
pub fn ishex(c byte) bool {
|
|
|
|
|
if `0` <= c && c <= `9` {
|
|
|
|
|
return true
|
|
|
|
|
} else if `a` <= c && c <= `f` {
|
|
|
|
|
}
|
|
|
|
|
else if `a` <= c && c <= `f` {
|
|
|
|
|
return true
|
|
|
|
|
} else if `A` <= c && c <= `F` {
|
|
|
|
|
}
|
|
|
|
|
else if `A` <= c && c <= `F` {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
@@ -1113,10 +1121,13 @@ pub fn ishex(c byte) bool {
|
|
|
|
|
fn unhex(c byte) byte {
|
|
|
|
|
if `0` <= c && c <= `9` {
|
|
|
|
|
return c - `0`
|
|
|
|
|
} else if `a` <= c && c <= `f` {
|
|
|
|
|
}
|
|
|
|
|
else if `a` <= c && c <= `f` {
|
|
|
|
|
return c - `a` + 10
|
|
|
|
|
} else if `A` <= c && c <= `F` {
|
|
|
|
|
}
|
|
|
|
|
else if `A` <= c && c <= `F` {
|
|
|
|
|
return c - `A` + 10
|
|
|
|
|
}
|
|
|
|
|
return 0
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|