From 466ced2876ab1febbcc557d724069a4a9ed99337 Mon Sep 17 00:00:00 2001 From: Delyan Angelov Date: Wed, 10 Nov 2021 17:31:41 +0200 Subject: [PATCH] net.http: fix multipart/form-data file upload handling --- examples/vweb/file_upload/index.html | 13 +++- examples/vweb/file_upload/upload.html | 1 + vlib/net/http/request.v | 85 ++++++++++++++++++--------- 3 files changed, 69 insertions(+), 30 deletions(-) diff --git a/examples/vweb/file_upload/index.html b/examples/vweb/file_upload/index.html index a2bffad0c4..1e79400594 100644 --- a/examples/vweb/file_upload/index.html +++ b/examples/vweb/file_upload/index.html @@ -1,6 +1,15 @@

File Upload

+File form:
- - + + + +
+ +Text form: +
+ + +
diff --git a/examples/vweb/file_upload/upload.html b/examples/vweb/file_upload/upload.html index e80359de7f..5bf2a6dbdc 100644 --- a/examples/vweb/file_upload/upload.html +++ b/examples/vweb/file_upload/upload.html @@ -5,6 +5,7 @@ File amount: @fdata.len @for i, data in fdata

Filename: @data.filename

+

Length: @data.data.len

Type: @data.content_type

@{files[i]}

diff --git a/vlib/net/http/request.v b/vlib/net/http/request.v index 75690b4d76..cfa8739cf2 100644 --- a/vlib/net/http/request.v +++ b/vlib/net/http/request.v @@ -300,44 +300,85 @@ fn multipart_form_body(form map[string]string, files map[string][]FileData) (str return sb.str(), boundary } +struct LineSegmentIndexes { +mut: + start int + end int +} + +// parse_multipart_form parses an http request body, given a boundary string +// For more details about multipart forms, see: +// https://datatracker.ietf.org/doc/html/rfc2183 +// https://datatracker.ietf.org/doc/html/rfc2388 +// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition pub fn parse_multipart_form(body string, boundary string) (map[string]string, map[string][]FileData) { - sections := body.split(boundary) - fields := sections[1..sections.len - 1] + // dump(body) + // dump(boundary) mut form := map[string]string{} mut files := map[string][]FileData{} - + // TODO: do not use split, but only indexes, to reduce copying of potentially large data + sections := body.split(boundary) + fields := sections[1..sections.len - 1] for field in fields { - // TODO: do not split into lines; do same parsing for HTTP body - lines := field.split_into_lines()[1..] - disposition := parse_disposition(lines[0]) + mut line_segments := []LineSegmentIndexes{cap: 100} + mut line_idx, mut line_start := 0, 0 + for cidx, c in field { + if line_idx >= 6 { + // no need to scan further + break + } + if c == `\n` { + line_segments << LineSegmentIndexes{line_start, cidx} + line_start = cidx + 1 + line_idx++ + } + } + line_segments << LineSegmentIndexes{line_start, field.len} + line1 := field[line_segments[1].start..line_segments[1].end] + line2 := field[line_segments[2].start..line_segments[2].end] + disposition := parse_disposition(line1.trim_space()) // Grab everything between the double quotes name := disposition['name'] or { continue } // Parse files - // TODO: filename* + // TODO: handle `filename*`, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Disposition if filename := disposition['filename'] { - // Parse Content-Type header - if lines.len == 1 || !lines[1].to_lower().starts_with('content-type:') { + // reject early broken content + if line_segments.len < 5 { continue } - mut ct := lines[1].split_nth(':', 2)[1] - ct = ct.trim_left(' \t') - data := lines_to_string(field.len, lines, 3, lines.len - 1) + // reject early non Content-Type headers + if !line2.to_lower().starts_with('content-type:') { + continue + } + content_type := line2.split_nth(':', 2)[1].trim_space() + // line1: Content-Disposition: form-data; name="upfile"; filename="photo123.jpg" + // line2: Content-Type: image/jpeg + // line3: + // line4: DATA + // ... + // lineX: -- + data := field[line_segments[4].start..field.len - 4] // each multipart field ends with \r\n-- + // dump(data.limit(20).bytes()) + // dump(data.len) files[name] << FileData{ filename: filename - content_type: ct + content_type: content_type data: data } continue } - data := lines_to_string(field.len, lines, 2, lines.len - 1) - form[name] = data + if line_segments.len < 4 { + continue + } + form[name] = field[line_segments[3].start..field.len - 4] } + // dump(form) return form, files } // Parse the Content-Disposition header of a multipart form // Returns a map of the key="value" pairs -// Example: parse_disposition('Content-Disposition: form-data; name="a"; filename="b"') == {'name': 'a', 'filename': 'b'} +// Example: assert parse_disposition('Content-Disposition: form-data; name="a"; filename="b"') == {'name': 'a', 'filename': 'b'} fn parse_disposition(line string) map[string]string { mut data := map[string]string{} for word in line.split(';') { @@ -354,15 +395,3 @@ fn parse_disposition(line string) map[string]string { } return data } - -[manualfree] -fn lines_to_string(len int, lines []string, start int, end int) string { - mut sb := strings.new_builder(len) - for i in start .. end { - sb.writeln(lines[i]) - } - sb.cut_last(1) // last newline - res := sb.str() - unsafe { sb.free() } - return res -}