1
0
Fork 0
mirror of https://github.com/denoland/deno.git synced 2025-01-03 12:58:54 -05:00

fix(cli/js/web): formData parser for binary files (#6015)

This commit is contained in:
Marcos Casagrande 2020-06-01 14:32:08 +02:00 committed by GitHub
parent edeeedf401
commit 1d3dce9a68
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 163 additions and 87 deletions

View file

@ -3,6 +3,7 @@ import * as encoding from "./text_encoding.ts";
import * as domTypes from "./dom_types.d.ts";
import { ReadableStreamImpl } from "./streams/readable_stream.ts";
import { getHeaderValueParams, hasHeaderValueOf } from "./util.ts";
import { MultipartParser } from "./fetch/multipart.ts";
// only namespace imports work for now, plucking out what we need
const { TextEncoder, TextDecoder } = encoding;
@ -130,98 +131,15 @@ export class Body implements domTypes.Body {
// ref: https://fetch.spec.whatwg.org/#body-mixin
public async formData(): Promise<FormData> {
const formData = new FormData();
const enc = new TextEncoder();
if (hasHeaderValueOf(this.contentType, "multipart/form-data")) {
const params = getHeaderValueParams(this.contentType);
if (!params.has("boundary")) {
// TypeError is required by spec
throw new TypeError("multipart/form-data must provide a boundary");
}
// ref: https://tools.ietf.org/html/rfc2046#section-5.1
const boundary = params.get("boundary")!;
const dashBoundary = `--${boundary}`;
const delimiter = `\r\n${dashBoundary}`;
const closeDelimiter = `${delimiter}--`;
const body = new Uint8Array(await this.arrayBuffer());
const multipartParser = new MultipartParser(body, boundary);
const body = await this.text();
let bodyParts: string[];
const bodyEpilogueSplit = body.split(closeDelimiter);
if (bodyEpilogueSplit.length < 2) {
bodyParts = [];
} else {
// discard epilogue
const bodyEpilogueTrimmed = bodyEpilogueSplit[0];
// first boundary treated special due to optional prefixed \r\n
const firstBoundaryIndex = bodyEpilogueTrimmed.indexOf(dashBoundary);
if (firstBoundaryIndex < 0) {
throw new TypeError("Invalid boundary");
}
const bodyPreambleTrimmed = bodyEpilogueTrimmed
.slice(firstBoundaryIndex + dashBoundary.length)
.replace(/^[\s\r\n\t]+/, ""); // remove transport-padding CRLF
// trimStart might not be available
// Be careful! body-part allows trailing \r\n!
// (as long as it is not part of `delimiter`)
bodyParts = bodyPreambleTrimmed
.split(delimiter)
.map((s): string => s.replace(/^[\s\r\n\t]+/, ""));
// TODO: LWSP definition is actually trickier,
// but should be fine in our case since without headers
// we should just discard the part
}
for (const bodyPart of bodyParts) {
const headers = new Headers();
const headerOctetSeperatorIndex = bodyPart.indexOf("\r\n\r\n");
if (headerOctetSeperatorIndex < 0) {
continue; // Skip unknown part
}
const headerText = bodyPart.slice(0, headerOctetSeperatorIndex);
const octets = bodyPart.slice(headerOctetSeperatorIndex + 4);
// TODO: use textproto.readMIMEHeader from deno_std
const rawHeaders = headerText.split("\r\n");
for (const rawHeader of rawHeaders) {
const sepIndex = rawHeader.indexOf(":");
if (sepIndex < 0) {
continue; // Skip this header
}
const key = rawHeader.slice(0, sepIndex);
const value = rawHeader.slice(sepIndex + 1);
headers.set(key, value);
}
if (!headers.has("content-disposition")) {
continue; // Skip unknown part
}
// Content-Transfer-Encoding Deprecated
const contentDisposition = headers.get("content-disposition")!;
const partContentType = headers.get("content-type") || "text/plain";
// TODO: custom charset encoding (needs TextEncoder support)
// const contentTypeCharset =
// getHeaderValueParams(partContentType).get("charset") || "";
if (!hasHeaderValueOf(contentDisposition, "form-data")) {
continue; // Skip, might not be form-data
}
const dispositionParams = getHeaderValueParams(contentDisposition);
if (!dispositionParams.has("name")) {
continue; // Skip, unknown name
}
const dispositionName = dispositionParams.get("name")!;
if (dispositionParams.has("filename")) {
const filename = dispositionParams.get("filename")!;
const blob = new DenoBlob([enc.encode(octets)], {
type: partContentType,
});
// TODO: based on spec
// https://xhr.spec.whatwg.org/#dom-formdata-append
// https://xhr.spec.whatwg.org/#create-an-entry
// Currently it does not mention how I could pass content-type
// to the internally created file object...
formData.append(dispositionName, blob, filename);
} else {
formData.append(dispositionName, octets);
}
}
return formData;
return multipartParser.parse();
} else if (
hasHeaderValueOf(this.contentType, "application/x-www-form-urlencoded")
) {

View file

@ -0,0 +1,120 @@
// Copyright 2018-2020 the Deno authors. All rights reserved. MIT license.
import { DenoBlob } from "../blob.ts";
import { TextEncoder, TextDecoder } from "../text_encoding.ts";
import { getHeaderValueParams } from "../util.ts";
const decoder = new TextDecoder();
const encoder = new TextEncoder();
const CR = "\r".charCodeAt(0);
const LF = "\n".charCodeAt(0);
interface MultipartHeaders {
headers: Headers;
disposition: Map<string, string>;
}
export class MultipartParser {
readonly boundary: string;
readonly boundaryChars: Uint8Array;
readonly body: Uint8Array;
constructor(body: Uint8Array, boundary: string) {
if (!boundary) {
throw new TypeError("multipart/form-data must provide a boundary");
}
this.boundary = `--${boundary}`;
this.body = body;
this.boundaryChars = encoder.encode(this.boundary);
}
#parseHeaders = (headersText: string): MultipartHeaders => {
const headers = new Headers();
const rawHeaders = headersText.split("\r\n");
for (const rawHeader of rawHeaders) {
const sepIndex = rawHeader.indexOf(":");
if (sepIndex < 0) {
continue; // Skip this header
}
const key = rawHeader.slice(0, sepIndex);
const value = rawHeader.slice(sepIndex + 1);
headers.set(key, value);
}
return {
headers,
disposition: getHeaderValueParams(
headers.get("Content-Disposition") ?? ""
),
};
};
parse(): FormData {
const formData = new FormData();
let headerText = "";
let boundaryIndex = 0;
let state = 0;
let fileStart = 0;
for (let i = 0; i < this.body.length; i++) {
const byte = this.body[i];
const prevByte = this.body[i - 1];
const isNewLine = byte === LF && prevByte === CR;
if (state === 1 || state === 2 || state == 3) {
headerText += String.fromCharCode(byte);
}
if (state === 0 && isNewLine) {
state = 1;
} else if (state === 1 && isNewLine) {
state = 2;
const headersDone = this.body[i + 1] === CR && this.body[i + 2] === LF;
if (headersDone) {
state = 3;
}
} else if (state === 2 && isNewLine) {
state = 3;
} else if (state === 3 && isNewLine) {
state = 4;
fileStart = i + 1;
} else if (state === 4) {
if (this.boundaryChars[boundaryIndex] !== byte) {
boundaryIndex = 0;
} else {
boundaryIndex++;
}
if (boundaryIndex >= this.boundary.length) {
const { headers, disposition } = this.#parseHeaders(headerText);
const content = this.body.subarray(fileStart, i - boundaryIndex - 1);
// https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata
const filename = disposition.get("filename");
const name = disposition.get("name");
state = 5;
// Reset
boundaryIndex = 0;
headerText = "";
if (!name) {
continue; // Skip, unknown name
}
if (filename) {
const blob = new DenoBlob([content], {
type: headers.get("Content-Type") || "application/octet-stream",
});
formData.append(name, blob, filename);
} else {
formData.append(name, decoder.decode(content));
}
}
} else if (state === 5 && isNewLine) {
state = 1;
}
}
return formData;
}
}

View file

@ -216,6 +216,25 @@ unitTest(
}
);
unitTest(
{ perms: { net: true } },
async function fetchInitFormDataBinaryFileBody(): Promise<void> {
// Some random bytes
// prettier-ignore
const binaryFile = new Uint8Array([108,2,0,0,145,22,162,61,157,227,166,77,138,75,180,56,119,188,177,183]);
const response = await fetch("http://localhost:4545/echo_multipart_file", {
method: "POST",
body: binaryFile,
});
const resultForm = await response.formData();
const resultFile = resultForm.get("file") as File;
assertEquals(resultFile.type, "application/octet-stream");
assertEquals(resultFile.name, "file.bin");
assertEquals(new Uint8Array(await resultFile.arrayBuffer()), binaryFile);
}
);
unitTest(
{
perms: { net: true },

View file

@ -207,6 +207,25 @@ class ContentTypeHandler(QuietSimpleHTTPRequestHandler):
data_string = self.rfile.read(int(self.headers['Content-Length']))
self.wfile.write(bytes(data_string))
return
if "echo_multipart_file" in self.path:
self.protocol_version = 'HTTP/1.1'
self.send_response(200, 'OK')
self.send_header('Content-type',
'multipart/form-data;boundary=boundary')
self.end_headers()
file_content = self.rfile.read(int(self.headers['Content-Length']))
self.wfile.write(
bytes('--boundary\t \r\n'
'Content-Disposition: form-data; name="field_1"\r\n'
'\r\n'
'value_1 \r\n'
'\r\n--boundary\r\n'
'Content-Disposition: form-data; name="file"; '
'filename="file.bin"\r\n'
'Content-Type: application/octet-stream\r\n'
'\r\n') + bytes(file_content) +
bytes('\r\n--boundary--\r\n'))
return
self.protocol_version = 'HTTP/1.1'
self.send_response(501)
self.send_header('content-type', 'text/plain')