From 15e175f06163bfaed3a1ace26cf8cc2720a5b27a Mon Sep 17 00:00:00 2001 From: yizhi <946185759@qq.com> Date: Sat, 2 Apr 2022 16:17:29 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 + .npmignore | 2 + README.md | 23 ++++ package.json | 17 +++ src/file.ts | 85 ++++++++++++ src/index.ts | 2 + src/reader.ts | 356 ++++++++++++++++++++++++++++++++++++++++++++++++++ tsconfig.json | 100 ++++++++++++++ 8 files changed, 589 insertions(+) create mode 100644 .gitignore create mode 100644 .npmignore create mode 100644 README.md create mode 100644 package.json create mode 100644 src/file.ts create mode 100644 src/index.ts create mode 100644 src/reader.ts create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..546a918 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/node_modules +/dist +/package-lock.json +/typing \ No newline at end of file diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..0966a5d --- /dev/null +++ b/.npmignore @@ -0,0 +1,2 @@ +/src +/tsconfig.json \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d79f879 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +# yizhi-multipart-reader + +一个 没有任何依赖的 `multipart/form-data` 内容读取工具 + + +## 简单用例 + +```typescript + +import { MultipartReader, File } from 'yizhi-multipart-reader' + +const reader = new MultipartReader({ + //请求,类型为:http.IncomingMessage + req: ctx.req, + //formdata分隔符,从headers['content-type']中取得 + boundary: 'boundary from content-type', + //文件保存位置 + saveDir: '/path/to/savedir', +}) + +const { fields, files } = await reader.wait() + +``` \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..9e29a64 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "yizhi-multipart-reader", + "version": "1.0.2", + "description": "", + "main": "dist/index.js", + "types": "typing/index.d.ts", + "scripts": { + "build": "tsc" + }, + "keywords": [], + "author": "", + "license": "ISC", + "devDependencies": { + "@types/node": "^17.0.23", + "typescript": "^4.6.3" + } +} \ No newline at end of file diff --git a/src/file.ts b/src/file.ts new file mode 100644 index 0000000..908bb53 --- /dev/null +++ b/src/file.ts @@ -0,0 +1,85 @@ +import fs from 'fs' +import path from 'path' +import crypto from 'crypto' + +interface IFileConstructorOption { + name: string + type: string + saveDir: string +} + +/** + * 表单提交的文件 + */ +export class File { + /** 文件名 */ + public readonly name: string + /** mime类型 */ + public readonly type: string + /** 文件保存路径 */ + public path: string + + #ws: fs.WriteStream + #hash: crypto.Hash + #hashstr!: string + #finished = false + #size = 0 + #top64 = Buffer.from('') + + constructor(option: IFileConstructorOption) { + this.name = option.name + this.type = option.type + const randomText = parseInt(Math.random() * 10000 as any).toString().padStart(4, '0') + this.path = path.join(option.saveDir, `${Date.now()}${randomText}`) + this.#ws = fs.createWriteStream(this.path) + this.#hash = crypto.createHash('md5') + } + + /** + * 写入数据到文件 + * @param data 写入的数据 + */ + public async write(data: Buffer) { + if (this.#finished || !this.#ws.writable) return + //开始64字节 + if (this.#top64.length < 64) { + this.#top64 = Buffer.concat([ + this.#top64, + data.slice(0, 64 - this.#top64.length) + ]) + } + + this.#size += data.length + + //写入 + await Promise.all([ + new Promise((resolve, reject) => this.#ws.write(data, err => err ? reject(err) : resolve())), + new Promise((resolve, reject) => this.#hash.write(data, err => err ? reject(err) : resolve())), + ]) + } + + /** + * 结束 + */ + public finish() { + this.#ws.destroy() + this.#hashstr = this.#hash.digest('hex') + this.#hash.destroy() + this.#finished = true + } + + /** 文件hash */ + public get hash() { + return this.#hashstr + } + + /** 文件大小 */ + public get size() { + return this.#size + } + + /** 文件开始64字节 */ + public get top64() { + return this.#top64 + } +} \ No newline at end of file diff --git a/src/index.ts b/src/index.ts new file mode 100644 index 0000000..4415a58 --- /dev/null +++ b/src/index.ts @@ -0,0 +1,2 @@ +export * from './file' +export * from './reader' \ No newline at end of file diff --git a/src/reader.ts b/src/reader.ts new file mode 100644 index 0000000..04bf468 --- /dev/null +++ b/src/reader.ts @@ -0,0 +1,356 @@ +import type http from 'http' +import { File } from './file' + +enum ReadState { + boundary = 1, //需要读取boundary + header = 2, //需要读取header + content = 3, //需要读取内容 + finish = 4, //结束 +} + +//字段头 +interface IFormDataHeaderField { + /** 头类型 */ + type: 'field' + /** 表单name */ + name: string + /** 字段内容 */ + content: string +} + +//文件头 +interface IFormDataHeaderFile { + /** 头类型 */ + type: 'file' + /** 表单name */ + name: string + /** 文件 */ + file: File +} + +//整合后的头 +type IFormDataHeader = IFormDataHeaderField | IFormDataHeaderFile + +export interface IMultipartReaderResult { + /** 文件列表 */ + files: { [k in string]: File | Array } + /** 表单字段列表 */ + fields: { [k in string]: string | Array } +} + +export interface IMultipartReaderConstructorOption { + /** 请求 */ + req: http.IncomingMessage + /** 分隔符 */ + boundary: string + /** 文件保存目录 */ + saveDir: string +} + +interface IReadResult { + left: Buffer | null + exit: boolean +} + +export class MultipartReader { + /** 分隔符 */ + #boundary: Buffer + /** 保存目录 */ + #saveDir: string + /** 当前处理状态 */ + #state = ReadState.boundary + /** 上一次没有处理完的头数据 */ + #headBuffer: Buffer | null = null + /** 当前头信息 */ + #header: IFormDataHeader | undefined = undefined + /** 处理到的字段信息 */ + #fields: IMultipartReaderResult['fields'] = {} + /** 处理到的文件信息 */ + #files: IMultipartReaderResult['files'] = {} + /** 已读取的长度 */ + #readed = 0 + /** 进度监听 */ + #onReadCallback?: (readed: number) => any + + //wait回调 + #resolve?: (data: IMultipartReaderResult) => any + #reject?: (err: Error) => any + + //======================================================公共函数====================================================== + + /** + * multipart读取工具 + * @param option 选项 + */ + constructor(option: IMultipartReaderConstructorOption) { + this.#boundary = Buffer.from(option.boundary) + this.#saveDir = option.saveDir + this.#handleRequest(option.req) + } + + /** + * 等待处理,处理完成得到内容 + */ + public async wait() { + return new Promise((resolve, reject) => { + this.#resolve = resolve + this.#reject = reject + }) + } + + /** + * 监听读取过程 + * @param callback 回调函数 + */ + public onRead(callback: (readed: number) => any) { + this.#onReadCallback = callback + } + + //======================================================私有函数====================================================== + + //处理请求 + #handleRequest(req: http.IncomingMessage) { + let old: Buffer | null = null //处理剩下的Buffer + const dataReader = async (_data: Buffer) => { + //暂停读取 + req.pause() + //数据内容 + const data: Buffer = old ? Buffer.concat([old, _data]) : _data + old = await this.#parseData(data) + //触发一下监听过程 + this.#onReadCallback?.(this.#readed += _data.length) + //处理数据 + req.resume() + if (this.#state == ReadState.finish) { + this.#resolve?.({ files: this.#files, fields: this.#fields }) + } + } + const onError = (err: Error) => { + this.#reject?.(err) + } + //事件处理 + req.on('error', onError) + req.on('data', dataReader) + req.once('end', () => { + //删除监听器 + req.removeListener('error', onError) + req.removeListener('data', dataReader) + }) + } + + //定义一个函数来处理data + async #parseData(data: Buffer): Promise { + let result: IReadResult = { exit: false, left: data } + //使用循环,以避免使用递归调用 + while (result.left && !result.exit) { + switch (this.#state) { + //读取boundary + //formdata中的每个数据是由 Content-Type中指定的Boundary进行分割的 + //内容的分隔符中,会在boundary前加上--,如果是结束,末尾还有-- + case ReadState.boundary: + result = await this.#readBoundary(result.left) + break + //读取头 + //头部以两个连续的换行为结束 + case ReadState.header: + result = await this.#readHeader(result.left) + break + //读取内容 + //内容可以一直读取,直到遇到下一个分隔符为止 + case ReadState.content: + result = await this.#readContent(result.left) + break + //已经读完,其他的就不管了 + case ReadState.finish: + result = { exit: true, left: null } + break + } + } + //返回剩余内容 + return result?.left ?? null + } + + //读取boundary(一开始会读取boundary,内容读取完后,也会回来继续读取boundary) + async #readBoundary(data: Buffer): Promise { + //如果数据量不够,剩下的数据下次处理 + if (data.length <= this.#boundary.length + 2) return { left: data, exit: true } //+2是因为boundary开始有 -- + //开始有 -- + if (data[0] == 45 && data[1] == 45) data = data.slice(2) + else throw new Error('multipart body error') + //去除boundary + if (this.#boundary.compare(data, 0, this.#boundary.length) == 0) data = data.slice(this.#boundary.length) + else throw new Error('multipart body error') + //去除\r\n + if (data[0] == 13) { + data = data.slice(1) + if (data[0] == 10) data = data.slice(1) // \r + else throw new Error('multipart body error') // \r后面必须时\n + } + //去除\n + else if (data[0] == 10) data = data.slice(1) + //遇到 -- , 可能就要结束了 + else if (data[0] == 45 && data[1] == 45) { + //看看是否有换行,有换行表示结束 + if (data[2] == 10 || (data[2] == 13 && data[3] == 10)) { + this.#saveHeader() + this.#state = ReadState.finish + return { left: null, exit: true } + } + //否则就当出错 + else throw new Error('multipart body error') + } + //上面的情况都不是,表示出错了 + else throw new Error('multipart body error') + //开始读取头 + this.#state = ReadState.header + //返回剩下的数据 + return { left: data, exit: false } + } + + //读取头信息(当度去玩boundary后就应该读取头) + async #readHeader(data: Buffer): Promise { + //读取头结束位置 + let endAt = 0 + for (let i = 0; i < data.length; ++i) { + // 检测是不是两个连续换行 + if (data[i] == 10) { + if (data[i + 1] == 10) { + endAt = i + 1 + break + } + else if (data[i + 1] == 13 && data[i + 2] == 10) { + endAt = i + 2 + break + } + } + } + //如果头读取完成,则读取body + if (endAt) { + //取得头 + const header = this.#headBuffer ? Buffer.concat([this.#headBuffer, data.slice(0, endAt + 1)]) : data.slice(0, endAt + 1) + this.#headBuffer = null + await this.#resolveHead((header + '').trim()) + //读取内容 + this.#state = ReadState.content + return { left: data.slice(endAt + 1), exit: false } + } + //否则将内容缓存起来,下次处理 + else { + this.#headBuffer = this.#headBuffer ? Buffer.concat([this.#headBuffer, data]) : data + return { left: null, exit: true } + } + } + + //读取内容(头读取完后,就要读取内容了) + async #readContent(data: Buffer): Promise { + for (let i = 0; i < data.length; ++i) { + //处理换行 + let gotbr = 0 + if (data[i] == 10) gotbr = 1 + else if (data[i] == 13 && data[i + 1] == 10) gotbr = 2 + + //遇到换行,那么很有可能就遇到了分隔符 + if (gotbr) { + //看看内容够不够,不够啦?先保存起来,剩下的下次处理 + if (data.length - i - gotbr - 2 < this.#boundary.length) { //-gotbr表示减去换行符,-2表示减去分隔符开始的-- + // 保存数据 + await this.#resolveData(data.slice(0, i)) + //剩下的可能时分隔符的内容留着下次处理 + return { left: data.slice(i), exit: true } + } + //内容充足,处理内容 + else { + //跳过换行符 + i += gotbr + //遇到了 -- ,后面很有可能时分隔符,瞧一眼 + if (data[i] == 45 && data[i + 1] == 45) { + //看看是不是遇到了分隔符 + if (this.#boundary.compare(data, i + 2, i + 2 + this.#boundary.length) == 0) { + //保存数据 + await this.#resolveData(data.slice(0, i - gotbr)) + //接下来读取分隔符 + this.#state = ReadState.boundary + return { left: data.slice(i), exit: false } + } + } + } + } + } + //没有遇到分隔符,将内容直接保存 + await this.#resolveData(data) + return { left: null, exit: true } + } + + //收到数据后的处理 + async #resolveData(data: Buffer) { + if (!this.#header) return + //普通字段处理 + if (this.#header.type == 'field') this.#header.content += data + //文件处理 + else await this.#header.file.write(data) + } + + //头部读取完成后的处理 + async #resolveHead(data: string) { + this.#saveHeader() + this.#header = undefined + const body: { [i in string]: string } = {} + //先按行分割 + const liens = data.split(/\r?\n/).map(s => s.trim()).filter(s => !!s) + //处理每行的内容 + liens.forEach(line => { + const match = line.match(/^([^:]+):([\s\S]+)$/) + if (!match) return + const key = match[1].trim().toLowerCase() + const val = match[2].trim() + if (key == 'content-disposition') { + const items = val.split(/;/).map(s => s.trim()).filter(s => !!s) + items.forEach(item => { + let [k, v] = item.split(/=/) + if (!v) return + k = k.toLowerCase() + //去除引号 + if (v[0] == '"' && v[v.length - 1] == '"') v = v.substring(1, v.length - 1) + else if (v[0] == "'" && v[v.length - 1] == "'") v = v.substring(1, v.length - 1) + //保存 + body[k] = v + }) + } + else body[key] = val + }) + //类型处理 + if (body['content-type']) this.#header = { + type: 'file', + name: body.name, + file: new File({ name: body.filename, type: body['content-type'], saveDir: this.#saveDir }) + } + else this.#header = { + type: 'field', + name: body.name, + content: '', + } + } + + //保存头内容 + #saveHeader() { + if (!this.#header) return + if (this.#header.type == 'field') this.#putField(this.#header.name, this.#header.content) + else if (this.#header.type == 'file') this.#putFile(this.#header.name, this.#header.file) + } + + //存入字段信息 + #putField(name: string, value: string) { + if (!this.#fields[name]) this.#fields[name] = value + else if (this.#fields[name] instanceof Array) (this.#fields as any)[name].push(value) + else (this.#fields[name] = [this.#fields[name] as string]).push(value) + } + + //存入文件信息 + #putFile(name: string, file: File) { + file.finish() + if (!this.#files[name]) this.#files[name] = file + else if (this.#files[name] instanceof Array) (this.#files as any)[name].push(file) + else (this.#files[name] = [this.#files[name] as File]).push(file) + } + +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..f5a6101 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,100 @@ +{ + "compilerOptions": { + /* Visit https://aka.ms/tsconfig.json to read more about this file */ + /* Projects */ + // "incremental": true, /* Enable incremental compilation */ + // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */ + // "tsBuildInfoFile": "./", /* Specify the folder for .tsbuildinfo incremental compilation files. */ + // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects */ + // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */ + // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */ + /* Language and Environment */ + "target": "ESNext", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */ + // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */ + // "jsx": "preserve", /* Specify what JSX code is generated. */ + // "experimentalDecorators": true, /* Enable experimental support for TC39 stage 2 draft decorators. */ + // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */ + // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h' */ + // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */ + // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using `jsx: react-jsx*`.` */ + // "reactNamespace": "", /* Specify the object invoked for `createElement`. This only applies when targeting `react` JSX emit. */ + // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */ + // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */ + /* Modules */ + "module": "commonjs", /* Specify what module code is generated. */ + "rootDir": "./src", /* Specify the root folder within your source files. */ + // "moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */ + // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */ + // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */ + // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */ + // "typeRoots": [], /* Specify multiple folders that act like `./node_modules/@types`. */ + // "types": [], /* Specify type package names to be included without being referenced in a source file. */ + // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */ + // "resolveJsonModule": true, /* Enable importing .json files */ + // "noResolve": true, /* Disallow `import`s, `require`s or ``s from expanding the number of files TypeScript should add to a project. */ + /* JavaScript Support */ + // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the `checkJS` option to get errors from these files. */ + // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */ + // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from `node_modules`. Only applicable with `allowJs`. */ + /* Emit */ + "declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */ + // "declarationMap": true, /* Create sourcemaps for d.ts files. */ + // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */ + // "sourceMap": true, /* Create source map files for emitted JavaScript files. */ + // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If `declaration` is true, also designates a file that bundles all .d.ts output. */ + "outDir": "./dist", /* Specify an output folder for all emitted files. */ + // "removeComments": true, /* Disable emitting comments. */ + // "noEmit": true, /* Disable emitting files from a compilation. */ + // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */ + // "importsNotUsedAsValues": "remove", /* Specify emit/checking behavior for imports that are only used for types */ + // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */ + // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */ + // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */ + // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */ + // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */ + // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */ + // "newLine": "crlf", /* Set the newline character for emitting files. */ + // "stripInternal": true, /* Disable emitting declarations that have `@internal` in their JSDoc comments. */ + // "noEmitHelpers": true, /* Disable generating custom helper functions like `__extends` in compiled output. */ + // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */ + // "preserveConstEnums": true, /* Disable erasing `const enum` declarations in generated code. */ + "declarationDir": "./typing", /* Specify the output directory for generated declaration files. */ + // "preserveValueImports": true, /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */ + /* Interop Constraints */ + // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */ + // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */ + "esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables `allowSyntheticDefaultImports` for type compatibility. */ + // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */ + "forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */ + /* Type Checking */ + "strict": true, /* Enable all strict type-checking options. */ + // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied `any` type.. */ + // "strictNullChecks": true, /* When type checking, take into account `null` and `undefined`. */ + // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */ + // "strictBindCallApply": true, /* Check that the arguments for `bind`, `call`, and `apply` methods match the original function. */ + // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */ + // "noImplicitThis": true, /* Enable error reporting when `this` is given the type `any`. */ + // "useUnknownInCatchVariables": true, /* Type catch clause variables as 'unknown' instead of 'any'. */ + // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */ + // "noUnusedLocals": true, /* Enable error reporting when a local variables aren't read. */ + // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read */ + // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */ + // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */ + // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */ + // "noUncheckedIndexedAccess": true, /* Include 'undefined' in index signature results */ + // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */ + // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type */ + // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */ + // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */ + /* Completeness */ + // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */ + "skipLibCheck": true /* Skip type checking all .d.ts files. */ + }, + "include": [ + "src/**/*.ts" + ], + "exclude": [ + "typing/**/*", + "node_modules" + ] +} \ No newline at end of file