react+flask大文件上传

技术栈

  • react

  • axios

  • flask

  • flask_restful

正常上传

前端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import { useState } from "react";
import axios from "axios";

const axiosInstance = axios.create({
baseURL: "http://localhost:5000",
timeout: 1000,
});

const uploadFileApi = async (formdata) => {
const config = {
headers: {
"Content-Type": "Multipart/form-data",
},
};
axiosInstance.post("/api/UploadFile", formdata, config);
};

const CHUNK_SIZE = 256 * 1024; // 分片大小为256kb

const UploadFileButton = () => {
const [file, setFile] = useState(null);

// 上传
const handleUpload = () => {
// 判断文件大小
if (file && file.size < CHUNK_SIZE) {
const formdata = new FormData();
formdata.append("file", file);
uploadFileApi(formdata);
}
};

const handleFileChange = (e) => {
const file = e.target.files[0];
setFile(file);
console.log(file);
};

return (
<div className="w-full flex flex-col items-center justify-center p-4">
<button onClick={handleUpload}>Upload File</button>
<input
type="file"
onChange={handleFileChange}
/>
</div>
);
};

export default UploadFileButton;

后端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from flask_restful import Resource, reqparse
from ..common.utils import res
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
import os

parser = reqparse.RequestParser()
parser.add_argument('file', type=FileStorage, location="files")


class UploadFile(Resource):
def post(self):
args = parser.parse_args()
file = args.get('file')
print(file)
filename = secure_filename(file.filename)
file.save(os.path.join('test_files', filename))
return res(data=None, message="success", code=200, status_code=200)

注意

location中的formfiles的区别

  • 如果你的 HTML 表单使用 application/x-www-form-urlencodedmultipart/form-data 编码类型提交数据,那么这些数据会存储在 request.form 中。

  • location='files' 用来指定请求中的 文件上传 数据(request.files),通过 multipart/form-data 编码类型以文件的形式上传

  • 文件与表单混合的情况:如果你的请求同时包含文件和表单数据(如上传文件的同时提交一些其他文本字段),你可以在 reqparse 中同时使用 location='form'location='files' 来分别解析这两类数据:

    1
    2
    3
    4
    5
    6
    7
    8
    parser = reqparse.RequestParser()

    # 解析表单数据
    parser.add_argument('username', type=str, location='form')
    parser.add_argument('email', type=str, location='form')

    # 解析文件上传
    parser.add_argument('picture', type=werkzeug.datastructures.FileStorage, location='files')

分片上传

逻辑

前端

  • 将文件按照大小进行分片,之后将分片统一发送
  • 由于分片可能不会按照顺序到达,并且需要确定每个分片对应的文件,以及需要知道是否是最后一块
    • 使用 file hash 作为文件的统一标识,以及文件的后缀
    • 使用 index 作为分片的顺序
    • 使用 end:1 作为最后一块的结尾标识
    • 使用 chunk 存放对应的分片数据
  • 生成hash使用webworker使用其他进程来处理,防止卡顿

后端

+

前端

index.jsx

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import { useState } from "react";
import axios from "axios";

const axiosInstance = axios.create({
baseURL: "http://localhost:5000",
timeout: 100000,
});

const uploadFileApi = async (formdata) => {
const config = {
headers: {
"Content-Type": "Multipart/form-data",
},
};
return axiosInstance.post("/api/uploadfile", formdata, config);
};

const CHUNK_SIZE = 512 * 1024; // 分片大小为512kb

const UploadFileButton = () => {
const [file, setFile] = useState(null);

// 创建分片
const createFileChunk = (file, size = CHUNK_SIZE) => {
let fileChunks = [];
let cur = 0;
while (cur < file.size) {
fileChunks.push(file.slice(cur, cur + size));
cur = cur + size;
}
return fileChunks;
};
// 上传分片
const uploadFileChunk = async (fileChunk, hash, suffix) => {
const promiseList = fileChunk.map((item, index) => {
const formData = new FormData();
formData.append("hash", hash);
formData.append("index", index);
formData.append("end", +(index === fileChunk.length - 1));
formData.append("chunk", item);
formData.append("suffix", suffix);
return uploadFileApi(formData);
});
return await Promise.all(promiseList);
};

// 生成hash
const fileHash = (fileChunkList) => {
return new Promise((resolve) => {
let worker = new Worker(
"src\\components\\UploadFileButton\\hashChunk.js"
);
worker.postMessage({ fileChunkList });
worker.onmessage = (e) => {
const { hash } = e.data;
resolve(hash);
};
});
};

// 上传组件
const handleUpload = async () => {
if (!file) return;
const fileChunkList = createFileChunk(file);
const filehash = await fileHash(fileChunkList);
uploadFileChunk(fileChunkList, filehash, file.name.split(".")[1]);
};

const handleFileChange = (e) => {
const file = e.target.files[0];
setFile(file);
};

return (
<div className="w-full flex flex-col items-center justify-center p-4">
<button onClick={handleUpload}>Upload File</button>
<input
type="file"
onChange={handleFileChange}
/>
</div>
);
};

export default UploadFileButton;

hashChunk.js

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
// 导入脚本
self.importScripts("./spark-md5.min.js");

// 生成文件 hash
self.onmessage = e => {
const { fileChunkList } = e.data;
const spark = new self.SparkMD5.ArrayBuffer();
let count = 0;
const loadNext = index => {
const reader = new FileReader();
reader.readAsArrayBuffer(fileChunkList[index].file);
reader.onload = e => {
count++;
spark.append(e.target.result);
if (count === fileChunkList.length) {
self.postMessage({
hash: spark.end()
});
self.close();
} else {
loadNext(count);
}
};
};
loadNext(0);
};

后端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from flask_restful import Resource, reqparse
from ..common.utils import res
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
import os
import time

parser = reqparse.RequestParser()
parser.add_argument('chunk', type=FileStorage, location="files")
parser.add_argument('index', type=str, location="form")
parser.add_argument('end', type=str, location="form")
parser.add_argument('hash', type=str, location="form")
parser.add_argument('suffix', type=str, location="form")


def saveFile(chunk, index, hash):
file_name = os.path.join('test_files', f"{hash}_{index}")
with open(file_name, 'wb') as f:
f.write(chunk.read())
return hash, index


def mergeFile(hash, index, suffix):
file_name = os.path.join('test_files', f"{hash}.{suffix}")
with open(file_name, 'wb') as f:
for i in range(int(index)+1):
path = os.path.join(
'test_files', f"{hash}_{i}")
if os.path.exists(path):
with open(path, 'rb') as file:
f.write(file.read())
os.remove(path)
else:
return hash, str(i)
return hash, index


class UploadFile(Resource):
def post(self):
args = parser.parse_args()
chunk = args.get('chunk')
index = args.get('index')
end = args.get('end')
hash = args.get('hash')
suffix = args.get('suffix')
hashRes, indexRes = saveFile(chunk, index, hash)
if end == '1':
hashRes, indexRes = mergeFile(hash, index, suffix)
return res(data={hashRes: hashRes, indexRes: indexRes}, message="success", code=200, status_code=200)

快速上传

原理就是在上传文件前查询文件时候存在

前端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// 文件秒传
+ const verifyUpload = async (filehash, suffix) => {
+ const formdata = new FormData();
+ formdata.append("hash", filehash);
+ formdata.append("suffix", suffix);
+ return await uploadFileApi(formdata);
+ };

// 上传组件
const handleUpload = async () => {
if (!file) return;
const suffix = file.name.split(".")[1];
const fileChunkList = createFileChunk(file);
const filehash = await fileHash(fileChunkList);
+ const isExist = await verifyUpload(filehash, suffix);
+ if (isExist.data.code !== 304) {
+ uploadFileChunk(fileChunkList, filehash, suffix);
+ }
};

后端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
+ def verifyFile(hash, suffix):
+ file_name = os.path.join('test_files', f"{hash}.{suffix}")
+ if os.path.exists(file_name):
+ return True
+ return False


class UploadFile(Resource):
def post(self):
args = parser.parse_args()
chunk = args.get('chunk')
index = args.get('index')
end = args.get('end')
hash = args.get('hash')
suffix = args.get('suffix')
+ if verifyFile(hash, suffix):
+ return res(data={}, message="file exists", code=304, status_code=200)
hashRes, indexRes = saveFile(chunk, index, hash)
if end == '1':
hashRes, indexRes = mergeFile(hash, index, suffix)
return res(data={hashRes: hashRes, indexRes: indexRes}, message="success", code=200, status_code=200)

断点续传

思路:

暂停利用了axois的一个api

Axios 支持以 fetch API 方式—— AbortController 取消请求

但是这里要注意使用,因为我们只是暂停一次请求,我们需要在再次点击上传按钮的时候,将上次的取消请求信号取消,不然就发不出去了

续传就是先向服务端请求获取文件列表,之后再根据缺少的文件上传剩下的文件

前端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import { useState, useRef } from "react";
import axios from "axios";
import { chunk, has } from "lodash-es";

const UploadFileButton = () => {
const axiosInstance = axios.create({
baseURL: "http://localhost:5000",
timeout: 100000,
headers: {
"Content-Type": "Multipart/form-data",
},
});

// 取消请求的控制器
const controller = useRef(null);

// 发送文件
const uploadFileApi = async (formdata) => {
return axiosInstance
.request({
url: "/api/uploadfile",
method: "post",
data: formdata,
signal: controller.current.signal,
})
.catch(function (thrown) {
if (axios.isCancel(thrown)) {
console.log("Request canceled", thrown.message);
}
});
};

// 是否存在文件
const verifyUploadApi = async (formdata) => {
return axiosInstance.request({
url: "/api/verifyfile",
method: "post",
data: formdata,
});
};

// 请求现有的chunklist
const getExistChunkListApi = async (hash) => {
return axiosInstance.request({
url: "/api/existchunklist",
method: "get",
params: { hash },
});
};

const CHUNK_SIZE = 1024 * 1024; // 分片大小为1MB
// 创建分片
const createFileChunk = (file, size = CHUNK_SIZE) => {
let fileChunks = [];
let cur = 0;
while (cur < file.size) {
fileChunks.push(file.slice(cur, cur + size));
cur = cur + size;
}
return fileChunks;
};
// 上传分片
const uploadFileChunk = async (fileChunk, hash, suffix, base = 0) => {
const promiseList = fileChunk.map((item, index) => {
const formData = new FormData();
formData.append("hash", hash);
formData.append("index", index + base);
formData.append("end", +(index === fileChunk.length - 1));
formData.append("chunk", item);
formData.append("suffix", suffix);
return uploadFileApi(formData);
});
return await Promise.all(promiseList);
};

// 生成hash
const fileHash = (fileChunkList) => {
return new Promise((resolve) => {
let worker = new Worker(
"src\\components\\UploadFileButton\\hashChunk.js"
);
worker.postMessage({ fileChunkList });
worker.onmessage = (e) => {
const { hash } = e.data;
resolve(hash);
};
});
};

// 文件秒传
const verifyUpload = async (filehash, suffix) => {
const formdata = new FormData();
formdata.append("hash", filehash);
formdata.append("suffix", suffix);
return await verifyUploadApi(formdata);
};

// 暂停上传
const handlePause = () => {
if (!filehash.current) return;
console.log("暂停");
controller.current.abort();
};

// 继续上传
const handleContinue = async () => {
if (!filehash.current) return;
// 生成新的controller
controller.current = new AbortController();
// 请求服务器现有的文件
console.log(filehash.current);
const index_list =
(await getExistChunkListApi(filehash.current)).data.data || [];
const fileChunkRestList = fileChunkList.current.filter((_, index) => {
return !index_list.find((i) => index == i);
});
await uploadFileChunk(
fileChunkRestList,
filehash.current,
suffix.current,
index_list.length
);
};

// 上传组件
const handleUpload = async () => {
if (!filehash.current) return;
// 生成新的controller
controller.current = new AbortController();

const isExist = await verifyUpload(filehash.current, suffix.current);
if (isExist.data.code !== 304) {
uploadFileChunk(
fileChunkList.current,
filehash.current,
suffix.current
);
}
};

const fileChunkList = useRef([]);
const filehash = useRef();
const suffix = useRef();

const handleFileChange = async (e) => {
const file = e.target.files[0];
fileChunkList.current = createFileChunk(file);
filehash.current = await fileHash(fileChunkList.current);
suffix.current = file.name.split(".")[1];
};

return (
<div className="w-full flex flex-col items-center justify-center p-4">
<button onClick={handleUpload}>Upload File</button>
<button onClick={handlePause}>Pause upload</button>
<button onClick={handleContinue}>continue upload</button>
<input
type="file"
onChange={handleFileChange}
/>
</div>
);
};

export default UploadFileButton;

后端

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from flask_restful import Resource, reqparse
from ..common.utils import res
from werkzeug.datastructures import FileStorage
from werkzeug.utils import secure_filename
import os
import time

parser = reqparse.RequestParser()
parser.add_argument('chunk', type=FileStorage, location="files")
parser.add_argument('index', type=str, location="form")
parser.add_argument('end', type=str, location="form")
parser.add_argument('hash', type=str, location=["form", "args"])
parser.add_argument('suffix', type=str, location="form")


def saveFile(chunk, index, hash):
file_name = os.path.join('test_files', f"{hash}_{index}")
with open(file_name, 'wb') as f:
f.write(chunk.read())
return hash, index


def mergeFile(hash, index, suffix):
print(index)
file_name = os.path.join('test_files', f"{hash}.{suffix}")
with open(file_name, 'wb') as f:
for i in range(int(index)+1):
path = os.path.join(
'test_files', f"{hash}_{i}")
if os.path.exists(path):
with open(path, 'rb') as file:
f.write(file.read())
os.remove(path)
else:
return hash, str(i)
return hash, index


class UploadFile(Resource):
def post(self):
args = parser.parse_args()
chunk = args.get('chunk')
index = args.get('index')
end = args.get('end')
hash = args.get('hash')
suffix = args.get('suffix')
hashRes, indexRes = saveFile(chunk, index, hash)
if end == '1':
hashRes, indexRes = mergeFile(hash, index, suffix)
return res(data={hashRes: hashRes, indexRes: indexRes}, message="success", code=200, status_code=200)


def verifyFile(hash, suffix):
file_name = os.path.join('test_files', f"{hash}.{suffix}")
if os.path.exists(file_name):
return True
return False


class VerifyFile(Resource):
def post(self):
args = parser.parse_args()
hash = args.get('hash')
suffix = args.get('suffix')
if verifyFile(hash, suffix):
return res(data={}, message="success", code=304, status_code=200)
return res(data={}, message="fail", code=200, status_code=200)


def getExistChunkList(hash):
index_list = []
# 查询一个文件下的所有文件的文件名
files = os.listdir('test_files')
for file in files:
if file.startswith(hash):
index_list.append(file.split('_')[-1])
return index_list


class ExistChunkList(Resource):
def get(self):
args = parser.parse_args()
hash = args.get('hash')
index_list = getExistChunkList(hash)
print(index_list)
return res(data=index_list, message="success", code=200, status_code=200)

总结

  • 分片上传就是将大文件使用slice分片,使用webworker对分片进行hash求值,最后统一上传

  • 快速上传就是上传之前请求一次服务器

  • 断点续传是利用AxiosAbortController 取消请求,以及根据服务器返回的数据再次上传文件

查阅的资料