异步切片下载二进制文件
异步切片,断点续传,提高下载速度
# -*- coding: UTF-8 -*-
# @author: ylw
# @file: async_dwnload
# @time: 2022-3-8
# @desc:
import os
import time
import asyncio
import aiohttp
from retrying import retry
class Async_download:
def __init__(self, sema_number=20):
self.__sema_number = sema_number
self.__sema = None
@classmethod
def _mkdir(cls, file_path, full_path):
if os.path.exists(file_path):
return True
try:
os.makedirs(full_path)
except Exception as e:
pass
return False
def __init_check(self, file_path: str):
full_path, file_name = file_path.rsplit('/', 1)
file_size = os.path.getsize(file_path) if self._mkdir(file_path, full_path) else 0
return file_name, file_size
@classmethod
def __sync_save_local(cls, r_headers, results, file_path):
done, padding = results
for d in done:
for index, value in d.result().items():
r_headers[index] = value
with open(file_path, 'ab') as f:
for _, value in r_headers.items():
f.write(value)
return True
@classmethod
def __generate_headers(cls, headers, file_size, first_byte):
r_headers = {}
index = 0
if first_byte > 51200000:
byte = 2048000 # 2M 为一片
else:
byte = 1024000 # 1M 为一片
while True:
file_size_two = file_size + byte
if file_size_two >= first_byte:
r_headers[index] = {"Range": f"bytes={file_size}-{first_byte}"}
break
r_headers[index] = {"Range": f"bytes={file_size}-{file_size_two - 1}"}
index += 1
file_size = file_size_two
for key in r_headers:
r_headers[key].update(headers)
return r_headers
@retry(stop_max_attempt_number=3)
async def __download_one(self, session, method, url, r_headers, **kwargs):
index, headers = r_headers
async with self.__sema:
async with session.request(method, url, headers=headers, **kwargs) as response:
binary = await response.content.read()
return {index: binary}
async def __async_section_download(self, session, method, url, r_headers, **kwargs):
tasks = [
asyncio.create_task(self.__download_one(session, method, url, (key, r_headers[key]), **kwargs)) for key in
r_headers
]
return await asyncio.wait(tasks)
@classmethod
async def __get_content_length(cls, session, method, url, headers, **kwargs):
async with session.request(method, url, headers=headers, **kwargs) as response:
return response.headers.get('Content-Length') or response.headers.get('content-length') or 0
@classmethod
async def __sync_download(cls, session, method, url, headers, file_path, **kwargs):
async with session.request(method, url, headers=headers, **kwargs) as response:
with open(file_path, 'wb') as f:
binary = await response.content.read()
f.write(binary)
async def __async_download_main(self, method, url, headers, file_path, **kwargs):
"""
:param url:
:param file_path: fullpath/file_name
:return:
"""
file_name, file_size = self.__init_check(file_path)
self.__sema = asyncio.Semaphore(self.__sema_number)
async with aiohttp.ClientSession() as session:
content_length = await self.__get_content_length(session, method, url, headers, **kwargs)
if content_length and content_length.isdigit():
content_length = int(content_length)
if file_size >= content_length:
return True, file_path
r_headers = self.__generate_headers(headers, file_size, content_length)
results = await self.__async_section_download(session, method, url, r_headers, **kwargs)
self.__sync_save_local(r_headers, results, file_path)
else:
await self.__sync_download(session, method, url, headers, file_path, **kwargs)
if os.path.getsize(file_path) >= int(content_length):
return True, file_path
return False, file_path
def start(self, method, url, headers, file_path, **kwargs):
# **kwargs 参数跟 aiohttp.request() 参数一致
return asyncio.run(self.__async_download_main(method, url, headers, file_path, **kwargs))
if __name__ == '__main__':
as_dw = Async_download(20)
url_ = "https://vd4.bdstatic.com/mda-nc6yygrbaqzsrd5u/hd/cae_h264_delogo/1646695221404923243/mda-nc6yygrbaqzsrd5u.mp4?v_from_s=hkapp-haokan-nanjing&auth_key=1646795766-0-0-52830eb9a3936b06fc194613d67a7fa5&bcevod_channel=searchbox_feed&pd=1&cd=0&pt=3&logid=2766416251&vid=17956560932232323908&abtest=100815_2-17451_1&klogid=2766416251"
s = time.time()
print(as_dw.start(
'get',
url_,
{
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'},
'./text.mp4',
cookies={}
))
print(time.time() - s)
速度对比
应该是我这边网速不好,所以差距会如此之大

ps:大佬们点个赞吧
文章转载自y小白,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




