(编辑:jimmy 日期: 2024/12/23 浏览:2)
# 模块导入import requestsimport time# 定义变量domain = "http://XXXXXXXX.cn/"search = "video.php?_t"now = time.time()# 定义函数def get_html(url): try: r = requests.get(url, timeout=30) with open("result.txt", "a") as f: f.write(r.text) f.close() r.close() print(r.text) except (ValueError): return "产生异常"# 主函数 for i in range(1, 1000): print("正在爬取第" + str(i + 1) + "页") time = "=0." + (str(now).replace('.', '')) get_html(domain + search + time)
# 读取result.txt文件,并返回以/开始的行 不知道为什么这样写的可以看上面的 第一层m3u8 文件 因为前面两行都是以 # 开头的domain_301="XXXXX"def get_m3u8(): with open("result.txt", "r") as f: for line in f.readlines(): if line.startswith("/"): with open("m3u8.txt", "a") as file: file.write(domain_301 + line) file.close() f.close()
# 读取m3u8.txt文件,并去除重复行def get_m3u8(): with open("m3u8.txt", "r") as f: lines = f.readlines() print(len(lines)) lines = list(set(lines)) print(len(lines)) f.close() with open("m3u8.txt", "w") as file: file.writelines(lines) file.close()
# 导入模块import osfrom multiprocessing.pool import ThreadPool# 定义变量save_path = 'D:\\video\\download\\movie\\'filetype = ".mp4 "oerder = "youtube-dl -o "# 读取m3u8.txt,并返回列表def get_m3u8(): with open("m3u8.txt", "r") as f: lines = f.readlines() print(len(lines)) f.close() return lines# 运行youtube-dl,下载视频def get_video(url): os.chdir(save_path) filename = url.split('/')[4] if os.path.exists(filename + filetype): print("文件已经存在") else: os.system(oerder + filename + filetype + url)# 线程函数def thread_task(lock, data_set): lock.acquire() get_video(data_set) lock.release()# 主函数if __name__ == '__main__': data_set = get_m3u8() with ThreadPool(10) as pool: pool.map(get_video, data_set, chunksize=1)