暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

exploit-db漏洞库爬取脚本

原创 zayki 2021-08-11
3560
# -*- encoding: utf-8 -*- ''' @Author : zyjsuper @License : (C) Copyright 2013-2020 @Contact : zyj_super@163.com @File : exploitdb-spider.py @Time : 2020/8/15 14:04 @Desc : ''' import json import os import re import time import pandas as pd import requests cookies = { '$XSRF-TOKEN': 'eyJpdiI6IkY2bGRwQkR0Tkt3YStpNWVVMHQwd2c9PSIsInZhbHVlIjoiR094YVhkaFE0OUNiY0d6ZldKbU5CUUJST281Unk5S2I4OGYyWFZuWk9aZSt0Mk5PZTFQUUQ4WDA0R1VSbkd0NSIsIm1hYyI6IjczZTNjMTAxZmYzNWNhMzA2NzVjOWNiOTVlZjhmMWE3ZWM0NWJkN2UwMGVhMDMzZjUxYTkwZmE0YjQ2YjFiMDAifQ%3D%3D', 'exploit_database_session': 'eyJpdiI6Im9jNEl5cVA1NnY4MTZmelFNMzIrYVE9PSIsInZhbHVlIjoianZ4blR3aVVyN05JSWlBbnZiKzNaSHJRV2FyelVSNjZ1eThmalQ2eDRuYWlvVXgyWmdQMFJvSWYyeUtnblhCTSIsIm1hYyI6ImEyMDRlNjc1M2JkMzQwNGUxZDUyNmExM2QzNzY3NWU1OGJhNzY2MDhiMDYyYjdkOWRiM2MxNWRlY2VmZGQ4MWYifQ%3D%3D', 'CookieConsent': '{stamp:%27-1%27%2Cnecessary:true%2Cpreferences:true%2Cstatistics:true%2Cmarketing:true%2Cver:1%2Cutc:1597464892196%2Cregion:%27JP%27}', '_ga': 'GA1.3.560436933.1597464931', '_gid': 'GA1.3.1985101316.1597464931', '_gat': '1', } headers = { 'Host': 'www.exploit-db.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'Accept-Encoding': 'gzip, deflate', 'Referer': 'https://www.exploit-db.com/google-hacking-database', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'close', } params1 = ( ('draw', '3'), ('columns[0][data]', 'date'), ('columns[0][name]', 'date'), ('columns[0][searchable]', 'true'), ('columns[0][orderable]', 'true'), ('columns[0][search][value]', ''), ('columns[0][search][regex]', 'false'), ('columns[1][data]', 'url_title'), ('columns[1][name]', 'url_title'), ('columns[1][searchable]', 'true'), ('columns[1][orderable]', 'false'), ('columns[1][search][value]', ''), ('columns[1][search][regex]', 'false'), ('columns[2][data]', 'cat_id'), ('columns[2][name]', 'cat_id'), ('columns[2][searchable]', 'true'), ('columns[2][orderable]', 'false'), ('columns[2][search][value]', ''), ('columns[2][search][regex]', 'false'), ('columns[3][data]', 'author_id'), ('columns[3][name]', 'author_id'), ('columns[3][searchable]', 'false'), ('columns[3][orderable]', 'false'), ('columns[3][search][value]', ''), ('columns[3][search][regex]', 'false'), ('order[0][column]', '0'), ('order[0][dir]', 'desc'), ('start', '0'), ('length', '1'), ('search[value]', ''), ('search[regex]', 'false'), ('author', ''), ('category', ''), ('_', '1599280294385'), ) def generatejson(): total = json.loads(response1.content.decode())["recordsTotal"] params = ( ('draw', '3'), ('columns[0][data]', 'date'), ('columns[0][name]', 'date'), ('columns[0][searchable]', 'true'), ('columns[0][orderable]', 'true'), ('columns[0][search][value]', ''), ('columns[0][search][regex]', 'false'), ('columns[1][data]', 'url_title'), ('columns[1][name]', 'url_title'), ('columns[1][searchable]', 'true'), ('columns[1][orderable]', 'false'), ('columns[1][search][value]', ''), ('columns[1][search][regex]', 'false'), ('columns[2][data]', 'cat_id'), ('columns[2][name]', 'cat_id'), ('columns[2][searchable]', 'true'), ('columns[2][orderable]', 'false'), ('columns[2][search][value]', ''), ('columns[2][search][regex]', 'false'), ('columns[3][data]', 'author_id'), ('columns[3][name]', 'author_id'), ('columns[3][searchable]', 'false'), ('columns[3][orderable]', 'false'), ('columns[3][search][value]', ''), ('columns[3][search][regex]', 'false'), ('order[0][column]', '0'), ('order[0][dir]', 'desc'), ('start', '0'), ('length', total), ('search[value]', ''), ('search[regex]', 'false'), ('author', ''), ('category', ''), ('_', '1599280294385'), ) response = requests.get('https://www.exploit-db.com/google-hacking-database', headers=headers, params=params, cookies=cookies,proxies=proxies, timeout=10) with open(savepath + "exploitdb.json","w") as file: data = response.content.decode() file.write(data) print("exploitdb.json已经在桌面生成。") def generatetxt(list_data): with open(savepath + "exploitdb.txt","w",encoding='utf-8') as file: for data in list_data: file.write(re.findall(".*>(.*)<.*",data["url_title"])[0]+"\n") print("exploitdb.txt已经在桌面生成。") def generatexls(list_data): for data in list_data: data_info = { "id": data["id"], "date": data["date"], "url_title": re.findall(".*>(.*)<.*",data["url_title"])[0]+"\n", "author_id": data["author_id"][0], "author_name": data["author_id"][1] } save_data.append(data_info) data = pd.DataFrame(save_data) data = data[["id","date","url_title","author_id","author_name"]] data.to_excel(savepath + "ghdb_" + time.strftime("%Y%m%d") + ".xlsx",sheet_name='ghdbrecords', encoding='utf-8-sig', index=False) print("%s已经在桌面生成。" %("ghdb_" + time.strftime("%Y%m%d") + ".xlsx")) if __name__ == "__main__": count = 0 proxies = {"http":"http://IP:PORT","https":"http://IP:PORT"} response1 = requests.get('https://www.exploit-db.com/google-hacking-database', headers=headers, params=params1, proxies=proxies,cookies=cookies, timeout=10) save_data = [] username = os.getenv("USERNAME") savepath = "C:\\Users\\"+ username + "\\Desktop\\" generatejson() with open(savepath + "exploitdb.json","r",encoding='utf-8') as file: data = file.read() json_data = json.loads(data) list_data = json_data["data"] generatetxt(list_data) generatexls(list_data)
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论