# -*- encoding: utf-8 -*-
'''
@Author : zyjsuper
@License : (C) Copyright 2013-2020
@Contact : zyj_super@163.com
@File : exploitdb-spider.py
@Time : 2020/8/15 14:04
@Desc :
'''
import json
import os
import re
import time
import pandas as pd
import requests
cookies = {
'$XSRF-TOKEN': 'eyJpdiI6IkY2bGRwQkR0Tkt3YStpNWVVMHQwd2c9PSIsInZhbHVlIjoiR094YVhkaFE0OUNiY0d6ZldKbU5CUUJST281Unk5S2I4OGYyWFZuWk9aZSt0Mk5PZTFQUUQ4WDA0R1VSbkd0NSIsIm1hYyI6IjczZTNjMTAxZmYzNWNhMzA2NzVjOWNiOTVlZjhmMWE3ZWM0NWJkN2UwMGVhMDMzZjUxYTkwZmE0YjQ2YjFiMDAifQ%3D%3D',
'exploit_database_session': 'eyJpdiI6Im9jNEl5cVA1NnY4MTZmelFNMzIrYVE9PSIsInZhbHVlIjoianZ4blR3aVVyN05JSWlBbnZiKzNaSHJRV2FyelVSNjZ1eThmalQ2eDRuYWlvVXgyWmdQMFJvSWYyeUtnblhCTSIsIm1hYyI6ImEyMDRlNjc1M2JkMzQwNGUxZDUyNmExM2QzNzY3NWU1OGJhNzY2MDhiMDYyYjdkOWRiM2MxNWRlY2VmZGQ4MWYifQ%3D%3D',
'CookieConsent': '{stamp:%27-1%27%2Cnecessary:true%2Cpreferences:true%2Cstatistics:true%2Cmarketing:true%2Cver:1%2Cutc:1597464892196%2Cregion:%27JP%27}',
'_ga': 'GA1.3.560436933.1597464931',
'_gid': 'GA1.3.1985101316.1597464931',
'_gat': '1',
}
headers = {
'Host': 'www.exploit-db.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding': 'gzip, deflate',
'Referer': 'https://www.exploit-db.com/google-hacking-database',
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'close',
}
params1 = (
('draw', '3'),
('columns[0][data]', 'date'),
('columns[0][name]', 'date'),
('columns[0][searchable]', 'true'),
('columns[0][orderable]', 'true'),
('columns[0][search][value]', ''),
('columns[0][search][regex]', 'false'),
('columns[1][data]', 'url_title'),
('columns[1][name]', 'url_title'),
('columns[1][searchable]', 'true'),
('columns[1][orderable]', 'false'),
('columns[1][search][value]', ''),
('columns[1][search][regex]', 'false'),
('columns[2][data]', 'cat_id'),
('columns[2][name]', 'cat_id'),
('columns[2][searchable]', 'true'),
('columns[2][orderable]', 'false'),
('columns[2][search][value]', ''),
('columns[2][search][regex]', 'false'),
('columns[3][data]', 'author_id'),
('columns[3][name]', 'author_id'),
('columns[3][searchable]', 'false'),
('columns[3][orderable]', 'false'),
('columns[3][search][value]', ''),
('columns[3][search][regex]', 'false'),
('order[0][column]', '0'),
('order[0][dir]', 'desc'),
('start', '0'),
('length', '1'),
('search[value]', ''),
('search[regex]', 'false'),
('author', ''),
('category', ''),
('_', '1599280294385'),
)
def generatejson():
total = json.loads(response1.content.decode())["recordsTotal"]
params = (
('draw', '3'),
('columns[0][data]', 'date'),
('columns[0][name]', 'date'),
('columns[0][searchable]', 'true'),
('columns[0][orderable]', 'true'),
('columns[0][search][value]', ''),
('columns[0][search][regex]', 'false'),
('columns[1][data]', 'url_title'),
('columns[1][name]', 'url_title'),
('columns[1][searchable]', 'true'),
('columns[1][orderable]', 'false'),
('columns[1][search][value]', ''),
('columns[1][search][regex]', 'false'),
('columns[2][data]', 'cat_id'),
('columns[2][name]', 'cat_id'),
('columns[2][searchable]', 'true'),
('columns[2][orderable]', 'false'),
('columns[2][search][value]', ''),
('columns[2][search][regex]', 'false'),
('columns[3][data]', 'author_id'),
('columns[3][name]', 'author_id'),
('columns[3][searchable]', 'false'),
('columns[3][orderable]', 'false'),
('columns[3][search][value]', ''),
('columns[3][search][regex]', 'false'),
('order[0][column]', '0'),
('order[0][dir]', 'desc'),
('start', '0'),
('length', total),
('search[value]', ''),
('search[regex]', 'false'),
('author', ''),
('category', ''),
('_', '1599280294385'),
)
response = requests.get('https://www.exploit-db.com/google-hacking-database', headers=headers, params=params, cookies=cookies,proxies=proxies, timeout=10)
with open(savepath + "exploitdb.json","w") as file:
data = response.content.decode()
file.write(data)
print("exploitdb.json已经在桌面生成。")
def generatetxt(list_data):
with open(savepath + "exploitdb.txt","w",encoding='utf-8') as file:
for data in list_data:
file.write(re.findall(".*>(.*)<.*",data["url_title"])[0]+"\n")
print("exploitdb.txt已经在桌面生成。")
def generatexls(list_data):
for data in list_data:
data_info = {
"id": data["id"],
"date": data["date"],
"url_title": re.findall(".*>(.*)<.*",data["url_title"])[0]+"\n",
"author_id": data["author_id"][0],
"author_name": data["author_id"][1]
}
save_data.append(data_info)
data = pd.DataFrame(save_data)
data = data[["id","date","url_title","author_id","author_name"]]
data.to_excel(savepath + "ghdb_" + time.strftime("%Y%m%d") + ".xlsx",sheet_name='ghdbrecords', encoding='utf-8-sig', index=False)
print("%s已经在桌面生成。" %("ghdb_" + time.strftime("%Y%m%d") + ".xlsx"))
if __name__ == "__main__":
count = 0
proxies = {"http":"http://IP:PORT","https":"http://IP:PORT"}
response1 = requests.get('https://www.exploit-db.com/google-hacking-database', headers=headers, params=params1, proxies=proxies,cookies=cookies, timeout=10)
save_data = []
username = os.getenv("USERNAME")
savepath = "C:\\Users\\"+ username + "\\Desktop\\"
generatejson()
with open(savepath + "exploitdb.json","r",encoding='utf-8') as file:
data = file.read()
json_data = json.loads(data)
list_data = json_data["data"]
generatetxt(list_data)
generatexls(list_data)
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




