一、前言
作为一名DBA,对线上的数据库的情况和巡检是很有必要的,可以及时发现线上可能出现的问题,笔者最近要对腾讯云MongoDB有一个大概的了解,所以笔者对开源的mongodb-tools的部分函数进行了修改,主要是为了适应python2.7和公司需要,笔者的系统版本为7.2,python版本为2.7.5,以下脚本在笔者环境运行没问题,大家只需要使用pip安装依赖的模块即可。直接拿开源版本运行可能会问题,一是部分功能函数已经过时,二是开源脚本兼容性有点问题,所以大家如果需要运行在python3版本,肯定是需要做一些修改的,开源脚本的地址为https://github.com/jwilder/mongodb-tools ,大家可以看看,感谢源作者Jason Wilder的开源。
二、具体脚本
1、collection_stats.py ##获取MongoDB集合信息
#!/usr/bin/env python# -*- coding: utf-8 -*-"""This script prints some basic collection stats about the size of thecollections and their indexes."""import sysfrom prettytable import PrettyTableimport psutilfrom pymongo import MongoClientfrom pymongo import ReadPreferencefrom optparse import OptionParserdef compute_signature(index):signature = index["ns"]for key in index["key"]:signature += "%s_%s" % (key, index["key"][key])return signaturedef get_collection_stats(database, collection):print "Checking DB: %s" % collection.full_namereturn database.command("collstats", collection.name)def get_cli_options():parser = OptionParser(usage="usage: python %prog [options]",description="""This script prints some basic collection stats about the size of the collections and their indexes.""")parser.add_option("-H", "--host",dest="host",default="localhost",metavar="HOST",help="MongoDB host")parser.add_option("-p", "--port",dest="port",default=27017,metavar="PORT",help="MongoDB port")parser.add_option("-d", "--database",dest="database",default="",metavar="DATABASE",help="Target database to generate statistics. All if omitted.")parser.add_option("-u", "--user",dest="user",default="",metavar="USER",help="Admin username if authentication is enabled")parser.add_option("--password",dest="password",default="",metavar="PASSWORD",help="Admin password if authentication is enabled")(options, args) = parser.parse_args()return optionsdef get_client(host, port, username, password):try:client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')return clientexcept Exception as e:print('connect to server failed:%s' % e)# From http://www.5dollarwhitebox.org/drupal/node/84def convert_bytes(bytes):bytes = float(bytes)magnitude = abs(bytes)if magnitude >= 1099511627776:terabytes = bytes 1099511627776size = '%.2fT' % terabyteselif magnitude >= 1073741824:gigabytes = bytes 1073741824size = '%.2fG' % gigabyteselif magnitude >= 1048576:megabytes = bytes 1048576size = '%.2fM' % megabyteselif magnitude >= 1024:kilobytes = bytes 1024size = '%.2fK' % kilobyteselse:size = '%.2fb' % bytesreturn sizedef main(options):summary_stats = {"count" : 0,"size" : 0,"indexSize" : 0,"storageSize" : 0}all_stats = []client = get_client(options.host, options.port, options.user, options.password)all_db_stats = {}databases= []if options.database:databases.append(options.database)else:databases = client.database_names()for db in databases:# FIXME: Add an option to include oplog stats.if db == "local":continuedatabase = client[db]all_db_stats[database.name] = []for collection_name in database.collection_names():stats = get_collection_stats(database, database[collection_name])all_stats.append(stats)all_db_stats[database.name].append(stats)summary_stats["count"] += stats["count"]summary_stats["size"] += stats["size"]summary_stats["indexSize"] += stats.get("totalIndexSize", 0)summary_stats["storageSize"] += stats.get("storageSize", 0)x = PrettyTable(["Collection", "Count", "% Size", "DB Size", "Avg Obj Size", "Indexes", "Index Size", "Storage Size"])x.align["Collection"] = "l"x.align["% Size"] = "r"x.align["Count"] = "r"x.align["DB Size"] = "r"x.align["Avg Obj Size"] = "r"x.align["Index Size"] = "r"x.align["Storage Size"] = "r"x.padding_width = 1for db in all_db_stats:db_stats = all_db_stats[db]count = 0for stat in db_stats:count += stat["count"]x.add_row([stat["ns"], stat["count"], "%0.1f%%" % ((stat["size"] float(summary_stats["size"])) * 100),convert_bytes(stat["size"]),convert_bytes(stat.get("avgObjSize", 0)),stat.get("nindexes", 0),convert_bytes(stat.get("totalIndexSize", 0)),convert_bytes(stat.get("storageSize", 0))])print x.get_string(sortby="% Size")print "Total Documents:", summary_stats["count"]print "Total Data Size:", convert_bytes(summary_stats["size"])print "Total Index Size:", convert_bytes(summary_stats["indexSize"])print "Total Storage Size:", convert_bytes(summary_stats["storageSize"])# this is only meaningful if we're running the script on localhostif options.host == "localhost":ram_headroom = psutil.virtual_memory().total - summary_stats["indexSize"]print "RAM Headroom:", convert_bytes(ram_headroom)print "RAM Used: %s (%s%%)" % (convert_bytes(psutil.virtual_memory().used), psutil.virtual_memory().percent)print "Available RAM Headroom:", convert_bytes((100 - psutil.virtual_memory().percent) 100 * ram_headroom)if __name__ == "__main__":options = get_cli_options()main(options)
运行示例如图

2、index_stats.py ##获取MongoDB集合索引信息
#!/usr/bin/env python# -*- coding: utf-8 -*-"""This script prints some basic collection stats about the size of thecollections and their indexes."""from prettytable import PrettyTableimport psutilfrom pymongo import MongoClientfrom pymongo import ReadPreferencefrom optparse import OptionParserdef compute_signature(index):signature = index["ns"]for key in index["key"]:signature += "%s_%s" % (key, index["key"][key])return signaturedef get_collection_stats(database, collection):print "Checking DB: %s" % collection.full_namereturn database.command("collstats", collection.name)# From http://www.5dollarwhitebox.org/drupal/node/84def convert_bytes(bytes):bytes = float(bytes)magnitude = abs(bytes)if magnitude >= 1099511627776:terabytes = bytes 1099511627776size = '%.2fT' % terabyteselif magnitude >= 1073741824:gigabytes = bytes 1073741824size = '%.2fG' % gigabyteselif magnitude >= 1048576:megabytes = bytes 1048576size = '%.2fM' % megabyteselif magnitude >= 1024:kilobytes = bytes 1024size = '%.2fK' % kilobyteselse:size = '%.2fb' % bytesreturn sizedef get_cli_options():parser = OptionParser(usage="usage: python %prog [options]",description="""This script prints some basic collection stats about the size of the collections and their indexes.""")parser.add_option("-H", "--host",dest="host",default="localhost",metavar="HOST",help="MongoDB host")parser.add_option("-p", "--port",dest="port",default=27017,metavar="PORT",help="MongoDB port")parser.add_option("-d", "--database",dest="database",default="",metavar="DATABASE",help="Target database to generate statistics. All if omitted.")parser.add_option("-u", "--user",dest="user",default="",metavar="USER",help="Admin username if authentication is enabled")parser.add_option("--password",dest="password",default="",metavar="PASSWORD",help="Admin password if authentication is enabled")(options, args) = parser.parse_args()return optionsdef get_client(host, port, username, password):try:client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')return clientexcept Exception as e:print('connect to server failed:%s' % e)def main(options):summary_stats = {"count" : 0,"size" : 0,"indexSize" : 0}all_stats = []client = get_client(options.host, options.port, options.user, options.password)all_db_stats = {}databases = []if options.database:databases.append(options.database)else:databases = client.database_names()for db in databases:# FIXME: Add an option to include oplog stats.if db == "local":continuedatabase = client[db]all_db_stats[database.name] = []for collection_name in database.collection_names():stats = get_collection_stats(database, database[collection_name])all_stats.append(stats)all_db_stats[database.name].append(stats)summary_stats["count"] += stats["count"]summary_stats["size"] += stats["size"]summary_stats["indexSize"] += stats.get("totalIndexSize", 0)x = PrettyTable(["Collection", "Index","% Size", "Index Size"])x.align["Collection"] = "l"x.align["Index"] = "l"x.align["% Size"] = "r"x.align["Index Size"] = "r"x.padding_width = 1index_size_mapping = {}for db in all_db_stats:db_stats = all_db_stats[db]count = 0for stat in db_stats:count += stat["count"]for index in stat["indexSizes"]:index_size = stat["indexSizes"].get(index, 0)row = [stat["ns"], index,"%0.1f%%" % ((index_size float(summary_stats["indexSize"])) * 100),convert_bytes(index_size)]index_size_mapping[index_size] = rowx.add_row(row)print "Index Overview"print x.get_string(sortby="Collection")print "Top 5 Largest Indexes"x = PrettyTable(["Collection", "Index","% Size", "Index Size"])x.align["Collection"] = "l"x.align["Index"] = "l"x.align["% Size"] = "r"x.align["Index Size"] = "r"x.padding_width = 1top_five_indexes = sorted(index_size_mapping.keys(), reverse=True)[0:5]for size in top_five_indexes:x.add_row(index_size_mapping.get(size))print xprint "Total Documents:", summary_stats["count"]print "Total Data Size:", convert_bytes(summary_stats["size"])print "Total Index Size:", convert_bytes(summary_stats["indexSize"])# this is only meaningful if we're running the script on localhostif options.host == "localhost":ram_headroom = psutil.virtual_memory().total - summary_stats["indexSize"]print "RAM Headroom:", convert_bytes(ram_headroom)print "RAM Used: %s (%s%%)" % (convert_bytes(psutil.virtual_memory().used), psutil.virtual_memory().percent)print "Available RAM Headroom:", convert_bytes((100 - psutil.virtual_memory().percent) 100 * ram_headroom)if __name__ == "__main__":options = get_cli_options()main(options)
运行示例如图

3、redundant_indexes.py ##获取MongoDB集合索引是否冗余
#!/usr/bin/env python# -*- coding: utf-8 -*-"""This is a simple script to print out potentially redundant indexes in a mongdb instance.For example, if an index is defined on {field1:1,field2:1} and there is another indexwith just fields {field1:1}, the latter index is not needed since the first index alreadyindexes the necessary fields."""import jsonfrom pymongo import MongoClientfrom pymongo import ReadPreferencefrom optparse import OptionParserdef get_cli_options():parser = OptionParser(usage="usage: python %prog [options]",description="""This script prints some basic collection stats about the size of the collections and their indexes.""")parser.add_option("-H", "--host",dest="host",default="localhost",metavar="HOST",help="MongoDB host")parser.add_option("-p", "--port",dest="port",default=27017,metavar="PORT",help="MongoDB port")parser.add_option("-d", "--database",dest="database",default="",metavar="DATABASE",help="Target database to generate statistics. All if omitted.")parser.add_option("-u", "--user",dest="user",default="",metavar="USER",help="Admin username if authentication is enabled")parser.add_option("--password",dest="password",default="",metavar="PASSWORD",help="Admin password if authentication is enabled")(options, args) = parser.parse_args()return optionsdef get_client(host, port, username, password):try:client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')return clientexcept Exception as e:print('connect to server failed:%s' % e)def main(options):client = get_client(options.host, options.port, options.user, options.password)def compute_signature(index):signature = index["ns"]for key in index["key"]:try:signature += "%s_%s" % (key, int(index["key"][key]))except ValueError:signature += "%s_%s" % (key, index["key"][key])return signaturedef report_redundant_indexes(current_db):print "Checking DB: %s" % current_db.namecoll_list = current_db.list_collection_names()indexes = []for cname in coll_list:index_info = current_db[cname].list_indexes()for index_name in index_info:iname = json.dumps(index_name)index_name = json.loads(iname)indexes.append(json.dumps(index_name))index_map = {}for index in indexes:index = json.loads(index)signature = compute_signature(index)index_map[signature] = indexfor signature in index_map.keys():for other_sig in index_map.keys():if signature == other_sig:continueif other_sig.startswith(signature):print "Index %s[%s] may be redundant with %s[%s]" % (index_map[signature]["ns"],index_map[signature]["name"],index_map[other_sig]["ns"],index_map[other_sig]["name"])databases= []if options.database:databases.append(options.database)else:databases = client.database_names()for db in databases:report_redundant_indexes(client[db])if __name__ == "__main__":options = get_cli_options()main(options)
运行示例如图

三、结尾
大家有不懂的地方,可以咨询笔者,【声明】对于生产使用此脚本产生的问题(一般没任何问题),笔者不承担任何责任,希望大家理解。
由于笔者能力有限,有不足之处请多指教。

喜欢作者,可以关注一下
文章转载自DBA入坑指南,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




