暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

MongoDB巡检小工具介绍

DBA入坑指南 2021-04-18
2939

一、前言

作为一名DBA,对线上的数据库的情况和巡检是很有必要的,可以及时发现线上可能出现的问题,笔者最近要对腾讯云MongoDB有一个大概的了解,所以笔者对开源的mongodb-tools的部分函数进行了修改,主要是为了适应python2.7和公司需要,笔者的系统版本为7.2,python版本为2.7.5,以下脚本在笔者环境运行没问题,大家只需要使用pip安装依赖的模块即可。直接拿开源版本运行可能会问题,一是部分功能函数已经过时,二是开源脚本兼容性有点问题,所以大家如果需要运行在python3版本,肯定是需要做一些修改的,开源脚本的地址为https://github.com/jwilder/mongodb-tools ,大家可以看看,感谢源作者Jason Wilder的开源。

二、具体脚本

1、collection_stats.py      ##获取MongoDB集合信息

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-


    """
    This script prints some basic collection stats about the size of the
    collections and their indexes.
    """
    import sys
    from prettytable import PrettyTable
    import psutil
    from pymongo import MongoClient
    from pymongo import ReadPreference
    from optparse import OptionParser


    def compute_signature(index):
    signature = index["ns"]
    for key in index["key"]:
    signature += "%s_%s" % (key, index["key"][key])
    return signature


    def get_collection_stats(database, collection):
    print "Checking DB: %s" % collection.full_name
    return database.command("collstats", collection.name)


    def get_cli_options():
    parser = OptionParser(usage="usage: python %prog [options]",
    description="""This script prints some basic collection stats about the size of the collections and their indexes.""")


    parser.add_option("-H", "--host",
    dest="host",
    default="localhost",
    metavar="HOST",
    help="MongoDB host")
    parser.add_option("-p", "--port",
    dest="port",
    default=27017,
    metavar="PORT",
    help="MongoDB port")
    parser.add_option("-d", "--database",
    dest="database",
    default="",
    metavar="DATABASE",
    help="Target database to generate statistics. All if omitted.")
    parser.add_option("-u", "--user",
    dest="user",
    default="",
    metavar="USER",
    help="Admin username if authentication is enabled")
    parser.add_option("--password",
    dest="password",
    default="",
    metavar="PASSWORD",
    help="Admin password if authentication is enabled")


    (options, args) = parser.parse_args()


    return options


    def get_client(host, port, username, password):
    try:
    client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')
    return client
    except Exception as e:
    print('connect to server failed:%s' % e)




    # From http://www.5dollarwhitebox.org/drupal/node/84
    def convert_bytes(bytes):
    bytes = float(bytes)
    magnitude = abs(bytes)
    if magnitude >= 1099511627776:
    terabytes = bytes 1099511627776
    size = '%.2fT' % terabytes
    elif magnitude >= 1073741824:
    gigabytes = bytes 1073741824
    size = '%.2fG' % gigabytes
    elif magnitude >= 1048576:
    megabytes = bytes 1048576
    size = '%.2fM' % megabytes
    elif magnitude >= 1024:
    kilobytes = bytes 1024
    size = '%.2fK' % kilobytes
    else:
    size = '%.2fb' % bytes
    return size


    def main(options):
    summary_stats = {
    "count" : 0,
    "size" : 0,
    "indexSize" : 0,
    "storageSize" : 0
    }
    all_stats = []


    client = get_client(options.host, options.port, options.user, options.password)
    all_db_stats = {}


    databases= []
    if options.database:
    databases.append(options.database)
    else:
    databases = client.database_names()


    for db in databases:
    # FIXME: Add an option to include oplog stats.
    if db == "local":
    continue


    database = client[db]
    all_db_stats[database.name] = []
    for collection_name in database.collection_names():
    stats = get_collection_stats(database, database[collection_name])
    all_stats.append(stats)
    all_db_stats[database.name].append(stats)


    summary_stats["count"] += stats["count"]
    summary_stats["size"] += stats["size"]
    summary_stats["indexSize"] += stats.get("totalIndexSize", 0)
    summary_stats["storageSize"] += stats.get("storageSize", 0)


    x = PrettyTable(["Collection", "Count", "% Size", "DB Size", "Avg Obj Size", "Indexes", "Index Size", "Storage Size"])
    x.align["Collection"] = "l"
    x.align["% Size"] = "r"
    x.align["Count"] = "r"
    x.align["DB Size"] = "r"
    x.align["Avg Obj Size"] = "r"
    x.align["Index Size"] = "r"
    x.align["Storage Size"] = "r"
    x.padding_width = 1


    print


    for db in all_db_stats:
    db_stats = all_db_stats[db]
    count = 0
    for stat in db_stats:
    count += stat["count"]
    x.add_row([stat["ns"], stat["count"], "%0.1f%%" % ((stat["size"] float(summary_stats["size"])) * 100),
    convert_bytes(stat["size"]),
    convert_bytes(stat.get("avgObjSize", 0)),
    stat.get("nindexes", 0),
    convert_bytes(stat.get("totalIndexSize", 0)),
    convert_bytes(stat.get("storageSize", 0))
    ])


    print
    print x.get_string(sortby="% Size")
    print "Total Documents:", summary_stats["count"]
    print "Total Data Size:", convert_bytes(summary_stats["size"])
    print "Total Index Size:", convert_bytes(summary_stats["indexSize"])
    print "Total Storage Size:", convert_bytes(summary_stats["storageSize"])


    # this is only meaningful if we're running the script on localhost
    if options.host == "localhost":
    ram_headroom = psutil.virtual_memory().total - summary_stats["indexSize"]
    print "RAM Headroom:", convert_bytes(ram_headroom)
    print "RAM Used: %s (%s%%)" % (convert_bytes(psutil.virtual_memory().used), psutil.virtual_memory().percent)
    print "Available RAM Headroom:", convert_bytes((100 - psutil.virtual_memory().percent) 100 * ram_headroom)


    if __name__ == "__main__":
    options = get_cli_options()
    main(options)

    运行示例如图


    2、index_stats.py     ##获取MongoDB集合索引信息

      #!/usr/bin/env python
      # -*- coding: utf-8 -*-


      """
      This script prints some basic collection stats about the size of the
      collections and their indexes.
      """


      from prettytable import PrettyTable
      import psutil
      from pymongo import MongoClient
      from pymongo import ReadPreference
      from optparse import OptionParser


      def compute_signature(index):
      signature = index["ns"]
      for key in index["key"]:
      signature += "%s_%s" % (key, index["key"][key])
      return signature


      def get_collection_stats(database, collection):
      print "Checking DB: %s" % collection.full_name
      return database.command("collstats", collection.name)


      # From http://www.5dollarwhitebox.org/drupal/node/84
      def convert_bytes(bytes):
      bytes = float(bytes)
      magnitude = abs(bytes)
      if magnitude >= 1099511627776:
      terabytes = bytes 1099511627776
      size = '%.2fT' % terabytes
      elif magnitude >= 1073741824:
      gigabytes = bytes 1073741824
      size = '%.2fG' % gigabytes
      elif magnitude >= 1048576:
      megabytes = bytes 1048576
      size = '%.2fM' % megabytes
      elif magnitude >= 1024:
      kilobytes = bytes 1024
      size = '%.2fK' % kilobytes
      else:
      size = '%.2fb' % bytes
      return size


      def get_cli_options():
      parser = OptionParser(usage="usage: python %prog [options]",
      description="""This script prints some basic collection stats about the size of the collections and their indexes.""")


      parser.add_option("-H", "--host",
      dest="host",
      default="localhost",
      metavar="HOST",
      help="MongoDB host")
      parser.add_option("-p", "--port",
      dest="port",
      default=27017,
      metavar="PORT",
      help="MongoDB port")
      parser.add_option("-d", "--database",
      dest="database",
      default="",
      metavar="DATABASE",
      help="Target database to generate statistics. All if omitted.")
      parser.add_option("-u", "--user",
      dest="user",
      default="",
      metavar="USER",
      help="Admin username if authentication is enabled")
      parser.add_option("--password",
      dest="password",
      default="",
      metavar="PASSWORD",
      help="Admin password if authentication is enabled")


      (options, args) = parser.parse_args()


      return options


      def get_client(host, port, username, password):
      try:
      client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')
      return client
      except Exception as e:
      print('connect to server failed:%s' % e)


      def main(options):
      summary_stats = {
      "count" : 0,
      "size" : 0,
      "indexSize" : 0
      }
      all_stats = []


      client = get_client(options.host, options.port, options.user, options.password)


      all_db_stats = {}


      databases = []
      if options.database:
      databases.append(options.database)
      else:
      databases = client.database_names()


      for db in databases:
      # FIXME: Add an option to include oplog stats.
      if db == "local":
      continue


      database = client[db]
      all_db_stats[database.name] = []
      for collection_name in database.collection_names():
      stats = get_collection_stats(database, database[collection_name])
      all_stats.append(stats)
      all_db_stats[database.name].append(stats)


      summary_stats["count"] += stats["count"]
      summary_stats["size"] += stats["size"]
      summary_stats["indexSize"] += stats.get("totalIndexSize", 0)


      x = PrettyTable(["Collection", "Index","% Size", "Index Size"])
      x.align["Collection"] = "l"
      x.align["Index"] = "l"
      x.align["% Size"] = "r"
      x.align["Index Size"] = "r"
      x.padding_width = 1


      print


      index_size_mapping = {}
      for db in all_db_stats:
      db_stats = all_db_stats[db]
      count = 0
      for stat in db_stats:
      count += stat["count"]
      for index in stat["indexSizes"]:
      index_size = stat["indexSizes"].get(index, 0)
      row = [stat["ns"], index,
      "%0.1f%%" % ((index_size float(summary_stats["indexSize"])) * 100),
      convert_bytes(index_size)]
      index_size_mapping[index_size] = row
      x.add_row(row)




      print "Index Overview"
      print x.get_string(sortby="Collection")


      print
      print "Top 5 Largest Indexes"
      x = PrettyTable(["Collection", "Index","% Size", "Index Size"])
      x.align["Collection"] = "l"
      x.align["Index"] = "l"
      x.align["% Size"] = "r"
      x.align["Index Size"] = "r"
      x.padding_width = 1


      top_five_indexes = sorted(index_size_mapping.keys(), reverse=True)[0:5]
      for size in top_five_indexes:
      x.add_row(index_size_mapping.get(size))
      print x
      print


      print "Total Documents:", summary_stats["count"]
      print "Total Data Size:", convert_bytes(summary_stats["size"])
      print "Total Index Size:", convert_bytes(summary_stats["indexSize"])


      # this is only meaningful if we're running the script on localhost
      if options.host == "localhost":
      ram_headroom = psutil.virtual_memory().total - summary_stats["indexSize"]
      print "RAM Headroom:", convert_bytes(ram_headroom)
      print "RAM Used: %s (%s%%)" % (convert_bytes(psutil.virtual_memory().used), psutil.virtual_memory().percent)
      print "Available RAM Headroom:", convert_bytes((100 - psutil.virtual_memory().percent) 100 * ram_headroom)


      if __name__ == "__main__":
      options = get_cli_options()
      main(options)

      运行示例如图


      3、redundant_indexes.py    ##获取MongoDB集合索引是否冗余

        #!/usr/bin/env python
        # -*- coding: utf-8 -*-


        """
        This is a simple script to print out potentially redundant indexes in a mongdb instance.
        For example, if an index is defined on {field1:1,field2:1} and there is another index
        with just fields {field1:1}, the latter index is not needed since the first index already
        indexes the necessary fields.
        """
        import json
        from pymongo import MongoClient
        from pymongo import ReadPreference
        from optparse import OptionParser




        def get_cli_options():
        parser = OptionParser(usage="usage: python %prog [options]",
        description="""This script prints some basic collection stats about the size of the collections and their indexes.""")


        parser.add_option("-H", "--host",
        dest="host",
        default="localhost",
        metavar="HOST",
        help="MongoDB host")
        parser.add_option("-p", "--port",
        dest="port",
        default=27017,
        metavar="PORT",
        help="MongoDB port")
        parser.add_option("-d", "--database",
        dest="database",
        default="",
        metavar="DATABASE",
        help="Target database to generate statistics. All if omitted.")
        parser.add_option("-u", "--user",
        dest="user",
        default="",
        metavar="USER",
        help="Admin username if authentication is enabled")
        parser.add_option("--password",
        dest="password",
        default="",
        metavar="PASSWORD",
        help="Admin password if authentication is enabled")


        (options, args) = parser.parse_args()


        return options


        def get_client(host, port, username, password):
        try:
        client = MongoClient(host=host,port=int(port),username=username,password=password,authSource='admin',authMechanism='SCRAM-SHA-1')
        return client
        except Exception as e:
        print('connect to server failed:%s' % e)


        def main(options):
        client = get_client(options.host, options.port, options.user, options.password)


        def compute_signature(index):
        signature = index["ns"]
        for key in index["key"]:
        try:
        signature += "%s_%s" % (key, int(index["key"][key]))
        except ValueError:
        signature += "%s_%s" % (key, index["key"][key])
        return signature


        def report_redundant_indexes(current_db):
        print "Checking DB: %s" % current_db.name
        coll_list = current_db.list_collection_names()
        indexes = []
        for cname in coll_list:
        index_info = current_db[cname].list_indexes()
        for index_name in index_info:
        iname = json.dumps(index_name)
        index_name = json.loads(iname)
        indexes.append(json.dumps(index_name))


        index_map = {}
        for index in indexes:
        index = json.loads(index)
        signature = compute_signature(index)
        index_map[signature] = index


        for signature in index_map.keys():
        for other_sig in index_map.keys():
        if signature == other_sig:
        continue
        if other_sig.startswith(signature):
        print "Index %s[%s] may be redundant with %s[%s]" % (
        index_map[signature]["ns"],
        index_map[signature]["name"],
        index_map[other_sig]["ns"],
        index_map[other_sig]["name"])


        databases= []
        if options.database:
        databases.append(options.database)
        else:
        databases = client.database_names()


        for db in databases:
        report_redundant_indexes(client[db])


        if __name__ == "__main__":
        options = get_cli_options()
        main(options)

        运行示例如图


        三、结尾

        大家有不懂的地方,可以咨询笔者,【声明】对于生产使用此脚本产生的问题(一般没任何问题),笔者不承担任何责任,希望大家理解。


        由于笔者能力有限,有不足之处请多指教。


        喜欢作者,可以关注一下


        文章转载自DBA入坑指南,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

        评论