使用hadoop restful api实现对集群信息的统计

(适用于hadoop 2.7及以上版本)

涉及到RESTful API

1. 统计HDFS文件系统实时使用情况

{
  "ContentSummary":
  {
    "directoryCount": 2,
    "fileCount"     : 1,
    "length"        : 24930,
    "quota"         : -1,
    "spaceConsumed" : 24930,
    "spaceQuota"    : -1
  }
}
  • 关于返回结果的说明:
{
  "name"      : "ContentSummary",
  "properties":
  {
    "ContentSummary":
    {
      "type"      : "object",
      "properties":
      {
        "directoryCount":
        {
          "description": "The number of directories.",
          "type"       : "integer",
          "required"   : true
        },
        "fileCount":
        {
          "description": "The number of files.",
          "type"       : "integer",
          "required"   : true
        },
        "length":
        {
          "description": "The number of bytes used by the content.",
          "type"       : "integer",
          "required"   : true
        },
        "quota":
        {
          "description": "The namespace quota of this directory.",
          "type"       : "integer",
          "required"   : true
        },
        "spaceConsumed":
        {
          "description": "The disk space consumed by the content.",
          "type"       : "integer",
          "required"   : true
        },
        "spaceQuota":
        {
          "description": "The disk space quota.",
          "type"       : "integer",
          "required"   : true
        }
      }
    }
  }
}

2. 查看集群的实时信息和状态

  • URL

http://emr-header-1:8088/ws/v1/cluster

  • 返回结果
{
    "clusterInfo": {
        "id": 1495123166259, 
        "startedOn": 1495123166259, 
        "state": "STARTED", 
        "haState": "ACTIVE", 
        "rmStateStoreName": "org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore", 
        "resourceManagerVersion": "2.7.2", 
        "resourceManagerBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum c63f7cc71b8f63249e35126f0f7492d", 
        "resourceManagerVersionBuiltOn": "2017-04-17T12:28Z", 
        "hadoopVersion": "2.7.2", 
        "hadoopBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum 3329b146070a2bc9e249fa9ba9fb55", 
        "hadoopVersionBuiltOn": "2017-04-17T12:18Z", 
        "haZooKeeperConnectionState": "ResourceManager HA is not enabled."
    }
}

3. 查看资源队列的实时信息,包括队列的配额信息、资源使用实时情况

  • URL

http://emr-header-1:8088/ws/v1/cluster/scheduler

  • 返回结果
{
    "scheduler": {
        "schedulerInfo": {
            "type": "capacityScheduler", 
            "capacity": 100, 
            "usedCapacity": 0, 
            "maxCapacity": 100, 
            "queueName": "root", 
            "queues": {
                "queue": [
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 1, 
                        "usedCapacity": 0, 
                        "maxCapacity": 90, 
                        "absoluteCapacity": 1, 
                        "absoluteMaxCapacity": 90, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "algorithm_aliyun", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 100, 
                        "maxApplicationsPerUser": 100, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 1, 
                        "AMResourceLimit": {
                            "memory": 11776, 
                            "vCores": 7
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 160, 
                            "vCores": 1
                        }, 
                        "preemptionDisabled": true
                    }, 
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 1, 
                        "usedCapacity": 0, 
                        "maxCapacity": 90, 
                        "absoluteCapacity": 1, 
                        "absoluteMaxCapacity": 90, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "dcps_aliyun", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 100, 
                        "maxApplicationsPerUser": 100, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 1, 
                        "AMResourceLimit": {
                            "memory": 11776, 
                            "vCores": 7
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 160, 
                            "vCores": 1
                        }, 
                        "preemptionDisabled": true
                    }, 
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 31, 
                        "usedCapacity": 0, 
                        "maxCapacity": 100, 
                        "absoluteCapacity": 31, 
                        "absoluteMaxCapacity": 100, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "default", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 3100, 
                        "maxApplicationsPerUser": 3100, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 1, 
                        "AMResourceLimit": {
                            "memory": 13088, 
                            "vCores": 8
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 4064, 
                            "vCores": 3
                        }, 
                        "preemptionDisabled": true
                    }, 
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 15.000001, 
                        "usedCapacity": 0, 
                        "maxCapacity": 100, 
                        "absoluteCapacity": 15.000001, 
                        "absoluteMaxCapacity": 100, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "feed_aliyun", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 1500, 
                        "maxApplicationsPerUser": 7500, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 5, 
                        "AMResourceLimit": {
                            "memory": 12320, 
                            "vCores": 8
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 9856, 
                            "vCores": 7
                        }, 
                        "preemptionDisabled": true
                    }, 
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 51, 
                        "usedCapacity": 0, 
                        "maxCapacity": 90, 
                        "absoluteCapacity": 51, 
                        "absoluteMaxCapacity": 90, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "hot_aliyun", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 5100, 
                        "maxApplicationsPerUser": 5100, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 1, 
                        "AMResourceLimit": {
                            "memory": 11776, 
                            "vCores": 7
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 6688, 
                            "vCores": 5
                        }, 
                        "preemptionDisabled": true
                    }, 
                    {
                        "type": "capacitySchedulerLeafQueueInfo", 
                        "capacity": 1, 
                        "usedCapacity": 0, 
                        "maxCapacity": 90, 
                        "absoluteCapacity": 1, 
                        "absoluteMaxCapacity": 90, 
                        "absoluteUsedCapacity": 0, 
                        "numApplications": 0, 
                        "queueName": "push_aliyun", 
                        "state": "RUNNING", 
                        "resourcesUsed": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "hideReservationQueues": false, 
                        "nodeLabels": [
                            "*"
                        ], 
                        "numActiveApplications": 0, 
                        "numPendingApplications": 0, 
                        "numContainers": 0, 
                        "maxApplications": 100, 
                        "maxApplicationsPerUser": 100, 
                        "userLimit": 100, 
                        "users": null, 
                        "userLimitFactor": 1, 
                        "AMResourceLimit": {
                            "memory": 11776, 
                            "vCores": 7
                        }, 
                        "usedAMResource": {
                            "memory": 0, 
                            "vCores": 0
                        }, 
                        "userAMResourceLimit": {
                            "memory": 160, 
                            "vCores": 1
                        }, 
                        "preemptionDisabled": true
                    }
                ]
            }
        }
    }
}

4. 查看实时的作业列表,列表信息中也包含了作业运行的详情信息,包括作业名称、id、运行状态、起止时间,资源使用情况。

  • URL

http://emr-header-1:8088/ws/v1/cluster/apps

  • 返回结果
{
  "apps":
  {
    "app":
    [
       {
          "finishedTime" : 1326815598530,
          "amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326815542473_0001_01_000001",
          "trackingUI" : "History",
          "state" : "FINISHED",
          "user" : "user1",
          "id" : "application_1326815542473_0001",
          "clusterId" : 1326815542473,
          "finalStatus" : "SUCCEEDED",
          "amHostHttpAddress" : "host.domain.com:8042",
          "progress" : 100,
          "name" : "word count",
          "startedTime" : 1326815573334,
          "elapsedTime" : 25196,
          "diagnostics" : "",
          "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326815542473_0001/jobhistory/job/job_1326815542473_1_1",
          "queue" : "default",
          "allocatedMB" : 0,
          "allocatedVCores" : 0,
          "runningContainers" : 0,
          "memorySeconds" : 151730,
          "vcoreSeconds" : 103
       },
       {
          "finishedTime" : 1326815789546,
          "amContainerLogs" : "http://host.domain.com:8042/node/containerlogs/container_1326815542473_0002_01_000001",
          "trackingUI" : "History",
          "state" : "FINISHED",
          "user" : "user1",
          "id" : "application_1326815542473_0002",
          "clusterId" : 1326815542473,
          "finalStatus" : "SUCCEEDED",
          "amHostHttpAddress" : "host.domain.com:8042",
          "progress" : 100,
          "name" : "Sleep job",
          "startedTime" : 1326815641380,
          "elapsedTime" : 148166,
          "diagnostics" : "",
          "trackingUrl" : "http://host.domain.com:8088/proxy/application_1326815542473_0002/jobhistory/job/job_1326815542473_2_2",
          "queue" : "default",
          "allocatedMB" : 0,
          "allocatedVCores" : 0,
          "runningContainers" : 1,
          "memorySeconds" : 640064,
          "vcoreSeconds" : 442
       } 
    ]
  }
}

5. 统计作业扫描的数据量情况

job扫描的数据量,需要通过History Server的RESTful API查询,MapReduce的和Spark的又有一些差异。

5.1 Mapreduce job扫描数据量

  • URL

http://emr-header-1:19888/ws/v1/history/mapreduce/jobs/job_1495123166259_0962/counters

  • 返回结果
{
   "jobCounters" : {
      "id" : "job_1326381300833_2_2",
      "counterGroup" : [
         {
            "counterGroupName" : "Shuffle Errors",
            "counter" : [
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "BAD_ID"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "CONNECTION"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "IO_ERROR"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "WRONG_LENGTH"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "WRONG_MAP"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "WRONG_REDUCE"
               }
            ]
          },
         {
            "counterGroupName" : "org.apache.hadoop.mapreduce.FileSystemCounter",
            "counter" : [
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 2483,
                  "name" : "FILE_BYTES_READ"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 108525,
                  "name" : "FILE_BYTES_WRITTEN"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "FILE_READ_OPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "FILE_LARGE_READ_OPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "FILE_WRITE_OPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 48,
                  "name" : "HDFS_BYTES_READ"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "HDFS_BYTES_WRITTEN"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1,
                  "name" : "HDFS_READ_OPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "HDFS_LARGE_READ_OPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "HDFS_WRITE_OPS"
               }
            ]
         },
         {
            "counterGroupName" : "org.apache.hadoop.mapreduce.TaskCounter",
            "counter" : [
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1,
                  "name" : "MAP_INPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1200,
                  "name" : "MAP_OUTPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 4800,
                  "name" : "MAP_OUTPUT_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 2235,
                  "name" : "MAP_OUTPUT_MATERIALIZED_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 48,
                  "name" : "SPLIT_RAW_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "COMBINE_INPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "COMBINE_OUTPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1200,
                  "name" : "REDUCE_INPUT_GROUPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 2235,
                  "name" : "REDUCE_SHUFFLE_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1200,
                  "name" : "REDUCE_INPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "REDUCE_OUTPUT_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 2400,
                  "name" : "SPILLED_RECORDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1,
                  "name" : "SHUFFLED_MAPS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "FAILED_SHUFFLE"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1,
                  "name" : "MERGED_MAP_OUTPUTS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 113,
                  "name" : "GC_TIME_MILLIS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 1830,
                  "name" : "CPU_MILLISECONDS"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 478068736,
                  "name" : "PHYSICAL_MEMORY_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 2159284224,
                  "name" : "VIRTUAL_MEMORY_BYTES"
               },
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 378863616,
                  "name" : "COMMITTED_HEAP_BYTES"
               }
            ]
         },
         {
            "counterGroupName" : "org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter",
            "counter" : [
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "BYTES_READ"
               }
            ]
         },
         {
            "counterGroupName" : "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter",
            "counter" : [
               {
                  "reduceCounterValue" : 0,
                  "mapCounterValue" : 0,
                  "totalCounterValue" : 0,
                  "name" : "BYTES_WRITTEN"
               }
            ]
         }
      ]
   }
}

其中org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter里面的BYTES_READ为job扫描的数据量
具体参数:https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html#Job_Counters_API

5.2 Mapreduce job扫描数据量

  • URL

http://emr-header-1:18080/api/v1/applications/application_1495123166259_1050/executors

每个executor的totalInputBytes总和为整个job的数据扫描量。
更多参考:http://spark.apache.org/docs/latest/monitoring.html

上一篇:Java 中 String 对 null 对象的容错处理!


下一篇:The Apache Software Foundation Announces Apache® Zeppelin™ as a Top-Level Project