使用Python获取yarn任务状态

使用Python获取yarn任务状态

yarn history

使用yarn history server rest api, 参考http://hadoop.apache.org/docs/r2.6.0/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import httplib
import urllib
import json
import time
yarnHistoryServer = "172.31.103.124"
conn = httplib.HTTPConnection(yarnHistoryServer, port="19888")
headers = {"accept": "application/json"}
body = {"limit": 100, "user": "chenlong", "state": "SUCCEEDED"}
url = "/ws/v1/history/mapreduce/jobs?%s" % urllib.urlencode(body)
conn.request("GET", url, headers=headers)
response = conn.getresponse()
data = response.read()
conn.close()
json = json.loads(data)
jobs = json['jobs']['job']
for job in jobs:
if job['state'] == 'SUCCEEDED':
submitTime = job['submitTime']
submitTimeFormat = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(submitTime / 1000))
duration = job['finishTime'] - job['startTime']
print '%s\t%s\t%s\t%s' % (submitTimeFormat, job['user'], job['name'], duration)

spark history

参考https://github.com/apache/spark/blob/master/docs/monitoring.md

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import urllib
import httplib
import json
import time
sparkHistoryServer = "172.31.103.128"
conn = httplib.HTTPConnection(sparkHistoryServer, port="18088")
body = {"minDate": "2016-06-01", "status": "completed"}
url = "/api/v1/applications?%s" % urllib.urlencode(body)
conn.request("GET", url)
response = conn.getresponse()
data = response.read()
conn.close()
jobs = json.loads(data)
for job in jobs:
name = job['name']
attempts = job['attempts']
firstAttempt = attempts[0]
user = firstAttempt['sparkUser']
if user == "chenb" and "App C" in name:
startTime = firstAttempt['startTime']
endTime = firstAttempt['endTime']
startTimeStamp = time.mktime(time.strptime(startTime, "%Y-%m-%dT%H:%M:%S.%f%Z"))
endTimeStamp = time.mktime(time.strptime(endTime, "%Y-%m-%dT%H:%M:%S.%f%Z"))
duration = endTimeStamp - startTimeStamp
print '%s\t%s\t%s\t%s' % (startTime, user, name, duration)