#python解析远程xml数据的一种方法
##需要解析的xml 需要将updated时间为2018-03-01的节点数据解析出来
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>****</title>
<link rel="alternate" type="text/html" href="***"/>
<updated>2018-03-01T07:00:00Z</updated>
<author>
<name>Jenkins Server</name>
</author>
<id>urn:uuid:903deee0-7bfa-11db-9fe1-0800200c9a66</id>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #242 (stable)</title>
<link rel="alternate" type="text/html" href="http://job_address/242/"/>
<id>tag:hudson.dev.java.net,***:242</id>
<published>2018-03-01T04:00:00Z</published>
<updated>2018-03-01T04:00:00Z</updated>
</entry>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #241 (back to normal)</title>
<link rel="alternate" type="text/html" href="http://job_address/241/"/>
<id>tag:hudson.dev.java.net,***:241</id>
<published>2018-03-01T03:00:00Z</published>
<updated>2018-03-01T03:00:00Z</updated>
</entry>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #240 (broken since this build)</title>
<link rel="alternate" type="text/html" href="http://job_address/240/"/>
<id>tag:hudson.dev.java.net,***:240</id>
<published>2018-03-01T02:00:00Z</published>
<updated>2018-03-01T02:00:00Z</updated>
</entry>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #239 (stable)</title>
<link rel="alternate" type="text/html" href="http://job_address/239/"/>
<id>tag:hudson.dev.java.net,***:239</id>
<published>2018-03-01T01:00:00Z</published>
<updated>2018-03-01T01:00:00Z</updated>
</entry>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #238 (stable)</title>
<link rel="alternate" type="text/html" href="http://job_address/238/"/>
<id>tag:hudson.dev.java.net,***:238</id>
<published>2018-02-28T13:00:00Z</published>
<updated>2018-02-28T13:00:00Z</updated>
</entry>
<entry>
<title>V7.18.10_AMF_Hours_Smoke #237 (stable)</title>
<link rel="alternate" type="text/html" href="http://job_address/237/"/>
<id>tag:hudson.dev.java.net,***:237</id>
<published>2018-02-28T12:00:00Z</published>
<updated>2018-02-28T12:00:00Z</updated>
</entry>
</feed>
##所需模块 import urllib3,re from xml.dom.minidom import parseString
##请求数据 urllib3需要PoolManager实例发出请求,这个实例能处理连接池的细节和保证安全!
result= []
http = urllib3.PoolManager()
url = 'xml_address'
res = http.request('GET',url)
if 200 != res.status:
print("Can't get data from " + url + ": server return status is " + str(res.status))
return result
##解析xml 这样请求的数据是bytes对象,需要将bytes转成字符串之后解析成dom
dom = parseString(bytes.decode(res.data))
root = dom.documentElement
##遍历xml 获得root之后就可以通过类似于js的方式获得节点 通过标签进行访问
xmlUpdateTime = root.getElementsByTagName('updated')[0].firstChild.data
theLastDay = xmlUpdateTime.split('T')[0]
allEntryNodes = root.getElementsByTagName('entry')
theLastDayNodes = []
for node in allEntryNodes:
jobUpdatedTime = node.getElementsByTagName('updated')[0].firstChild.data
if re.match(theLastDay,jobUpdatedTime):
theLastDayNodes.append(node)
else:
break
for item in theLastDayNodes:
titleNode = item.getElementsByTagName('title')[0]
updatedNode = item.getElementsByTagName('updated')[0]
result.append({titleNode.nodeName:titleNode.firstChild.data,updatedNode.nodeName:updatedNode.firstChild.data})
return result