# -*- coding:UTF-8 -*-
import sys
from time import sleep
import win32com.client
from win32com.client import DispatchEx stdin, stdout, stderr = sys.stdin, sys.stdout, sys.stderr
reload(sys)
sys.setdefaultencoding("utf-8")
sys.stdin, sys.stdout, sys.stderr = stdin, stdout, stderr class COM_IE:
def __init__(self,url=None):
self.url = url
self.Visible = 1
self.ie = self.openIE(url)
self.document = ""
self.text = ""
self.charset = None def ExistIE(self,url):
ShellWindowsCLSID = '{9BA05972-F6A8-11CF-A442-00A0C90A8F39}'
ies=DispatchEx(ShellWindowsCLSID)
if len(ies)==0:
return None
for ie in ies:
if ie.LocationURL==url:
return ie
return None def NewIE(self,url):
ie = DispatchEx("InternetExplorer.Application")
ie.Visible = self.Visible
ie.Navigate(url)
return ie def openIE(self,url):
ie = self.ExistIE(url)
if ie==None:
ie = self.NewIE(url)
return ie def WaitIE(self):
# while self.ie.Busy:
# leep(1)
while 1:
state = self.ie.ReadyState
if state ==4:
# print "load done..."
self.charset = self.ie.Document.charset
self.document = self.ie.Document.body.innerHTML
self.text = self.ie.Document.body.innerText
break
sleep(1) def Visible(self):
self.ie.Visible = self.Visible def GetBody(self):
self.WaitIE()
return self.ie.Document.body def GetNodes(self,parentNode,tag):
"""
>>> coldiv=GetNodes(body,"div")
"""
childNodes=[]
for childNode in parentNode.getElementsByTagName(tag):
childNodes.append(childNode)
return childNodes def NodeByAttr(self,Nodes,nodeattr,nodeval):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
for node in Nodes:
if str(node.getAttribute(nodeattr))==nodeval:
return node
return None def SetNodeHtml(self,body,node_type,node_attr,node_attr_val,node_inner_html):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.innerHTML = node_inner_html def SetNodeVal(self,body,node_type,node_attr,node_attr_val,node_value):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.value = node_value def NodeClick(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
node.click() def GetNodeHtml(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
html = node.innerHTML
return html def GetNodeVal(self,body,node_type,node_attr,node_attr_val):
tags = self.GetNodes(body,node_type)
node = self.NodeByAttr(tags,node_attr,node_attr_val)
value = node.value
return value #mutiple nodes
def NodesByAttr(self,Nodes,nodeattr=None,nodeval=None):
"""
>>> div_id_editor=NodeByAttr(coldiv,"id","editor_ifr")
"""
value_list = []
for node in Nodes:
# print node.nodeType,node.nodeName #,node.getAttribute("id"),node.innerText
value_dict = {}
if not nodeattr:
nodeattr_list = ["id","nodeName","nodeType","nodeValue","className",
"innerHTML","innerText","href","name","title","type","value"]
for attr in nodeattr_list:
value_dict[attr] = node.getAttribute(attr)
value_list.append(value_dict)
else:
if not nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
else:
if str(node.getAttribute(nodeattr))==nodeval:
value_dict[nodeattr] = node.getAttribute(nodeattr)
value_list.append(value_dict)
return value_list #mutiple nodes
def GetNodesVal(self,body,node_type,node_attr=None,node_val=None): # print '*'*50
tags = self.GetNodes(body,node_type)
value_list = self.NodesByAttr(tags,node_attr,node_val)
return value_list def Quit(self):
self.ie.Quit() if __name__=="__main__": url = "http://blog.csdn.net/agoago_2009/"
IE = COM_IE(url)
BODY = IE.GetBody() # a_list = IE.GetNodesVal(BODY,"a","href")
a_list = IE.GetNodesVal(BODY,"a")
for a in a_list:
print a.get("innerText"),a.get("href") '''
IE.SetNodeVal(BODY,"input","id","inputSearch","COM")
IE.NodeClick(BODY,"input","id","btnSubmit") IE.WaitIE()
print IE.document.strip()[:100]
print IE.charset
print IE.text.strip()[:100]
''' raw_input('quit')
IE.Quit()
随机推荐
-
挂羊头卖狗肉蓄意欺骗读者——谭浩强《C程序设计(第四版)》中所谓的“按照C99”(二)
挂羊头卖狗肉蓄意欺骗读者——谭浩强<C程序设计(第四版)>中所谓的“按照C99”(二) 在<谭C>p4:“本书的叙述以C99标准为依据”,下面从C89到C99的主要变化方面来看 ...
-
ipod中,写计时器倒计时界面倒计时没有更改
innerText 改为textContent. IE.Safari.Opera和Chrome支持innerText属性.Firefox虽然不支持innerText,但支持作用类似的textConte ...
-
C++初学者 const使用详解
关于C++中的const关键字的用法非常灵活,而使用const将大大改善程序的健壮性,参考了康建东兄的const使用详解一文,对其中进行了一些补充,写下了本文. 1. const常量 如: cons ...
-
BestCoder Round #1
逃生 反向拓扑+优先队列+逆序输出 这里要注意,题中要求的不是输出字典序,而是要编号小的尽量考前(首先1尽量考前,然后2尽量考前..). 比如说 约束是 4->1,3->2,字典序答案就是 ...
-
ActionScript 设置元件色彩属性
var clr:Color = new Color(mc); var ct:Object = {ra:100, rb:0, ga:100, gb:0, ba:100, bb:0, aa ...
-
AVL的旋转
转自http://blog.csdn.net/gabriel1026/article/details/6311339 平衡二叉树在进行插入操作的时候可能出现不平衡的情况,AVL树即是一种自平衡的二叉树 ...
-
JSOI2015 分组赛记
分组赛结束了,虽然跟我关系不大,但是去了还是学到了不少东西 day1 上午报到,在宾馆遇到大神wzy,orz 好像没有参赛证发了,于是给我发了一个[工作证],233我是工作人员了,高贵冷艳 下午是常中 ...
-
验证码识别image/pdf to string 开源工具
http://blog.csdn.net/jollyjumper/article/details/18748003
-
1.Nginx服务应用
Nginx服务应用 Nginx的优点和作用 Nginx是一款高性能的HTTP和反向代理的服务器软件,还是一个IMAP/POP3/SMTP(邮件)代理服务器! Nginx在功能实现上都采用模块化结构设计 ...
-
CentOS下将Python的版本升级为3.x
本文主要介绍在Linux(CentOS)下将Python的版本升级为3.x的方法 众所周知,在2020年python官方将不再支持2.7版本的python,所以使用3.x版本的python是必要的,但 ...