WebDriver初始化时做了什么?这里通过查看源码来说明(备注:着重看中文注解部分!!!)
from selenium.webdriver import Chrome # 初始化谷歌浏览器的WebDriver driver = Chrome()
查看Chrome的源码:发现Chrome其实就是WebDriver类
class WebDriver(RemoteWebDriver): """ Controls the ChromeDriver and allows you to drive the browser. 翻译:控制ChromeDriver来驱动浏览器 You will need to download the ChromeDriver executable from http://chromedriver.storage.googleapis.com/index.html 备注:提供了chromedriver驱动的下载网页 """ def __init__(self, executable_path="chromedriver", port=0, options=None, service_args=None, desired_capabilities=None, service_log_path=None, chrome_options=None, keep_alive=True): """ Creates a new instance of the chrome driver. 翻译:创建一个chrome driver的实例 Starts the service and then creates new instance of chrome driver. 翻译:开启服务,然后创建一个chrome driver的实例 :Args: - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH 翻译:executable_path为chromedriver.exe驱动的路径,默认使用环境变量中的路径 - port - port you would like the service to run, if left as 0, a free port will be found. 翻译:port为服务的端口;如果使用默认值0,那么将使用一个空闲的端口 - options - this takes an instance of ChromeOptions - service_args - List of args to pass to the driver service - desired_capabilities - Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". 翻译:desired_capabilities为字典类型,仅无头浏览器指定该参数 - service_log_path - Where to log information from the driver. - chrome_options - Deprecated argument for options - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive. 翻译:持久连接 """ if chrome_options: warnings.warn('use options instead of chrome_options', DeprecationWarning, stacklevel=2) options = chrome_options if options is None: # desired_capabilities stays as passed in if desired_capabilities is None: desired_capabilities = self.create_options().to_capabilities() else: if desired_capabilities is None: desired_capabilities = options.to_capabilities() else: desired_capabilities.update(options.to_capabilities()) self.service = Service( executable_path, port=port, service_args=service_args, log_path=service_log_path) self.service.start() 注解:创建一个Service对象,然后使用start方法开启服务 try: 注解:调用父类RemoteWebDriver的初始化方法,其中发送了一个http请求来创建一个新会话 RemoteWebDriver.__init__( self, command_executor=ChromeRemoteConnection( remote_server_addr=self.service.service_url, keep_alive=keep_alive), desired_capabilities=desired_capabilities) except Exception: self.quit() raise self._is_remote = False
着重看上面初始化方法中的两个部分:
1)Service对象.start()开启服务
class Service(service.Service): """ Object that manages the starting and stopping of the ChromeDriver 翻译:管理ChromeDriver开启、停止的对象 """ 注解:重写父类service.Service的初始化方法 def __init__(self, executable_path, port=0, service_args=None, log_path=None, env=None): """ Creates a new instance of the Service :Args: - executable_path : Path to the ChromeDriver - port : Port the service is running on - service_args : List of args to pass to the chromedriver service - log_path : Path for the chromedriver service to log to""" self.service_args = service_args or [] if log_path: self.service_args.append('--log-path=%s' % log_path) service.Service.__init__(self, executable_path, port=port, env=env, start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home") 注解:调用父类的初始化方法
父类service.Service
class Service(object): def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""): self.path = executable 注解:self.path为chromedriver.exe的路径 self.port = port if self.port == 0: self.port = utils.free_port() 注解:如果port为0,那么使用一个空闲的端口 if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL: log_file = open(os.devnull, 'wb') self.start_error_message = start_error_message self.log_file = log_file self.env = env or os.environ @property def service_url(self): """ Gets the url of the Service 翻译:webdriver服务的url """ return "http://%s" % utils.join_host_port('localhost', self.port) 注解:http://localhost:端口号 def start(self): """ Starts the Service. 翻译:开启服务,这个服务就是chromedriver.exe :Exceptions: - WebDriverException : Raised either when it can't start the service or when it can't connect to the service """ try: cmd = [self.path] 注解:self.path定义在初始化方法中,为chromedriver.exe的路径 cmd.extend(self.command_line_args()) self.process = subprocess.Popen(cmd, env=self.env, close_fds=platform.system() != 'Windows', stdout=self.log_file, stderr=self.log_file, stdin=PIPE) 注解:创建了一个Popen对象,Popen初始化方法中调用了_execute_child方法,_execute_child调用了_winapi.CreateProcess方法(windows中创建进程) except TypeError: raise except OSError as err: if err.errno == errno.ENOENT: raise WebDriverException( "'%s' executable needs to be in PATH. %s" % ( os.path.basename(self.path), self.start_error_message) ) elif err.errno == errno.EACCES: raise WebDriverException( "'%s' executable may have wrong permissions. %s" % ( os.path.basename(self.path), self.start_error_message) ) else: raise except Exception as e: raise WebDriverException( "The executable %s needs to be available in the path. %s\n%s" % (os.path.basename(self.path), self.start_error_message, str(e))) count = 0 while True: self.assert_process_still_running() if self.is_connectable(): break count += 1 time.sleep(1) if count == 30: raise WebDriverException("Can not connect to the Service %s" % self.path)
from subprocess import Popen
process=Popen('D:\cjnsoft\Python\Python37\chromedriver.exe')
执行完后,任务管理器中有chromedriver的进程
相当于打开doc窗口,输入文件路径,回车,启动了chromedriver的服务
2)RemoteWebDriver初始化方法创建新会话
WebDriver的父类RemoteWebDriver,实际上也是叫WebDriver
class WebDriver(object): """ Controls a browser by sending commands to a remote server. 翻译:通过向远程服务器(即webdriver服务)发送命令来控制浏览器 This server is expected to be running the WebDriver wire protocol 翻译:此服务应运行WebDriver wire协议 as defined at https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol :Attributes: - session_id - String ID of the browser session started and controlled by this WebDriver. - capabilities - Dictionaty of effective capabilities of this browser session as returned by the remote server. See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities - command_executor - remote_connection.RemoteConnection object used to execute commands. 翻译:command_executor为执行命令的RemoteConnection - error_handler - errorhandler.ErrorHandler object used to handle errors. """ _web_element_cls = WebElement def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub', desired_capabilities=None, browser_profile=None, proxy=None, keep_alive=False, file_detector=None, options=None): """ Create a new driver that will issue commands using the wire protocol. 翻译:创建一个新的驱动程序,它将使用wire协议发出命令 :Args: - command_executor - Either a string representing URL of the remote server or a custom remote_connection.RemoteConnection object. Defaults to 'http://127.0.0.1:4444/wd/hub'. - desired_capabilities - A dictionary of capabilities to request when starting the browser session. Required parameter. - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested. Optional. - proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will be started with given proxy settings, if possible. Optional. - keep_alive - Whether to configure remote_connection.RemoteConnection to use HTTP keep-alive. Defaults to False. - file_detector - Pass custom file detector object during instantiation. If None, then default LocalFileDetector() will be used. - options - instance of a driver options.Options class """ capabilities = {} if options is not None: capabilities = options.to_capabilities() if desired_capabilities is not None: if not isinstance(desired_capabilities, dict): raise WebDriverException("Desired Capabilities must be a dictionary") else: capabilities.update(desired_capabilities) if proxy is not None: warnings.warn("Please use FirefoxOptions to set proxy", DeprecationWarning, stacklevel=2) proxy.add_to_capabilities(capabilities) self.command_executor = command_executor if type(self.command_executor) is bytes or isinstance(self.command_executor, str): self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive) self._is_remote = True self.session_id = None self.capabilities = {} self.error_handler = ErrorHandler() self.start_client() if browser_profile is not None: warnings.warn("Please use FirefoxOptions to set browser profile", DeprecationWarning, stacklevel=2) self.start_session(capabilities, browser_profile) 注解:初始化方法中调用start_session方法 self._switch_to = SwitchTo(self) self._mobile = Mobile(self) self.file_detector = file_detector or LocalFileDetector() def start_session(self, capabilities, browser_profile=None): """ Creates a new session with the desired capabilities. :Args: - browser_name - The name of the browser to request. - version - Which browser version to request. - platform - Which platform to request the browser on. - javascript_enabled - Whether the new session should support JavaScript. - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested. """ if not isinstance(capabilities, dict): raise InvalidArgumentException("Capabilities must be a dictionary") if browser_profile: if "moz:firefoxOptions" in capabilities: capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded else: capabilities.update({'firefox_profile': browser_profile.encoded}) w3c_caps = _make_w3c_caps(capabilities) parameters = {"capabilities": w3c_caps, "desiredCapabilities": capabilities} response = self.execute(Command.NEW_SESSION, parameters) 注解:start_session实际上调用的是execute方法,发送http请求,创建新会话 if 'sessionId' not in response: response = response['value'] self.session_id = response['sessionId'] 注解:从响应结果中获取session_id self.capabilities = response.get('value') # if capabilities is none we are probably speaking to # a W3C endpoint if self.capabilities is None: self.capabilities = response.get('capabilities') # Double check to see if we have a W3C Compliant browser self.w3c = response.get('status') is None self.command_executor.w3c = self.w3c
ChromeRemoteConnection继承于RemoteConnection
class ChromeRemoteConnection(RemoteConnection): 注解:重写父类的初始化方法 def __init__(self, remote_server_addr, keep_alive=True): RemoteConnection.__init__(self, remote_server_addr, keep_alive) 注解:调用父类的初始化方法 self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app') self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions') self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions') self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute')
父类RemoteConnection
class RemoteConnection(object): """A connection with the Remote WebDriver server. 翻译:与webdriver服务的连接 Communicates with the server using the WebDriver wire protocol: 翻译:使用the WebDriver wire协议和服务进行通信 https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol""" def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True): # Attempt to resolve the hostname and get an IP address. 翻译:尝试解析主机名并获取IP地址 self.keep_alive = keep_alive parsed_url = parse.urlparse(remote_server_addr) if parsed_url.hostname and resolve_ip: port = parsed_url.port or None if parsed_url.scheme == "https": ip = parsed_url.hostname elif port and not common_utils.is_connectable(port, parsed_url.hostname): ip = None LOGGER.info('Could not connect to port {} on host ' '{}'.format(port, parsed_url.hostname)) else: ip = common_utils.find_connectable_ip(parsed_url.hostname, port=port) if ip: netloc = ip if parsed_url.port: netloc = common_utils.join_host_port(netloc, parsed_url.port) if parsed_url.username: auth = parsed_url.username if parsed_url.password: auth += ':%s' % parsed_url.password netloc = '%s@%s' % (auth, netloc) remote_server_addr = parse.urlunparse( (parsed_url.scheme, netloc, parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment)) else: LOGGER.info('Could not get IP address for host: %s' % parsed_url.hostname) self._url = remote_server_addr if keep_alive: self._conn = urllib3.PoolManager(timeout=self._timeout)
总结
WebDriver类的初始化方法中,
1.使用Server类的start方法开启webdriver服务(具体使用subprocess模块的Popen类打开服务的,类似windows的doc窗口输入chromedriver.exe的绝对路径,然后就打开了chromedriver服务)
Server类的作用:管理webdriver服务的打开和关闭
2. RemoteWebDriver初始化时新建session,连接到服务:
调用父类RemoteWebDriver的初始化方法,RemoteWebDriver初始化方法中调用start_session方法,start_session中调用了execute方法,其实就是发送了一个http请求用于创建session