selenium源码分析:WebDriver初始化时做了什么?

WebDriver初始化时做了什么?这里通过查看源码来说明(备注:着重看中文注解部分!!!)

from selenium.webdriver import Chrome

# 初始化谷歌浏览器的WebDriver
driver = Chrome()

 

查看Chrome的源码:发现Chrome其实就是WebDriver类

class WebDriver(RemoteWebDriver):
    """
    Controls the ChromeDriver and allows you to drive the browser.
    翻译:控制ChromeDriver来驱动浏览器

    You will need to download the ChromeDriver executable from
    http://chromedriver.storage.googleapis.com/index.html
    备注:提供了chromedriver驱动的下载网页
    """

    def __init__(self, executable_path="chromedriver", port=0,
                 options=None, service_args=None,
                 desired_capabilities=None, service_log_path=None,
                 chrome_options=None, keep_alive=True):
        """
        Creates a new instance of the chrome driver.
        翻译:创建一个chrome driver的实例

        Starts the service and then creates new instance of chrome driver.
        翻译:开启服务,然后创建一个chrome driver的实例

        :Args:
         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH
翻译:executable_path为chromedriver.exe驱动的路径,默认使用环境变量中的路径
         - port - port you would like the service to run, if left as 0, a free port will be found.
翻译:port为服务的端口;如果使用默认值0,那么将使用一个空闲的端口
         - options - this takes an instance of ChromeOptions
         - service_args - List of args to pass to the driver service
         - desired_capabilities - Dictionary object with non-browser specific
           capabilities only, such as "proxy" or "loggingPref".
翻译:desired_capabilities为字典类型,仅无头浏览器指定该参数
         - service_log_path - Where to log information from the driver.
         - chrome_options - Deprecated argument for options
         - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive. 
翻译:持久连接
        """
        if chrome_options:
            warnings.warn('use options instead of chrome_options',
                          DeprecationWarning, stacklevel=2)
            options = chrome_options

        if options is None:
            # desired_capabilities stays as passed in
            if desired_capabilities is None:
                desired_capabilities = self.create_options().to_capabilities()
        else:
            if desired_capabilities is None:
                desired_capabilities = options.to_capabilities()
            else:
                desired_capabilities.update(options.to_capabilities())

        self.service = Service(
            executable_path,
            port=port,
            service_args=service_args,
            log_path=service_log_path)
        self.service.start()
        注解:创建一个Service对象,然后使用start方法开启服务

        try:
注解:调用父类RemoteWebDriver的初始化方法,其中发送了一个http请求来创建一个新会话
            RemoteWebDriver.__init__(
                self,
                command_executor=ChromeRemoteConnection(
                    remote_server_addr=self.service.service_url,
                    keep_alive=keep_alive),
                desired_capabilities=desired_capabilities)
        except Exception:
            self.quit()
            raise
        self._is_remote = False

 

着重看上面初始化方法中的两个部分:

1)Service对象.start()开启服务

class Service(service.Service):
    """
    Object that manages the starting and stopping of the ChromeDriver
翻译:管理ChromeDriver开启、停止的对象
    """

注解:重写父类service.Service的初始化方法
    def __init__(self, executable_path, port=0, service_args=None,
                 log_path=None, env=None):
        """
        Creates a new instance of the Service

        :Args:
         - executable_path : Path to the ChromeDriver
         - port : Port the service is running on
         - service_args : List of args to pass to the chromedriver service
         - log_path : Path for the chromedriver service to log to"""

        self.service_args = service_args or []
        if log_path:
            self.service_args.append('--log-path=%s' % log_path)

        service.Service.__init__(self, executable_path, port=port, env=env,
                                 start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home")
    注解:调用父类的初始化方法

 父类service.Service

class Service(object):

    def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""):
        self.path = executable
        注解:self.path为chromedriver.exe的路径

        self.port = port
        if self.port == 0:
            self.port = utils.free_port()
        注解:如果port为0,那么使用一个空闲的端口

        if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL:
            log_file = open(os.devnull, 'wb')

        self.start_error_message = start_error_message
        self.log_file = log_file
        self.env = env or os.environ

@property
    def service_url(self):
        """
        Gets the url of the Service
        翻译:webdriver服务的url
        """
        return "http://%s" % utils.join_host_port('localhost', self.port)
        注解:http://localhost:端口号

    def start(self):
        """
        Starts the Service.
        翻译:开启服务,这个服务就是chromedriver.exe

        :Exceptions:
         - WebDriverException : Raised either when it can't start the service
           or when it can't connect to the service
        """
        try:
            cmd = [self.path]
注解:self.path定义在初始化方法中,为chromedriver.exe的路径
            cmd.extend(self.command_line_args())
            self.process = subprocess.Popen(cmd, env=self.env,
                                           close_fds=platform.system() != 'Windows',
                                            stdout=self.log_file,
                                            stderr=self.log_file,
                                            stdin=PIPE)
注解:创建了一个Popen对象,Popen初始化方法中调用了_execute_child方法,_execute_child调用了_winapi.CreateProcess方法(windows中创建进程)
        except TypeError:
            raise
        except OSError as err:
            if err.errno == errno.ENOENT:
                raise WebDriverException(
                    "'%s' executable needs to be in PATH. %s" % (
                        os.path.basename(self.path), self.start_error_message)
                )
            elif err.errno == errno.EACCES:
                raise WebDriverException(
                    "'%s' executable may have wrong permissions. %s" % (
                        os.path.basename(self.path), self.start_error_message)
                )
            else:
                raise
        except Exception as e:
            raise WebDriverException(
                "The executable %s needs to be available in the path. %s\n%s" %
                (os.path.basename(self.path), self.start_error_message, str(e)))
        count = 0
        while True:
            self.assert_process_still_running()
            if self.is_connectable():
                break
            count += 1
            time.sleep(1)
            if count == 30:
                raise WebDriverException("Can not connect to the Service %s" % self.path)

 from subprocess import Popen

process=Popen('D:\cjnsoft\Python\Python37\chromedriver.exe')

执行完后,任务管理器中有chromedriver的进程

相当于打开doc窗口,输入文件路径,回车,启动了chromedriver的服务

selenium源码分析:WebDriver初始化时做了什么?

 

 

2)RemoteWebDriver初始化方法创建新会话

WebDriver的父类RemoteWebDriver,实际上也是叫WebDriver

class WebDriver(object):
    """
    Controls a browser by sending commands to a remote server.
翻译:通过向远程服务器(即webdriver服务)发送命令来控制浏览器
    This server is expected to be running the WebDriver wire protocol
翻译:此服务应运行WebDriver wire协议
    as defined at
    https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol

    :Attributes:
     - session_id - String ID of the browser session started and controlled by this WebDriver.
     - capabilities - Dictionaty of effective capabilities of this browser session as returned
         by the remote server. See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities
     - command_executor - remote_connection.RemoteConnection object used to execute commands. 翻译:command_executor为执行命令的RemoteConnection
     - error_handler - errorhandler.ErrorHandler object used to handle errors.
    """

    _web_element_cls = WebElement

    def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub',
                 desired_capabilities=None, browser_profile=None, proxy=None,
                 keep_alive=False, file_detector=None, options=None):
        """
        Create a new driver that will issue commands using the wire protocol.
        翻译:创建一个新的驱动程序,它将使用wire协议发出命令

        :Args:
         - command_executor - Either a string representing URL of the remote server or a custom
             remote_connection.RemoteConnection object. Defaults to 'http://127.0.0.1:4444/wd/hub'.
         - desired_capabilities - A dictionary of capabilities to request when
             starting the browser session. Required parameter.
         - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object.
             Only used if Firefox is requested. Optional.
         - proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will
             be started with given proxy settings, if possible. Optional.
         - keep_alive - Whether to configure remote_connection.RemoteConnection to use
             HTTP keep-alive. Defaults to False.
         - file_detector - Pass custom file detector object during instantiation. If None,
             then default LocalFileDetector() will be used.
         - options - instance of a driver options.Options class
        """
        capabilities = {}
        if options is not None:
            capabilities = options.to_capabilities()
        if desired_capabilities is not None:
            if not isinstance(desired_capabilities, dict):
                raise WebDriverException("Desired Capabilities must be a dictionary")
            else:
                capabilities.update(desired_capabilities)
        if proxy is not None:
            warnings.warn("Please use FirefoxOptions to set proxy",
                          DeprecationWarning, stacklevel=2)
            proxy.add_to_capabilities(capabilities)
        self.command_executor = command_executor
        if type(self.command_executor) is bytes or isinstance(self.command_executor, str):
            self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive)
        self._is_remote = True
        self.session_id = None
        self.capabilities = {}
        self.error_handler = ErrorHandler()
        self.start_client()
        if browser_profile is not None:
            warnings.warn("Please use FirefoxOptions to set browser profile",
                          DeprecationWarning, stacklevel=2)
        self.start_session(capabilities, browser_profile)
注解:初始化方法中调用start_session方法
        self._switch_to = SwitchTo(self)
        self._mobile = Mobile(self)
        self.file_detector = file_detector or LocalFileDetector()

    def start_session(self, capabilities, browser_profile=None):
        """
        Creates a new session with the desired capabilities.

        :Args:
         - browser_name - The name of the browser to request.
         - version - Which browser version to request.
         - platform - Which platform to request the browser on.
         - javascript_enabled - Whether the new session should support JavaScript.
         - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested.
        """
        if not isinstance(capabilities, dict):
            raise InvalidArgumentException("Capabilities must be a dictionary")
        if browser_profile:
            if "moz:firefoxOptions" in capabilities:
                capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded
            else:
                capabilities.update({'firefox_profile': browser_profile.encoded})
        w3c_caps = _make_w3c_caps(capabilities)
        parameters = {"capabilities": w3c_caps,
                      "desiredCapabilities": capabilities}
        response = self.execute(Command.NEW_SESSION, parameters)
 注解:start_session实际上调用的是execute方法,发送http请求,创建新会话
        if 'sessionId' not in response:
            response = response['value']
        self.session_id = response['sessionId']
注解:从响应结果中获取session_id
        self.capabilities = response.get('value')

        # if capabilities is none we are probably speaking to
        # a W3C endpoint
        if self.capabilities is None:
            self.capabilities = response.get('capabilities')

        # Double check to see if we have a W3C Compliant browser
        self.w3c = response.get('status') is None
        self.command_executor.w3c = self.w3c

 

ChromeRemoteConnection继承于RemoteConnection

class ChromeRemoteConnection(RemoteConnection):
注解:重写父类的初始化方法
    def __init__(self, remote_server_addr, keep_alive=True):
        RemoteConnection.__init__(self, remote_server_addr, keep_alive)
注解:调用父类的初始化方法
        self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app')
        self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions')
        self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions')
        self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute')

 父类RemoteConnection

class RemoteConnection(object):
    """A connection with the Remote WebDriver server.
    翻译:与webdriver服务的连接
    Communicates with the server using the WebDriver wire protocol:
翻译:使用the WebDriver wire协议和服务进行通信
    https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol"""

    def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True):
        # Attempt to resolve the hostname and get an IP address.
        翻译:尝试解析主机名并获取IP地址
        self.keep_alive = keep_alive
        parsed_url = parse.urlparse(remote_server_addr)
        if parsed_url.hostname and resolve_ip:
            port = parsed_url.port or None
            if parsed_url.scheme == "https":
                ip = parsed_url.hostname
            elif port and not common_utils.is_connectable(port, parsed_url.hostname):
                ip = None
                LOGGER.info('Could not connect to port {} on host '
                            '{}'.format(port, parsed_url.hostname))
            else:
                ip = common_utils.find_connectable_ip(parsed_url.hostname,
                                                      port=port)
            if ip:
                netloc = ip
                if parsed_url.port:
                    netloc = common_utils.join_host_port(netloc,
                                                         parsed_url.port)
                if parsed_url.username:
                    auth = parsed_url.username
                    if parsed_url.password:
                        auth += ':%s' % parsed_url.password
                    netloc = '%s@%s' % (auth, netloc)
                remote_server_addr = parse.urlunparse(
                    (parsed_url.scheme, netloc, parsed_url.path,
                     parsed_url.params, parsed_url.query, parsed_url.fragment))
            else:
                LOGGER.info('Could not get IP address for host: %s' %
                            parsed_url.hostname)

        self._url = remote_server_addr
        if keep_alive:
            self._conn = urllib3.PoolManager(timeout=self._timeout)

 

总结

WebDriver类的初始化方法中,

1.使用Server类的start方法开启webdriver服务(具体使用subprocess模块的Popen类打开服务的,类似windows的doc窗口输入chromedriver.exe的绝对路径,然后就打开了chromedriver服务)

Server类的作用:管理webdriver服务的打开和关闭

2. RemoteWebDriver初始化时新建session,连接到服务:

调用父类RemoteWebDriver的初始化方法,RemoteWebDriver初始化方法中调用start_session方法,start_session中调用了execute方法,其实就是发送了一个http请求用于创建session

 

上一篇:PostgreSQL磁盘使用监控


下一篇:Spark Streaming