This is an old revision of the document!
Table of Contents
Selenium
Nos bajamos la imagen de selenium con los drivers de chrome y de firefox:
Chrome: http://chromedriver.chromium.org/downloads
docker run -ti iwanttobefreak/selenium
Entramos en modo interactivo para probarlo:
ipython
Python 2.7.13 (default, Sep 26 2018, 18:42:22) Type "copyright", "credits" or "license" for more information. IPython 5.1.0 -- An enhanced Interactive Python. ? -> Introduction and overview of IPython's features. %quickref -> Quick reference. help -> Python's own help system. object? -> Details about 'object', use 'object??' for extra details. In [1]:
Ahora podemos lanzar comandos:
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(options=options)
Por ejemplo vamos a buscar un tren en la web de Renfe:
url = 'http://www.renfe.com' driver.get(url)
Y grabamos la url en un fichero:
driver.save_screenshot('/tmp/selenium/renfe.png')
Ejemplo headless chrome con timeout en llamada get
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument("--headless")
url1 = 'http://10.255.255.1'
url1_timeout = 5
driver = webdriver.Firefox(firefox_options = options)
driver.set_page_load_timeout(url1_timeout)
driver.get(url1)
Ejemplo headless chrome con timeout en llamada get
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
options = Options()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
url1 = 'http://10.255.255.1'
url1_timeout = 5
driver = webdriver.Chrome(desired_capabilities = options.to_capabilities())
driver.set_page_load_timeout(url1_timeout)
driver.get(url1)
Python firefox remote headless
Por definición siempre que es remote es headless
from selenium.webdriver import Firefox, FirefoxProfile, Remote
host = '172.30.10.18'
port = '4444'
host = selenium_hub_host
port = selenium_hub_port
url = f'{host}:{port}/wd/hub'
d = Remote(command_executor = url, desired_capabilities = desired_capabilities)
# MUY importante para evitar error:
# selenium.common.exceptions.ElementClickInterceptedException: Message: Element <input id="projects_" name="projects[]" type="checkbox"> is not clickable at point (179,16) because another element <a href="/"> obscures it
d.set_window_size(1920, 1080)
Python firefox remote
1. Arrancar el standalone
version: '3.7' services: hub: container_name: hub image: selenium/standalone-firefox
2. Obtner la IP de ese contenedor
3. Probar
from selenium import webdriver
host = "192.168.3.44"
port = "4444"
desired_capabilities = {
                                        'browserName': 'firefox',
                                        'javascriptEnabled': True,
                                       }
self.driver = webdriver.Remote(command_executor =
                                      host + ':' + port + '/wd/hub',
                                      desired_capabilities =
                                                          desired_capabilities)
Python chrome headless local
IMPORTANTE: si no se especifica el tamaño de la pantalla puede no encontrar objetos en el DOM.
Ejemplo:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
CHROMEDRIVER_PATH = '/usr/local/bin/chromedriver'
options = Options()
options.add_argument('--headless')
options.add_argument("window-size=1920,1080")
driver = webdriver.Chrome(CHROMEDRIVER_PATH, chrome_options=options)
Python firefox headless local
Sin profile
IMPORTANTE: si no se especifica el tamaño de la pantalla puede no encontrar objetos en el DOM.
Ejemplo:
from selenium.webdriver.firefox.options import Options from selenium.webdriver import Firefox # Local headless options = Options() options.headless = True driver = Firefox(options = options)
Con profile
from selenium.webdriver.firefox.options import Options from selenium.webdriver import Firefox, FirefoxProfile # Local headless options = Options() options.headless = True driver = Firefox(firefox_profile = profile, options = options)
Python firefox headless local con profile y options
https://stackoverflow.com/a/52898225/11436137
from selenium import webdriver;
from selenium.webdriver.firefox.options import Options
cProfile = webdriver.FirefoxProfile();
dwnd_path = os.getcwd();
cProfile.add_preference('browser.download.folderList', '2');
cProfile.add_preference('browser.download.manager.showWhenStarting', 'false');
cProfile.add_preference('browser.download.dir', 'dwnd_path');
cProfile.add_preference('browser.helperApps.neverAsk.saveToDisk', 'application/octet-stream,application/vnd.ms-excel');
options = Options()
options.headless = True
driver = webdriver.Firefox(firefox_profile=cProfile, firefox_options=options, executable_path=r'C:\path\to\geckodriver.exe')
Errores
selenium.common.exceptions.ElementClickInterceptedException: Message: Element <input id="projects_" name="projects[]" type="checkbox"> is not clickable at point (179,16) because another element <a href="/"> obscures it
Causa: se ha iniciado un webdriver remoto sin especificar las dimensiones de la ventana
Solución:
driver.set_window_size(1920, 1080)
selenium.common.exceptions.WebDriverException: Message: Failed to decode response from marionette
Causa:
* Se ha especificado un tamaño de ventana con set_window_size() (probablemente es irrelevante) * Se ha quedado sin memoria la instancia de Firefox
Solución: especificar variable “shm_size”. Ejemplo docker-compose:
 easyredmine-backup-selenium:
  container_name: easyredmine-backup-selenium
  image: selenium/standalone-firefox
  #restart: unless-stopped
  # Mandatory, to avoid "Message: Failed to decode response from marionette" error
  shm_size: ${SHM_SIZE}
  #environment:
  # - START_XVFB=False
  networks:
   network-easyredmine-backup:
    aliases:
     - easyredmine-backup-selenium
  volumes:
   - ${DOCKER_HOST_DOWNLOAD_DIR}:${DOCKER_CONTAINER_DOWNLOAD_DIR}
PROXY
Para firefox: webdriver.DesiredCapabilities.FIREFOX['proxy']
Para chrome: webdriver.DesiredCapabilities.CHROME['proxy']
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.proxy import *
PROXY = "172.17.0.1:3128"
webdriver.DesiredCapabilities.FIREFOX['proxy'] = {
    "httpProxy":PROXY,
    "ftpProxy":PROXY,
    "sslProxy":PROXY,
    "proxyType":"MANUAL"
}
options = Options()
options.add_argument("--headless")
driver = webdriver.Firefox(options=options)
Sesiones
Firefox
Abrimos firefox en local. Escribimos en la barra about:profiles. Creamos un nuevo pofile y le asignamos un directorio. Lanzamos el profile y navegamos para que nos guarde información, por ejemplo el login de whatsapp.
Lanzamos selenium con la siguiente propiedad:
myprofile = webdriver.FirefoxProfile("<ruta a mi directorio de perfil>")
driver = webdriver.Firefox(myprofile)
Chrome
Simplemente le tenemos que indicar un directorio y ya lo graba ahí. Funciona con Chromedriver 70 a 73. Lo podemos descargar de:
https://chromedriver.storage.googleapis.com/index.html
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
CHROMEDRIVER_PATH = '/chromedriver70/chromedriver'
options = Options()
options.add_argument('user-data-dir=/selenium/session')
options.add_argument("window-size=1920,1080")
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
Enviar mensaje
xpath = './/span[contains(@title, "Armando Bronca")]'
o = driver.find_element_by_xpath(xpath)
o.click()
xpath = './/div[contains(@class, "_3u328 copyable-text selectable-text")]'
o = driver.find_element_by_xpath(xpath)
o.send_keys('Mensaje enviado desde selenium')
xpath = './/button[contains(@class, "_3M-N-")]'
o = driver.find_element_by_xpath(xpath)
o.click()
Adjuntar fichero
xpath = './/span[contains(@title, "Armando Bronca")]'
o = driver.find_element_by_xpath(xpath)
o.click()
xpath = './/div[contains(@title, "Adjuntar")]'
o = driver.find_element_by_xpath(xpath)
o.click()
o = driver.find_element_by_xpath("//input[@type='file']")
o.send_keys(os.getcwd()+"/tmp/caron.png")
xpath = './/span[contains(@data-icon, "send-light")]'
o = driver.find_element_by_xpath(xpath)
o.click()
Padres e hijos
l
Padres e hijos
l
Padres e hijos
l
