我正在使用 selenium 和 python 来从中下载某些文件web page http://www.oceanenergyireland.com/testfacility/corkharbour/observations。我之前一直使用设置首选项来创建 Firefox 配置文件,它们工作得非常好。在这种情况下,下载弹出窗口会在同一页面上打开,尽管给定了首选项,但它始终会打开弹出窗口以进行操作(保存/打开)。有人可以帮助绕过它并自动下载文件而不弹出警报吗?
首选项设置为:
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.dir", downloadDir)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")
self.driver = webdriver.Firefox(firefox_profile=fp)
File i am trying to download:
![File to be downloaded](https://i.stack.imgur.com/cM2VY.png)
谢谢,此外,如果需要完整代码,如下所述:
# -*- coding utf-8 -*-
from selenium.webdriver.firefox.options import Options
from selenium import webdriver
import time
import os
import shutil
import uuid
class crawlOcean():
def __init__(self):
print("hurray33")
global downloadDir
downloadDir = ""
fp = webdriver.FirefoxProfile()
fp.set_preference("browser.download.folderList", 2)
fp.set_preference("browser.download.manager.showWhenStarting", False)
fp.set_preference("browser.download.dir", downloadDir)
fp.set_preference("browser.helperApps.neverAsk.saveToDisk",
"text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml")
fp.set_preference("pdfjs.disabled", True)
options = Options()
options.add_argument("--headless")
self.driver = webdriver.Firefox(firefox_profile=fp)
#self.driver = webdriver.Firefox()
print("hurray")
self.driver.implicitly_wait(15)
self.driver.get("http://www.oceanenergyireland.com/testfacility/corkharbour/observations")
self.verificationErrors = []
self.accept_next_alert = True
def crawl(self):
print("see")
driver = self.driver
driver.execute_script("window.scrollTo(0, 600)")
index = 0
driver.switch_to.frame(index)
driver.find_element_by_xpath("//div[@id='CorkTideHeight']/div[3]/button[2]").click()
time.sleep(3)
driver.find_element_by_xpath("//div[@id='CorkTideHeight']/div[3]/div/ul/li[5]").click()
time.sleep(5)
if __name__ == '__main__':
obj = crawlOcean()
obj.crawl()