初步分析 网站动态显示
故采取selenium爬取
第一步安装Googledrive,phantomjs,pip install selenium。
1.xpath查找“切换”节点
element01=browser.find_element(by=By.XPATH, value="//h1//span[2]")
2.模拟点击
3.xpath寻找文本框节点
element02 = browser.find_element(By.ID, 'cityInput')
4.文本框输入信息
element03=browser.find_element(By.ID,"xxxxxxxxx")
5.模拟点击
6.xpth寻找所需数据即可
import selenium
import smtplib
from selenium import webdriver
import time
from email.mime.text import MIMEText
from email.header import Header
from email.utils import formataddr
from selenium.webdriver.common.by import By
my_sender=input("输入你的QQ邮箱账号:")
my_pass=input("输入你自己QQ邮箱开启smtp后生成的一串代码:")
my_user=input("接受者的邮箱账号:")
wait_time=180
url='http://www.weather.com.cn/life/'
browser=webdriver.Chrome()
browser.maximize_window()
browser.get(url)
time.sleep(5)
browser.implicitly_wait(10)
element01=browser.find_element(by=By.XPATH, value="//h1//span[2]")
browser.execute_script("arguments[0].click();", element01)
element02 = browser.find_element(By.ID, 'cityInput')
element02.send_keys('xxxxxxxxx')
element03=browser.find_element(By.ID,"xxxxxxxxx")
browser.execute_script("arguments[0].click();", element03)
dataray=[]
dataclothes=[]
time.sleep(5)
data01=browser.find_elements(by=By.XPATH, value="//div[@class='second']//a//dl[1]")
for i in data01:
dataray.append(i.text)
data02=browser.find_elements(by=By.XPATH, value="//div[@class='you']//div[contains(@style, 'display: block;')]")
for i in data02:
dataclothes.append(i.text)
msg = MIMEText("life:"+'\n'+dataray[0]+'\n'+dataray[1]+'\n'+dataray[2]+'\n'+"clothes:"+'\n'+dataclothes[1],'plain','utf-8')
msg['From']=formataddr(["your name",my_sender])
msg['Subject']='简洁的标题'
server=smtplib.SMTP_SSL("smtp.qq.com",465)
server.login(my_sender,my_pass)
server.sendmail(my_sender,my_user,msg.as_string())
server.quit()
print('over')
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)