import json
import os
from time import sleep
from urllib import parse
import schedule
import time
import requests
import re
class Pusher(object):
def __init__(self):
print('init_')
def getSitemapIndex(self, sitemapIndexUrl):
print('getSitemapIndex:' + sitemapIndexUrl)
result = requests.get(sitemapIndexUrl)
print(result)
# print(result.content)
sitemapUrls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S)
print(sitemapUrls)
return sitemapUrls
def getSitemap(self, sitemapUrl):
print("getSitemap:" + sitemapUrl)
result = requests.get(sitemapUrl)
self.urls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S)
print(self.urls)
def postBaidu(self):
print("postBaidu:=================================")
header_baidu = {
"User-Agent": "curl/7.12.1",
"Host": "data.zz.baidu.com",
"Content-Type": "text/plain",
"Content-Length": "83"
}
file = open('push_baidu.txt', 'a+')
file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空
content = file.read()
# self.f.close()
print("content:" + content)
for url in self.urls:
if url in content:
print("已经推送过:" + url)
pass
else:
try:
result = requests.post(push_baidu, data=url, headers=header_baidu)
print(url + result.content.decode('utf-8'))
if '"success":1' in result.content.decode('utf-8'):
file.write(url + '\n')
file.flush()
if '{"remain":0' in result.content.decode('utf-8'):
break
# break
except Exception as e:
print(e)
sleep(1)
file.close()
def postShenma(self):
print("postShenma:=================================")
header_baidu = {
"User-Agent": "curl/7.12.1",
"Host": "data.zhanzhang.sm.cn",
"Content-Type": "text/plain"
}
file = open('push_shenma.txt', 'a+')
file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空
content = file.read()
# self.f.close()
print("content:" + content)
data = ''
for url in self.urls:
if url in content:
print("已经推送过:" + url)
pass
else:
data = data + url + '\n'
try:
result = requests.post(push_shenma, data=data, headers=header_baidu)
print("url:" + url)
print("status_code:" + str(result.status_code))
print("content:" + result.content.decode('utf-8'))
if str(result.status_code) == "200":
content = json.loads(result.content.decode('utf-8'))
print("returnCode " + str(content['returnCode']))
if str(content['returnCode']) == "200":
file.write(data + '\n')
file.flush()
except Exception as e:
print(e)
sleep(1)
file.close()
def postSougou(self):
print("postSougou:=================================")
header_baidu = {
"User-Agent": "Opera/9.80 (Windows NT 6.2; Win64; x64) Presto/2.12.388 Version/12.15",
"Host": "sogou.com",
"Content-Type": "application/x-www-form-urlencoded"
}
file = open('push_sogou.txt', 'a+')
file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空
content = file.read()
# self.f.close()
print("content:" + content)
for url in self.urls:
if url in content:
print("已经推送过:" + url)
pass
else:
try:
result = requests.post(push_sogou, data={"source": "1", "site_type": "1",
"Shoulu": {"webAdr": url, "email": "932888148@qq.com",
"reason": "网站收录不正常,恳请收录!"}},
headers=header_baidu)
print(url + result.content.decode('utf-8'))
except Exception as e:
print(e)
sleep(1)
file.close()
def get360Token(self, url, key):
n = list(url)
n.reverse()
r = list(key)
i = []
# for ($s=0, $o=16;$s < $o;$s++)
for s in range(0, 16):
if n[s] is None:
i.append(r[s])
else:
i.append(r[s] + n[s])
return ''.join(i)
def run(self, sitemapUrl):
self.getSitemap(sitemapUrl)
self.postBaidu()
urlSitemap = '' # Your sitemap url, like 'http://blog.kxrr.us/index.php/sitemap'
urlPost = '' # Your Baidu API, like 'http://data.zz.baidu.com/urls?site=blog.kxrr.us&token=xxxxxxxxxxxx'
push_baidu = 'http://data.zz.baidu.com/urls?site=meishih.com&token='
# 定义你要周期运行的函数
def job():
print("I'm working...")
pusher = Pusher()
sitemapUrls = pusher.getSitemapIndex("http://meishih.com/sitemap_index.xml")
for sitemapUrl in sitemapUrls:
pusher.run(sitemapUrl)
schedule.every().day.at("17:32").do(job) # 每天在 10:30 时间点运行 job 函数
if __name__ == '__main__':
# while True:
# schedule.run_pending() # 运行所有可以运行的任务
# time.sleep(10)
job()
# pusher = Pusher()
# pusher.postBingQuota()
# pusher.getSitemapIndex('http://[meishi](https://www.meishih.com)h.com/[sitemap](https://www.meishih.com/sitemap.html).xml')
#