接上一篇
Python 实现输入积分题目(latex)得到对应答案并将结果存入json
对其进行改进
首先在 MongoBD 中创建如下结果的表
{
"导数": [
{
"一阶导数": [
{"题目": "答案"}
],
"二阶导数": [
{"题目": "答案"}
]
}
],
"积分": [
{
"定积分": [
{"题目": "答案"}
],
"不定积分": [
{"题目": "答案"}
]
}
]
}
使用普通的爬虫即可实现
import requests
import json
import matplotlib.pyplot as plt
import urllib
from lxml import etree
import pymongo
base_url = 'https://zs.symbolab.com/solver/derivative-calculator'
headers = {
'authority': 'zs.symbolab.com',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
+'Chrome/62.0.3202.94 Safari/537.36',
'cookie': ''
}
res = requests.get(base_url,headers = headers)
html = res.text
html = etree.HTML(html)
ii_list = html.xpath('//ul[@class="m2u"]/li')
aa = {}
for i in ii_list:
ke_mu = i.xpath('./a/text()')[0].strip()
url = 'https://zs.symbolab.com' + i.xpath('./a/@href')[0].strip()
aa[ke_mu]=[]
#aa['url']=url
ti_xing = i.xpath('./ul/li')
try:
c= {}
for j in ti_xing:
ti_xing_ = j.xpath('./a/text()')[0].strip()
c[ti_xing_]=[{"题目": "答案"}]
aa[ke_mu].append(c)
except:
pass
print(aa)
myclient = pymongo.MongoClient('mongodb://localhost:27017/')
mydb = myclient['Question_bank'] # 题库
col = mydb['Knowledge_points'] # 建知识点表
col.insert(aa)
接下来,爬取 题目 和 答案,并存入 MongoDB
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2021-02-03 08:46:07
# Project: daan
from pyspider.libs.base_handler import *
import json
import pymongo
import urllib
class Handler(BaseHandler):
headers = {
'authority': 'zs.symbolab.com',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)'
+'Chrome/62.0.3202.94 Safari/537.36',
'cookie': ''}
crawl_config = {
'headers' : headers,
}
# 连接数据库
def __init__(self):
# //数据库配置,用的monggodb
self.myclient = pymongo.MongoClient(host='localhost',port=27017)
self.mydb = self.myclient['Question_bank'] # 题库
self.ke_mu = "导数"
self.ti_xing = "一阶导数"
self.latex_math = '\int 5xdx'
self.url_code_latex_math = urllib.parse.quote(self.latex_math)
self.base_url = 'https://zs.symbolab.com/pub_api/steps?subscribed=false&language=zs&query='+ self.url_code_latex_math+'&plotRequest=PlotOptional&page=calculus-calculator'
@every(minutes=24 * 60)
def on_start(self):
self.crawl(self.base_url, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
#print(json.loads(response.text))
solutions = json.loads(response.text)
step_input = solutions['solutions'][0]['step_input']
#print(step_input)
entire_result = solutions['solutions'][0]['entire_result']
return {
"题目": step_input,
"答案": entire_result
}
def on_result(self,result):
if result:
self.save_to_mongo(result)
# insert到mongo
def save_to_mongo(self,result):
timu = result['题目']
daan = result['答案']
an = {result['题目']:result['答案']}
print(an)
#print(ti)
#print(result['title'])
#print(result['Typesetting'])
# python中mongodb判断某字段的值是否存在
count = self.mydb['Knowledge_points'].count_documents({self.ke_mu+"."+self.ti_xing+"."+timu:daan})
if count !=0:
print("数据已存在")
else:
# 插入数据
self.mydb['Knowledge_points'].update_one({self.ke_mu+"."+self.ti_xing+"."+"题目":"答案"},{ "$push": {self.ke_mu+".$."+self.ti_xing:an}})
print('save to mongo',result)
在存入数据库之前,需要判断数据是否已经存在
最终效果