import json
import requests
import re
from requests import RequestException
def get_page(url):
try:
response = requests.get(url)
if response.status_code == 200:
print("状态:")
return response.text
print("状态3:")
return None
except RequestException:
return None
print("状态4:")
def parse_page(html):
# pattern = re.compile('
.*?(.*?)<.*?(.*?)<.*?(.*?).*?>(.*?)', re.S)
# 疾病分类
pattern = re.compile('.*?(.*?).*?>(.*?).*?>(.*?)', re.S)
items = re.findall(pattern,html)
for item in items:
yield {
'药品名称': item[0],
'超说明书适用症':item[1],
'批准适应症': item[2],
}
def write_to_file(content):
with open('超用药说明txt','a',encoding = 'utf-8')as f:
f.write(json.dumps(content,ensure_ascii=False))
def main():
# url = "https://db.yaozh.com/icd?"
# print(url)
with open('data/超用药说明.txt', "r", encoding='utf-8') as f:
sr = f.read()
for item in parse_page(sr):
print(item)
write_to_file(item)
if __name__ == '__main__':
print("开始")
main()
print("结束")