import json import requests import re from requests import RequestException def get_page(url): try: response = requests.get(url) if response.status_code == 200: print("状态:") return response.text print("状态3:") return None except RequestException: return None print("状态4:") def parse_page(html): # pattern = re.compile('.*?(.*?)<.*?(.*?)<.*?(.*?).*?>(.*?)', re.S) # 疾病分类 pattern = re.compile('.*?(.*?).*?>(.*?).*?>(.*?)', re.S) items = re.findall(pattern,html) for item in items: yield { '药品名称': item[0], '超说明书适用症':item[1], '批准适应症': item[2], } def write_to_file(content): with open('超用药说明txt','a',encoding = 'utf-8')as f: f.write(json.dumps(content,ensure_ascii=False)) def main(): # url = "https://db.yaozh.com/icd?" # print(url) with open('data/超用药说明.txt', "r", encoding='utf-8') as f: sr = f.read() for item in parse_page(sr): print(item) write_to_file(item) if __name__ == '__main__': print("开始") main() print("结束")