pc超用药.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import json
  2. import requests
  3. import re
  4. from requests import RequestException
  5. def get_page(url):
  6. try:
  7. response = requests.get(url)
  8. if response.status_code == 200:
  9. print("状态:")
  10. return response.text
  11. print("状态3:")
  12. return None
  13. except RequestException:
  14. return None
  15. print("状态4:")
  16. def parse_page(html):
  17. # pattern = re.compile('<tr>.*?<th.*?>(.*?)<.*?<td.*?>(.*?)<.*?<td.*?>(.*?)</td>.*?>(.*?)</td>', re.S)
  18. # 疾病分类
  19. pattern = re.compile('<tr>.*?<th.*?>(.*?)</th>.*?>(.*?)</td>.*?>(.*?)</td>', re.S)
  20. items = re.findall(pattern,html)
  21. for item in items:
  22. yield {
  23. '药品名称': item[0],
  24. '超说明书适用症':item[1],
  25. '批准适应症': item[2],
  26. }
  27. def write_to_file(content):
  28. with open('超用药说明txt','a',encoding = 'utf-8')as f:
  29. f.write(json.dumps(content,ensure_ascii=False))
  30. def main():
  31. # url = "https://db.yaozh.com/icd?"
  32. # print(url)
  33. with open('data/超用药说明.txt', "r", encoding='utf-8') as f:
  34. sr = f.read()
  35. for item in parse_page(sr):
  36. print(item)
  37. write_to_file(item)
  38. if __name__ == '__main__':
  39. print("开始")
  40. main()
  41. print("结束")