from bs4 import BeautifulSoup import requests import random def getip(): ip = ['163.204.241.158 :9999', '182.35.87.174:9999', '182.35.87.175:9999', '182.35.87.176:9999', '182.35.87.177:9999', '123.163.96.131:9999', '1.197.16.130:9999', '182.35.85.157:9999', '163.204.244.212:9999', '163.204.243.252:9999', '117.91.132.170:9999', '58.253.156.1:9999', '1.197.16.129:9999', '163.204.246.126:9999', '60.13.42.67:9999', '49.86.182.174:9999', '182.35.80.229:9999', '58.253.156.225:9999', '163.204.240.126:9999', '60.13.42.70:9999', '123.163.122.108:9999', '175.42.122.46:9999', '123.163.97.234:9999', '182.35.82.130:9999', '163.204.245.72:9999', '60.13.42.127:9999', '121.233.206.73:9999', '163.204.246.149:9999', '182.35.83.33:9999', '163.204.243.16:9999', '163.204.246.174:9999', '121.233.251.60:9999', '123.169.37.250:38677', '120.83.109.29:9999', '163.204.241.164:9999', '163.204.246.42:9999', '163.204.241.228:9999', '175.42.68.4:9999', '182.35.84.155:9999', '112.85.130.205:9999', '122.193.247.115:9999', '163.204.241.190:9999', '163.204.245.237:9999', '163.204.242.245:9999', '115.53.19.82:9999', '112.85.128.146:9999', '163.204.244.40:9999', '182.35.80.5:9999', '163.204.242.130:9999', '112.85.129.88:9999', '113.121.20.143:9999', '182.35.83.136:9999', '182.35.80.195:9999', '120.83.105.248:9999', '112.85.151.152:9999', '171.11.178.94:9999', '171.12.113.6:9999', '112.85.165.194:9999', '123.163.122.254:9999', '58.253.158.174:9999', '120.84.101.164:9999', '60.13.42.61:9999', '60.13.42.207:9999', '1.198.72.219:9999', '182.35.80.54:9999', '114.230.69.232:9999', '163.204.242.126:9999', '58.253.154.253:9999', '180.108.218.242:9999', '112.85.149.238:9999', '114.230.69.109:9999', '60.13.42.28:9999', '163.204.244.39:9999', '180.108.218.179:9999', '121.233.251.82:9999', '113.121.20.252:808', '120.83.111.43:9999', '182.35.86.234:9999', '182.35.83.200:9999', '60.13.42.142:9999', '120.83.98.106:9999', '117.91.130.10:9999', '111.226.188.146:8010', '180.119.68.222:9999', '123.163.96.170:9999', '60.13.42.57:9999', '113.121.23.248:9999', '222.189.144.147:9999', '60.13.42.172:9999', '183.128.167.248:8118', '182.35.86.217:9999', '60.13.42.38:9999', '222.89.32.141:8070', '183.157.84.221:8118', '222.189.191.34:9999', '123.163.122.129:9999', '121.233.227.214:9999', '180.119.141.163:9999'] b = random.randint(0,97) print("ip:"+str(b)) return {'http':'http://'+ip[b]} def getagent(): list1 = [ 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0)', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12', 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)'] a=random.randint(0, 9) print("agent:"+str(a)) return list1[a] def get_ip_list(): ipnum = 3775 url = 'http://www.xicidaili.com/nn/' for num in ipnum: if(ipnum == 3776): ipnum = 3775 headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Referer': 'https: // wuhan.anjuke.com / sale /?from=navigation', 'User-Agent': getagent() } web_data = requests.get(url,headers=headers) soup = BeautifulSoup(web_data.text, 'html.parser') ips = soup.find_all('tr') ip_list = [] for i in range(1, len(ips)): ip_info = ips[i] tds = ip_info.find_all('td') ip_list.append(tds[1].text + ':' + tds[2].text) return ip_list def get_random_ip(): ip_list = [] proxy_list = [] ip_list = get_ip_list() for ip in ip_list: proxy_list.append('http://' + ip) proxy_ip = random.choice(proxy_list) proxies = {'http': proxy_ip} return proxies if __name__ == '__main__': proxies = get_random_ip() print(proxies) # 函数get_ip_list(url, headers)传入url和headers,最后返回一个IP列表,列表的元素类似42.84.226.65:8888格式,这个列表包括国内髙匿代理IP网站首页所有IP地址和端口。 headers = { 'User-Agent':'Mozilla/5.0 (iPod; U; CPU iPhone OS 4_3 like Mac OS X; wo-SN) AppleWebKit/535.16.1 (KHTML, like Gecko) Version/4.0.5 Mobile/8B114 Safari/6535.16.1' } resp = requests.get('https://www.baidu.com',proxies = proxies,headers = headers) print(resp.text) print("结束")