forked from jiehua233/ipproxy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.py
82 lines (66 loc) · 1.94 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# @author: [email protected]
# @site: https://chenjiehua.me
# @date: 2016-08-25
#
import sys
import logging
import os.path
import IP
from lib.source import CZ88, KuaiDaili, XiciDaili, IP66, IP66API, CNProxy
reload(sys)
sys.setdefaultencoding("utf-8")
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO)
root = os.path.dirname(os.path.abspath(__file__))
def main():
ip_pool = get_proxyip()
sort_ip = sort_proxyip(ip_pool)
save2csv(sort_ip)
def get_proxyip():
ip_pool = set()
for s in [CZ88, KuaiDaili, XiciDaili, IP66, IP66API, CNProxy]:
instance = s()
ips = instance.get()
ip_pool.update(ips)
logging.info("Totally got proxy ip: %s of %s", len(ips), len(ip_pool))
return ip_pool
def sort_proxyip(pool):
result = {
"all": set(),
"china": set(),
"foreign": set(),
"high_anonymous": set(),
"low_anonymous": set(),
"non_anonymous": set(),
}
for ip in pool:
# country
info = IP.find(ip[0]).strip().replace("\t", "-")
ip_ = ip + (info,)
result["all"].add(ip_)
if info.startswith("中国"):
result["china"].add(ip_)
else:
result["foreign"].add(ip_)
# anonymous
if ip[2] == 3:
result["high_anonymous"].add(ip_)
elif ip[2] == 2:
result["low_anonymous"].add(ip_)
else:
result["non_anonymous"].add(ip_)
return result
def save2csv(sort_ip):
_dir = os.path.join(root, "data")
if not os.path.exists(_dir):
os.makedirs(_dir)
for tag in sort_ip:
output = os.path.join(_dir, "%s.csv" % tag)
with open(output, "w") as fw:
fw.write("ip,port,anonymous,info\n")
for ip in sort_ip[tag]:
fw.write("%s,%s,%s,%s\n" % ip)
if __name__ == "__main__":
main()