root/lang/python/cidr-mobilejp/trunk/scrape.py @ 3901

Revision 3901, 3.4 kB (checked in by tasuku, 5 years ago)

lang/python/cidr-mobilejp: added new project cidr-mobilejp

  • Property svn:executable set to *
Line 
1#!/usr/bin/env python
2
3# -*- coding: utf-8 -*-
4
5import urllib
6import re
7from math import ceil
8
9class Docomo(object):
10  def name(self):
11    return 'docomo'
12
13  def url(self):
14    return 'http://www.nttdocomo.co.jp/service/imode/make/content/ip/'
15
16  def run(self):
17    content = urllib.urlopen(self.url()).read()
18    n = self.name()
19    return [(ip, n) for ip in re.findall(r'<li>([\d\./]+)</li>', content, re.M)]
20
21class Ezweb(object):
22  def name(self):
23    return 'ezweb'
24
25  def url(self):
26    return 'http://www.au.kddi.com/ezfactory/tec/spec/ezsava_ip.html'
27
28  def run(self):
29    content = urllib.urlopen(self.url()).read()
30    n = self.name()
31    pattern = r'<td>\s*<div class="TableText">([\d\.]+)</div>\s*</td>\s+<td>\s*<div class="TableText">(/\d+)</div>\s*</td>'
32    return [(ip + sn, n) for ip, sn in re.findall(pattern, content, re.M)]
33
34class Softbank(object):
35  def name(self):
36    return 'softbank'
37
38  def url(self):
39    return 'http://developers.softbankmobile.co.jp/dp/tech_svc/web/ip.php'
40
41  def run(self):
42    content = urllib.urlopen(self.url()).read()
43    n = self.name()
44    pattern = '<FONT size="2" class="j10".*?>([\d\./]+)</FONT>'
45    return [(ip, n) for ip in re.findall(pattern, content, re.M)]
46
47class AirHPhone(object):
48  def name(self):
49    return 'airhphone'
50
51  def url(self):
52    return 'http://www.willcom-inc.com/ja/service/contents_service/club_air_edge/for_phone/ip/'
53
54  def run(self):
55    content = urllib.urlopen(self.url()).read()
56    n = self.name()
57    pattern = '<td align="center" bgcolor="#f5f5f5" width="50%"><font size="2">([\d\./]+)</font></td>'
58    return [(ip, n) for ip in re.findall(pattern, content, re.M)]
59
60def get_cidr():
61  classes = [Docomo, Ezweb, Softbank, AirHPhone]
62  sources = []
63  for carrier in classes:
64    c = carrier()
65    sources += c.run()
66
67  # convert cidr to ipaddress
68  import socket, struct
69  pat = '([\d\.]+)/(\d+)'
70  ranges = []
71  for s in sources:
72    [(ip, bit)] = re.findall(pat, s[0], re.M)
73    ipnum = struct.unpack('>L', socket.inet_aton(ip))[0]
74    mask_ed = (1L << (32 - int(bit))) - 1
75    mask_st = ~mask_ed
76    ip_st = ipnum & mask_st
77    ip_ed = ip_st | mask_ed
78    ranges.append((ip_st, ip_ed, s[1]))
79
80  # sort
81  ranges.sort()
82
83  # merge adjacent range
84  preip_st = 0
85  preip_ed = 0
86  precarr = ''
87  merge_st = False
88  mranges = []
89  i = 0
90  while True:
91    st = i
92    try:
93      n = ranges[i + 1]
94      while ranges[i][1] + 1 == n[0] and \
95            ranges[i][2] == n[2]:
96        i += 1
97        n = ranges[i + 1]
98      mranges.append((ranges[st][0], ranges[i][1], ranges[st][2]))
99      i += 1
100    except IndexError, e:
101      mranges.append((ranges[st][0], ranges[i][1], ranges[st][2]))
102      break
103
104  # output php source
105  print """<?php
106function ip2mobile($ip) {
107  $n = ip2long($ip);
108"""
109  output_php(mranges, 0, len(mranges) - 1, 2)
110  print """  return 'pc';
111}
112?>
113"""
114
115def output_php(range, st, ed, ind):
116  # print st, ed, "\n"
117  if st > ed:
118    return
119  if st == ed:
120    print ' ' * ind + 'if ($n >= %d && $n <= %d) {' % (
121      range[st][0], range[st][1])
122    print ' ' * ind + "  return '%s';" % range[st][2]
123    print ' ' * ind + '}'
124    return
125  b = int((st + ed) / 2)
126  print ' ' * ind + 'if ($n < %d) {' % range[b][0]
127  output_php(range, st, b - 1, ind + 2)
128  print ' ' * ind + '} else if ($n <= %d) {' % range[b][1]
129  print ' ' * ind + "  return '%s';" % range[b][2]
130  print ' ' * ind + '} else {'
131  output_php(range, b + 1, ed, ind + 2)
132  print ' ' * ind + '}'
133
134if __name__ == '__main__':
135  get_cidr()
Note: See TracBrowser for help on using the browser.