| 170 | | def putHtml(self,s): |
| 171 | | import cookielib |
| 172 | | |
| 173 | | #cookie管理オブジェクト作る |
| 174 | | try: |
| 175 | | opener = signIn() |
| 176 | | |
| 177 | | except: |
| 178 | | pass |
| 179 | | |
| 180 | | opener.open("http://twitter.com/sessions","username=&s&password%s" % (self.user['user'],self.user["pass"])) |
| 181 | | postdata = {} |
| 182 | | postdata['status'] = s.encode('utf-8') |
| 183 | | #postdata['source'] = s |
| 184 | | param = urllib.urlencode(postdata) |
| 185 | | data = urllib2.urlopen("http://"+self.url+"/statuses/update",param) |
| 186 | | print data.read()""" |
| 187 | | def getUserPageWithScraping(self,user,num): |
| 188 | | self.setAuthHandler() |
| 189 | | s = "http://"+self.url+"/"+user+"?page="+str(num) |
| 190 | | print "url+" +s |
| 191 | | data = urllib2.urlopen(s) |
| 192 | | urlstring = data.read() |
| | 181 | def getWithScraping(self,user,num=1): |
| | 182 | |
| | 183 | opener = self.singIn("") |
| | 184 | #ログイン必要? |
| | 185 | s = "http://"+self.url+"/home?page="+str(num) |
| | 186 | print "url+" +s |
| | 187 | data = opener.open(s) |
| | 188 | urlstring = data.read() |
| | 189 | print urlstring |
| | 226 | |
| | 227 | |
| | 228 | def scrapeTwit(self,str,flag): |
| | 229 | import re |
| | 230 | regTwit = re.compile("<tr class=\"hentry hentry_hover\"([\w\W]*?)</tr>",re.MULTILINE) |
| | 231 | regImage = re.compile("<img [\w\W]*? src=\"([\w\W]*?)\"",re.MULTILINE) |
| | 232 | regUserAndMessage = re.compile("<td class=\"content\">([\w\W]*?)</td>") |
| | 233 | regUser = re.compile("<a href=.*?>(.*?)</a>") |
| | 234 | regMessage = re.compile("<span class=\"entry-content\">([\w\W]*?)</span>") |
| | 235 | regTime = re.compile("<abbr class=\"[\w\W]*?\" title=\"([\w\W]*?)\">") |
| | 236 | str = str.replace("\t","") |
| | 237 | str = str.replace("\n","") |
| | 238 | a = regTwit.findall(str) |
| | 239 | #print a |
| | 240 | resultList = [] |
| | 241 | for aa in a: |
| | 242 | resultData = {} |
| | 243 | |
| | 244 | b = regImage.search(aa) |
| | 245 | resultData['image'] = b.group(1) |
| | 246 | c = regUserAndMessage.search(aa).group(1) |
| | 247 | d1 = regUser.search(c) |
| | 248 | d2 = regMessage.search(c) |
| | 249 | resultData['user'] = unicode(d1.group(1),'utf-8') |
| | 250 | resultData['message'] = unicode(d2.group(1),'utf-8') |
| | 251 | tm = regTime.search(aa) |
| | 252 | resultData['time'] = tm.group(1) |
| | 253 | result = [] |
| | 254 | result.append(resultData['user']) |
| | 255 | result.append(resultData['message']) |
| | 256 | result.append(resultData['time']) |
| | 257 | result.append(resultData['image']) |
| | 258 | resultList.append(result) |
| | 259 | |
| | 260 | return resultList |