Changeset 9987
- Timestamp:
- 04/20/08 16:52:19 (5 years ago)
- Files:
-
- 1 modified
-
lang/perl/tiarra/trunk/module/Auto/FetchTitle.pm (modified) (13 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/perl/tiarra/trunk/module/Auto/FetchTitle.pm
r9012 r9987 1045 1045 my $config = [ 1046 1046 { 1047 # 1. ぷりんと楽譜. 1047 1048 url => 'http://www.print-gakufu.com/*', 1048 1049 recv_limit => 8*1024, … … 1051 1052 }, 1052 1053 { 1054 # 2. zakzak. 1053 1055 url => 'http://www.zakzak.co.jp/*', 1054 1056 recv_limit => 10*1024, … … 1056 1058 }, 1057 1059 { 1060 # 3. nikkei. 1058 1061 url => 'http://www.nikkei.co.jp/*', 1059 1062 recv_limit => 16*1024, … … 1065 1068 }, 1066 1069 { 1070 # 4. nhkニュース. 1067 1071 url => 'http://www*.nhk.or.jp/news/*', 1068 1072 extract => qr{<p class="newstitle">(.*?)</p>}, 1069 1073 }, 1070 1074 { 1075 # 5. creative (timeout). 1071 1076 url => 'http://*.creative.com/*', 1072 1077 timeout => 5, 1073 1078 }, 1074 1079 { 1080 # 6. soundhouse news. 1075 1081 url => 'http://www.soundhouse.co.jp/shop/News.asp?NewsNo=*', 1076 1082 recv_limit => 50*1024, … … 1078 1084 }, 1079 1085 { 1080 # trac changeset.1086 # 7. trac changeset. 1081 1087 url => '*/changeset/*', 1082 1088 extract => qr{<dd class="message" id="searchable"><p>(.*?)</p>}s, 1083 1089 }, 1084 1090 { 1091 # 8a. amazon (page size). 1085 1092 url => 'http://www.amazon.co.jp/*', 1086 1093 recv_limit => 15*1024, 1087 1094 }, 1088 1095 { 1096 # 8b. amazon (page size). 1089 1097 url => 'http://www.amazon.com/*', 1090 1098 recv_limit => 15*1024, 1099 }, 1100 { 1101 # 9. ニコニコ動画 (メンテ画面). 1102 status => 503, 1103 url => 'http://www.nicovideo.jp/*', 1104 extract => sub{ 1105 if( m{<div class="mb16p4 TXT12">\s*<p>現在ニコニコ動画は(メンテナンス中)です。</p>\s*<p>(.*?)<br />}s ) 1106 { 1107 "$1: $2"; 1108 }else 1109 { 1110 return; 1111 } 1112 }, 1113 }, 1114 { 1115 # 10. sanspo. 1116 url => 'http://www.sanspo.com/*', 1117 recv_limit => 5*1024, 1118 extract => qr{<h2>(.*?)</h2>}s, 1119 }, 1120 { 1121 # 11. sakura. 1122 url => 'http://www.sakura.ad.jp/news/archives/*', 1123 recv_limit => 10*1024, 1124 extract => qr{<h3 class="newstitle">(.*?)</h3>}s, 1091 1125 }, 1092 1126 ]; … … 1107 1141 my $type = shift; 1108 1142 1109 my $ conflist = $this->_extract_heading_config();1110 1111 foreach my $conf (@$ conflist)1143 my $extract_list = $this->_extract_heading_config(); 1144 1145 foreach my $conf (@$extract_list) 1112 1146 { 1113 1147 Mask::match($conf->{url}, $req->{url}) or next; … … 1147 1181 return; 1148 1182 } 1149 if( $req->{result}{status_code}!=200 ) 1150 { 1151 $DEBUG and $this->_debug($req, "debug: - - skip/not success:$req->{result}{status_code}"); 1152 return; 1153 } 1154 1155 my $conflist = $this->_extract_heading_config(); 1183 my $status = $req->{result}{status_code}; 1184 1185 my $extract_list = $this->_extract_heading_config(); 1156 1186 1157 1187 my $heading; 1158 1188 1159 foreach my $conf (@$ conflist)1189 foreach my $conf (@$extract_list) 1160 1190 { 1161 1191 Mask::match($conf->{url}, $req->{url}) or next; 1162 1192 $DEBUG and $this->_debug($req, "debug: - $conf->{url}"); 1193 1194 my $extract_status = $conf->{status} || 200; 1195 if( $status != $extract_status ) 1196 { 1197 $DEBUG and $this->_debug($req, "debug: - - status:$status not match with $extract_status"); 1198 next; 1199 } 1163 1200 1164 1201 my $extract_list = $conf->{extract}; … … 1172 1209 my $extract = $_extract; # sharrow-copy. 1173 1210 $extract = ref($extract) ? $extract : qr/\Q$extract/; 1174 my @match = $req->{result}{decoded_content} =~ $extract; 1211 my @match; 1212 if( ref($extract) eq 'CODE' ) 1213 { 1214 local($_) = $req->{result}{decoded_content}; 1215 @match = $extract->($req); 1216 }else 1217 { 1218 @match = $req->{result}{decoded_content} =~ $extract; 1219 } 1175 1220 @match or next; 1221 @match==1 && !defined($match[0]) and next; 1176 1222 $heading = $match[0]; 1177 1223 last; … … 1419 1465 content_length => undef, 1420 1466 decoded_content => undef, 1467 fetch_length => undef, 1421 1468 }; 1422 1469 … … 1445 1492 my $headers = $res->{Header}; # hash-ref. 1446 1493 my $content = $res->{Content}; 1494 $result->{fetch_length} = defined($content) ? length($content) : undef; 1447 1495 defined($content) or $content = ''; 1496 my @opts; 1448 1497 1449 1498 $result->{status_code} = $status_code; … … 1483 1532 if( int($status_code / 100) != 2 && !$result->{redirect} ) 1484 1533 { 1485 my @opts; 1486 $status_msg and push(@opts, $status_msg); 1534 $result->{title} = $status_msg; 1487 1535 push(@opts, "http status $status_code"); 1488 if( $req->{redirected} )1489 {1490 my $redirs = $req->{redirected}==1 ? 'redir' : 'redirs';1491 push(@opts, "$req->{redirected} $redirs");1492 }1493 my $reply = shift @opts;1494 if( @opts )1495 {1496 $reply .= " (".join("; ", @opts).")";1497 }1498 $result->{result} = $reply;1499 return $result;1500 1536 } 1501 1537 … … 1559 1595 $title = $this->_fixup_title($title); 1560 1596 $result->{title} = $title; 1597 }else 1598 { 1599 $title = $result->{title}; 1561 1600 } 1562 1601 … … 1595 1634 } 1596 1635 1597 my @opts;1598 1636 if( $reply eq '' || $ctype !~ /html/ ) 1599 1637 {
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)