root/lang/perl/HTTP-DetectUserAgent/trunk/lib/HTTP/DetectUserAgent.pm @ 20043

Revision 20043, 14.2 kB (checked in by mizuno_takaaki, 6 years ago)

lang/perl/HTTP-DetectUserAgent?: added some useragent detection logic

Line 
1package HTTP::DetectUserAgent;
2
3use warnings;
4use strict;
5#use Carp;
6our $VERSION = '0.01';
7use base qw(Class::Accessor);
8
9__PACKAGE__->mk_accessors(qw(name version vendor type os));
10
11sub new {
12   my ($class, $user_agent) = @_;
13   my $self = {};
14   bless $self, $class;
15   unless (defined $user_agent) {
16     $user_agent = $ENV{'HTTP_USER_AGENT'};
17   }
18   $self->user_agent($user_agent);
19   return $self;
20}
21
22sub user_agent {
23  my ($self, $user_agent) = @_;
24  if (defined $user_agent) {
25    $self->{user_agent} = $user_agent;
26    $self->_parse();
27  }
28  return $self->{user_agent};
29}
30
31sub _parse {
32    my $self = shift;
33    my $ua = lc $self->{user_agent};
34    $self->_parse_name($ua);
35    if( $self->{type} eq 'Browser' ){
36        $self->_parse_os($ua);
37    }
38}
39
40sub _parse_name {
41    my ( $self, $ua ) = @_;
42    return if( $self->_check_crawler($ua) );
43    if( index($ua,'opera') != -1){
44        $self->_check_opera($ua);
45        return;
46    }
47    my $block = $self->_parse_block($ua);
48    if( $block->{applewebkit} ){
49        $self->_check_webkit( $ua, $block );
50    }elsif( $block->{'_comment'}
51                && index($block->{'_comment'}, 'msie' ) != -1 ){
52        $self->_check_ie($ua, $block);
53    }elsif( $block->{gecko} ){
54        $self->_check_gecko( $ua, $block );
55    }else{
56        $self->_check_mobile( $ua, $block )   ||
57        $self->_check_mobile_pc_viewer( $ua, $block ) ||
58        $self->_check_other_browsers( $ua, $block ) ||
59        $self->_check_webservice($ua, $block ) ||
60        $self->_check_robot( $ua, $block ) ||
61            $self->_check_portable($ua, $block );
62    }
63    if( !$self->{name} ){
64        $self->{name} = 'Unknown';
65        $self->{type} = 'Unknown';
66    }
67}
68
69sub _parse_block {
70    my ( $self, $ua ) = @_;
71
72    return {} unless $ua;
73    my $reg = qr/(\([^\)]+\))|(\S+?)\/(\S+)|(\S+)/;
74    my %block = ();
75    while( $ua =~ /$reg/g ){
76        if( $1 ){
77            $block{_comment} = ($block{_comment}||'').$1;
78        }elsif( $2 ){
79            $block{$2} = $3;
80        }elsif( $4 ){
81            $block{_illigal} = ($block{_illigal}||'').':'.$4;
82        }
83    }
84    return \%block;
85}
86
87sub _check_crawler {
88    my ( $self, $ua ) = @_;
89    if( index($ua,'googlebot') != -1){
90        # http://www.google.com/bot.html
91        if( index($ua,'mobile') != -1 ){
92            $self->{name} = 'Googlebot Mobile';
93        }else{
94            $self->{name} = 'Googlebot';
95        }
96        $self->{vendor} = 'Google';
97    }elsif( index($ua,'mediapartners-google') != -1){
98        $self->{name} = 'Googlebot Mediapartners';
99        $self->{vendor} = 'Google';
100    }elsif( index($ua,'feedfetcher-google') != -1){
101        $self->{name} = 'Googlebot Feedfetcher';
102        $self->{vendor} = 'Google';
103    }elsif( index($ua, 'yahoo') != -1){
104        if( index($ua, 'slurp') != -1){
105            # http://help.yahoo.com/help/us/ysearch/slurp
106            $self->{name} = 'Yahoo! Slurp';
107            $self->{vendor} = 'Yahoo';
108        }elsif( index($ua, 'y!j-srd') != -1 || index($ua, 'y!j-mbs') != -1 ){
109            # http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html
110            $self->{name} = 'Yahoo! Japan Mobile Crawler';
111            $self->{vendor} = 'Yahoo';
112        }elsif( index($ua, 'y!j-bsc') != -1){
113            # http://help.yahoo.co.jp/help/jp/blog-search/
114            $self->{name} = 'Yahoo! Japan Blog Crawler';
115            $self->{vendor} = 'Yahoo';
116        }elsif( index($ua, 'y!j-') != -1){
117            # http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
118            $self->{name} = 'Yahoo! Japan Crawler';
119            $self->{vendor} = 'Yahoo';
120        }
121    }elsif( index($ua, 'msnbot') != -1){
122        # http://search.msn.com/msnbot.htm
123        $self->{name} = 'msnbot';
124        $self->{vendor} = 'Microsoft';
125    }elsif( index($ua, 'twiceler') != -1){
126        # http://www.cuil.com/twiceler/robot.html
127        $self->{name} = 'Twiceler';
128        $self->{vendor} = 'Cuil';
129    }elsif( index($ua, 'baiduspider') != -1){
130        # http://help.baidu.jp/system/05.html
131        $self->{name} = 'Baiduspider';
132        $self->{vendor} = 'Baidu';
133    }elsif( index($ua, 'yeti') != -1 && index($ua, 'naver') != -1){
134        # http://help.naver.com/robots/
135        $self->{name} = 'Yeti';
136        $self->{vendor} = 'Naver';
137    }elsif( index($ua, 'ichiro') != -1){
138        # http://help.goo.ne.jp/door/crawler.html)
139        $self->{name} = 'ichiro';
140        $self->{vendor} = 'goo';
141    }elsif( index($ua, 'moba-crawler') != -1){
142        # http://crawler.dena.jp/
143        $self->{name} = 'moba-crawler';
144        $self->{vendor} = 'DeNA';
145    }elsif( index($ua, 'masagool') != -1){
146        # http://sagool.jp/
147        $self->{name} = 'MaSagool';
148        $self->{vendor} = 'Sagool';
149    }elsif( index($ua, 'ia_archiver') != -1){
150        # http://www.archive.org/
151        $self->{name} = 'Internet Archive';
152        $self->{vendor} = 'Internet Archive';
153
154    }elsif( index($ua, 'spider') != -1 || index($ua, 'crawler') != -1 ){
155        $self->{name} = 'Unknown Crawler';
156    }
157    if( $self->{name} ){
158        $self->{type} = 'Crawler';
159        return 1;
160    }
161    return 0;
162}
163
164sub _check_robot {
165    my ( $self, $ua, $block ) = @_;
166    if( $block->{'libwww-perl'} ){
167        $self->{name} = 'LWP';
168        $self->{version} = $block->{'libwww-perl'};
169    }elsif( $block->{'web::scraper'} ){
170        $self->{name} = 'Web::Scraper';
171        $self->{version} = $block->{'web::scraper'};
172    }elsif( $block->{php} ){
173        $self->{name} = 'PHP';
174        $self->{version} = $block->{php};
175    }elsif( $block->{java} ){
176        $self->{name} = 'Java';
177        $self->{version} = $block->{java};
178    }elsif( $block->{wget} ){
179        $self->{name} = 'Wget';
180        $self->{version} = $block->{wget};
181    }elsif( $block->{curl} ){
182        $self->{name} = 'Curl';
183        $self->{version} = $block->{curl};
184    }elsif( index( $ua, 'h2tconv' ) != -1 ){
185        $self->{name} = 'H2Tconv';
186        $self->{version} = 'Unknown';
187    }elsif( $block->{plagger} ){
188       $self->{name} = 'Plagger';
189       $self->{version} = $block->{plagger};
190    }
191    if( $self->{name} ){
192        $self->{type} = 'Robot';
193        return 1;
194    }
195    return 0;
196}
197
198sub _check_webservice {
199    my ( $self, $ua, $block ) = @_;
200    if( index( $ua, 'hatena bookmark') != -1 ){
201        $self->{name} = 'Hatena Bookmark';
202        $self->{version} = $block->{bookmark};
203        $self->{vendor}  = 'Hatena';
204    }elsif( index( $ua, 'hatena antenna') != -1 ){
205        $self->{name} = 'Hatena Antenna';
206        $self->{version} = $block->{antenna};
207        $self->{vendor}  = 'Hatena';
208    }elsif( $ua =~ /yahoo pipes ([\d\.]+)/ ){
209        $self->{name} = 'Yahoo Pipes';
210        $self->{version} = $1;
211        $self->{vendor}  = 'Yahoo';
212    }elsif( $block->{pathtraq} ){
213        $self->{name} = 'Pathtraq';
214        $self->{version} = $block->{pathtraq};
215        $self->{vendor}  = 'Cybozu Labs';
216    }
217    if( $self->{name} ){
218        $self->{type} = 'Robot';
219        return 1;
220    }
221    return 0;
222}
223
224sub _check_opera {
225    my ( $self, $ua ) = @_;
226    $self->{engine} = 'Opera';
227    $self->{type} = 'Browser';
228    $self->{name} = 'Opera';
229    $self->{vendor} = 'Opera';
230    if( $ua =~ /opera(?:\/|\s+)([\d\.]+)/ ){
231        $self->{version} = $1;
232    }else{
233        $self->{version} = 'Unknown';
234    }
235    return 1;
236}
237
238sub _check_webkit {
239    my ( $self, $ua, $block ) = @_;
240    $self->{engine} = 'WebKit';
241    $self->{type} = 'Browser';
242    if( $block->{chrome} ){
243        $self->{name}    = 'Chrome';
244        $self->{version} = $block->{chrome};
245        $self->{vendor}  = 'Google';
246    }elsif( $block->{omniweb} ){
247        $self->{name}    = 'OmniWeb';
248        $self->{version} = $block->{omniweb};
249        $self->{vendor}  = 'The Omni Group';
250    }elsif( $block->{shiira} ){
251        $self->{name}    = 'Shiira';
252        $self->{version} = $block->{shiira};
253        $self->{vendor}  = 'Shiira Project';
254    }elsif( $block->{safari} ){
255        $self->{name}    = 'Safari';
256        $self->{version} = $block->{version} || $block->{shiira};
257        $self->{vendor}  = 'Apple';
258    }else{
259        $self->{name}    = 'WebKit';
260        $self->{version} = $block->{webkit};
261    }
262}
263
264sub _check_ie {
265    my ( $self, $ua, $block ) = @_;
266    $self->{engine} = 'Internet Explorer';
267    $self->{type} = 'Browser';
268    if( $block->{sleipnir} ){
269        $self->{name}    = 'Sleipnir';
270        $self->{version} = $block->{sleipnir};
271        $self->{vendor}  = 'Fenrir';
272    }elsif( $block->{_comment} =~ /lunascape\s+([\d\.]+)/){
273        $self->{name}    = 'Lunascape';
274        $self->{version} = $1;
275        $self->{vendor}  = 'Lunascape';
276    }elsif( $block->{_comment} =~ /kiki\/([\d\.]+)/){
277        $self->{name}    = 'KIKI';
278        $self->{version} = $1;
279        $self->{vendor}  = 'http://www.din.or.jp/~blmzf/index.html';
280    }elsif( $block->{_comment} =~ /msie\s+([\d\.]+)/){
281        $self->{name}    = 'Internet Explorer';
282        $self->{version} = $1;
283        $self->{vendor}  = 'Microsoft';
284    }
285}
286
287sub _check_gecko {
288    my ( $self, $ua, $block ) = @_;
289    $self->{engine} = 'Gecko';
290    $self->{type} = 'Browser';
291    if( $block->{flock} ){
292        $self->{name}    = 'Flock';
293        $self->{version} = $block->{flock};
294        $self->{vendor}  = 'Flock';
295    }elsif( $block->{firefox} ||
296           $block->{granparadiso} ||
297           $block->{bonecho} ){
298        $self->{name}    = 'Firefox';
299        $self->{version} = $block->{firefox} ||
300                           $block->{granparadiso} ||
301                           $block->{bonecho};
302        if( $self->{version} =~ /(^[^;,]+)/ ){
303            $self->{version} = $1;
304        }
305        $self->{vendor}  = 'Mozilla';
306    }elsif( $block->{netscape} ){
307        $self->{name}    = 'Netscape';
308        $self->{version} = $block->{netscape};
309        $self->{vendor}  = 'Mozilla';
310    }elsif( $block->{iceweasel} ){
311        $self->{name}    = 'Iceweasel';
312        $self->{version} = $block->{iceweasel};
313        $self->{vendor}  = 'Debian Project';
314    }elsif( $block->{seamonkey} ){
315        $self->{name}    = 'SeaMonkey';
316        $self->{version} = $block->{seamonkey};
317        $self->{vendor}  = 'SeaMonkey Council';
318    }elsif( $block->{camino} ){
319        $self->{name}    = 'Camino';
320        $self->{version} = $block->{camino};
321        $self->{vendor}  = 'The Camino Project';
322    }else{
323        $self->{name}    = 'Gecko';
324        $self->{version} = $block->{gecko};
325        $self->{vendor}  = 'Unknown';
326    }
327}
328
329sub _check_mobile {
330    my ( $self, $ua, $block ) = @_;
331    $ua = $self->{user_agent} || $ua;
332    if( $block->{docomo} ){
333        $self->{name}    = 'docomo';
334        if( $ua =~ /DoCoMo\/(\d\.\d)[\/\s]+([A-Za-z0-9]+)/ ){
335            $self->{version} = $2;
336        }else{
337            $self->{version} = "Unknown";
338        }
339        $self->{vendor}  = 'docomo';
340    }elsif( $block->{'up.browser'} && $ua =~ /^KDDI-(\S+)/ ){
341        $self->{name}    = 'au';
342        $self->{version} = $1;
343        $self->{vendor}  = 'KDDI';
344    }elsif( my $softbank =
345                $block->{softbank} ||
346                    $block->{vodafone} ||
347                        $block->{'j-phone'} ){
348        if( $ua =~ /(?:SoftBank|Vodafone|J-PHONE)\/[\d\.]+\/([A-Za-z0-9]+)/ ){
349            $self->{name}    = 'SoftBank';
350            $self->{version} = $1;
351            $self->{vendor}  = 'SoftBank';
352        }
353    }
354    if( $self->{name} ){
355        $self->{type} = 'Mobile';
356        return 1;
357    }
358    return 0;
359}
360
361sub _check_mobile_pc_viewer {
362    my ( $self, $ua, $block ) = @_;
363    $ua = $self->{user_agent} || $ua;
364    if( $ua =~ /jig browser( web)?(?:\D+([\d\.]+))*/ ){
365        $self->{name}    = 'Jig Browser';
366        $self->{version} = $1 || 'Unknown';
367        $self->{vendor}  = 'jig';
368    }elsif( $ua =~ /ibisBrowser/ ){
369        $self->{name}    = 'ibisBrowser';
370        $self->{version} = 'Unknown';
371        $self->{vendor}  = 'ibis';
372    }elsif( $block->{mozilla} && $ua =~ /([A-Za-z0-9]+);\s*FOMA/ ){
373        $self->{name}    = 'FOMA Full Browser';
374        $self->{version} = $1;
375        $self->{vendor}  = 'DoCoMo';
376    }
377    if( $self->{name} ){
378        $self->{type} = 'Browser';
379        return 1;
380    }
381    return 0;
382}
383
384sub _check_other_browsers {
385    my ( $self, $ua, $block ) = @_;
386    if( $block->{lynx} ){
387        $self->{name}    = 'Lynx';
388        $self->{version} = $block->{lynx};
389        $self->{vendor}  = 'The University of Kansas';
390    }elsif( $block->{w3m} ){
391        $self->{name}    = 'w3m';
392        $self->{version} = $block->{w3m};
393        $self->{vendor}  = 'Akinori Ito';
394    }elsif( $ua =~ /konqueror\/([\d\.]+)/ ){
395        $self->{name}    = 'Konqueror';
396        $self->{version} = $1;
397        $self->{vendor}  = 'KDE Team';
398    }
399    if( $self->{name} ){
400        $self->{type} = 'Browser';
401        return 1;
402    }
403    return 0;
404}
405
406sub _check_portable {
407    my ( $self, $ua, $block ) = @_;
408    if( $ua =~ /playstation portable(?:\D+([\d\.]+))*/ ){
409        $self->{name}    = 'PSP';
410        $self->{version} = $1 || 'Unknown';
411        $self->{vendor}  = 'Sony';
412    }elsif( $ua =~ /playstation 3(?:\D+([\d\.]+))*/ ){
413        $self->{name}    = 'Playstation 3';
414        $self->{version} = $1 || 'Unknown';
415        $self->{vendor}  = 'Sony';
416    }
417    if( $self->{name} ){
418        $self->{type} = 'Browser';
419        return 1;
420    }
421}
422
423sub _parse_os {
424    my ( $self, $ua ) = @_;
425    return unless $ua;
426    if( $ua =~ /windows|win(?:9[58]|nt)/ ){
427        $self->{os} = 'Windows';
428    }elsif( $ua =~ /macintosh|mac_(?:powerpc|68000)/ ){
429        $self->{os} = 'Macintosh';
430    }elsif( $ua =~ /x11/ ){
431        $self->{os} = 'X11';
432    }
433}
434
4351; # Magic true value required at end of module
436__END__
437
438=head1 NAME
439
440HTTP::DetectUserAgent - Yet another HTTP useragent string parser.
441
442
443=head1 VERSION
444
445This document describes HTTP::DetectUserAgent version 0.0.1
446
447
448=head1 SYNOPSIS
449
450    use HTTP::DetectUserAgent;
451    my $ua = HTTP::DetectUserAgent->new($useragent_string);
452    my $type    = $ua->type;
453    my $name    = $ua->name;
454    my $version = $ua->version;
455    my $vendor  = $ua->vendor;
456    my $os      = $ua->os;
457
458=head1 DESCRIPTION
459
460HTTP::DetectUserAgent provides the parsing function for HTTP useragent strings. You can use it for determine which browser( or crawler, bot, and so on ) is accessing to your servers or web applications.
461
462=head1 AUTHOR
463
464Takaaki Mizuno, E<lt>module@takaaki.infoE<gt>
465
466=head1 COPYRIGHT AND LICENSE
467
468Copyright (C) 2008 by Takaaki Mizuno
469
470This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
471
472=cut
Note: See TracBrowser for help on using the browser.