Changeset 20745 for lang/php

Show
Ignore:
Timestamp:
10/05/08 02:35:59 (2 months ago)
Author:
sasezaki
Message:

lang/php/Scraper: Adapter_Htmlscraping modify

Location:
lang/php/Scraper
Files:
4 modified

Legend:

Unmodified
Added
Removed
  • lang/php/Scraper/library/Diggin/Scraper/Adapter/Htmlscraping.php

    r20204 r20745  
    2424 */ 
    2525 
     26/** 
     27 * @see Diggin_Scraper_Adapter_Interface 
     28 */ 
    2629require_once 'Diggin/Scraper/Adapter/Interface.php'; 
    2730 
     
    4649            $xhtml = $this->getXhtml($response); 
    4750        } catch (Exception $e) { 
    48             throw $e; 
     51            require_once 'Diggin/Scraper/Adapter/Exception.php'; 
     52            throw new Diggin_Scraper_Adapter_Exception($e); 
    4953        } 
    5054         
     
    6670        $responseBody = str_replace('&', '&', $responseBody); 
    6771        try { 
    68             $xml_object = @new SimpleXMLElement($responseBody); 
     72                //@see http://php.net/libxml.constants 
     73                if (isset($this->config['libxmloptions'])) { 
     74                        $xml_object = @new SimpleXMLElement($responseBody, $this->config['libxmloptions']); 
     75                } else { 
     76                $xml_object = @new SimpleXMLElement($responseBody); 
     77                } 
    6978        } catch (Exception $e) { 
    70             throw $e; 
     79                require_once 'Diggin/Scraper/Adapter/Exception.php'; 
     80            throw new Diggin_Scraper_Adapter_Exception($e); 
    7181        } 
    7282         
     
    280290        $declarations .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" '; 
    281291        $declarations .= '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'; 
    282         $responseBody = "$declarations$responseBody"; 
    283  
    284         return $responseBody; 
     292         
     293        return "$declarations$responseBody"; 
    285294    } 
    286295 
  • lang/php/Scraper/library/Diggin/Scraper/Process.php

    r17673 r20745  
    2424    public $processes; 
    2525     
     26    /** 
     27     *  
     28     * 
     29     * @return UnTokenize process 
     30     */ 
    2631    public function __toString() 
    2732    { 
    28         return '\''.$this->expression.'\' , '. 
    29                $this->name.' => '. $this->type. '"'; 
     33        if ($this->processes instanceof Diggin_Scraper_Process) { 
     34            return '\''.$this->expression.'\', '. 
     35               "'".$this->name.' => " (Diggin_Scraper_Process)"'; 
     36        } 
     37         
     38        if ($this->filters !== false) { 
     39                return '\''.$this->expression.'\', '. 
     40               "'".$this->name.' => ["'. $this->type. '", "'.$this->filters.'"]\''; 
     41        } 
     42         
     43        return '\''.$this->expression.'\', '. 
     44               "'".$this->name.' => "'. $this->type. '"\''; 
    3045    } 
    3146     
  • lang/php/Scraper/library/Diggin/Uri/Http.php

    r20656 r20745  
    4545                } 
    4646            } 
     47        /* 
     48        } else if(class_exists('Rhaco')) { 
     49                Rhaco::import('network.Url'); 
     50                return Url::parseAbsolute($base_url, $url); 
     51        */ 
    4752        //Net_URL2 ver 0.2.0 
    4853        } else { 
  • lang/php/Scraper/tests/Diggin/Uri/HttpTest.php

    r20533 r20745  
    11<?php 
     2//$rhacopath = '/media/disk-1/work/rhaco2/2_0'; 
     3//set_include_path(get_include_path().PATH_SEPARATOR.$rhacopath); 
     4//require_once 'Rhaco.php'; 
     5//Rhaco::import('network.Url'); 
     6//var_dump(Url::parseAbsolute('http://yahoo.com/test/test.cgi?hoge=bar', '?param=foo')); 
     7//var_dump(Url::parseAbsolute('http://yahoo.com/test/', '../index.html')); 
     8//var_dump(get_include_path());exit; 
     9 
    210require_once 'PHPUnit/Framework.php'; 
    311 
     
    4351    public function testGetAbsoluteUrl() 
    4452    { 
     53         
     54         
    4555        //if  
    4656        $this->assertEquals('http://yahoo.com/test/',