Changeset 15861
- Timestamp:
- 07/15/08 23:55:39 (5 years ago)
- Location:
- lang/php/Scraper/library/Diggin
- Files:
-
- 4 modified
-
Scraper.php (modified) (1 diff)
-
Scraper/Strategy/Abstract.php (modified) (2 diffs)
-
Scraper/Strategy/Selector.php (modified) (5 diffs)
-
Scraper/Strategy/Xpath.php (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
lang/php/Scraper/library/Diggin/Scraper.php
r15813 r15861 306 306 $context = new Diggin_Scraper_Context($this->getStrategy($resource)); 307 307 foreach (self::$_processes as $process) { 308 $values = self::$_strategy->getValue($context, $process); 309 310 if ($process->filters) { 311 require_once 'Diggin/Scraper/Filter.php'; 312 $values = Diggin_Scraper_Filter::run($values, $process->filters); 313 } 314 308 $values = self::$_strategy->getValues($context, $process); 309 315 310 $this->results[$process->name] = $values; 316 311 } -
lang/php/Scraper/library/Diggin/Scraper/Strategy/Abstract.php
r14732 r15861 39 39 } 40 40 41 /**42 *43 */44 public function getData()45 {46 //if !is_readble($this->getBody)...47 48 return $this->readData($this->getResponse());49 }50 51 41 public function scrapedData($process) 52 42 { … … 61 51 protected abstract function scrape($response, $process); 62 52 63 protected abstract function getValue($context, $process); 53 protected abstract function getValue($values, $process); 54 55 protected abstract static function extract($values, $process); 56 57 public function getValues($context, $process) 58 { 59 if (!isset($process->type)) { 60 return $context->scrape($process); 61 } 62 63 if ($context instanceof Diggin_Scraper_Context) { 64 $values = $context->scrape($process); 65 } else { 66 $values = $this->extract($context, $process); 67 } 68 69 if ($process->type instanceof scraper) { 70 foreach ($values as $count => $val) { 71 foreach ($process->type->processes as $proc) { 72 $returns[$count][$proc->name] = $this->getValues($val, $proc); 73 } 74 75 if (($process->arrayflag === false) && $count === 0) break; 76 } 77 return $returns; 78 } 79 80 $values = $this->getValue($values, $process); 81 82 if ($process->arrayflag === false && strtoupper($process->type) === 'RAW') { 83 $values = array_shift($values); 84 } elseif ($process->arrayflag === false) { 85 $values = (string) array_shift($values); 86 } 87 88 if ($process->filters) { 89 require_once 'Diggin/Scraper/Filter.php'; 90 $values = Diggin_Scraper_Filter::run($values, $process->filters); 91 } 92 93 return $values; 94 } 64 95 } -
lang/php/Scraper/library/Diggin/Scraper/Strategy/Selector.php
r15814 r15861 16 16 17 17 require_once 'Diggin/Scraper/Strategy/Abstract.php'; 18 18 require_once dirname(__FILE__).'/Selector/sfDomCssSelector.class.php'; 19 19 class Diggin_Scraper_Strategy_Selector extends Diggin_Scraper_Strategy_Abstract 20 20 { … … 71 71 $simplexml = $this->getAdapter()->readData($respose); 72 72 73 return self::extract($simplexml, $process); 74 } 75 76 public static function extract($simplexml, $process) 77 { 73 78 $dom = dom_import_simplexml($simplexml); 74 75 require_once dirname(__FILE__).'/Selector/sfDomCssSelector.class.php'; 79 76 80 $selector = new sfDomCssSelector($dom); 77 81 … … 83 87 return $results; 84 88 } 89 85 90 86 91 /** … … 91 96 * @return mixed 92 97 */ 93 public function getValue($context, $process) 94 { 95 96 if (!isset($process->type)) { 97 return $context->scrape($process); 98 } 99 100 if ($context instanceof Diggin_Scraper_Context) { 101 $values = $context->scrape($process); 102 } else { 103 $dom = dom_import_simplexml($context); 104 $selector = new sfDomCssSelector($dom); 105 106 $values = array(); 107 foreach ($selector->getElements($process->expression) as $result) { 108 $values[] = simplexml_import_dom($result); 109 } 110 } 111 112 if ($process->type instanceof scraper) { 113 foreach ($values as $count => $val) { 114 foreach ($process->type->processes as $proc) { 115 $returns[$count][$proc->name] = $this->getValue($val, $proc); 116 } 117 } 118 119 if($process->arrayflag === false) { 120 $returns = array_shift($returns); 121 } 122 123 return $returns; 124 } 125 98 public function getValue($values, $process) 99 { 126 100 //type 127 101 if (strtoupper(($process->type)) === 'RAW'){ … … 161 135 } 162 136 163 if ($process->arrayflag === false && strtoupper($process->type) === 'RAW') {164 $strings = array_shift($strings);165 } elseif ($process->arrayflag === false) {166 $strings = (string) array_shift($strings);167 }168 169 137 return $strings; 170 138 } -
lang/php/Scraper/library/Diggin/Scraper/Strategy/Xpath.php
r15814 r15861 72 72 { 73 73 $simplexml = $this->getAdapter()->readData($respose); 74 75 $results = array(); 76 foreach ($simplexml->xpath($process->expression) as $result) { 74 75 return self::extract($simplexml, $process); 76 } 77 78 public static function extract($values, $process) 79 { 80 $results = array(); 81 foreach ($values->xpath($process->expression) as $result) { 77 82 $results[] = $result; 78 83 } … … 80 85 return $results; 81 86 } 82 87 83 88 /** 84 89 * get value with DSL … … 88 93 * @return mixed 89 94 */ 90 public function getValue($ context, $process)95 public function getValue($values, $process) 91 96 { 92 if (!isset($process->type)) {93 return $context->scrape($process);94 }95 96 if ($context instanceof Diggin_Scraper_Context) {97 $values = $context->scrape($process);98 } else {99 100 $values = array();101 foreach ($context->xpath($process->expression) as $result) {102 $values[] = $result;103 }104 }105 106 if ($process->type instanceof scraper) {107 foreach ($values as $count => $val) {108 foreach ($process->type->processes as $proc) {109 $returns[$count][$proc->name] = $this->getValue($val, $proc);110 }111 }112 113 if($process->arrayflag === false) {114 $returns = array_shift($returns);115 }116 return $returns;117 }118 119 97 //type 120 98 if (strtoupper(($process->type)) === 'RAW') { 121 99 $strings = $values; 122 100 } elseif (strtoupper(($process->type)) === 'TEXT') { 123 124 101 $strings = array(); 125 102 foreach ($values as $value) { … … 153 130 throw new Diggin_Scraper_Strategy_Exception("can not understand type :".$process->type); 154 131 } 155 156 if ($process->arrayflag === false && strtoupper($process->type) === 'RAW') {157 $strings = array_shift($strings);158 } elseif ($process->arrayflag === false) {159 $strings = (string) array_shift($strings);160 }161 132 162 133 return $strings; 163 134 } 135 164 136 } 165 137
![(please configure the [header_logo] section in trac.ini)](/share/chrome/site/your_project_logo.png)