0
I am trying to find a specific html page with get_web_page
function. Then I am using the php_simple_dom
functions to load the content. However I cannot fetch the page. Here is my code:
include("simple_html_dom.php");
function getWebPage($url){
$ch = curl_init($url);
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
// CURLOPT_BINARYTRANSFER => true,
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36 ", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 30, // timeout on connect
CURLOPT_TIMEOUT => 30, // timeout on response
CURLOPT_MAXREDIRS => 30, // stop after 10 redirects
CURLOPT_FAILONERROR => true,
// CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_SSL_VERIFYPEER => false,
// CURLOPT_PROXY => '108.59.14.203:13010'
CURLOPT_COOKIE => "npii=btguid/fc03e0021610ab600201584cffbe2cc05c7fecd1^cguid/fc03e74c1610a993a1036300e76f880a5c7fecd1^; nonsession=CgADLAAFansBaMwDKACBkBLrSZmMwM2UwMDIxNjEwYWI2MDAyMDE1ODRjZmZiZTJjYzDyP9R7; AMCVS_A71B5B5B54F607AB0A4C98A2%40AdobeOrg=1; AMCV_A71B5B5B54F607AB0A4C98A2%40AdobeOrg=-1758798782%7CMCIDTS%7C17597%7CMCMID%7C54020616941805210570237619514598619543%7CMCAAMLH-1520956371%7C6%7CMCAAMB-1520956371%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCCIDH%7C-1464329217%7CMCOPTOUT-1520358771s%7CNONE%7CMCAID%7CNONE; dp1=bu1p/QEBfX0BAX19AQA**5c7fecd2^bl/BG5e612052^pbf/#308000100000000000000100020000005e612054^; ebay=%5Esbf%3D%2340400000000010000000000%5Epsi%3DAA%2BADACA*%5E; ds2=sotr/b7pQ5zQMz5zz^; csegs=segs%3Da12; aam_uuid=54029524289082034840234450335155765290"
);
curl_setopt_array( $ch, $options );
$img = curl_exec($ch);
$errno = curl_errno( $ch );
$errmsg = curl_error( $ch );
$n = 0; //number requests send
while( $errno != 0 ){
if($n >= 2){
//fwrite($fp, $url."\r\n");
$n = 0;
$img = null;
break;
}
echo $errno." - ".$errmsg . "\nSending again request ...\r\n";
$errmsg = "";
// sleep(2);
$n++;
$img = curl_exec($ch);
$errno = curl_errno( $ch );
$errmsg = curl_error( $ch );
}
return $img;
}
error_reporting(0);
$data = getWebPage('https://www.ebay.com/sch/6000/i.html?_from=R40&_nkw=113302336294');
if ($data == null) {
echo "Couldn't download page"."\n";
continue;
}
$dom->load($data);
$totalItems = $dom->find("span.rcnt", 0);
//print_r($totalItems);
if($totalItems == null){
echo "Can't fetch page"."\n";
$data = getWebPage('https://www.ebay.com/sch/6000/i.html?_from=R40&_nkw=113302336294');
$dom->load($data);
$res = $dom->find("span.rcnt", 0);
if($res == null){
print_r("Download failed");
}else{
$totalItems = $res->plaintext;
}
}
$totalItems = $totalItems->plaintext;
if($totalItems == 0){
echo "No results "."\n";
continue;
}
As you can see I am loading the data but then in $totalItems = $dom->find("span.rcnt", 0)
cannot find the search results. Here is my output:
I tried it in https://ebay.co.uk and it worked but here in ebay.com it does not. Here is a screenshot with the correct search result which must be fetched: