I am trying to find and open the first result from a search link in eBay. I am using curl to load this web page: SOA508_206SM | eBay. Here is a screenshot:
I am also using the the php_simple_html_dom
library to search for DOM elements in the page. Here is my code:
function getWebPage($url){
$ch = curl_init($url);
$options = array(
CURLOPT_RETURNTRANSFER => true, // return web page
// CURLOPT_BINARYTRANSFER => true,
CURLOPT_HEADER => false, // don't return headers
CURLOPT_FOLLOWLOCATION => true, // follow redirects
CURLOPT_ENCODING => "", // handle all encodings
CURLOPT_USERAGENT => "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36 ", // who am i
CURLOPT_AUTOREFERER => true, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 30, // timeout on connect
CURLOPT_TIMEOUT => 30, // timeout on response
CURLOPT_MAXREDIRS => 30, // stop after 10 redirects
CURLOPT_FAILONERROR => true,
// CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_SSL_VERIFYPEER => false,
// CURLOPT_PROXY => '108.59.14.203:13010'
CURLOPT_COOKIE => "npii=btguid/fc03e0021610ab600201584cffbe2cc05c7fecd1^cguid/fc03e74c1610a993a1036300e76f880a5c7fecd1^; nonsession=CgADLAAFansBaMwDKACBkBLrSZmMwM2UwMDIxNjEwYWI2MDAyMDE1ODRjZmZiZTJjYzDyP9R7; AMCVS_A71B5B5B54F607AB0A4C98A2%40AdobeOrg=1; AMCV_A71B5B5B54F607AB0A4C98A2%40AdobeOrg=-1758798782%7CMCIDTS%7C17597%7CMCMID%7C54020616941805210570237619514598619543%7CMCAAMLH-1520956371%7C6%7CMCAAMB-1520956371%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCCIDH%7C-1464329217%7CMCOPTOUT-1520358771s%7CNONE%7CMCAID%7CNONE; dp1=bu1p/QEBfX0BAX19AQA**5c7fecd2^bl/BG5e612052^pbf/#308000100000000000000100020000005e612054^; ebay=%5Esbf%3D%2340400000000010000000000%5Epsi%3DAA%2BADACA*%5E; ds2=sotr/b7pQ5zQMz5zz^; csegs=segs%3Da12; aam_uuid=54029524289082034840234450335155765290"
);
curl_setopt_array( $ch, $options );
$img = curl_exec($ch);
$errno = curl_errno( $ch );
$errmsg = curl_error( $ch );
$n = 0; //number requests send
while( $errno != 0 ){
if($n >= 2){
//fwrite($fp, $url."\r\n");
$n = 0;
$img = null;
break;
}
echo $errno." - ".$errmsg . "\nSending again request ...\r\n";
$errmsg = "";
// sleep(2);
$n++;
$img = curl_exec($ch);
$errno = curl_errno( $ch );
$errmsg = curl_error( $ch );
}
return $img;
}
$data = getWebPage('https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=SOA508_206SM&_sacat=0');
https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=005-4560499D-PS&_sacat=0
if ($data == null) {
echo "Couldn't download page "."\n";
continue;
}
$dom->load($data);
$res=$dom->find(".BOLD", 0);
if($res == null){
fputcsv('download failed'));
}else{
$totalItems = $res->plaintext;//Get the number of the search results
}
$list = $dom->find("li.s-item"); //This finds the class of each listing
if($list == null){
echo "List not found at "."\n";
continue;
}
$max = 5;
if($totalItems < $max){
$max = $totalItems;
}
for ($i=0; $i < $max; $i++) {
$id = $list[$i]->listingid;//Get listing id
$url = $list[$i]->find(".s-item__link", 0); //Get the listing` url
At this point, after debugging, I always get that the total items are 4 and so I get the third result from $list[4]
. The idea is to always get the first listing no matter what the number of the results will be. I think the solution is in the DOM elements and the other attributes from the screenshot.