到目前为止,我已成功从外部站点抓取 2 个元素到我的测试页面: http://mt-cloud.co.uk/nhs/
(请在页面上进行测试搜索以查看结果)
$ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$output = curl_exec($ch);
curl_close($ch);
$document = new DOMDocument;
libxml_use_internal_errors(true);
$document->loadHTML($output);
$document->encoding = 'utf-8';
$xpath = new DOMXPath($document);
$pnames = $xpath->query("//th[@class='fctitle']");
$addresses = $xpath->query("//td[@class='fcdetails fc-first']");
我有 2 个 foreach 循环:
foreach ($pnames as $pname) {
$result1 = $document->saveHTML($pname);
}
foreach ($addresses as $address) {
$result2 = $document->saveHTML($address);
}
$result1 = 全科医生诊所名称 $result2 = 全科医生诊所地址
正如您在测试页上看到的,我的结果 1 和 2 是分开的。我如何获得它们,以便将诊所名称和诊所地址放在一起?
更新(@Tri)
for($i = 0; $i < count($pnames); $i++){
$name= $document->saveHTML($pnames[$i]);
$name=str_replace ('<a href="/Services/', '<a href="http://www.nhs.uk/Services/', $name);
$address = $document->saveHTML($addresses[$i]);
echo $name.'<br>'.$address;
}
仅返回一个结果而不是全部
这是我完整的 php 代码: http://mt-cloud.co.uk/nhs/content/code
我正在尝试抓取的数据图像http://mt-cloud.co.uk/nhs/content/results.png
你的第一个代码没问题,你只需要将你的姓名和地址存储在一个二维数组中,然后循环遍历你的数组。
这部分代码和你的完全一样:
$ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$output = curl_exec($ch);
curl_close($ch);
$document = new DOMDocument;
libxml_use_internal_errors(true);
$document->loadHTML($output);
$document->encoding = 'utf-8';
$xpath = new DOMXPath($document);
$pnames = $xpath->query("//th[@class='fctitle']");
$addresses = $xpath->query("//td[@class='fcdetails fc-first']");
现在我们将创建一个数组:
$results = array();
然后使用循环并将名称和地址成对存储在数组的相同索引中:
$iCnt = 0;
foreach ($pnames as $pname){
$results[$iCnt]['name'] = $document->saveHTML($pname);
$iCnt++;
}
$iCnt = 0;
foreach ($addresses as $address){
$results[$iCnt]['address'] = $document->saveHTML($address);
$iCnt++;
}
现在我们有一个包含名称和地址对的数组,如果我们循环遍历它,我们可以看到它们在一起:
for($iCnt = 0, $cnt = count($results); $iCnt < $cnt; $iCnt++){
echo 'Name: '.$results[$iCnt]['name'].'<br>';
echo 'Address: '.$results[$iCnt]['address'].'<br>';
}
仅此而已。完整的代码如下所示:
<?php
$ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$output = curl_exec($ch);
curl_close($ch);
$document = new DOMDocument;
libxml_use_internal_errors(true);
$document->loadHTML($output);
$document->encoding = 'utf-8';
$xpath = new DOMXPath($document);
$pnames = $xpath->query("//th[@class='fctitle']");
$addresses = $xpath->query("//td[@class='fcdetails fc-first']");
$results = array();
$iCnt = 0;
foreach ($pnames as $pname){
$results[$iCnt]['name'] = $document->saveHTML($pname);
$iCnt++;
}
$iCnt = 0;
foreach ($addresses as $address){
$results[$iCnt]['address'] = $document->saveHTML($address);
$iCnt++;
}
for($iCnt = 0, $cnt = count($results); $iCnt < $cnt; $iCnt++){
echo 'Name: '.$results[$iCnt]['name'].'<br>';
echo 'Address: '.$results[$iCnt]['address'].'<br>';
}
?>
所以不要使用 foreach:
for($i = 0; $i < count($pnames); $i++){
$name= $document->saveHTML($pnames[$i]);
$address = $document->saveHTML($addresses[$i]);
//do something with your result
}
注意:您必须确保地址长度等于pnames