我正在尝试从 http://www.elwatannews.com/home/rssfeeds?sectionId=115 获取 RSS feed 该网站使用 cloudfare 进行保护。我尝试从线程中解决问题 curl:无法从网站获取 rss,因为 CloudFlare 我没有得到任何结果。我在下面附上我的代码
<?php
/* cloudflare.php by asifpk
* [email protected] */
$url = 'http://www.elwatannews.com/home/rssfeeds?sectionId=115';
$data = OpenURLcloudflare($url);
echo htmlspecialchars($data);
//print $data;
//echo htmlspecialchars($data);
//echo htmlspecialchars($data);
function OpenURLcloudflare($url) {
//get cloudflare ChallengeForm
$page = OpenURL($url);
$data = explode('challenge-form',$page);
$data = explode('=', $data[1]);
$data = explode(';', $data[1]);
$value = $data[0];
eval("\$jschl_answer=$value;");
$action = explode('action="',$page);
$action = explode('"', $action[1]);
$action = $action[0];
$token = explode('"jschl_vc" value="',$page);
$token = explode('"',$token[1]);
$token = $token[0];
//echo "action =".$action;
$post['act'] = $action;
$post['jschl_vc'] = $token;
$post['jschl_answer'] = $jschl_answer;
$data = OpenURL($url, $post);
return $data;
}
function OpenURL($url, $post=array()) {
$headers[] = 'User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:13.0) Gecko/20100101 Firefox/13.0.1';
$headers[] = 'Accept: application/json, text/javascript, */*; q=0.01';
$headers[] = 'Accept-Language: ar,en;q=0.5';
$headers[] = 'Connection: keep-alive';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
curl_setopt($ch, CURLOPT_VERBOSE, TRUE);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
if(count($post)>0) {
curl_setopt($ch, CURLOPT_POST, TRUE);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
}
curl_setopt($ch, CURLOPT_COOKIEFILE, '/tmp/curl.cookie');
curl_setopt($ch, CURLOPT_COOKIEJAR, '/tmp/curl.cookie');
$data = curl_exec($ch);
return($data);
}
?>
10年后,我发现绕过cloudflare的唯一方法(而且这个方法甚至不是万无一失的),就是使用脚本通过FlareSolverr的docker容器调用网页,巫婆就像用一块面包杀死一只苍蝇一样C4...
如果有人感兴趣,这里有 docker compose file 和调用网页的 php 脚本: 语法为 httx://domain.tld/unflare.php?url=httx://some-website.tld/whatever 根据您的喜好编辑 php 文件中的 127.0.0.1:8191。
<?php
if(isset($_GET['url']))
{
$url = $_GET['url'];
if(!preg_match('/http[s]?:\/\//', $url, $matches)) $url = 'http://'.$url;
exec('curl -L -X POST "http://127.0.0.1:8191/v1" -H "Content-Type: application/json" --data-raw \'{ "cmd": "request.get", "url":"'.$url.'", "maxTimeout": 280000}\'', $out);
$json=json_decode($out[0], true);
if ($json['status'] == "ok")
{
print_r($json['solution']['response']);
}
elseif ($json['status'] == "error") {
print_r($json['message']);
}
else {
echo($url.'<br>');
var_dump($json);
}
}
else {echo 'nothing to see...';}
?>