注意事项
二进制传输时,首字母不能为符号[@],否则发送不出去 环境复现【PHP5.4 】
新版
<?php
/**
* Created by PhpStorm.
* User: aoshi
* Date: 2020/12/28
* Time: 15:18
*/
namespace Cron\Controller;
class CrawltestController extends BaseController
{
protected $cookie = array();
protected $referer = '';
/**
* 登录
* */
public function login() {
//获取cookie
$url = '********';
$agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36';
$res = $this->curlCore($url,'get',array('agent'=>$agent));
if(!$res) {
exit('this is over width first curl error');
}
$responseHeader = $this->explainHeader($res['response_header']);
$data = array(
'account'=>'****.hz.cn',
'password'=>'*****',
'Sumit'=>'submit'
);
$params = array(
'content_type'=>'urlencode',
'data'=>$data,
'referer'=>$url,
'agent'=>$agent,
'headers'=>$this->getHeader('post'),
);
$resSecond = $this->curlCore($url,'post',$params);
if(!$res) {
exit('this is over width second curl error');
}
$responseHeaderSecond = $this->explainHeader($resSecond['response_header']);
//获取数据
$baseUrl = '********';
$referBase = '*******';
for($i = 0;$i<10;$i++) {
if(isset($listUrl) && $listUrl) {
$this->referer = $listUrl;
} else {
$this->referer = $referBase;
}
if($i) {
$listUrl = $baseUrl . '?offset=' . $i *100;
} else {
$listUrl = $baseUrl;
}
$params = array(
'referer'=>$this->referer,
'agent'=>$agent,
'headers'=>$this->getHeader(),
);
$htmlStr = $this->curlCore($listUrl,'get',$params);
$this->explanHtml($htmlStr);
sleep(2);
}
var_export($this->cookie);
exit;
$data = array(
'account'=>'cococao.hz.cn',
'password'=>'cc191101',
'Sumit'=>'submit'
);
sleep(1);
var_export($this->cookie);
echo PHP_EOL . PHP_EOL;
$this->curlRequest($url,$this->referer,$this->getHeader('post'),2,$data);
var_export($this->cookie);
echo PHP_EOL . PHP_EOL;
exit();
//exit();
sleep(1);
//获取数据
$baseUrl = '*******';
$referBase = '*************';
for($i = 0;$i<10;$i++) {
if(isset($listUrl) && $listUrl) {
$this->referer = $listUrl;
} else {
$this->referer = $referBase;
}
if($i) {
$listUrl = $baseUrl . '?offset=' . $i *100;
} else {
$listUrl = $baseUrl;
}
$htmlStr = $this->curlRequest($listUrl,$this->referer,$this->getHeader('get'),1,$data);
var_export($htmlStr);exit;
$this->explanHtml($htmlStr);
sleep(2);
}
}
/**
* 拼装header头
* @param int $type 是否表单请求 1|表单请求
* */
public function getHeader($method = 'get') {
$method = strtoupper($method);
$headersMap = array(
'cookie'=>$this->cookie,
);
if($method == 'POST') {
$headersMap['Content-type'] = 'application/x-www-form-urlencoded';
} else {
$headersMap['Content-type'] = 'Content-type:application/json;charset=utf-8';
$headersMap['Accept'] = 'application/json';
}
$headers = array();
foreach($headersMap as $headerKey => $headerVal) {
if(is_array($headerVal)) { //同一个header头不要有换行
$headerVal = implode(" ",$headerVal);
}
$headers[] = $headerKey . ' : ' . $headerVal;
}
return $headers;
}
/**
* curl核心
* @param string $url 请求地址
* @param string $method 方法
* @param array $params 其余参数 array() content_type data referer agent headers timeout
* */
public function curlCore($url,$method = 'GET',$params) {
$method = strtoupper($method);
$timeOut = $params['time_out'] ? $params['time_out'] : 10;
$ch = curl_init();
if($params['ssl']) {
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, true); //设置为FALSE 禁止 cURL 验证对等证书
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, true); //false|0 不校验 设置为 1 是检查服务器SSL证书中是否存在一个公用名 设置成 2,会检查公用名是否存在,并且是否与提供的主机名匹配。
curl_setopt($ch,CURLOPT_CAINFO,$params['cacert_pem']); //一个保存着1个或多个用来让服务端验证的证书的文件名。这个选项是和CURLOPT_SSL_VERIFYPEER一起使用的。
curl_setopt($ch,CURLOPT_CAPATH ,$params['cacert_path']); // 一个保存着多个CA证书的目录。这个选项是和CURLOPT_SSL_VERIFYPEER一起使用的。
}
curl_setopt($ch, CURLOPT_URL, $url);
if(is_array($params['data'])){
switch($params['content_type']) {
case 'urlencode': //Content-Type 被指定为 application/x-www-form-urlencoded;其次,提交的数据按照 key1=val1&key2=val2 的方式进行编码
$data = http_build_query($params['data']); //
break;
case 'json':
$data = json_encode($params['data']);
break;
default:
$data = $params['data'];
break;
}
}
switch($method) {
case 'GET':
curl_setopt($ch, CURLOPT_HTTPGET, true);//TRUE 时会设置 HTTP 的 method 为 GET,由于默认是 GET,所以只有 method 被修改时才需要这个选项。
break;
case 'POST':
#curl_setopt($ch, CURLOPT_POST,true);//TRUE 时会发送 POST 请求,类型为:application/x-www-form-urlencoded,是 HTML 表单提交时最常见的一种。
#curl_setopt($ch, CURLOPT_NOBODY, true);//TRUE 时将不输出 BODY 部分。同时 Mehtod 变成了 HEAD。修改为 FALSE 时不会变成 GET。
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "POST");//HTTP 请求时,使用自定义的 Method 来代替"GET"或"HEAD"。对 "DELETE" 或者其他更隐蔽的 HTTP 请求有用。 有效值如 "GET","POST","CONNECT"等等;
//设置提交的信息
curl_setopt($ch, CURLOPT_POSTFIELDS,$data);//全部数据使用HTTP协议中的 "POST" 操作来发送。
break;
case 'PUT':
curl_setopt ($ch, CURLOPT_CUSTOMREQUEST, "PUT");
curl_setopt($ch, CURLOPT_POSTFIELDS,$data);
break;
case 'DELETE':
curl_setopt ($ch, CURLOPT_CUSTOMREQUEST, "DELETE");
curl_setopt($ch, CURLOPT_POSTFIELDS,$data);
break;
}
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_AUTOREFERER, true); //遇到重定向时 自动填充referer
curl_setopt($ch, CURLOPT_MAXREDIRS, 3); //指定最多的HTTP重定向的数量,这个选项是和CURLOPT_FOLLOWLOCATION一起使用的。
curl_setopt($ch, CURLOPT_UNRESTRICTED_AUTH, true); //在使用CURLOPT_FOLLOWLOCATION产生的header中的多个locations中持续追加用户名和密码信息,即使域名已发生改变
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //启用时会将服务器服务器返回的"Location: "放在header中递归的返回给服务器,使用CURLOPT_MAXREDIRS可以限定递归返回的数量。
curl_setopt($ch, CURLOPT_HEADER, true); //true:启用时会将头文件的信息作为数据流输出 可以在curl_exec中截取 false:不以数据流返回
curl_setopt($ch, CURLINFO_HEADER_OUT, true); //启用时追踪句柄的请求字符串。 1|是 0|否 通过 curl_getinfo($ch, CURLINFO_HEADER_OUT) 获取请求头信息
curl_setopt($ch, CURLOPT_NOBODY, false); //TRUE 时将数据流不输出 BODY 部分。同时 Mehtod 变成了 HEAD。修改为 FALSE 时不会变成 GET。
if($params['referer']){
curl_setopt($ch, CURLOPT_REFERER, $params['referer']); //设置在HTTP请求头中"Referer: "的内容
}
if($params['agent']) {
curl_setopt($ch, CURLOPT_USERAGENT, $params['agent']); //设置在HTTP请求头中"User-Agent: "的内容
}
curl_setopt($ch, CURLOPT_TIMEOUT, $timeOut); // 设置超时限制防止死循环
if($params['headers']) {
curl_setopt($ch, CURLOPT_HTTPHEADER,$params['headers']); //一个用来设置HTTP头字段的数组。使用如下的形式的数组进行设置
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //将curl_exec()获取的信息以文件流的形式返回,而不是直接输出。
curl_setopt($ch, CURLOPT_VERBOSE, true); //打印请求细节信息到std 调试使用
$responseStream = curl_exec($ch); //返回的数据流 包括header头
$responseHeaderSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); //获取 response header头大小
$requestHeader = curl_getinfo($ch, CURLINFO_HEADER_OUT); //获取 request header头
$connectTime = curl_getinfo($ch, CURLINFO_CONNECT_TIME); //建立连接消耗的时间
$preTransferTime = curl_getinfo($ch, CURLINFO_PRETRANSFER_TIME); //从建立连接到准备传输所使用的时间
$startTransferTime = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME); //从建立连接到传输开始所使用的时间
$redirectTime = curl_getinfo($ch, CURLINFO_REDIRECT_TIME); //从建立连接到传输开始所使用的时间
$totalTime = curl_getinfo($ch, CURLINFO_TOTAL_TIME); //最后一次传输所消耗的时间
$responseContentType = curl_getinfo($ch, CURLINFO_CONTENT_TYPE); //下载内容的Content-Type:值,NULL表示服务器没有发送有效的Content-Type: header
curl_close($ch);
//抓取文件类型
if($responseStream === false) {
$this->error = '参数错误';
return false;
} else {
$responseHeader = substr($responseStream, 0, $responseHeaderSize); //返回header头
$responseBody = substr($responseStream, $responseHeaderSize);
$responseBody = $responseBody ? $responseBody : ''; //body体为空时 防止返回false
return array(
'response_header'=>$responseHeader,
'response_body'=>$responseBody,
'requestHeader'=>$requestHeader,
'connectTime'=>$connectTime,
'preTransferTime'=>$preTransferTime,
'startTransferTime'=>$startTransferTime,
'redirectTime'=>$redirectTime,
'totalTime'=>$totalTime,
'responseContentType'=>$responseContentType,
);
}
}
/**
* 请求
* */
protected function curlRequest($url,$referUrl,$headers,$requestTyp = 1,$data = array()) {
$agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36';
$ch = curl_init();
// curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //SSL证书校验
// curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
// curl_setopt($ch, CURLOPT_NOBODY, true);
if($referUrl){
curl_setopt($ch, CURLOPT_REFERER, $referUrl);//带来的Referer
}
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置超时限制防止死循环
if($headers) {
curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
}
if($requestTyp == 2) {
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$return_str = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$requestStr = curl_getinfo($ch, CURLINFO_HEADER_OUT);
curl_close($ch);
//抓取文件类型
$header = substr($return_str, 0, $header_size);
curl_close($ch);
if($return_str === false) {
exit('error with stop');
} else {
var_export($requestStr) . PHP_EOL;
var_export($header) . PHP_EOL;
$this->explainHeader($url,$header);
}
return $return_str;
}
/**
* 请求
* */
protected function curlRequestNew($url,$referUrl,$headers,$requestTyp = 1,$data = array()) {
$agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36';
$ch = curl_init();
// curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE); //设置为FALSE 禁止 cURL 验证对等证书
// curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE); //false|0 不校验 设置为 1 是检查服务器SSL证书中是否存在一个公用名 设置成 2,会检查公用名是否存在,并且是否与提供的主机名匹配。
// curl_setopt($ch,CURLOPT_CAINFO,dirname(__FILE__).'/cacert.pem'); //一个保存着1个或多个用来让服务端验证的证书的文件名。这个选项是和CURLOPT_SSL_VERIFYPEER一起使用的。
// curl_setopt($ch,CURLOPT_CAPATH ,dirname(__FILE__).'/'); // 一个保存着多个CA证书的目录。这个选项是和CURLOPT_SSL_VERIFYPEER一起使用的。
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true); //启用时会将头文件的信息作为数据流输出
curl_setopt($ch, CURLINFO_HEADER_OUT, true); //启用时追踪句柄的请求字符串。 1|是 0|否 通过 curl_getinfo($ch, CURLINFO_HEADER_OUT) 获取请求头信息
// curl_setopt($ch, CURLOPT_NOBODY, true);
if($referUrl){
curl_setopt($ch, CURLOPT_REFERER, $referUrl); //设置在HTTP请求头中"Referer: "的内容
}
curl_setopt($ch, CURLOPT_USERAGENT, $agent); //设置在HTTP请求头中"User-Agent: "的内容
curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置超时限制防止死循环
if($headers) {
curl_setopt($ch, CURLOPT_HTTPHEADER,$headers); //一个用来设置HTTP头字段的数组。使用如下的形式的数组进行设置
}
if($requestTyp == 2) {
curl_setopt($ch, CURLOPT_POST, true); //启用时会发送一个常规的POST请求,类型为:application/x-www-form-urlencoded,就像表单提交的一样
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data)); //全部数据使用HTTP协议中的"POST"操作来发送。要发送文件,在文件名前面加上@前缀并使用完整路径。这个参数可以通过urlencoded后的字符串类似'para1=val1¶2=val2&...'或使用一个以字段名为键值,字段数据为值的数组。如果value是一个数组,Content-Type头将会被设置成multipart/form-data
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //将curl_exec()获取的信息以文件流的形式返回,而不是直接输出。
$return_str = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); //获取 response header头大小
$requestStr = curl_getinfo($ch, CURLINFO_HEADER_OUT); //获取 request header头
curl_close($ch);
//抓取文件类型
$header = substr($return_str, 0, $header_size);
curl_close($ch);
if($return_str === false) {
exit('error with stop');
} else {
var_export($requestStr) . PHP_EOL;
$this->explainHeader($url,$header);
}
return $return_str;
}
/**
* 解析header头
* @param string $url 请求链接
* @param string $header response header头
* return array
* */
public function explainHeader($header) {
$headArr = explode("\r\n",$header);
var_export($headArr);
$map = array();
foreach($headArr as $val) {
$pos = strpos($val,':');
if($pos) {
$mapKey = trim(substr($val,0,$pos)); //header头 key
$mapValue = trim(substr($val,($pos+1))); //header头 value
$mapKey = strtolower($mapKey);
if($mapKey == 'set-cookie') { //cookie去掉路径限制
$subLength = strpos($mapValue,'path=/');
if($subLength) {
$mapValue = substr($mapValue,0,$subLength);
}
$this->cookie[] = $mapValue;
}
if($map[$mapKey]) {
$map[$mapKey] = array(trim($mapValue),$map[$mapKey]);
} else {
$map[$mapKey] = trim($mapValue);
}
}
}
if($map['set-cookie']) {
$map['set-cookie'] = $this->cookie;
}
return $map;
}
/**
* 获取列表数据
* */
public function getList() {
$baseUrl = '*************';
$referBase = '***********************';
$url = '';
for($i = 0;$i<10;$i++) {
if($url) {
$referUrl = $url;
} else {
$referUrl = $referBase;
}
if($i) {
$url = $baseUrl . '?offset=' . $i *100;
} else {
$url = $baseUrl;
}
$htmlStr = $this->curlGet($url,$referUrl);
$this->explanHtml($htmlStr);
sleep(2);
}
exit('this is over');
}
/**
* 请求
* */
protected function curlGet($url,$referUrl) {
$headers = array(
"Content-type:application/json;charset=utf-8",
"Accept:application/json",
"Cookie: ASPSESSIONIDACQSQDSB=GHGNMIJACHNKAHHMJLEGMIDO; ASPSESSIONIDCAQRRDSA=KCDNMALAGDKKMLKLIFECCION; User=UserLocation=HANGZHOU&UserNameEN=Coco+Cao&Logintime=28&UserDept=SALESHEAD&AccountName=cococao%2Ehz%2Ecn"
);
$agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36';
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
if($referUrl){
curl_setopt($ch, CURLOPT_REFERER, $referUrl);//带来的Referer
}
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置超时限制防止死循环
curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$return_str = curl_exec($ch);
curl_close($ch);
return $return_str;
}
/**
* 解析html
* */
protected function explanHtml($htmlStr){
preg_match_all( '/<table[^>]*?>\s*?<tr>\s*?<td[^>]*?><a href="(.*?)" class="style10">(.*?)<\/a><\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<\/tr>\s*?<\/table>/i' , $htmlStr , $results );
$saveData = array();
foreach($results[1] as $key => $val) {
$saveData['link'] = $val;
$saveData['en_name'] = $results[2][$key];
$saveData['zh_name'] = $results[3][$key];
$saveData['nature'] = $results[4][$key];
$saveData['sales'] = $results[5][$key];
$saveData['supervisor'] = $results[6][$key];
$saveData['content'] = '';
var_export($saveData);exit;
try{
$res = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->add($saveData); //如果开启调试模式 会直接报错
if(!$res) {
throw new \Exception($saveData['zh_name']);
}
} catch (\Exception $e) {
echo 'Caught exception: ' . $e->getMessage() . PHP_EOL;
}
}
}
/**
* 解析详情页
*
*
* */
public function getInfo() {
$baseUrl = '***********************';
$referBase = '****************************';
$lists = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->field('id,link')->where('id = 1')->select();
foreach($lists as $key => $val) {
$url = $baseUrl . $val['link'];
$offset = floor($val['id'] / 100) * 100;
if($offset) {
$referUrl = $referBase . '?offset=' . $offset;
} else {
$referUrl = $referBase;
}
$htmlUrl = $this->curlGet($url,$referUrl);
M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->save(array('id'=>$val['id'],'content'=>$htmlUrl));
if($key % 100 == 0) {
sleep(1);
}
}
exit('this is over');
}
/**
* 检测id断层
* */
public function testId() {
for($i = 1;$i<1000;$i++) {
$res = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->find($i);
if(!$res) {
echo $i . PHP_EOL;
}
}
exit('this is over');
}
}
<?php
/**
* Created by PhpStorm.
* User: aoshi
* Date: 2020/12/28
* Time: 15:18
*/
namespace Cron\Controller;
class CrawltestController extends BaseController
{
/**
* 获取列表数据
* */
public function login() {
$baseUrl = '******************************';
$referBase = ''******************************';
for($i = 0;$i<10;$i++) {
if($url) {
$referUrl = $url;
} else {
$referUrl = $referBase;
}
if($i) {
$url = $baseUrl . '?offset=' . $i *100;
} else {
$url = $baseUrl;
}
$htmlStr = $this->curlGet($url,$referUrl);
$this->explanHtml($htmlStr);
sleep(2);
}
exit('this is over');
}
/**
* 请求
* */
protected function curlGet($url,$referUrl) {
$headers = array(
"Content-type:application/json;charset=utf-8",
"Accept:application/json",
"Cookie: ASPSESSIONIDACQSQDSB=GHGNMIJACHNKAHHMJLEGMIDO; ASPSESSIONIDCAQRRDSA=KCDNMALAGDKKMLKLIFECCION; User=UserLocation=HANGZHOU&UserNameEN=Coco+Cao&Logintime=28&UserDept=SALESHEAD&AccountName=cococao%2Ehz%2Ecn"
);
$agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36';
$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_URL, $url);
if($referUrl){
curl_setopt($ch, CURLOPT_REFERER, $referUrl);//带来的Referer
}
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_TIMEOUT, 10); // 设置超时限制防止死循环
curl_setopt($ch, CURLOPT_HTTPHEADER,$headers);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$return_str = curl_exec($ch);
curl_close($ch);
return $return_str;
}
/**
* 解析html
* */
protected function explanHtml($htmlStr){
preg_match_all( '/<table[^>]*?>\s*?<tr>\s*?<td[^>]*?><a href="(.*?)" class="style10">(.*?)<\/a><\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<td[^>]*?>(.*?)<\/td>\s*?<\/tr>\s*?<\/table>/i' , $htmlStr , $results );
$saveData = array();
foreach($results[1] as $key => $val) {
$saveData['link'] = $val;
$saveData['en_name'] = $results[2][$key];
$saveData['zh_name'] = $results[3][$key];
$saveData['nature'] = $results[4][$key];
$saveData['sales'] = $results[5][$key];
$saveData['supervisor'] = $results[6][$key];
$saveData['content'] = '';
try{
$res = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->add($saveData); //如果开启调试模式 会直接报错
if(!$res) {
throw new \Exception($saveData['zh_name']);
}
} catch (\Exception $e) {
echo 'Caught exception: ' . $e->getMessage() . PHP_EOL;
}
}
}
/**
* 解析详情页
*
*
* */
public function getInfo() {
$baseUrl = '******************************/';
$referBase = '******************************';
$lists = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->field('id,link')->where('id = 1')->select();
foreach($lists as $key => $val) {
$url = $baseUrl . $val['link'];
$offset = floor($val['id'] / 100) * 100;
if($offset) {
$referUrl = $referBase . '?offset=' . $offset;
} else {
$referUrl = $referBase;
}
$htmlUrl = $this->curlGet($url,$referUrl);
M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->save(array('id'=>$val['id'],'content'=>$htmlUrl));
if($key % 100 == 0) {
sleep(1);
}
}
exit('this is over');
}
/**
* 检测id断层
* */
public function testId() {
for($i = 1;$i<1000;$i++) {
$res = M('PccAsicLlc','wx_',C('ARTICLE_DSN'))->find($i);
if(!$res) {
echo $i . PHP_EOL;
}
}
exit('this is over');
}
}