file_get_contents 设置代理抓取页面

普通页面获取

  1. $url = "http://www.epooll.com/archives/806/";
  2. $contents = file_get_contents($url);
  3. preg_match_all("/<h1>(.*?)</h1>/is", $content, $matchs);
  4. print_r($matchs[0]);

设置代理IP去采集数据

  1. $context = array(
  2. 'http' => array(
  3. 'proxy' => 'tcp://192.168.0.2:3128', //这里设置你要使用的代理ip及端口号
  4. 'request_fulluri' => true,
  5. ),
  6. );
  7. $context = stream_context_create($context);
  8. $html = file_get_contents("http://www.epooll.com/archives/806/", false, $context);
  9. echo $html;

设置需要验证的代理IP去采集数据

  1. $auth = base64_encode('USER:PASS'); //LOGIN:PASSWORD 这里是代理服务器的账户名及密码
  2. $context = array(
  3. 'http' => array(
  4. 'proxy' => 'tcp://192.168.0.2:3128', //这里设置你要使用的代理ip及端口号
  5. 'request_fulluri' => true,
  6. 'header' => "Proxy-Authorization: Basic $auth",
  7. ),
  8. );
  9. $context = stream_context_create($context);
  10. $html = file_get_contents("http://www.epooll.com/archives/806/", false, $context);
  11. echo $html;