php抓取网页指定内容(之前做平台内容发布审核都是自己构建一套违禁词库 )

优采云 发布时间: 2021-12-27 00:02

  php抓取网页指定内容(之前做平台内容发布审核都是自己构建一套违禁词库

)

  之前平台内容发布审核是通过搭建一套违禁词库,通过代码中的词库来判断用户发布的内容,现在可以使用百度ai api来完成这个功能。接下来我们简单说一下怎么做:

  首先打开百度ai开发平台注册账号

  

  

  进入控制台

  

  创建自己的应用,获取apikey和secret key

  

  进入文档页面文字审核:

  

  图片审核:

  

  文档很详细,实现对用户发布内容的审核和图片审核非常方便简单。

  我没有使用官方的sdk,简单的集成了一个实践。以下是我简单使用php实现的代码演示:

  

use Nnt\Controller\Application;

class Sentive

{

protected $accessTokenUrl = 'https://aip.baidubce.com/oauth/2.0/token';//获取token url

protected $textUrl = 'https://aip.baidubce.com/rest/2.0/antispam/v2/spam';//文本审核url

protected $imgUrl = 'https://aip.baidubce.com/api/v1/solution/direct/img_censor';//图片审核url

protected $avatarUrl = 'https://aip.baidubce.com/rest/2.0/solution/v1/face_audit';//头像审核url

protected $grant_type;

protected $client_id;

protected $client_secret;

function __construct()

{

$this->grant_type = 'client_credentials';

$this->client_id = 'xxx';//API Key

$this->client_secret = 'xxx';//Secret Key

}

static function request($url = '', $param = '')

{

if (empty($url) || empty($param)) {

return false;

}

$postUrl = $url;

$curlPost = $param;

$curl = curl_init();//初始化curl

curl_setopt($curl, CURLOPT_URL, $postUrl);//抓取指定网页

curl_setopt($curl, CURLOPT_HEADER, 0);//设置header

curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);//要求结果为字符串且输出到屏幕上

curl_setopt($curl, CURLOPT_POST, 1);//post提交方式

curl_setopt($curl, CURLOPT_POSTFIELDS, $curlPost);

$data = curl_exec($curl);//运行curl

curl_close($curl);

return $data;

}

static function request_post($url = '', $param = array(), $type)

{

if (empty($url) || empty($param)) {

return false;

}

$postUrl = $url;

$curlPost = $param;

$curl = curl_init();

curl_setopt($curl, CURLOPT_URL, $postUrl);

curl_setopt($curl, CURLOPT_HEADER, 0);

// 要求结果为字符串

curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);

// post方式

curl_setopt($curl, CURLOPT_POST, 1);

curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);

curl_setopt($curl, CURLOPT_POSTFIELDS, $curlPost);

if ($type == "text") {

curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-Type: application/x-www-form-urlencoded'));

} else {

curl_setopt($curl, CURLOPT_HTTPHEADER, array('Content-Type: application/json;charset=utf-8'));

}

curl_setopt($curl, CURLINFO_HEADER_OUT, true);

$data = curl_exec($curl);

$code = curl_getinfo($curl, CURLINFO_HTTP_CODE);

if ($code === 0) {

throw new \Exception(curl_error($curl));

}

curl_close($curl);

return $data;

}

//获取token

public function getToken()

{

$redis = Application::$shared->di->getRedis();

$post_data['grant_type'] = $this->grant_type;

$post_data['client_id'] = $this->client_id;

$post_data['client_secret'] = $this->client_secret;

$o = "";

foreach ($post_data as $k => $v) {

$o .= "$k=" . urlencode($v) . "&";

}

$post_data = substr($o, 0, -1);

$res = self::request($this->accessTokenUrl, $post_data);

$redis->setkey("filterToken", json_decode($res, true)['access_token']);

return json_decode($res, true)['access_token'];

}

//文本审核

public function textVerify($data)

{

$redis = Application::$shared->di->getRedis();

$token = $redis->get("filterToken");

if (empty($token)) {

$token = $this->getToken();

}

$curl = $this->textUrl . "?access_token=" . $token;

$result = self::request_post($curl, $data, "text");

return json_decode($result, true);

}

//图片审核

public function imgVerify($img)

{

$redis = Application::$shared->di->getRedis();

$token = $redis->get("filterToken");

if (empty($token)) {

$token = $this->getToken();

}

$curl = $this->imgUrl . "?access_token=" . $token;

$bodys = array(

'image' => $img,

'scenes' => array("ocr",

"face", "public", "politician", "antiporn", "terror", "webimage", "disgust",

'watermark')

);

$bodys = json_encode($bodys);

$result = self::request_post($curl, $bodys, "img");

return json_decode($result, true);

}

//头像审核

public function avatarVerify($img)

{

$redis = Application::$shared->di->getRedis();

$token = $redis->get("filterToken");

if (empty($token)) {

$token = $this->getToken();

}

$curl = $this->avatarUrl . "?access_token=" . $token;

$bodys = array(

"configId" => "1",

"images" => $img

);

$result = self::request_post($curl, $bodys, "text");

return json_decode($result, true);

}

}

0 个评论

要回复文章请先登录注册


官方客服QQ群

微信人工客服

QQ人工客服


线