DEDE 4.0 自动采集源文件 提供了
更新时间:2007年09月22日 15:27:12 作者:
DEDE 4.0 自动采集源文件
放在网站跟目录
WINDOWS系统 建立个 ****.cmd 内容
D:\php\php-cgi E:\wwwroot\**\cj.php
再做个计划任务就可以了(D:\php\php-cgi E:\wwwroot\**\cj.php 知道是什么吧)
LINUX 系统定时间任务偶不会 一样是 运行 cj.php 会的可以回复下
cj.php 源文件
<?php
//要放别的目录请改路径
require_once(dirname(__FILE__)."/include/config_base.php");
require_once(dirname(__FILE__)."/include/pub_collection.php");
require_once(dirname(__FILE__)."/include/pub_datalist.php");
require_once(dirname(__FILE__)."/include/inc_functions.php");
require_once(dirname(__FILE__)."/include/pub_dedetag.php");
require_once(dirname(__FILE__)."/include/inc_archives_view.php");
require_once(dirname(__FILE__)."/include/inc_arclist_view.php");
//添加数据 采集栏目名称,采集栏目ID, 前台栏目名称,前台栏目ID 有几个就填写几行
$makearr = array();
array_push($makearr,array('采集一',1,'分类一',25));
array_push($makearr,array('采集二',2,'分类二',26));
//其它一些参数
$makehtml = 1 ;//生成文章
$threadnum = 5 ; //间隔时间
$pagesize = 5; //设置线程
$islisten = 0 ; //附加选项 不下载曾下载的网址-> 0 ; 仅下载未下载内容->-1; 重新下载所有内容->1
$sptime = 0 ; //防刷新的站点需设置 x秒
$channelid = 1 ;//导入的文章模型
$web = "www-data"; //web用户
$webgroup = "www-data"; //web用户组
$onlytitle = 1 ; //
while(sizeof($makearr)>0){
$nid = $makearr[0][1]; //采集栏目ID
$typeid = $makearr[0][3]; //前台栏目ID
array_shift($makearr);
//采集
//多线程模式初次设置
$co = new DedeCollection();
$co->Init();
$co->LoadFromDB($nid);
$co->GetSourceUrl();
$co->dsql->SetQuery("Update #@__conote set lasttime='".mytime()."' where nid=$nid");
$co->dsql->ExecuteNoneQuery();
$co->dsql->SetQuery("Select aid,url From #@__courl where nid=$nid ");
$co->dsql->Execute(99);
while($row = $co->dsql->GetObject(99))
{
$lrow = $co->dsql->GetOne("Select * From #@__co_listenurl where url like
'".addslashes($row->url)."'");
if(is_array($lrow)){
if($islisten==0) continue;
}
else{
echo $inquery = "INSERT INTO #@__co_listenurl(nid,url) VALUES ('$nid',
'".addslashes($row->url)."');";
$co->dsql->ExecuteNoneQuery($inquery);
}
$co->DownUrl($row->aid,$row->url);
if($sptime>0) sleep($sptime);
}
$co->Close();
//导入到当前的数据库并生成html
$dsql = new DedeSql(false);
$maxidrow = $dsql->GetOne("Select max(ID) as maxid From #@__archives where typeid=$typeid
;");
$maxart_id = $maxidrow['maxid'];
$mrow = $dsql->GetOne("Select count(*) as dd From #@__courl where nid='$nid'");
$totalcc = $mrow['dd']; //
$rrow = $dsql->GetOne("Select typeid From #@__conote where nid='$nid'");
$ruleid = $rrow['typeid'];
$rrow = $dsql->GetOne("Select channelid From #@__co_exrule where aid='$ruleid'");
$channelid = $rrow['channelid'];
if(!isset($channelid)) $channelid = 0;
if(!isset($typeid)) $typeid = 0;
if(!isset($makehtml)) $makehtml = 0;
if(!isset($onlytitle)) $onlytitle = 1;
$dsql = new DedeSql(false);
$row = $dsql->GetOne("Select * From #@__co_exrule where aid='$ruleid'");
if(!is_array($row)){
echo "找不到导入规则,无法完成操作!";
$dsql->Close();
continue;
}
//分析规则,并生成临时的SQL语句
$dtp = new DedeTagParse();
$dtp->LoadString($row['ruleset']);
$noteinfo = $dtp->GetTagByName('note');
$tablenames = explode(",",$noteinfo->GetAtt('tablename'));
$autofield = $noteinfo->GetAtt('autofield');
$synfield = $noteinfo->GetAtt('synfield');
$tablename1 = $tablenames[0];
$tb1SqlKey = "Insert Into $tablename1(";
$tb1SqlValue = " Values(";
if(count($tablenames)>=2){
$tablename2 = $tablenames[1];
$tb2SqlKey = "Insert Into $tablename2(";
$tb2SqlValue = " Values(";
if($synfield!=''){
$tb2SqlKey .= $synfield;
$tb2SqlValue .= "'@$synfield@'";
}
}
else{
$tablename2 = "";
$tb2SqlKey = "";
$tb2SqlValue = "";
}
$exKeys = Array();
foreach($dtp->CTags as $tagid => $ctag)
{
if($ctag->GetName()=='field')
{
$fieldname = $ctag->GetAtt('name');
$tbname = $ctag->GetAtt('intable');
if($tbname==$tablename1){
$tb1SqlKey .= ",$fieldname";
if($ctag->GetAtt('source')!='value'){
$tb1SqlValue .= ",'@#{$tbname}.{$fieldname}#@'";
}else{
$nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText
());
$nvalue = str_replace('{cid}',$channelid,$nvalue);
$nvalue = str_replace('{rank}',0,$nvalue);
$nvalue = str_replace('{admin}',"1",$nvalue);
$tb1SqlValue .= ",'$nvalue'";
}
}
else if($tbname==$tablename2){
$tb2SqlKey .= ",$fieldname";
if($ctag->GetAtt('source')!='value'){
$tb2SqlValue .= ",'@#{$tbname}.{$fieldname}#@'";
}else{
$nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText
());
$nvalue = str_replace('{cid}',$channelid,$nvalue);
$nvalue = str_replace('{rank}',0,$nvalue);
$tb2SqlValue .= ",'$nvalue'";
}
}
}
}
$tb1SqlKey = str_replace('(,','(',$tb1SqlKey).")";
$tb1SqlValue = str_replace('(,','(',$tb1SqlValue).");";
$tb1Sql = $tb1SqlKey.$tb1SqlValue;
if($tablename2!="")
{
$tb2SqlKey = str_replace("(,","(",$tb2SqlKey).")";
$tb2SqlValue = str_replace("(,","(",$tb2SqlValue).");";
$tb2Sql = $tb2SqlKey.$tb2SqlValue;
}
//导出数据的SQL操作
$dsql->SetQuery("Select * From #@__courl where nid='$nid' order by aid asc ");
$dsql->Execute();
while($row = $dsql->GetObject())
{
$tmpSql1 = $tb1Sql;
$tmpSql2 = $tb2Sql;
$dtp->LoadString($row->result);
$aid = $row->aid;
if(!is_array($dtp->CTags)){ continue; }
if($onlytitle){
$titletag = '';
foreach ($dtp->CTags as $ctag){
$tvalue = $ctag->GetAtt("name");
if($tvalue == '#@__archives.title' || $tvalue ==
$cfg_dbprefix.'archives.title'){
$titletag = $ctag;
break;
}
}
if(is_object($titletag)){
$title = trim(addslashes($titletag->GetInnerText()));
$testrow = $dsql->GetOne("Select count(ID) as dd From #@__archives
where title like '%$title%'");
if($testrow['dd']>0){
echo "数据库已存在标题为: {$title} 的文档,程序阻止了此内
容导入<br/>";
continue;
}
}
}
foreach($dtp->CTags as $ctag)
{
if($ctag->GetName()!="field") continue;
$tvalue = $ctag->GetAtt("name");
$tmpSql1 = str_replace('@#'.$tvalue.'#@',addslashes($ctag->GetInnerText
()),$tmpSql1);
if($tablename2!=""){
$tmpSql2 = str_replace('@#'.$tvalue.'#@',addslashes($ctag-
>GetInnerText()),$tmpSql2);
}
}
$tmpSql1 = ereg_replace('@#(.*)#@','',$tmpSql1);
$rs = $dsql->ExecuteNoneQuery($tmpSql1);
if($rs && $tablename2!=""){
if($synfield!=""){
$lid = $dsql->GetLastID();
$tmpSql2 = str_replace("@$synfield@",$lid,$tmpSql2);
$rs = $dsql->ExecuteNoneQuery($tmpSql2);
if(!$rs) $dsql->ExecuteNoneQuery("Delete From $tablename1 where
$autofield='$lid'");
}
else $dsql->ExecuteNoneQuery($tmpSql2);
}
$dsql->ExecuteNoneQuery("update #@__courl set isex=1 where aid='$aid'");
}
$dsql->Close();
//生成html
if($channelid>0 && $makehtml==1){
if(!$maxart_id) $maxart_id =0;
$dsql = new DedeSql(false);
$dsql->SetQuery("Select ID From #@__archives where typeid=$typeid and ID
>$maxart_id ;");
$dsql->Execute();
while($row=$dsql->GetObject())
{
$ID = $row->ID;
$ac = new Archives($ID);
$rurl = $ac->MakeHtml();
$ac->Close();
}
$dsql->Close();
}
//生成首页
$templet = "{style}/index.htm";
$templet = str_replace("{style}",$cfg_df_style,$templet);
$homeFile = dirname(__FILE__)."/index.html";
$homeFile = str_replace("\\","/",$homeFile);
$homeFile = str_replace("//","/",$homeFile);
$fp = fopen($homeFile,"w") or die("你指定的文件名有问题,无法创建文件");
fclose($fp);
$pv = new PartView();
$pv->SetTemplet($cfg_basedir.$cfg_templets_dir."/".$templet);
$pv->SaveToHtml($homeFile);
$pv->Close();
}
$dsql = new DedeSql(false);
$dsql->SetQuery("Select ID,typedir From #@__arctype;");
$dsql->Execute();
while($row=$dsql->GetObject())
{
//生成列表html
$lv = new ListView($row->ID);
$lv->MakeHtml();
$lv->Close();
}
$dsql->Close();
?>
放在网站跟目录
WINDOWS系统 建立个 ****.cmd 内容
D:\php\php-cgi E:\wwwroot\**\cj.php
再做个计划任务就可以了(D:\php\php-cgi E:\wwwroot\**\cj.php 知道是什么吧)
LINUX 系统定时间任务偶不会 一样是 运行 cj.php 会的可以回复下
cj.php 源文件
复制代码 代码如下:
<?php
//要放别的目录请改路径
require_once(dirname(__FILE__)."/include/config_base.php");
require_once(dirname(__FILE__)."/include/pub_collection.php");
require_once(dirname(__FILE__)."/include/pub_datalist.php");
require_once(dirname(__FILE__)."/include/inc_functions.php");
require_once(dirname(__FILE__)."/include/pub_dedetag.php");
require_once(dirname(__FILE__)."/include/inc_archives_view.php");
require_once(dirname(__FILE__)."/include/inc_arclist_view.php");
//添加数据 采集栏目名称,采集栏目ID, 前台栏目名称,前台栏目ID 有几个就填写几行
$makearr = array();
array_push($makearr,array('采集一',1,'分类一',25));
array_push($makearr,array('采集二',2,'分类二',26));
//其它一些参数
$makehtml = 1 ;//生成文章
$threadnum = 5 ; //间隔时间
$pagesize = 5; //设置线程
$islisten = 0 ; //附加选项 不下载曾下载的网址-> 0 ; 仅下载未下载内容->-1; 重新下载所有内容->1
$sptime = 0 ; //防刷新的站点需设置 x秒
$channelid = 1 ;//导入的文章模型
$web = "www-data"; //web用户
$webgroup = "www-data"; //web用户组
$onlytitle = 1 ; //
while(sizeof($makearr)>0){
$nid = $makearr[0][1]; //采集栏目ID
$typeid = $makearr[0][3]; //前台栏目ID
array_shift($makearr);
//采集
//多线程模式初次设置
$co = new DedeCollection();
$co->Init();
$co->LoadFromDB($nid);
$co->GetSourceUrl();
$co->dsql->SetQuery("Update #@__conote set lasttime='".mytime()."' where nid=$nid");
$co->dsql->ExecuteNoneQuery();
$co->dsql->SetQuery("Select aid,url From #@__courl where nid=$nid ");
$co->dsql->Execute(99);
while($row = $co->dsql->GetObject(99))
{
$lrow = $co->dsql->GetOne("Select * From #@__co_listenurl where url like
'".addslashes($row->url)."'");
if(is_array($lrow)){
if($islisten==0) continue;
}
else{
echo $inquery = "INSERT INTO #@__co_listenurl(nid,url) VALUES ('$nid',
'".addslashes($row->url)."');";
$co->dsql->ExecuteNoneQuery($inquery);
}
$co->DownUrl($row->aid,$row->url);
if($sptime>0) sleep($sptime);
}
$co->Close();
//导入到当前的数据库并生成html
$dsql = new DedeSql(false);
$maxidrow = $dsql->GetOne("Select max(ID) as maxid From #@__archives where typeid=$typeid
;");
$maxart_id = $maxidrow['maxid'];
$mrow = $dsql->GetOne("Select count(*) as dd From #@__courl where nid='$nid'");
$totalcc = $mrow['dd']; //
$rrow = $dsql->GetOne("Select typeid From #@__conote where nid='$nid'");
$ruleid = $rrow['typeid'];
$rrow = $dsql->GetOne("Select channelid From #@__co_exrule where aid='$ruleid'");
$channelid = $rrow['channelid'];
if(!isset($channelid)) $channelid = 0;
if(!isset($typeid)) $typeid = 0;
if(!isset($makehtml)) $makehtml = 0;
if(!isset($onlytitle)) $onlytitle = 1;
$dsql = new DedeSql(false);
$row = $dsql->GetOne("Select * From #@__co_exrule where aid='$ruleid'");
if(!is_array($row)){
echo "找不到导入规则,无法完成操作!";
$dsql->Close();
continue;
}
//分析规则,并生成临时的SQL语句
$dtp = new DedeTagParse();
$dtp->LoadString($row['ruleset']);
$noteinfo = $dtp->GetTagByName('note');
$tablenames = explode(",",$noteinfo->GetAtt('tablename'));
$autofield = $noteinfo->GetAtt('autofield');
$synfield = $noteinfo->GetAtt('synfield');
$tablename1 = $tablenames[0];
$tb1SqlKey = "Insert Into $tablename1(";
$tb1SqlValue = " Values(";
if(count($tablenames)>=2){
$tablename2 = $tablenames[1];
$tb2SqlKey = "Insert Into $tablename2(";
$tb2SqlValue = " Values(";
if($synfield!=''){
$tb2SqlKey .= $synfield;
$tb2SqlValue .= "'@$synfield@'";
}
}
else{
$tablename2 = "";
$tb2SqlKey = "";
$tb2SqlValue = "";
}
$exKeys = Array();
foreach($dtp->CTags as $tagid => $ctag)
{
if($ctag->GetName()=='field')
{
$fieldname = $ctag->GetAtt('name');
$tbname = $ctag->GetAtt('intable');
if($tbname==$tablename1){
$tb1SqlKey .= ",$fieldname";
if($ctag->GetAtt('source')!='value'){
$tb1SqlValue .= ",'@#{$tbname}.{$fieldname}#@'";
}else{
$nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText
());
$nvalue = str_replace('{cid}',$channelid,$nvalue);
$nvalue = str_replace('{rank}',0,$nvalue);
$nvalue = str_replace('{admin}',"1",$nvalue);
$tb1SqlValue .= ",'$nvalue'";
}
}
else if($tbname==$tablename2){
$tb2SqlKey .= ",$fieldname";
if($ctag->GetAtt('source')!='value'){
$tb2SqlValue .= ",'@#{$tbname}.{$fieldname}#@'";
}else{
$nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText
());
$nvalue = str_replace('{cid}',$channelid,$nvalue);
$nvalue = str_replace('{rank}',0,$nvalue);
$tb2SqlValue .= ",'$nvalue'";
}
}
}
}
$tb1SqlKey = str_replace('(,','(',$tb1SqlKey).")";
$tb1SqlValue = str_replace('(,','(',$tb1SqlValue).");";
$tb1Sql = $tb1SqlKey.$tb1SqlValue;
if($tablename2!="")
{
$tb2SqlKey = str_replace("(,","(",$tb2SqlKey).")";
$tb2SqlValue = str_replace("(,","(",$tb2SqlValue).");";
$tb2Sql = $tb2SqlKey.$tb2SqlValue;
}
//导出数据的SQL操作
$dsql->SetQuery("Select * From #@__courl where nid='$nid' order by aid asc ");
$dsql->Execute();
while($row = $dsql->GetObject())
{
$tmpSql1 = $tb1Sql;
$tmpSql2 = $tb2Sql;
$dtp->LoadString($row->result);
$aid = $row->aid;
if(!is_array($dtp->CTags)){ continue; }
if($onlytitle){
$titletag = '';
foreach ($dtp->CTags as $ctag){
$tvalue = $ctag->GetAtt("name");
if($tvalue == '#@__archives.title' || $tvalue ==
$cfg_dbprefix.'archives.title'){
$titletag = $ctag;
break;
}
}
if(is_object($titletag)){
$title = trim(addslashes($titletag->GetInnerText()));
$testrow = $dsql->GetOne("Select count(ID) as dd From #@__archives
where title like '%$title%'");
if($testrow['dd']>0){
echo "数据库已存在标题为: {$title} 的文档,程序阻止了此内
容导入<br/>";
continue;
}
}
}
foreach($dtp->CTags as $ctag)
{
if($ctag->GetName()!="field") continue;
$tvalue = $ctag->GetAtt("name");
$tmpSql1 = str_replace('@#'.$tvalue.'#@',addslashes($ctag->GetInnerText
()),$tmpSql1);
if($tablename2!=""){
$tmpSql2 = str_replace('@#'.$tvalue.'#@',addslashes($ctag-
>GetInnerText()),$tmpSql2);
}
}
$tmpSql1 = ereg_replace('@#(.*)#@','',$tmpSql1);
$rs = $dsql->ExecuteNoneQuery($tmpSql1);
if($rs && $tablename2!=""){
if($synfield!=""){
$lid = $dsql->GetLastID();
$tmpSql2 = str_replace("@$synfield@",$lid,$tmpSql2);
$rs = $dsql->ExecuteNoneQuery($tmpSql2);
if(!$rs) $dsql->ExecuteNoneQuery("Delete From $tablename1 where
$autofield='$lid'");
}
else $dsql->ExecuteNoneQuery($tmpSql2);
}
$dsql->ExecuteNoneQuery("update #@__courl set isex=1 where aid='$aid'");
}
$dsql->Close();
//生成html
if($channelid>0 && $makehtml==1){
if(!$maxart_id) $maxart_id =0;
$dsql = new DedeSql(false);
$dsql->SetQuery("Select ID From #@__archives where typeid=$typeid and ID
>$maxart_id ;");
$dsql->Execute();
while($row=$dsql->GetObject())
{
$ID = $row->ID;
$ac = new Archives($ID);
$rurl = $ac->MakeHtml();
$ac->Close();
}
$dsql->Close();
}
//生成首页
$templet = "{style}/index.htm";
$templet = str_replace("{style}",$cfg_df_style,$templet);
$homeFile = dirname(__FILE__)."/index.html";
$homeFile = str_replace("\\","/",$homeFile);
$homeFile = str_replace("//","/",$homeFile);
$fp = fopen($homeFile,"w") or die("你指定的文件名有问题,无法创建文件");
fclose($fp);
$pv = new PartView();
$pv->SetTemplet($cfg_basedir.$cfg_templets_dir."/".$templet);
$pv->SaveToHtml($homeFile);
$pv->Close();
}
$dsql = new DedeSql(false);
$dsql->SetQuery("Select ID,typedir From #@__arctype;");
$dsql->Execute();
while($row=$dsql->GetObject())
{
//生成列表html
$lv = new ListView($row->ID);
$lv->MakeHtml();
$lv->Close();
}
$dsql->Close();
?>
相关文章
解决安装wordpress时出现ERR_TOO_MANY_REDIRECTS重定向次数过多问题
本文主要介绍了如何解决安装wordpress时出现ERR_TOO_MANY_REDIRECTS重定向次数过多导致无法安装的问题2017-12-12动易开源PowerEasy® SiteWeaver™ 6.5系列产品发布免费下载了
动易开源CMS 6.5正式版今日发布2008-01-01
最新评论