DEDE 4.0 自动采集源文件 提供了

 更新时间:2007年09月22日 15:27:12   作者:  
DEDE 4.0 自动采集源文件
放在网站跟目录
WINDOWS系统 建立个 ****.cmd 内容  
D:\php\php-cgi E:\wwwroot\**\cj.php
再做个计划任务就可以了(D:\php\php-cgi E:\wwwroot\**\cj.php 知道是什么吧)
LINUX 系统定时间任务偶不会 一样是 运行 cj.php 会的可以回复下

cj.php  源文件

复制代码 代码如下:

<?php 
//要放别的目录请改路径 
require_once(dirname(__FILE__)."/include/config_base.php"); 
require_once(dirname(__FILE__)."/include/pub_collection.php"); 
require_once(dirname(__FILE__)."/include/pub_datalist.php"); 
require_once(dirname(__FILE__)."/include/inc_functions.php"); 
require_once(dirname(__FILE__)."/include/pub_dedetag.php"); 
require_once(dirname(__FILE__)."/include/inc_archives_view.php"); 
require_once(dirname(__FILE__)."/include/inc_arclist_view.php"); 
//添加数据 采集栏目名称,采集栏目ID, 前台栏目名称,前台栏目ID  有几个就填写几行 
$makearr = array(); 
array_push($makearr,array('采集一',1,'分类一',25)); 
array_push($makearr,array('采集二',2,'分类二',26)); 
//其它一些参数 
$makehtml = 1 ;//生成文章 
$threadnum  = 5 ; //间隔时间 
$pagesize = 5; //设置线程 
$islisten = 0 ; //附加选项 不下载曾下载的网址-> 0 ; 仅下载未下载内容->-1; 重新下载所有内容->1 
$sptime = 0 ; //防刷新的站点需设置  x秒 
$channelid = 1 ;//导入的文章模型 
$web = "www-data"; //web用户 
$webgroup = "www-data"; //web用户组 
$onlytitle = 1 ; // 
while(sizeof($makearr)>0){ 
        $nid = $makearr[0][1]; //采集栏目ID 
        $typeid = $makearr[0][3]; //前台栏目ID 
        array_shift($makearr); 
        //采集 
        //多线程模式初次设置 
        $co = new DedeCollection(); 
        $co->Init(); 
        $co->LoadFromDB($nid); 
        $co->GetSourceUrl(); 
        $co->dsql->SetQuery("Update #@__conote set lasttime='".mytime()."' where nid=$nid"); 
        $co->dsql->ExecuteNoneQuery(); 
        $co->dsql->SetQuery("Select aid,url From #@__courl where nid=$nid "); 
        $co->dsql->Execute(99); 
        while($row = $co->dsql->GetObject(99)) 
        { 
                $lrow = $co->dsql->GetOne("Select * From #@__co_listenurl where url like  
'".addslashes($row->url)."'"); 
                if(is_array($lrow)){ 
                        if($islisten==0) continue; 
                } 
                else{ 
                        echo $inquery = "INSERT INTO #@__co_listenurl(nid,url) VALUES ('$nid',  
'".addslashes($row->url)."');"; 
                        $co->dsql->ExecuteNoneQuery($inquery); 
                } 

                $co->DownUrl($row->aid,$row->url); 
                if($sptime>0) sleep($sptime); 

        } 
        $co->Close(); 
        //导入到当前的数据库并生成html 
        $dsql = new DedeSql(false); 
        $maxidrow = $dsql->GetOne("Select max(ID) as maxid From #@__archives where typeid=$typeid  
;"); 
        $maxart_id = $maxidrow['maxid']; 
        $mrow = $dsql->GetOne("Select count(*) as dd From #@__courl where nid='$nid'"); 
        $totalcc = $mrow['dd']; // 
        $rrow = $dsql->GetOne("Select typeid From #@__conote where nid='$nid'"); 
        $ruleid = $rrow['typeid']; 
        $rrow = $dsql->GetOne("Select channelid From #@__co_exrule where aid='$ruleid'"); 
        $channelid = $rrow['channelid']; 
        if(!isset($channelid)) $channelid = 0; 
        if(!isset($typeid)) $typeid = 0; 
        if(!isset($makehtml)) $makehtml = 0; 
        if(!isset($onlytitle)) $onlytitle = 1; 
        $dsql = new DedeSql(false); 
        $row = $dsql->GetOne("Select * From #@__co_exrule where aid='$ruleid'"); 
        if(!is_array($row)){ 
                echo "找不到导入规则,无法完成操作!"; 
                $dsql->Close(); 
                continue; 
        } 
        //分析规则,并生成临时的SQL语句 
        $dtp = new DedeTagParse(); 
        $dtp->LoadString($row['ruleset']); 
        $noteinfo = $dtp->GetTagByName('note'); 
        $tablenames = explode(",",$noteinfo->GetAtt('tablename')); 
        $autofield = $noteinfo->GetAtt('autofield'); 
        $synfield = $noteinfo->GetAtt('synfield'); 
        $tablename1 = $tablenames[0]; 
        $tb1SqlKey = "Insert Into $tablename1("; 
        $tb1SqlValue = " Values("; 
        if(count($tablenames)>=2){ 
                $tablename2 = $tablenames[1]; 
                $tb2SqlKey = "Insert Into $tablename2("; 
                $tb2SqlValue = " Values("; 
                if($synfield!=''){ 
                        $tb2SqlKey .= $synfield; 
                        $tb2SqlValue .= "'@$synfield@'"; 
                } 
        } 
        else{ 
                $tablename2 = ""; 
                $tb2SqlKey = ""; 
                $tb2SqlValue = ""; 
        } 
        $exKeys = Array(); 
        foreach($dtp->CTags as $tagid => $ctag) 
        { 
                if($ctag->GetName()=='field') 
                { 
                        $fieldname = $ctag->GetAtt('name'); 
                        $tbname = $ctag->GetAtt('intable'); 
                        if($tbname==$tablename1){ 
                                $tb1SqlKey .= ",$fieldname"; 
                                if($ctag->GetAtt('source')!='value'){ 
                                        $tb1SqlValue .= ",'@#{$tbname}.{$fieldname}#@'"; 
                                }else{ 
                                        $nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText 
()); 
                                        $nvalue = str_replace('{cid}',$channelid,$nvalue); 
                                        $nvalue = str_replace('{rank}',0,$nvalue); 
                                        $nvalue = str_replace('{admin}',"1",$nvalue); 
                                        $tb1SqlValue .= ",'$nvalue'"; 
                                } 
                        } 
                        else if($tbname==$tablename2){ 
                                $tb2SqlKey .= ",$fieldname"; 
                                if($ctag->GetAtt('source')!='value'){ 
                                        $tb2SqlValue .= ",'@#{$tbname}.{$fieldname}#@'"; 
                                }else{ 
                                        $nvalue = str_replace('{tid}',$typeid,$ctag->GetInnerText 
()); 
                                        $nvalue = str_replace('{cid}',$channelid,$nvalue); 
                                        $nvalue = str_replace('{rank}',0,$nvalue); 
                                        $tb2SqlValue .= ",'$nvalue'"; 
                                } 
                        } 
                } 
        } 
        $tb1SqlKey = str_replace('(,','(',$tb1SqlKey).")"; 
        $tb1SqlValue = str_replace('(,','(',$tb1SqlValue).");"; 
        $tb1Sql = $tb1SqlKey.$tb1SqlValue; 
        if($tablename2!="") 
        { 
                $tb2SqlKey = str_replace("(,","(",$tb2SqlKey).")"; 
                $tb2SqlValue = str_replace("(,","(",$tb2SqlValue).");"; 
                $tb2Sql = $tb2SqlKey.$tb2SqlValue; 
        } 
        //导出数据的SQL操作 
        $dsql->SetQuery("Select * From #@__courl where nid='$nid' order by aid asc "); 
        $dsql->Execute(); 
        while($row = $dsql->GetObject()) 
        { 
                $tmpSql1 = $tb1Sql; 
                $tmpSql2 = $tb2Sql; 
                $dtp->LoadString($row->result); 
                $aid = $row->aid; 
                if(!is_array($dtp->CTags)){ continue; } 
                if($onlytitle){ 
                        $titletag = ''; 
                        foreach ($dtp->CTags as $ctag){ 
                                $tvalue = $ctag->GetAtt("name"); 
                                if($tvalue == '#@__archives.title' || $tvalue ==  
$cfg_dbprefix.'archives.title'){ 
                                        $titletag = $ctag; 
                                        break; 
                                } 
                        } 
                        if(is_object($titletag)){ 
                                $title = trim(addslashes($titletag->GetInnerText())); 
                                $testrow = $dsql->GetOne("Select count(ID) as dd From #@__archives  
where title like '%$title%'"); 
                                if($testrow['dd']>0){ 
                                        echo "数据库已存在标题为: {$title} 的文档,程序阻止了此内 
容导入<br/>"; 
                                        continue; 
                                } 
                        } 
                } 
                foreach($dtp->CTags as $ctag) 
                { 
                        if($ctag->GetName()!="field") continue; 
                        $tvalue = $ctag->GetAtt("name"); 
                        $tmpSql1 = str_replace('@#'.$tvalue.'#@',addslashes($ctag->GetInnerText 
()),$tmpSql1); 
                        if($tablename2!=""){ 
                                $tmpSql2 = str_replace('@#'.$tvalue.'#@',addslashes($ctag- 
>GetInnerText()),$tmpSql2); 
                        } 
                } 
                $tmpSql1 = ereg_replace('@#(.*)#@','',$tmpSql1); 
                $rs = $dsql->ExecuteNoneQuery($tmpSql1); 
                if($rs && $tablename2!=""){ 
                        if($synfield!=""){ 
                                $lid = $dsql->GetLastID(); 
                                $tmpSql2 = str_replace("@$synfield@",$lid,$tmpSql2); 
                                $rs = $dsql->ExecuteNoneQuery($tmpSql2); 
                                if(!$rs) $dsql->ExecuteNoneQuery("Delete From $tablename1 where  
$autofield='$lid'"); 
                        } 
                        else $dsql->ExecuteNoneQuery($tmpSql2); 
                } 
                $dsql->ExecuteNoneQuery("update #@__courl set isex=1 where aid='$aid'"); 
        } 
        $dsql->Close(); 
        //生成html 
        if($channelid>0 && $makehtml==1){ 
                if(!$maxart_id) $maxart_id =0; 
                $dsql = new DedeSql(false); 
                $dsql->SetQuery("Select ID From #@__archives where typeid=$typeid and ID  
>$maxart_id ;"); 
                $dsql->Execute(); 
                while($row=$dsql->GetObject()) 
                { 
                        $ID = $row->ID; 
                        $ac = new Archives($ID); 
                        $rurl = $ac->MakeHtml(); 
                        $ac->Close(); 
                } 
                $dsql->Close(); 
        } 
        //生成首页 
        $templet  = "{style}/index.htm"; 
        $templet = str_replace("{style}",$cfg_df_style,$templet); 
        $homeFile = dirname(__FILE__)."/index.html"; 
        $homeFile = str_replace("\\","/",$homeFile); 
        $homeFile = str_replace("//","/",$homeFile); 
        $fp = fopen($homeFile,"w") or die("你指定的文件名有问题,无法创建文件"); 
        fclose($fp); 
        $pv = new PartView(); 
        $pv->SetTemplet($cfg_basedir.$cfg_templets_dir."/".$templet); 
        $pv->SaveToHtml($homeFile); 
        $pv->Close(); 


$dsql = new DedeSql(false); 
$dsql->SetQuery("Select ID,typedir From #@__arctype;"); 
$dsql->Execute(); 
while($row=$dsql->GetObject()) 

        //生成列表html 
        $lv = new ListView($row->ID); 
        $lv->MakeHtml(); 
        $lv->Close(); 

$dsql->Close(); 
?>

相关文章

最新评论