selfhtml-rss-feeds-erstellen/beispiele/rss-feed-creator.php
<?php
/*Just errors but no warnings */
error_reporting(E_USER_ERROR);
$c = 1;
$debug = "" ;
function getTeaser ($url) {
global $c, $debug ;
$url = trim($url);
$config = array(
'indent' => true,
'output-xhtml' => true,
'wrap' => 200);
$debug .= $url ;
$html = file_get_contents($url,'r');
$tidy = new tidy;
$tidy->parseString($html, $config, 'ascii');
$tidy->cleanRepair();
$doc = new DOMDocument();
$doc->loadHTML($tidy->html()->value);
file_put_contents("page-$c.html","$url\n".$tidy->html()->value);
$c++ ;
$xp = new domxpath($doc);
$cnt = $xp->query('//div[@class="contentbox"]//p');
$res = $cnt->item(1)->textContent;
if (trim($res) == "Weiter") {
$res = $cnt->item(0)->textContent;
}
return $res;
}
// CHECK START
function checkChanges() {
$md5file = "magazin.md5";
$htmlfile = "magazin.html" ;
if (!file_exists($md5file)) {
return true ;
}
if (date("Y-m-d") != date("Y-m-d",filemtime($md5file))) {
return true ;
}
$html = file_get_contents($htmlfile,'r');
$md5Old = trim(file_get_contents($md5file,'r'));
$md5New = md5($html);
if ($md5New != $md5Old) {
return true ;
}
return false ;
}
// CHECK END
function getTitles () {
// START FETCH
# check for file existence
if (checkChanges()) {
$html = file_get_contents("http://www.selfhtml.de:80/magazin/index.php");
file_put_contents('magazin.html',$html);
# now check for md5-hash
if (checkChanges()) {
$html = file_get_contents("magazin.html",'r');
file_put_contents('magazin.md5',md5($html));
// END FETCH
// START DOM
$rssdom = new DOMDocument('1.0', 'iso-8859-1');
$rss = $rssdom->createElement("rss");
$rss->setAttribute("version","2.0");
$rssdom->appendChild($rss);
$channel = $rssdom->createElement("channel");
$rss->appendChild($channel);
$channel->appendChild($rssdom->createElement("title","www.selfhtml.de Magazin"));
$channel->appendChild($rssdom->createElement("link","http://www.selfhtml.de/magazin/"));
$channel->appendChild($rssdom->createElement("description","News Feed fuer das Magazin von www.selfhtml.de"));
// END DOM
// START Tidy
$config = array(
'indent' => true,
'output-xhtml' => true,
'wrap' => 200);
$tidy = new tidy;
$tidy->parseString($html, $config, 'ascii');
$tidy->cleanRepair();
// END Tidy
// Output
file_put_contents('magazin-tidy2.html',$tidy->html()->value);
// START XPATH
$doc = new DOMDocument();
$doc->loadHTML($tidy->html()->value);
$xp = new domxpath($doc);
$titles = $xp->query('//td//td');
// END XPATH
$x = 3 ;
// START DOM2
$date = "" ;
foreach ($titles as $node) {
$cnode->textContent . "\n";
if ($node->getElementsByTagName("a")->length > 0) {
$a = $node->getElementsByTagName("a");
$descr = getTeaser($a->item(0)->getAttribute("href"));
$item = $rssdom->createElement("item");
$channel->appendChild($item);
$item->appendChild($rssdom->createElement("title",$a->item(0)->textContent));
$item->appendChild($rssdom->createElement("description",$descr));
$item->appendChild($rssdom->createElement("pubDate",$date));
$item->appendChild($rssdom->createElement("link",$a->item(0)->getAttribute("href")));
} else {
// get rss conform date
$arr = split('\.',trim($node->textContent));
$date = date(DATE_RSS,mktime(0,0,0,$arr[1],$arr[0],$arr[2]));
}
}
file_put_contents("magazin.rss",$rssdom->saveXML());
// END DOM2
}
}
}
getTitles();
header("Content-Type: application/rss+xml");
echo file_get_contents("magazin.rss");
?>
Generated by GNU enscript 1.6.3.