selfhtml-rss-feeds-erstellen/beispiele/rss-feed-creator.php

<?php
/*Just errors but no warnings */
error_reporting(E_USER_ERROR);
$c = 1;
$debug = "" ;
function getTeaser ($url) {
  global $c, $debug ;
  $url = trim($url);
  $config = array(
                  'indent'         => true,
                  'output-xhtml'   => true,
                  'wrap'           => 200);
  
  $debug .= $url ;
  $html = file_get_contents($url,'r');
  $tidy = new tidy;
  $tidy->parseString($html, $config, 'ascii');
  $tidy->cleanRepair();
  $doc = new DOMDocument();
  $doc->loadHTML($tidy->html()->value);
  file_put_contents("page-$c.html","$url\n".$tidy->html()->value);
  $c++ ;
  $xp = new domxpath($doc); 
  $cnt = $xp->query('//div[@class="contentbox"]//p');  
  $res = $cnt->item(1)->textContent;
  if (trim($res) == "Weiter") {
    $res = $cnt->item(0)->textContent;
  }
  return $res;
  
}
// CHECK START
function checkChanges() {
  $md5file = "magazin.md5";
  $htmlfile = "magazin.html" ;
  if (!file_exists($md5file)) {
    return true ;
  }
  if (date("Y-m-d") != date("Y-m-d",filemtime($md5file))) {
    return true ;
  }
  $html = file_get_contents($htmlfile,'r');
  $md5Old = trim(file_get_contents($md5file,'r'));
  $md5New = md5($html);
  if ($md5New != $md5Old) {
    return true ;
  }
  return false ;
}
// CHECK END
function getTitles () {
  // START FETCH
  # check for file existence
  if (checkChanges()) {
    $html = file_get_contents("http://www.selfhtml.de:80/magazin/index.php");
    file_put_contents('magazin.html',$html);
    # now check for md5-hash
    if (checkChanges()) {
      $html = file_get_contents("magazin.html",'r');
      file_put_contents('magazin.md5',md5($html));
      // END FETCH
      // START DOM
      $rssdom = new DOMDocument('1.0', 'iso-8859-1');
      $rss = $rssdom->createElement("rss");
      $rss->setAttribute("version","2.0");
      $rssdom->appendChild($rss);
      $channel = $rssdom->createElement("channel");
      $rss->appendChild($channel);
      $channel->appendChild($rssdom->createElement("title","www.selfhtml.de Magazin"));
      $channel->appendChild($rssdom->createElement("link","http://www.selfhtml.de/magazin/"));
      $channel->appendChild($rssdom->createElement("description","News Feed fuer das Magazin von www.selfhtml.de"));
      // END DOM
      // START Tidy
      $config = array(
                      'indent'         => true,
                      'output-xhtml'   => true,
                      'wrap'           => 200);
      $tidy = new tidy;
      $tidy->parseString($html, $config, 'ascii');
      $tidy->cleanRepair();
      // END Tidy
      // Output
      file_put_contents('magazin-tidy2.html',$tidy->html()->value);
      
      // START XPATH
      $doc = new DOMDocument(); 
      $doc->loadHTML($tidy->html()->value); 
      $xp =  new domxpath($doc); 
      $titles = $xp->query('//td//td'); 
      // END XPATH
      $x = 3 ; 
      // START DOM2
      $date =  "" ; 
      foreach ($titles as $node) { 
        $cnode->textContent . "\n"; 
        if ($node->getElementsByTagName("a")->length > 0) { 
          $a = $node->getElementsByTagName("a");
          $descr = getTeaser($a->item(0)->getAttribute("href"));
          $item = $rssdom->createElement("item"); 
          $channel->appendChild($item);
          $item->appendChild($rssdom->createElement("title",$a->item(0)->textContent));
          $item->appendChild($rssdom->createElement("description",$descr));
          $item->appendChild($rssdom->createElement("pubDate",$date));
          $item->appendChild($rssdom->createElement("link",$a->item(0)->getAttribute("href")));
        } else { 
          // get rss conform date
          $arr = split('\.',trim($node->textContent)); 
          $date = date(DATE_RSS,mktime(0,0,0,$arr[1],$arr[0],$arr[2]));
        }
      }
      file_put_contents("magazin.rss",$rssdom->saveXML());
      // END DOM2
    }
        
  }
}
getTitles();

header("Content-Type: application/rss+xml");
echo file_get_contents("magazin.rss");
?> 

Generated by GNU enscript 1.6.3.