This data access object (DAO) parses the information out of the RSS feed. It also determines if the cache file is stale, in which case it will download a fresh version from Blogger.

(double-click the code to select all)
<?php
namespace blog;
use \DateTime;
use \DateTimeZone;

/**
 * This class is used to access the blog posts.
 * @author Michael Angstadt
 */
class BlogDao {
	/**
	 * The XML document of the RSS feed.
	 */
	private $xml;
	
	/**
	 * True to add highslide support to all images in the blog post,
	 * false to leave them alone.
	 * @var boolean
	 */
	private $hsImages = true;

	/**
	 * @param $feedUrl the URL to the RSS feed.
	 * @param $cacheFile (optional) the path to the cache file or null to
	 * not use a cache
	 * @param $cacheRefresh (optional) the number of seconds old the cache
	 * is allowed to be before it is refreshed
	 */
	public function __construct($feedUrl, $cacheFile = null, $cacheRefresh = 3600){
		if ($cacheFile == null){
			$this->xml = simplexml_load_file($feedUrl);
		} else {
			if (file_exists($cacheFile)){
				$lastModified = filemtime($cacheFile);
				$diff = time() - $lastModified;
				if ($diff > $cacheRefresh){
					$feed = file_get_contents($feedUrl);
					if ($feed === false){
						 error_log("Error loading RSS feed: $feedUrl");
						 $this->xml = simplexml_load_file($cacheFile);
					} else {
						file_put_contents($cacheFile, $feed);
						$this->xml = simplexml_load_string($feed);
					}
				} else {
					$this->xml = simplexml_load_file($cacheFile);
				}
			} else {
				$feed = file_get_contents($feedUrl);
				if ($feed === false){
					throw new Exception("Error loading RSS feed: $feedUrl");
				} else {
					file_put_contents($cacheFile, $feed);
					$this->xml = simplexml_load_string($feed);
				}
			}
		}
		
		$this->xml->registerXPathNamespace('thr', 'http://purl.org/syndication/thread/1.0');
	}

	/**
	 * Gets the most recent posts from the blog.
	 * @param $num (optional) the max number of posts to retrieve
	 * @return array(BlogPost) the posts
	 */
	public function getPosts($num = null){
		$posts = array();

		$items = $this->xml->xpath('/rss/channel/item');
		if ($num == null){
			$num = count($items);
		}
		for ($i = 0; $i < $num && $i < count($items); $i++){
			$item = $items[$i];
			$post = new BlogPost();
			
			//get the comment URL
			$guid = (string)$item->guid;
			preg_match('/blog-(\\d+)\\.post-(\\d+)/', $guid, $matches);
			$blogId = $matches[1];
			$postId = $matches[2];
			$post->commentsUrl = "http://www.blogger.com/comment.g?blogID=$blogId&postID=$postId";

			//get the published date
			$post->date = new DateTime((string)$item->pubDate);
			$post->date->setTimezone(new DateTimeZone(date_default_timezone_get()));
			
			//get the title
			$post->title = (string)$item->title;
			
			//get the blog post
			$content = (string)$item->description;
			$content = $this->fixCodeSamples($content); //replace "<br />" tags with newlines in the code samples
			if ($this->hsImages){ //add highslide support to images
				$content = $this->addHighslideSupport($content);
			}
			$post->content = $content;
			
			//get the URL to the blog post
			$post->url = (string)$item->link;
			
			//get the number of comments
			$total = $item->xpath('thr:total');
			$post->numComments = (int)$total[0];
				
			$posts[] = $post;
		}

		return $posts;
	}

	/**
	 * Gets the URL of the blog.
	 * @return string the blog URL
	 */
	public function getUrl(){
		return (string)$this->xml->channel->link;
	}
	
	/**
	 * Sets whether highslide support should be added to all images in the blog post.
	 * @param boolean $enabled true to enable this, false to disable
	 */
	public function setHideSlideImagesEnabled($enabled){
		$this->hsImages = $enabled;
	}
	
	/**
	 * Replaces "&lt;br /&gt;" tags in the code samples with newlines.
	 * @param string $content the blog post
	 * @return string the fixed blog post
	 */
	private function fixCodeSamples($content){
		return preg_replace_callback('~(<pre\\s+class="brush:.*?">)(.*?)(</pre>)~', function($matches){
			$code = $matches[2];
			$code = str_replace('<br />', "\n", $code);
			return $matches[1] . $code . $matches[3];
		}, $content);
	}
	
	/**
	 * Adds Highslide support to all images in the blog post.
	 * @param string $content the blog post
	 * @return string the blog post with highslide support added or the
	 * original blog post if there was a problem parsing the blog post as XML
	 */
	private function addHighslideSupport($content){
		//XML doesn't like "&nbsp;", so replace it with the proper XML equivalent
		//see: http://techtrouts.com/webkit-entity-nbsp-not-defined-convert-html-entities-to-xml/
		$content = str_replace("&nbsp;", "&#160;", $content);
			
		//load the text into a DOM
		//add a root tag incase there isn't one
		$xml = simplexml_load_string('<div>' . $content . '</div>');
			
		//if there's a problem loading the XML, skip the highslide stuff
		if ($xml !== false){
			//get all links that contain an image
			$links = $xml->xpath('//a[img]');
			
			//add the highslide stuff to each link
			foreach ($links as $link){
				$link['class'] = 'highslide';
				$link['onclick'] = 'return hs.expand(this)';
			}
			
			//marshal XML to a string
			$content = $xml->asXML();
			
			//remove the XML declaration at the top
			$content = preg_replace('~^<\\?xml.*?\\?>~', '', $content);
			
			//trim whitespace
			$content = trim($content);
			
			//remove the root tag that we added
			$content = preg_replace('~(^<div>)|(</div>$)~', '', $content);
		}
		
		return $content;
	}
}

This is a DTO (data transfer object) that contains information on a blog post.

(double-click the code to select all)
<?php
namespace blog;

/**
 * Represents a blog post.
 * @author Michael Angstadt
 */
class BlogPost {
	/**
	 * The URL of the post.
	 * @var string
	 */
	public $url;

	/**
	 * The date the post was made.
	 * @var DateTime
	 */
	public $date;

	/**
	 * The title of the post.
	 * @var string
	 */
	public $title;

	/**
	 * The body of the post (contains unencoded HTML).
	 * @var string
	 */
	public $content;

	/**
	 * The number of comments the post has.
	 * @var integer
	 */
	public $numComments;

	/**
	 * The URL for viewing and posting comments.
	 * @var string
	 */
	public $commentsUrl;
}

The unit test class for the BlogDao class. It uses a locally-saved version of the RSS feed from my blog, which I modified to help improve the quality of the test. The first blog post in the RSS contains text which makes sure the HTML code of the blog post is unescaped. The second blog post tests to make sure <br /> tags are replaced with newline characters in all code samples. The third post tests to make sure Highslide support is correctly added to the images.

(double-click the code to select all)
<?php
namespace blog;
use \DateTime;
use utils\TestWrapper;

class BlogDaoTest extends TestWrapper {
	/**
	 * A sample RSS file used for testing.
	 */
	private $rssFile;
	
	public function __construct(){
		$this->rssFile = __DIR__ . '/blog.xml';
	}

	/**
	 * Tests the getPosts() method.
	 */
	public function testGetPosts(){
		$expectedPosts = array();

		$post = new BlogPost();
		$post->url = 'http://mangstacular.blogspot.com/2011/12/post1.html';
		$post->date = new DateTime('2011-12-19 21:30:00'); //take timezone into account
		$post->title = 'Post 1';
		//make sure HTML is un-escaped
		$post->content = 'The blog <b>content</b>.';
		$post->numComments = 0;
		$post->commentsUrl = 'http://www.blogger.com/comment.g?blogID=5682413770770674096&postID=2489557499927389722';
		$expectedPosts[] = $post;

		$post = new BlogPost();
		$post->url = 'http://mangstacular.blogspot.com/2011/12/post2.html';
		$post->date = new DateTime('2011-12-15 19:01:00'); //take timezone into account
		$post->title = 'Post 2';
		//make sure it replaces the "<br />" tags in the code samples with newlines
		$post->content = '<pre class="brush: xml">&lt;script
 type="text/javascript"
 src="path/to/script.js"&gt;
&lt;/script&gt;
</pre>';
		$post->numComments = 1;
		$post->commentsUrl = 'http://www.blogger.com/comment.g?blogID=5682413770770674096&postID=3997914456363482410';
		$expectedPosts[] = $post;

		$post = new BlogPost();
		$post->url = 'http://mangstacular.blogspot.com/2011/12/post3.html';
		$post->date = new DateTime('2011-12-11 12:23:00'); //take timezone into account
		$post->title = 'Post 3';
		//make sure it adds highslide support to all links
		$post->content = '<a href="foobar.png" class="highslide" onclick="return hs.expand(this)"><img src="foobar.thumb.png"/></a><a href="http://www.google.com">Regular link</a>';
		$post->numComments = 5;
		$post->commentsUrl = 'http://www.blogger.com/comment.g?blogID=5682413770770674096&postID=1634330854796202492';
		$expectedPosts[] = $post;

		$dao = new BlogDao($this->rssFile);

		//no max specified
		$posts = $dao->getPosts();
		$this->assertEquals(3, count($posts));
		for ($i = 0; $i < count($posts); $i++){
			$actualPost = $posts[$i];
			$expectedPost = $expectedPosts[$i];
			$this->assertEquals($expectedPost, $actualPost);
		}

		//specify a max less than the total number of posts
		$posts = $dao->getPosts(1);
		$this->assertEquals(1, count($posts));
		for ($i = 0; $i < count($posts); $i++){
			$actualPost = $posts[$i];
			$expectedPost = $expectedPosts[$i];
			$this->assertEquals($expectedPost, $actualPost);
		}

		//specify a max higher than the total number of posts
		$posts = $dao->getPosts(10);
		$this->assertEquals(3, count($posts));
		for ($i = 0; $i < count($posts); $i++){
			$actualPost = $posts[$i];
			$expectedPost = $expectedPosts[$i];
			$this->assertEquals($expectedPost, $actualPost);
		}
	}

	/**
	 * Tests the getUrl() method.
	 */
	public function testGetUrl(){
		$dao = new BlogDao($this->rssFile);
		$expected = 'http://mangstacular.blogspot.com/';
		$actual = $dao->getUrl();
		$this->assertEquals($expected, $actual);
	}

	/**
	 * Tests to make sure the caching functionality works.
	 */
	public function testCache(){
		$orig = $this->rssFile;
		$cache = __DIR__ . '/blog.cache.rss';
		$this->tempFiles[] = $cache;

		$dao = new BlogDao($orig, $cache);

		//make sure it created the cache file
		$this->assertTrue(file_exists($cache));

		//make sure the contents of the cache file are the same as the contents of the original file
		$expected = file_get_contents($orig);
		$actual = file_get_contents($cache);
		$this->assertEquals($expected, $actual);

		//make sure the cache is read from by modifying the cache
		$cacheContents = file_get_contents($cache);
		$cacheContents = str_replace('<link>http://mangstacular.blogspot.com/</link>', '<link>http://changed.com</link>', $cacheContents);
		file_put_contents($cache, $cacheContents);
		$dao = new BlogDao($orig, $cache, 5);
		$expected = 'http://changed.com';
		$actual = $dao->getUrl();
		$this->assertEquals($expected, $actual);

		//make sure that if the cache is stale, it should be refreshed
		sleep(2);
		$dao = new BlogDao($orig, $cache, 1);
		$expected = 'http://mangstacular.blogspot.com/';
		$actual = $dao->getUrl();
		$this->assertEquals($expected, $actual);
	}
}

The sample RSS feed used in BlogDaoTest.

(double-click the code to select all)
<?xml version='1.0' encoding='UTF-8'?>
<rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'
	xmlns:georss='http://www.georss.org/georss' xmlns:thr='http://purl.org/syndication/thread/1.0'
	version='2.0'>
	<channel>
		<atom:id>tag:blogger.com,1999:blog-5682413770770674096</atom:id>
		<lastBuildDate>Fri, 23 Dec 2011 02:31:21 +0000</lastBuildDate>
		<category>java</category>
		<category>php</category>
		<category>programming</category>
		<title>Mike's Software Development Blog</title>
		<description>A computer blog with a focus on everything software
			development related.</description>
		<link>http://mangstacular.blogspot.com/</link>
		<managingEditor>noreply@blogger.com (Michael Angstadt)</managingEditor>
		<generator>Blogger</generator>
		<openSearch:totalResults>65</openSearch:totalResults>
		<openSearch:startIndex>1</openSearch:startIndex>
		<openSearch:itemsPerPage>25</openSearch:itemsPerPage>
		<item>
			<guid isPermaLink='false'>tag:blogger.com,1999:blog-5682413770770674096.post-2489557499927389722</guid>
			<pubDate>Tue, 20 Dec 2011 02:30:00 +0000</pubDate>
			<atom:updated>2011-12-20T19:27:46.194-05:00</atom:updated>
			<title>Post 1</title>
			<description>The blog &lt;b&gt;content&lt;/b&gt;.</description>
			<link>http://mangstacular.blogspot.com/2011/12/post1.html</link>
			<author>noreply@blogger.com (Michael Angstadt)</author>
			<media:thumbnail xmlns:media='http://search.yahoo.com/mrss/'
				url='http://4.bp.blogspot.com/-5NaIcZFxhTA/Tu_u5KSe2iI/AAAAAAAAAWI/mwP5lSfTtIY/s72-c/topup-logo.png'
				height='72' width='72' />
			<thr:total>0</thr:total>
		</item>
		<item>
			<guid isPermaLink='false'>tag:blogger.com,1999:blog-5682413770770674096.post-3997914456363482410</guid>
			<pubDate>Fri, 16 Dec 2011 00:01:00 +0000</pubDate>
			<atom:updated>2011-12-15T19:01:31.041-05:00</atom:updated>
			<title>Post 2</title>
			<description>&lt;pre class="brush: xml"&gt;&amp;lt;script&lt;br /&gt; type="text/javascript"&lt;br /&gt; src="path/to/script.js"&amp;gt;&lt;br /&gt;&amp;lt;/script&amp;gt;&lt;br /&gt;&lt;/pre&gt;</description>
			<link>http://mangstacular.blogspot.com/2011/12/post2.html</link>
			<author>noreply@blogger.com (Michael Angstadt)</author>
			<media:thumbnail xmlns:media='http://search.yahoo.com/mrss/'
				url='http://2.bp.blogspot.com/-NhyR9uNz2kw/TuqJEwy2KiI/AAAAAAAAAVY/yRJpAseWG-o/s72-c/Abe-Lincoln-gwt.png'
				height='72' width='72' />
			<thr:total>1</thr:total>
		</item>
		<item>
			<guid isPermaLink='false'>tag:blogger.com,1999:blog-5682413770770674096.post-1634330854796202492</guid>
			<pubDate>Sun, 11 Dec 2011 17:23:00 +0000</pubDate>
			<atom:updated>2011-12-12T19:37:53.394-05:00</atom:updated>
			<title>Post 3</title>
			<description>&lt;a href="foobar.png"&gt;&lt;img src="foobar.thumb.png" /&gt;&lt;/a&gt;&lt;a href="http://www.google.com"&gt;Regular link&lt;/a&gt;</description>
			<link>http://mangstacular.blogspot.com/2011/12/post3.html</link>
			<author>noreply@blogger.com (Michael Angstadt)</author>
			<media:thumbnail xmlns:media='http://search.yahoo.com/mrss/'
				url='http://1.bp.blogspot.com/-MAbahhdlVmo/TuFmcnXf-qI/AAAAAAAAAUQ/BqKZ30E4isQ/s72-c/gwt-logo.png'
				height='72' width='72' />
			<thr:total>5</thr:total>
		</item>
	</channel>
</rss>