Tuesday, December 7, 2010

Parse an RSS feed using C#

Here is a quick example of how to parse an RSS feed in C# using Linq (.NET 3.5 and higher). It is a basic example implemented as a static method in a console application; however, for a real project you may want to create a set of classes to provide a more OO approach.

For this example I am using two objects to hold the RSS item’s data (code in listing 1 and 2).

Listing 1 – RssEnclosure Object
I use this object to hold RSS enclosures (<enclosure />); which provide a way of attaching multimedia content to RSS feeds (found in podcast feeds).

/// <summary>
    /// RssEnclosure object
    /// </summary>
    public class RssEnclosure
    {
        private string _url;
        private double _length;
        private string _itemType;

        /// <summary>
        /// Set/Get url
        /// </summary>
        public string Url
        {
            set { _url = value; }
            get { return _url; }
        }

        /// <summary>
        /// Set/Get Length
        /// </summary>
        public double Length
        {
            set { _length = value; }
            get { return _length; }

        }

        /// <summary>
        /// Set/Get itemType
        /// </summary>
        public string ItemType
        {
            set { _itemType = value; }
            get { return _itemType; }

        }

    }



Listing 2 – RssItem Object
This object holds your basic RSS item information.

    /// <summary>
    /// Generic RssItem
    /// </summary>
    public class RssItem
    {
        private string _title;
        private string _link;
        private string _description;
        private string _pubDate;
        private string _guid;

        private string _comments;
        private string _author;
        private RssEnclosure _enclosure;

        /// <summary>
        /// Default constructor
        /// </summary>
        public RssItem()
        {
            this.Initialize("", "", "", "", "","","",new RssEnclosure());
        }

        /// <summary>
        /// Second constructor
        /// </summary>
        /// <param name="title">title</param>
        /// <param name="link">link</param>
        /// <param name="description">description</param>
        /// <param name="pubDate">pubDate</param>
        /// <param name="guid">guid</param>
        /// <param name="author">author</param>
        /// <param name="comment">comment</param>
        /// <param name="enclosure">enclosure</param>
 public RssItem(string title, string link, string description,
  string pubDate, string guid,
  string comment, string author,
  RssEnclosure enclosure)
        {
            this.Initialize(title, link, description, pubDate,
 guid,comment,author,enclosure);
        }




        /// <summary>
        /// Initialize private fields
        /// </summary>
        /// <param name="title">title</param>
        /// <param name="link">link</param>
        /// <param name="description">description</param>
        /// <param name="pubDate">pubDate</param>
        /// <param name="guid">guid</param>
        /// <param name="author">author</param>
        /// <param name="comment">comment</param>
        /// <param name="enclosure">enclosure</param>
        private void Initialize(string title, string link, string description,
 string pubDate, string guid,
 string comment, string author,
 RssEnclosure enclosure)
        {
            _title = title;
            _link = link;
            _description = description;
            _pubDate = pubDate;
            _guid = guid;
            _comments = "";
            _author = "";
            _enclosure = new RssEnclosure();
        }


        #region "Public properties"

        ///<summary>
        ///Set/Get title
        ///</summary>
        public string Title
        {
            set { this._title = value; }
            get { return this._title; }
        }


        ///<summary>
        ///Set/Get link
        ///</summary>
        public string Link
        {
            set { this._link = value; }
            get { return this._link; }
        }


        ///<summary>
        ///Set/Get description
        ///</summary>
        public string Description
        {
            set { this._description = value; }
            get { return this._description; }
        }


        ///<summary>
        ///Set/Get pubDate
        ///</summary>
        public string PubDate
        {
            set { this._pubDate = value; }
            get { return this._pubDate; }
        }


        ///<summary>
        ///Set/Get guid
        ///</summary>
        public string Guid
        {
            set { this._guid = value; }
            get { return this._guid; }
        }

        /// <summary>
        /// Set/Get comment
        /// </summary>
        public string Comments
        {
            set { _comments = value; }
            get { return _comments; }
        }

        /// <summary>
        /// Set/Get author
        /// </summary>
        public string Author
        {
            set { _author = value; }
            get { return _author; }
        }

        /// <summary>
        /// Set/Get enclosure
        /// </summary>
        public RssEnclosure Enclosure
        {
            set { _enclosure = value; }
            get { return _enclosure; }
        }


        #endregion

    }







Listing 3 - The RSS Parsing Method

public static void ParseRss(string feed)
{
        
            ArrayList items = new ArrayList();
            var doc = XElement.Load(feed).Elements("channel");

            foreach (var itemnodes in doc)
            {
              
                foreach (var i in itemnodes.Elements())
                {
                   if (i.Name.LocalName == "item")
                   {
                      if (i.HasElements)
                      {
                          RssItem rssitem = new RssItem();
                          foreach (var n in i.Elements())
                          {
                             switch (n.Name.LocalName.ToLower())
                             {
                               case "title": rssitem.Title = n.Value;
                                            break;
                               case "link": rssitem.Link = n.Value;
                                            break;
                               case "description": rssitem.Description = n.Value;
                                            break;
                               case "author": rssitem.Author = n.Value;
                                            break;
                               case "pubDate": rssitem.PubDate = n.Value;
                                            break;
                               case "comments"
                                       rssitem.Comments = (n.Value.Length > 0 ? n.Value : "");
                                            break;
                               case "enclosure":
                                            RssEnclosure e = new RssEnclosure();
                                            if (n.HasAttributes)
                                            {
                                                e.Url = n.Attribute("url").Value;
                                                e.ItemType = n.Attribute("type").Value;
                                                double len = 0;
                                                double.TryParse(n.Attribute("length").Value, out len);
                                                e.Length = len;
                                            }
                                            rssitem.Enclosure = e;
                                            break;
                                case "guid": rssitem.Guid = n.Value;
                                            break;
                              }
                           }

                           items.Add(rssitem);
                              
                      }
                  }

                }
            }

            //Print content in the console
            foreach (RssItem i in items)
            {
                Console.WriteLine(i.Title);
                Console.WriteLine(i.Link);
                Console.WriteLine();
                Console.WriteLine();
            }


}


Usage:

Just call the function passing an RSS feed and it will print in the console.

ParseRss("rss feed");


No comments:

Post a Comment