#! /usr/bin/env ruby

require "rubygems"
require "nokogiri"
require "open-uri"
require "iconv"
require "rss/maker"
require "webrick"

class SkyrockblogArticle < Hash
  def to_xhtml
    ret  = "<div>"
    unless self[:images_urls].empty?
      ret += "<div>"      
      self[:images_urls].each { |url| ret += "<img src=\"#{url}\" />" }
      ret += "</div>"
    end
    unless self[:texts].empty?
      ret += "<div>"
      self[:texts].each { |text| ret += "<p>#{text}</p>" }
      ret += "</div>"
    end
    ret += "</div>"
  end
end

class SkyrockblogPage < Array; end

class Skyrockblog
  FRENCH_MONTHS = {
    "janvier" => 1, "février" => 2, "mars" => 3, "avril" => 4, "mai" => 5,
    "juin" => 6, "juillet" => 7, "août" => 8, "septembre" => 9,
    "octobre" => 10, "novembre" => 11, "décembre" => 12
  }

  attr_reader :base_url

protected
  @doc = nil
  @page = nil

  def parse_skyrockblog_date(skyrockblog_date)
    skyrockblog_date = Iconv.new("ISO-8859-1", "UTF-8").iconv(skyrockblog_date)
    matches = %r{Post. le \S+ (\d+) (\S+) (\d+)\s+(\d+):(\d+)}.
      match(skyrockblog_date)
    day, month, year, hour, min =
      matches[1], FRENCH_MONTHS[matches[2]], matches[3], matches[4], matches[5]
    Time.parse("#{year}-#{month}-#{day} #{hour}:#{min}")
  end

  def page_url(page)
    return @base_url if page <= 1
    "#{@base_url}&page=#{page}"
  end

  def fetch_page(page)
    unless (page == false || @page == page) && @doc
      @doc = Nokogiri::HTML(open(page_url(page || 1)))
      @page = page
    end
  end

  def permalink(id)
    "http://www.skyrock.com/direct.php/#{id}:#{user_id}"
  end

public
  def initialize(user_name)
    @base_url = "http://www.skyrock.com/blog/blog.php" +
          "?pseudo=#{user_name}&__FORCE_LANG=fr_FX"
  end

  def parse_page(page = 1)
    fetch_page(page)
    skyrockblog_page = SkyrockblogPage.new
    @doc.css(".bloc").each do |article|
      next unless aid = article["id"] and /^a-/.match(aid)
      id = aid.gsub(/^a-/, "")
      title_container = article.at("h2") or next
      skyrockblog_article = SkyrockblogArticle.new
      skyrockblog_article[:id] = id
      skyrockblog_article[:images_urls], skyrockblog_article[:texts] = [ ], [ ]
      skyrockblog_article[:title] = title_container.inner_text
      article.css("img").each { |image|
        skyrockblog_article[:images_urls] << image["src"]
      }
      article.css("div").each { |div|
        skyrockblog_article[:texts] <<
          div.inner_text if div["class"] == 'text-container'
      }
      created_on_plaintext = article.at(".created_on").inner_text
      skyrockblog_article[:created_on] = parse_skyrockblog_date(created_on_plaintext)
      skyrockblog_article[:permalink] = permalink(skyrockblog_article[:id])
      skyrockblog_page << skyrockblog_article
    end
    skyrockblog_page
  end

  def pagination
    fetch_page(false)
    return 1 unless pagination = @doc.at("ul.pagination")
    last = 1
    pagination.css("a").each { |link|
      href = link["href"]
      matches = /(\d+)\.html$/.match(href) or next
      last = [ last, matches[1].to_i ].max
    }
    1..last
  end

  def user_id
    fetch_page(false)
    matches = /id_skynaute\s*=\s*"?(\d+)"?/.match(@doc.text)
    matches[1]
  end

  def title
    fetch_page(false)
    @doc.at("title").text
  end

  def description
    fetch_page(false)
    @doc.at(".description").text
  end

  def fetch_articles(nb_max)
    found_pages = [ ]
    first_page = parse_page(1)
    pages = [ *pagination ]
    if first_page.size < 2 ||
       first_page[0][:created_on] < first_page[1][:created_on]
      pages.reverse!
    end
    pages.each do |page|
      parse_page(page).each { |parsed_page| found_pages << parsed_page }
      break if found_pages.size >= nb_max
    end
    found_pages
  end
  
  def rss
    articles = fetch_articles(15)

    rss = RSS::Maker.make("1.0") do |r|
      r.encoding = "UTF-8"
      r.channel.title = title
      r.channel.link = base_url
      r.channel.description = description
      r.channel.about = description
      r.items.do_sort = true

      articles.each do |article|
        i = r.items.new_item
        i.title = article[:title]
        i.date = article[:created_on]
        i.link = article[:permalink]
        i.description = article[:texts].first
        i.content_encoded = article.to_xhtml
      end
    end
    rss
  end
end

class RSSServlet < WEBrick::HTTPServlet::AbstractServlet
  def do_GET(req, res)
    unless user_name = req.query['u']
      res.status = 412
      return
    end
    unless skyrockblog = Skyrockblog.new(user_name)
      res.status = 404
      return
    end
    res.body = skyrockblog.rss.to_xml
    res['Content-Type'] = "text/xml"
  end
end

server = WEBrick::HTTPServer.new(:Port => 2000)
trap("INT") { server.shutdown }
server.mount("/rss", RSSServlet)
server.start
