From e9ce55afeabf34e4e689b10b71be481279b7df8d Mon Sep 17 00:00:00 2001 From: Jessica Lynn Suttles Date: Mon, 4 Feb 2013 14:55:06 -0800 Subject: [PATCH] walks dom tree looking for root microformat classes --- lib/microformats2.rb | 34 +++++++++++++++++++++++++++++++++- spec/lib/microformats2_spec.rb | 6 ++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/microformats2.rb b/lib/microformats2.rb index 03e1ac8..0c60890 100644 --- a/lib/microformats2.rb +++ b/lib/microformats2.rb @@ -5,7 +5,9 @@ require "microformats2/version" module Microformats2 def self.parse(html) html = read_html(html) - Nokogiri::HTML(html) + document = Nokogiri::HTML(html) + parsed_document = parse_nodeset(document.children) + parsed_document.flatten.compact end def self.read_html(html) @@ -13,4 +15,34 @@ module Microformats2 rescue Errno::ENOENT => e html end + + def self.parse_nodeset(nodeset) + nodeset.map do |node| parse_node(node) end + end + + def self.parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_element(node) + end + end + + def self.parse_element(element) + # innocent until proven guilty + microformat = false + + # look for root microformat class + element.attribute("class").to_s.split.each do |html_class| + microformat = microformat || html_class =~ /^h-/ + end + + # if found root microformat, yay + if microformat + "YAY MICROFORMAT" + + # if no root microformat found, look at children + else + parse_nodeset(element.children) + end + end end diff --git a/spec/lib/microformats2_spec.rb b/spec/lib/microformats2_spec.rb index 8e54335..903b6b6 100644 --- a/spec/lib/microformats2_spec.rb +++ b/spec/lib/microformats2_spec.rb @@ -2,6 +2,12 @@ require "spec_helper" require "microformats2" describe Microformats2 do + describe "::parse" do + it "returns an array of found microformats" do + html = "spec/support/simple.html" + Microformats2.parse(html).should == ["YAY MICROFORMAT"] + end + end describe "::read_html" do before do @html = <<-HTML.strip