walks dom tree looking for root microformat classes

This commit is contained in:
Jessica Lynn Suttles 2013-02-04 14:55:06 -08:00
parent 07d510ab7d
commit e9ce55afea
2 changed files with 39 additions and 1 deletions

View file

@ -5,7 +5,9 @@ require "microformats2/version"
module Microformats2 module Microformats2
def self.parse(html) def self.parse(html)
html = read_html(html) html = read_html(html)
Nokogiri::HTML(html) document = Nokogiri::HTML(html)
parsed_document = parse_nodeset(document.children)
parsed_document.flatten.compact
end end
def self.read_html(html) def self.read_html(html)
@ -13,4 +15,34 @@ module Microformats2
rescue Errno::ENOENT => e rescue Errno::ENOENT => e
html html
end end
def self.parse_nodeset(nodeset)
nodeset.map do |node| parse_node(node) end
end
def self.parse_node(node)
case
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
end
end
def self.parse_element(element)
# innocent until proven guilty
microformat = false
# look for root microformat class
element.attribute("class").to_s.split.each do |html_class|
microformat = microformat || html_class =~ /^h-/
end
# if found root microformat, yay
if microformat
"YAY MICROFORMAT"
# if no root microformat found, look at children
else
parse_nodeset(element.children)
end
end
end end

View file

@ -2,6 +2,12 @@ require "spec_helper"
require "microformats2" require "microformats2"
describe Microformats2 do describe Microformats2 do
describe "::parse" do
it "returns an array of found microformats" do
html = "spec/support/simple.html"
Microformats2.parse(html).should == ["YAY MICROFORMAT"]
end
end
describe "::read_html" do describe "::read_html" do
before do before do
@html = <<-HTML.strip @html = <<-HTML.strip