walks dom tree looking for root microformat classes
This commit is contained in:
parent
07d510ab7d
commit
e9ce55afea
2 changed files with 39 additions and 1 deletions
|
@ -5,7 +5,9 @@ require "microformats2/version"
|
|||
module Microformats2
|
||||
def self.parse(html)
|
||||
html = read_html(html)
|
||||
Nokogiri::HTML(html)
|
||||
document = Nokogiri::HTML(html)
|
||||
parsed_document = parse_nodeset(document.children)
|
||||
parsed_document.flatten.compact
|
||||
end
|
||||
|
||||
def self.read_html(html)
|
||||
|
@ -13,4 +15,34 @@ module Microformats2
|
|||
rescue Errno::ENOENT => e
|
||||
html
|
||||
end
|
||||
|
||||
def self.parse_nodeset(nodeset)
|
||||
nodeset.map do |node| parse_node(node) end
|
||||
end
|
||||
|
||||
def self.parse_node(node)
|
||||
case
|
||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
||||
end
|
||||
end
|
||||
|
||||
def self.parse_element(element)
|
||||
# innocent until proven guilty
|
||||
microformat = false
|
||||
|
||||
# look for root microformat class
|
||||
element.attribute("class").to_s.split.each do |html_class|
|
||||
microformat = microformat || html_class =~ /^h-/
|
||||
end
|
||||
|
||||
# if found root microformat, yay
|
||||
if microformat
|
||||
"YAY MICROFORMAT"
|
||||
|
||||
# if no root microformat found, look at children
|
||||
else
|
||||
parse_nodeset(element.children)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,6 +2,12 @@ require "spec_helper"
|
|||
require "microformats2"
|
||||
|
||||
describe Microformats2 do
|
||||
describe "::parse" do
|
||||
it "returns an array of found microformats" do
|
||||
html = "spec/support/simple.html"
|
||||
Microformats2.parse(html).should == ["YAY MICROFORMAT"]
|
||||
end
|
||||
end
|
||||
describe "::read_html" do
|
||||
before do
|
||||
@html = <<-HTML.strip
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue