adds support for nested formats

This commit is contained in:
Jessica Lynn Suttles 2013-02-06 12:15:07 -08:00
parent d3018451c8
commit 85071b8e60
7 changed files with 181 additions and 102 deletions

View file

@ -1,12 +1,5 @@
module Microformats2 module Microformats2
class Collection < Parser class Collection < Parser
attr_accessor :formats
def initialize
@formats = []
super
end
def to_hash def to_hash
hash = { items: [] } hash = { items: [] }
@formats.each do |format| @formats.each do |format|
@ -18,38 +11,5 @@ module Microformats2
def to_json def to_json
to_hash.to_json to_hash.to_json
end end
def html_class_regex
/^h-/
end
private
def parse_microformat(microformat, html_classes)
# only worry about the first format for now
html_class = html_classes.first
# class-name -> class_name
method_name = html_class.downcase.gsub("-","_")
# class_name -> Class_name -> ClassName
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
# get ruby class for microformat
if Object.const_defined?(constant_name)
klass = Object.const_get(constant_name)
else
klass = Class.new(Microformats2::Format)
Object.const_set constant_name, klass
end
# parse microformat
value = klass.new.parse(microformat)
# save microformat in array in order
@formats << value
# save microformat under custom method
define_method_and_set_value(method_name, value)
end
end end
end end

View file

@ -1,14 +1,19 @@
module Microformats2 module Microformats2
class Format < Parser class Format < Parser
def type attr_reader :format_types
# ClassName -> className -> class-name
self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase} def parse(element)
html_classes = element.attribute("class").to_s.split
@format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ }
super
end end
def to_hash def to_hash
hash = { type: [type], properties: {} } hash = { type: @format_types, properties: {} }
@added_methods.each do |method_name| @added_methods.each do |method_name|
hash[:properties][method_name.to_sym] = send(method_name).to_s value = send(method_name)
value = value.is_a?(Array) ? value : [value]
hash[:properties][method_name.to_sym] = value.map(&:to_hash)
end end
hash hash
end end
@ -17,23 +22,27 @@ module Microformats2
to_hash.to_json to_hash.to_json
end end
def html_class_regex
Microformats2::PropertyPrefixesRegEx
end
private private
# look for both formats and properties
def html_class_regex
/^(h-|p-|u-|dt-|e-)/
end
def parse_microformat(element, html_classes) def parse_microformat(element, html_classes)
html_classes.each do |html_class| format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ }
property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx }
property_classes.each do |property_class|
# p-class-name -> p # p-class-name -> p
prefix = html_class.split("-").first prefix = property_class.split("-").first
# p-class-name -> class_name # p-class-name -> class_name
method_name = html_class.split("-")[1..-1].join("_") method_name = property_class.split("-")[1..-1].join("_")
# avoid overriding Object#class # avoid overriding Object#class
method_name = "klass" if method_name == "class" method_name = "klass" if method_name == "class"
# parse property # parse property
value = Microformats2::PropertyPrefixes[prefix].parse(element) value = Microformats2::Property::Parsers[prefix].new.parse(element, format_classes)
# save property under custom method # save property under custom method
define_method_and_set_value(method_name, value) define_method_and_set_value(method_name, value)

View file

@ -1,11 +1,13 @@
module Microformats2 module Microformats2
class Parser class Parser
attr_accessor :added_methods attr_accessor :formats, :added_methods
def initialize def initialize
@formats = []
@added_methods = [] @added_methods = []
end end
# override and do interesting things here
def parse(element) def parse(element)
parse_nodeset(element.children) parse_nodeset(element.children)
self self
@ -15,14 +17,36 @@ module Microformats2
# override with regex to match before parsing microformat # override with regex to match before parsing microformat
def html_class_regex def html_class_regex
// /^(h-)/
end end
# override and do interesting things here # override and do interesting things here
def parse_microformat(element, html_classes) def parse_microformat(microformat, html_classes)
element # only worry about the first format for now
html_class = html_classes.first
# class-name -> class_name
method_name = html_class.downcase.gsub("-","_")
# class_name -> Class_name -> ClassName
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
# get ruby class for microformat
if Object.const_defined?(constant_name)
klass = Object.const_get(constant_name)
else
klass = Class.new(Microformats2::Format)
Object.const_set constant_name, klass
end end
# parse microformat
value = klass.new.parse(microformat)
# save microformat in array in order
@formats << value
# save microformat under custom method
define_method_and_set_value(method_name, value)
end
def parse_nodeset(nodeset) def parse_nodeset(nodeset)
nodeset.map { |node| parse_node(node) } nodeset.map { |node| parse_node(node) }
@ -37,7 +61,7 @@ module Microformats2
def parse_element(element) def parse_element(element)
html_classes = element.attribute("class").to_s.split html_classes = element.attribute("class").to_s.split
html_classes.keep_if { |html_class| html_class =~ html_class_regex } html_classes = html_classes.select { |html_class| html_class =~ html_class_regex }
if html_classes.length >= 1 if html_classes.length >= 1
parse_microformat(element, html_classes) parse_microformat(element, html_classes)

View file

@ -1,32 +1,64 @@
module Microformats2 module Microformats2
class TextProperty module Property
def parse(element) class Parser < Microformats2::Parser
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip attr_accessor :value
def parse(element, format_classes=[])
if format_classes.length >= 1
parse_microformat(element, format_classes)
end
@value = parse_flat_element(element)
self
end
def to_hash
if @formats.empty?
hash_safe_value
else
{ value: hash_safe_value }.merge @formats.first.to_hash
end end
end end
class UrlProperty
def parse(element) def hash_safe_value
(element.attribute("href") || property.text).to_s @value
end end
end end
class DateTimeProperty
def parse(element) class Text < Property::Parser
DateTime.parse(element.attribute("datetime") || property.text) def parse_flat_element(element)
rescue ArgumentError => e
element.attribute("datetime") || property.text
end
end
class EmbeddedProperty
def parse(element)
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
end end
end end
PropertyPrefixes = { class Url < Property::Parser
"p" => TextProperty.new, def parse_flat_element(element)
"u" => UrlProperty.new, (element.attribute("href") || property.text).to_s
"dt" => DateTimeProperty.new, end
"e" => EmbeddedProperty.new end
}
PropertyPrefixesRegEx = /^(p-|u-|dt-|e-)/ class DateTime < Property::Parser
def parse_flat_element(element)
::DateTime.parse(element.attribute("datetime") || property.text)
rescue ArgumentError => e
element.attribute("datetime") || property.text
end
def hash_safe_value
@value.to_s
end
end
class Embedded < Property::Parser
def parse_flat_element(element)
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
end
end
Parsers = {
"p" => Text,
"u" => Url,
"dt" => DateTime,
"e" => Embedded
}
PrefixesRegEx = /^(p-|u-|dt-|e-)/
end
end end

View file

@ -2,7 +2,7 @@ require "spec_helper"
require "microformats2" require "microformats2"
describe Microformats2::Collection do describe Microformats2::Collection do
describe "with simple h-card" do describe "with simple .h-card" do
before do before do
html = "spec/support/simple_hcard.html" html = "spec/support/simple_hcard.html"
@collection = Microformats2.parse(html) @collection = Microformats2.parse(html)
@ -13,29 +13,68 @@ describe Microformats2::Collection do
@collection.h_card.should be_kind_of HCard @collection.h_card.should be_kind_of HCard
end end
it "assigns .h-card .p-name to HCard#name" do it "assigns .h-card .p-name to HCard#name" do
@collection.h_card.name.should == "Jessica Lynn Suttles" @collection.h_card.name.value.should == "Jessica Lynn Suttles"
end end
it "assigns .h-card .u-url to HCard#url" do it "assigns both .h-card .u-url to HCard#url" do
@collection.h_card.url.should == "http://twitter.com/jlsuttles" urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"]
@collection.h_card.url.map(&:value).should == urls
end end
it "assings .h-card .dt-bday to HCard#bday" do it "assings .h-card .dt-bday to HCard#bday" do
@collection.h_card.bday.should be_kind_of DateTime @collection.h_card.bday.value.should be_kind_of DateTime
@collection.h_card.bday.to_s.should == "1990-10-15T20:45:33-08:00" @collection.h_card.bday.value.to_s.should == "1990-10-15T20:45:33-08:00"
end end
it "assigns .h-card .e-content to HCard#content" do it "assigns .h-card .e-content to HCard#content" do
@collection.h_card.content.should == "Vegan. Cat lover. Coder." @collection.h_card.content.value.should == "Vegan. Cat lover. Coder."
end end
end end
describe "#to_hash" do describe "#to_hash" do
it "returns the correct Hash" do it "returns the correct Hash" do
hash = { hash = {
:items => [{ :type => ["h-card"], :items => [{
:type => ["h-card"],
:properties => { :properties => {
:url => "http://twitter.com/jlsuttles", :url => ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"],
:name => "Jessica Lynn Suttles", :name => ["Jessica Lynn Suttles"],
:bday => "1990-10-15T20:45:33-08:00", :bday => ["1990-10-15T20:45:33-08:00"],
:content => "Vegan. Cat lover. Coder." :content => ["Vegan. Cat lover. Coder."]
}
}]
}
@collection.to_hash.should == hash
end
end
end
describe "with .h-entry .p-author.h-card nested" do
before do
html = "spec/support/nested_hentry.html"
@collection = Microformats2.parse(html)
end
describe "#parse" do
it "creates ruby class HEntry" do
@collection.h_entry.should be_kind_of HEntry
end
it "assigns .h-entry .p-author to HEntry#author" do
@collection.h_entry.author.value.should == "Jessica Lynn Suttles"
end
end
describe "#to_hash" do
it "returns the correct Hash" do
hash = {
:items => [{
:type => ["h-entry"],
:properties => {
:author => [{
:value => "Jessica Lynn Suttles",
:type => ["h-card", "h-org"],
:properties => {
:url => ["http://twitter.com/jlsuttles"],
:name => ["Jessica Lynn Suttles"]
}
}]
} }
}] }]
} }

View file

@ -0,0 +1,12 @@
<!DOCTYPE html>
<html>
<body>
<div class="h-entry">
<div class="p-author h-card h-org">
<a href="http://twitter.com/jlsuttles" class="u-url p-name">
Jessica Lynn Suttles
</a>
</div>
</div>
</body>
</html>

View file

@ -1,10 +1,13 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> <!DOCTYPE html>
<html> <html>
<body> <body>
<div class="h-card"> <div class="h-card">
<a href="http://twitter.com/jlsuttles" class="u-url p-name"> <a href="http://flickr.com/jlsuttles" class="u-url p-name">
Jessica Lynn Suttles Jessica Lynn Suttles
</a> </a>
<a href="http://twitter.com/jlsuttles" class="u-url">
@jlsuttles
</a>
<time class="dt-bday" datetime="1990-10-15T20:45:33-08:00"> <time class="dt-bday" datetime="1990-10-15T20:45:33-08:00">
October 15, 1990 October 15, 1990
</time> </time>