adds support for nested formats
This commit is contained in:
parent
9fae5c7248
commit
c013f48a03
7 changed files with 181 additions and 102 deletions
|
@ -1,12 +1,5 @@
|
|||
module Microformats2
|
||||
class Collection < Parser
|
||||
attr_accessor :formats
|
||||
|
||||
def initialize
|
||||
@formats = []
|
||||
super
|
||||
end
|
||||
|
||||
def to_hash
|
||||
hash = { items: [] }
|
||||
@formats.each do |format|
|
||||
|
@ -18,38 +11,5 @@ module Microformats2
|
|||
def to_json
|
||||
to_hash.to_json
|
||||
end
|
||||
|
||||
def html_class_regex
|
||||
/^h-/
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_microformat(microformat, html_classes)
|
||||
# only worry about the first format for now
|
||||
html_class = html_classes.first
|
||||
|
||||
# class-name -> class_name
|
||||
method_name = html_class.downcase.gsub("-","_")
|
||||
# class_name -> Class_name -> ClassName
|
||||
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
||||
|
||||
# get ruby class for microformat
|
||||
if Object.const_defined?(constant_name)
|
||||
klass = Object.const_get(constant_name)
|
||||
else
|
||||
klass = Class.new(Microformats2::Format)
|
||||
Object.const_set constant_name, klass
|
||||
end
|
||||
|
||||
# parse microformat
|
||||
value = klass.new.parse(microformat)
|
||||
|
||||
# save microformat in array in order
|
||||
@formats << value
|
||||
|
||||
# save microformat under custom method
|
||||
define_method_and_set_value(method_name, value)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,14 +1,19 @@
|
|||
module Microformats2
|
||||
class Format < Parser
|
||||
def type
|
||||
# ClassName -> className -> class-name
|
||||
self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase}
|
||||
attr_reader :format_types
|
||||
|
||||
def parse(element)
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
@format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ }
|
||||
super
|
||||
end
|
||||
|
||||
def to_hash
|
||||
hash = { type: [type], properties: {} }
|
||||
hash = { type: @format_types, properties: {} }
|
||||
@added_methods.each do |method_name|
|
||||
hash[:properties][method_name.to_sym] = send(method_name).to_s
|
||||
value = send(method_name)
|
||||
value = value.is_a?(Array) ? value : [value]
|
||||
hash[:properties][method_name.to_sym] = value.map(&:to_hash)
|
||||
end
|
||||
hash
|
||||
end
|
||||
|
@ -17,23 +22,27 @@ module Microformats2
|
|||
to_hash.to_json
|
||||
end
|
||||
|
||||
def html_class_regex
|
||||
Microformats2::PropertyPrefixesRegEx
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# look for both formats and properties
|
||||
def html_class_regex
|
||||
/^(h-|p-|u-|dt-|e-)/
|
||||
end
|
||||
|
||||
def parse_microformat(element, html_classes)
|
||||
html_classes.each do |html_class|
|
||||
format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ }
|
||||
property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx }
|
||||
|
||||
property_classes.each do |property_class|
|
||||
# p-class-name -> p
|
||||
prefix = html_class.split("-").first
|
||||
prefix = property_class.split("-").first
|
||||
# p-class-name -> class_name
|
||||
method_name = html_class.split("-")[1..-1].join("_")
|
||||
method_name = property_class.split("-")[1..-1].join("_")
|
||||
# avoid overriding Object#class
|
||||
method_name = "klass" if method_name == "class"
|
||||
|
||||
# parse property
|
||||
value = Microformats2::PropertyPrefixes[prefix].parse(element)
|
||||
value = Microformats2::Property::Parsers[prefix].new.parse(element, format_classes)
|
||||
|
||||
# save property under custom method
|
||||
define_method_and_set_value(method_name, value)
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
module Microformats2
|
||||
class Parser
|
||||
attr_accessor :added_methods
|
||||
attr_accessor :formats, :added_methods
|
||||
|
||||
def initialize
|
||||
@formats = []
|
||||
@added_methods = []
|
||||
end
|
||||
|
||||
# override and do interesting things here
|
||||
def parse(element)
|
||||
parse_nodeset(element.children)
|
||||
self
|
||||
|
@ -15,14 +17,36 @@ module Microformats2
|
|||
|
||||
# override with regex to match before parsing microformat
|
||||
def html_class_regex
|
||||
//
|
||||
/^(h-)/
|
||||
end
|
||||
|
||||
# override and do interesting things here
|
||||
def parse_microformat(element, html_classes)
|
||||
element
|
||||
end
|
||||
def parse_microformat(microformat, html_classes)
|
||||
# only worry about the first format for now
|
||||
html_class = html_classes.first
|
||||
|
||||
# class-name -> class_name
|
||||
method_name = html_class.downcase.gsub("-","_")
|
||||
# class_name -> Class_name -> ClassName
|
||||
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
||||
|
||||
# get ruby class for microformat
|
||||
if Object.const_defined?(constant_name)
|
||||
klass = Object.const_get(constant_name)
|
||||
else
|
||||
klass = Class.new(Microformats2::Format)
|
||||
Object.const_set constant_name, klass
|
||||
end
|
||||
|
||||
# parse microformat
|
||||
value = klass.new.parse(microformat)
|
||||
|
||||
# save microformat in array in order
|
||||
@formats << value
|
||||
|
||||
# save microformat under custom method
|
||||
define_method_and_set_value(method_name, value)
|
||||
end
|
||||
|
||||
def parse_nodeset(nodeset)
|
||||
nodeset.map { |node| parse_node(node) }
|
||||
|
@ -37,7 +61,7 @@ module Microformats2
|
|||
|
||||
def parse_element(element)
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
html_classes.keep_if { |html_class| html_class =~ html_class_regex }
|
||||
html_classes = html_classes.select { |html_class| html_class =~ html_class_regex }
|
||||
|
||||
if html_classes.length >= 1
|
||||
parse_microformat(element, html_classes)
|
||||
|
|
|
@ -1,32 +1,64 @@
|
|||
module Microformats2
|
||||
class TextProperty
|
||||
def parse(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
class UrlProperty
|
||||
def parse(element)
|
||||
(element.attribute("href") || property.text).to_s
|
||||
end
|
||||
end
|
||||
class DateTimeProperty
|
||||
def parse(element)
|
||||
DateTime.parse(element.attribute("datetime") || property.text)
|
||||
rescue ArgumentError => e
|
||||
element.attribute("datetime") || property.text
|
||||
end
|
||||
end
|
||||
class EmbeddedProperty
|
||||
def parse(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
module Property
|
||||
class Parser < Microformats2::Parser
|
||||
attr_accessor :value
|
||||
|
||||
PropertyPrefixes = {
|
||||
"p" => TextProperty.new,
|
||||
"u" => UrlProperty.new,
|
||||
"dt" => DateTimeProperty.new,
|
||||
"e" => EmbeddedProperty.new
|
||||
}
|
||||
PropertyPrefixesRegEx = /^(p-|u-|dt-|e-)/
|
||||
def parse(element, format_classes=[])
|
||||
if format_classes.length >= 1
|
||||
parse_microformat(element, format_classes)
|
||||
end
|
||||
@value = parse_flat_element(element)
|
||||
self
|
||||
end
|
||||
|
||||
def to_hash
|
||||
if @formats.empty?
|
||||
hash_safe_value
|
||||
else
|
||||
{ value: hash_safe_value }.merge @formats.first.to_hash
|
||||
end
|
||||
end
|
||||
|
||||
def hash_safe_value
|
||||
@value
|
||||
end
|
||||
end
|
||||
|
||||
class Text < Property::Parser
|
||||
def parse_flat_element(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
|
||||
class Url < Property::Parser
|
||||
def parse_flat_element(element)
|
||||
(element.attribute("href") || property.text).to_s
|
||||
end
|
||||
end
|
||||
|
||||
class DateTime < Property::Parser
|
||||
def parse_flat_element(element)
|
||||
::DateTime.parse(element.attribute("datetime") || property.text)
|
||||
rescue ArgumentError => e
|
||||
element.attribute("datetime") || property.text
|
||||
end
|
||||
def hash_safe_value
|
||||
@value.to_s
|
||||
end
|
||||
end
|
||||
|
||||
class Embedded < Property::Parser
|
||||
def parse_flat_element(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
|
||||
Parsers = {
|
||||
"p" => Text,
|
||||
"u" => Url,
|
||||
"dt" => DateTime,
|
||||
"e" => Embedded
|
||||
}
|
||||
PrefixesRegEx = /^(p-|u-|dt-|e-)/
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,7 +2,7 @@ require "spec_helper"
|
|||
require "microformats2"
|
||||
|
||||
describe Microformats2::Collection do
|
||||
describe "with simple h-card" do
|
||||
describe "with simple .h-card" do
|
||||
before do
|
||||
html = "spec/support/simple_hcard.html"
|
||||
@collection = Microformats2.parse(html)
|
||||
|
@ -13,29 +13,68 @@ describe Microformats2::Collection do
|
|||
@collection.h_card.should be_kind_of HCard
|
||||
end
|
||||
it "assigns .h-card .p-name to HCard#name" do
|
||||
@collection.h_card.name.should == "Jessica Lynn Suttles"
|
||||
@collection.h_card.name.value.should == "Jessica Lynn Suttles"
|
||||
end
|
||||
it "assigns .h-card .u-url to HCard#url" do
|
||||
@collection.h_card.url.should == "http://twitter.com/jlsuttles"
|
||||
it "assigns both .h-card .u-url to HCard#url" do
|
||||
urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"]
|
||||
@collection.h_card.url.map(&:value).should == urls
|
||||
end
|
||||
it "assings .h-card .dt-bday to HCard#bday" do
|
||||
@collection.h_card.bday.should be_kind_of DateTime
|
||||
@collection.h_card.bday.to_s.should == "1990-10-15T20:45:33-08:00"
|
||||
@collection.h_card.bday.value.should be_kind_of DateTime
|
||||
@collection.h_card.bday.value.to_s.should == "1990-10-15T20:45:33-08:00"
|
||||
end
|
||||
it "assigns .h-card .e-content to HCard#content" do
|
||||
@collection.h_card.content.should == "Vegan. Cat lover. Coder."
|
||||
@collection.h_card.content.value.should == "Vegan. Cat lover. Coder."
|
||||
end
|
||||
end
|
||||
|
||||
describe "#to_hash" do
|
||||
it "returns the correct Hash" do
|
||||
hash = {
|
||||
:items => [{ :type => ["h-card"],
|
||||
:items => [{
|
||||
:type => ["h-card"],
|
||||
:properties => {
|
||||
:url => "http://twitter.com/jlsuttles",
|
||||
:name => "Jessica Lynn Suttles",
|
||||
:bday => "1990-10-15T20:45:33-08:00",
|
||||
:content => "Vegan. Cat lover. Coder."
|
||||
:url => ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"],
|
||||
:name => ["Jessica Lynn Suttles"],
|
||||
:bday => ["1990-10-15T20:45:33-08:00"],
|
||||
:content => ["Vegan. Cat lover. Coder."]
|
||||
}
|
||||
}]
|
||||
}
|
||||
@collection.to_hash.should == hash
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "with .h-entry .p-author.h-card nested" do
|
||||
before do
|
||||
html = "spec/support/nested_hentry.html"
|
||||
@collection = Microformats2.parse(html)
|
||||
end
|
||||
|
||||
describe "#parse" do
|
||||
it "creates ruby class HEntry" do
|
||||
@collection.h_entry.should be_kind_of HEntry
|
||||
end
|
||||
it "assigns .h-entry .p-author to HEntry#author" do
|
||||
@collection.h_entry.author.value.should == "Jessica Lynn Suttles"
|
||||
end
|
||||
end
|
||||
|
||||
describe "#to_hash" do
|
||||
it "returns the correct Hash" do
|
||||
hash = {
|
||||
:items => [{
|
||||
:type => ["h-entry"],
|
||||
:properties => {
|
||||
:author => [{
|
||||
:value => "Jessica Lynn Suttles",
|
||||
:type => ["h-card", "h-org"],
|
||||
:properties => {
|
||||
:url => ["http://twitter.com/jlsuttles"],
|
||||
:name => ["Jessica Lynn Suttles"]
|
||||
}
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
|
12
spec/support/nested_hentry.html
Normal file
12
spec/support/nested_hentry.html
Normal file
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<div class="h-entry">
|
||||
<div class="p-author h-card h-org">
|
||||
<a href="http://twitter.com/jlsuttles" class="u-url p-name">
|
||||
Jessica Lynn Suttles
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
|
@ -1,10 +1,13 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<div class="h-card">
|
||||
<a href="http://twitter.com/jlsuttles" class="u-url p-name">
|
||||
<a href="http://flickr.com/jlsuttles" class="u-url p-name">
|
||||
Jessica Lynn Suttles
|
||||
</a>
|
||||
<a href="http://twitter.com/jlsuttles" class="u-url">
|
||||
@jlsuttles
|
||||
</a>
|
||||
<time class="dt-bday" datetime="1990-10-15T20:45:33-08:00">
|
||||
October 15, 1990
|
||||
</time>
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue