diff --git a/lib/microformats2/collection.rb b/lib/microformats2/collection.rb index 4037011..9fef9f4 100644 --- a/lib/microformats2/collection.rb +++ b/lib/microformats2/collection.rb @@ -1,12 +1,5 @@ module Microformats2 class Collection < Parser - attr_accessor :formats - - def initialize - @formats = [] - super - end - def to_hash hash = { items: [] } @formats.each do |format| @@ -18,38 +11,5 @@ module Microformats2 def to_json to_hash.to_json end - - def html_class_regex - /^h-/ - end - - private - - def parse_microformat(microformat, html_classes) - # only worry about the first format for now - html_class = html_classes.first - - # class-name -> class_name - method_name = html_class.downcase.gsub("-","_") - # class_name -> Class_name -> ClassName - constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} - - # get ruby class for microformat - if Object.const_defined?(constant_name) - klass = Object.const_get(constant_name) - else - klass = Class.new(Microformats2::Format) - Object.const_set constant_name, klass - end - - # parse microformat - value = klass.new.parse(microformat) - - # save microformat in array in order - @formats << value - - # save microformat under custom method - define_method_and_set_value(method_name, value) - end end end diff --git a/lib/microformats2/format.rb b/lib/microformats2/format.rb index 74b618c..21a8734 100644 --- a/lib/microformats2/format.rb +++ b/lib/microformats2/format.rb @@ -1,14 +1,19 @@ module Microformats2 class Format < Parser - def type - # ClassName -> className -> class-name - self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase} + attr_reader :format_types + + def parse(element) + html_classes = element.attribute("class").to_s.split + @format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ } + super end def to_hash - hash = { type: [type], properties: {} } + hash = { type: @format_types, properties: {} } @added_methods.each do |method_name| - hash[:properties][method_name.to_sym] = send(method_name).to_s + value = send(method_name) + value = value.is_a?(Array) ? value : [value] + hash[:properties][method_name.to_sym] = value.map(&:to_hash) end hash end @@ -17,23 +22,27 @@ module Microformats2 to_hash.to_json end - def html_class_regex - Microformats2::PropertyPrefixesRegEx - end - private + # look for both formats and properties + def html_class_regex + /^(h-|p-|u-|dt-|e-)/ + end + def parse_microformat(element, html_classes) - html_classes.each do |html_class| + format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ } + property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx } + + property_classes.each do |property_class| # p-class-name -> p - prefix = html_class.split("-").first + prefix = property_class.split("-").first # p-class-name -> class_name - method_name = html_class.split("-")[1..-1].join("_") + method_name = property_class.split("-")[1..-1].join("_") # avoid overriding Object#class method_name = "klass" if method_name == "class" # parse property - value = Microformats2::PropertyPrefixes[prefix].parse(element) + value = Microformats2::Property::Parsers[prefix].new.parse(element, format_classes) # save property under custom method define_method_and_set_value(method_name, value) diff --git a/lib/microformats2/parser.rb b/lib/microformats2/parser.rb index 44401ef..ffb5b48 100644 --- a/lib/microformats2/parser.rb +++ b/lib/microformats2/parser.rb @@ -1,11 +1,13 @@ module Microformats2 class Parser - attr_accessor :added_methods + attr_accessor :formats, :added_methods def initialize + @formats = [] @added_methods = [] end + # override and do interesting things here def parse(element) parse_nodeset(element.children) self @@ -15,14 +17,36 @@ module Microformats2 # override with regex to match before parsing microformat def html_class_regex - // + /^(h-)/ end # override and do interesting things here - def parse_microformat(element, html_classes) - element - end + def parse_microformat(microformat, html_classes) + # only worry about the first format for now + html_class = html_classes.first + # class-name -> class_name + method_name = html_class.downcase.gsub("-","_") + # class_name -> Class_name -> ClassName + constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} + + # get ruby class for microformat + if Object.const_defined?(constant_name) + klass = Object.const_get(constant_name) + else + klass = Class.new(Microformats2::Format) + Object.const_set constant_name, klass + end + + # parse microformat + value = klass.new.parse(microformat) + + # save microformat in array in order + @formats << value + + # save microformat under custom method + define_method_and_set_value(method_name, value) + end def parse_nodeset(nodeset) nodeset.map { |node| parse_node(node) } @@ -37,7 +61,7 @@ module Microformats2 def parse_element(element) html_classes = element.attribute("class").to_s.split - html_classes.keep_if { |html_class| html_class =~ html_class_regex } + html_classes = html_classes.select { |html_class| html_class =~ html_class_regex } if html_classes.length >= 1 parse_microformat(element, html_classes) diff --git a/lib/microformats2/property.rb b/lib/microformats2/property.rb index b6c5438..62821c0 100644 --- a/lib/microformats2/property.rb +++ b/lib/microformats2/property.rb @@ -1,32 +1,64 @@ module Microformats2 - class TextProperty - def parse(element) - element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip - end - end - class UrlProperty - def parse(element) - (element.attribute("href") || property.text).to_s - end - end - class DateTimeProperty - def parse(element) - DateTime.parse(element.attribute("datetime") || property.text) - rescue ArgumentError => e - element.attribute("datetime") || property.text - end - end - class EmbeddedProperty - def parse(element) - element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip - end - end + module Property + class Parser < Microformats2::Parser + attr_accessor :value - PropertyPrefixes = { - "p" => TextProperty.new, - "u" => UrlProperty.new, - "dt" => DateTimeProperty.new, - "e" => EmbeddedProperty.new - } - PropertyPrefixesRegEx = /^(p-|u-|dt-|e-)/ + def parse(element, format_classes=[]) + if format_classes.length >= 1 + parse_microformat(element, format_classes) + end + @value = parse_flat_element(element) + self + end + + def to_hash + if @formats.empty? + hash_safe_value + else + { value: hash_safe_value }.merge @formats.first.to_hash + end + end + + def hash_safe_value + @value + end + end + + class Text < Property::Parser + def parse_flat_element(element) + element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip + end + end + + class Url < Property::Parser + def parse_flat_element(element) + (element.attribute("href") || property.text).to_s + end + end + + class DateTime < Property::Parser + def parse_flat_element(element) + ::DateTime.parse(element.attribute("datetime") || property.text) + rescue ArgumentError => e + element.attribute("datetime") || property.text + end + def hash_safe_value + @value.to_s + end + end + + class Embedded < Property::Parser + def parse_flat_element(element) + element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip + end + end + + Parsers = { + "p" => Text, + "u" => Url, + "dt" => DateTime, + "e" => Embedded + } + PrefixesRegEx = /^(p-|u-|dt-|e-)/ + end end diff --git a/spec/lib/microformats2/collection_spec.rb b/spec/lib/microformats2/collection_spec.rb index c6d062c..7a9b9fc 100644 --- a/spec/lib/microformats2/collection_spec.rb +++ b/spec/lib/microformats2/collection_spec.rb @@ -2,7 +2,7 @@ require "spec_helper" require "microformats2" describe Microformats2::Collection do - describe "with simple h-card" do + describe "with simple .h-card" do before do html = "spec/support/simple_hcard.html" @collection = Microformats2.parse(html) @@ -13,29 +13,68 @@ describe Microformats2::Collection do @collection.h_card.should be_kind_of HCard end it "assigns .h-card .p-name to HCard#name" do - @collection.h_card.name.should == "Jessica Lynn Suttles" + @collection.h_card.name.value.should == "Jessica Lynn Suttles" end - it "assigns .h-card .u-url to HCard#url" do - @collection.h_card.url.should == "http://twitter.com/jlsuttles" + it "assigns both .h-card .u-url to HCard#url" do + urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"] + @collection.h_card.url.map(&:value).should == urls end it "assings .h-card .dt-bday to HCard#bday" do - @collection.h_card.bday.should be_kind_of DateTime - @collection.h_card.bday.to_s.should == "1990-10-15T20:45:33-08:00" + @collection.h_card.bday.value.should be_kind_of DateTime + @collection.h_card.bday.value.to_s.should == "1990-10-15T20:45:33-08:00" end it "assigns .h-card .e-content to HCard#content" do - @collection.h_card.content.should == "Vegan. Cat lover. Coder." + @collection.h_card.content.value.should == "Vegan. Cat lover. Coder." end end describe "#to_hash" do it "returns the correct Hash" do hash = { - :items => [{ :type => ["h-card"], + :items => [{ + :type => ["h-card"], :properties => { - :url => "http://twitter.com/jlsuttles", - :name => "Jessica Lynn Suttles", - :bday => "1990-10-15T20:45:33-08:00", - :content => "Vegan. Cat lover. Coder." + :url => ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"], + :name => ["Jessica Lynn Suttles"], + :bday => ["1990-10-15T20:45:33-08:00"], + :content => ["Vegan. Cat lover. Coder."] + } + }] + } + @collection.to_hash.should == hash + end + end + end + + describe "with .h-entry .p-author.h-card nested" do + before do + html = "spec/support/nested_hentry.html" + @collection = Microformats2.parse(html) + end + + describe "#parse" do + it "creates ruby class HEntry" do + @collection.h_entry.should be_kind_of HEntry + end + it "assigns .h-entry .p-author to HEntry#author" do + @collection.h_entry.author.value.should == "Jessica Lynn Suttles" + end + end + + describe "#to_hash" do + it "returns the correct Hash" do + hash = { + :items => [{ + :type => ["h-entry"], + :properties => { + :author => [{ + :value => "Jessica Lynn Suttles", + :type => ["h-card", "h-org"], + :properties => { + :url => ["http://twitter.com/jlsuttles"], + :name => ["Jessica Lynn Suttles"] + } + }] } }] } diff --git a/spec/support/nested_hentry.html b/spec/support/nested_hentry.html new file mode 100644 index 0000000..bdfcccd --- /dev/null +++ b/spec/support/nested_hentry.html @@ -0,0 +1,12 @@ + + +
+