From 081d0e5000c82f0cbd2df30cfce09c94243a66fa Mon Sep 17 00:00:00 2001 From: Jessica Lynn Suttles Date: Tue, 12 Feb 2013 18:46:35 -0800 Subject: [PATCH] organizing broke the world --- lib/microformats2.rb | 5 +- lib/microformats2/collection.rb | 10 +- lib/microformats2/format.rb | 54 ++++----- lib/microformats2/format_parser.rb | 54 +++++++++ lib/microformats2/parser.rb | 103 ------------------ lib/microformats2/property.rb | 4 +- lib/microformats2/property/date_time.rb | 2 +- lib/microformats2/property/embedded.rb | 2 +- .../property/{parser.rb => foundation.rb} | 23 ++-- lib/microformats2/property/text.rb | 2 +- lib/microformats2/property/url.rb | 2 +- lib/microformats2/property_parser.rb | 51 +++++++++ spec/lib/microformats2/collection_spec.rb | 16 +-- spec/support/hcard-hcard-nested.html | 13 +++ .../support/hcard-pname-pnickname-nested.html | 12 ++ 15 files changed, 189 insertions(+), 164 deletions(-) create mode 100644 lib/microformats2/format_parser.rb delete mode 100644 lib/microformats2/parser.rb rename lib/microformats2/property/{parser.rb => foundation.rb} (60%) create mode 100644 lib/microformats2/property_parser.rb create mode 100644 spec/support/hcard-hcard-nested.html create mode 100644 spec/support/hcard-pname-pnickname-nested.html diff --git a/lib/microformats2.rb b/lib/microformats2.rb index a693420..7f2f751 100644 --- a/lib/microformats2.rb +++ b/lib/microformats2.rb @@ -2,10 +2,11 @@ require "nokogiri" require "open-uri" require "json" require "microformats2/version" -require "microformats2/parser" +require "microformats2/format_parser" +require "microformats2/property_parser" require "microformats2/collection" require "microformats2/format" -require "microformats2/property/parser" +require "microformats2/property/foundation" require "microformats2/property/text" require "microformats2/property/url" require "microformats2/property/date_time" diff --git a/lib/microformats2/collection.rb b/lib/microformats2/collection.rb index 9fef9f4..c9b4bf2 100644 --- a/lib/microformats2/collection.rb +++ b/lib/microformats2/collection.rb @@ -1,8 +1,14 @@ module Microformats2 - class Collection < Parser + class Collection + attr_accessor :formats + + def parse(document) + formats = FormatParser.parse(document) + end + def to_hash hash = { items: [] } - @formats.each do |format| + formats.each do |format| hash[:items] << format.to_hash end hash diff --git a/lib/microformats2/format.rb b/lib/microformats2/format.rb index dce5646..72f08d2 100644 --- a/lib/microformats2/format.rb +++ b/lib/microformats2/format.rb @@ -1,16 +1,30 @@ module Microformats2 - class Format < Parser - attr_reader :format_types + class Format + CLASS_REG_EXP = /^(h-)/ - def parse(element) - html_classes = element.attribute("class").to_s.split - @format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ } - super + attr_reader :element, :properties, :format_types + + def initialize(element) + @element = element + @format_types = [] + @properties = [] + end + + def parse + properties << PropertyParser.parse(element) + format_types + self + end + + def format_types + @format_types ||= element.attribute("class").to_s.split.select do |html_class| + html_class =~ Format::CLASS_REG_EXP + end end def to_hash hash = { type: @format_types, properties: {} } - @added_methods.each do |method_name| + properties.each do |method_name| value = send(method_name) value = value.is_a?(Array) ? value : [value] hash[:properties][method_name.to_sym] = value.map(&:to_hash) @@ -21,31 +35,5 @@ module Microformats2 def to_json to_hash.to_json end - - private - - # look for both formats and properties - def html_class_regex - /^(h-|p-|u-|dt-|e-)/ - end - - def parse_microformat(element, html_classes) - property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx } - - property_classes.each do |property_class| - # p-class-name -> p - prefix = property_class.split("-").first - # p-class-name -> class_name - method_name = property_class.split("-")[1..-1].join("_") - # avoid overriding Object#class - method_name = "klass" if method_name == "class" - - # parse property - value = Microformats2::Property::Parsers[prefix].new(element).parse - - # save property under custom method - define_method_and_set_value(method_name, value) - end - end end end diff --git a/lib/microformats2/format_parser.rb b/lib/microformats2/format_parser.rb new file mode 100644 index 0000000..7dc096c --- /dev/null +++ b/lib/microformats2/format_parser.rb @@ -0,0 +1,54 @@ +module Microformats2 + class FormatParser + class << self + def parse(element) + parse_node(element) + end + + def parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_for_microformats(node) + end + end + + def parse_nodeset(nodeset) + nodeset.map { |node| parse_node(node) } + end + + def parse_for_microformats(element) + if format_classes(element).length >= 1 + parse_microformat(element) + else + parse_nodeset(element.children) + end + end + + def parse_microformat(element) + # only worry about the first format for now + html_class = format_classes(element).first + # class-name -> class_name + method_name = html_class.downcase.gsub("-","_") + # class_name -> Class_name -> ClassName + constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} + + # find or create ruby class for microformat + if Object.const_defined?(constant_name) + klass = Object.const_get(constant_name) + else + klass = Class.new(Microformats2::Format) + Object.const_set constant_name, klass + end + + # parse microformat + klass.new(element).parse + end + + def format_classes(element) + element.attribute("class").to_s.split.select do |html_class| + html_class =~ Format::CLASS_REG_EXP + end + end + end + end +end diff --git a/lib/microformats2/parser.rb b/lib/microformats2/parser.rb deleted file mode 100644 index 0ed9e9f..0000000 --- a/lib/microformats2/parser.rb +++ /dev/null @@ -1,103 +0,0 @@ -module Microformats2 - class Parser - attr_accessor :formats, :added_methods - - def initialize - @formats = [] - @added_methods = [] - end - - # override and do interesting things here - def parse(element) - parse_nodeset(element.children) - self - end - - protected - - # override with regex to match before parsing microformat - def html_class_regex - /^(h-)/ - end - - # override and do interesting things here - def parse_microformat(microformat, html_classes) - # only worry about the first format for now - html_class = html_classes.first - - # class-name -> class_name - method_name = html_class.downcase.gsub("-","_") - # class_name -> Class_name -> ClassName - constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} - - # get ruby class for microformat - if Object.const_defined?(constant_name) - klass = Object.const_get(constant_name) - else - klass = Class.new(Microformats2::Format) - Object.const_set constant_name, klass - end - - # parse microformat - value = klass.new.parse(microformat) - - # save microformat in array in order - formats << value - - # save microformat under custom method - define_method_and_set_value(method_name, value) - end - - def parse_nodeset(nodeset) - nodeset.map { |node| parse_node(node) } - end - - def parse_node(node) - case - when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) - when node.is_a?(Nokogiri::XML::Element) then parse_element(node) - end - end - - def parse_element(element) - html_classes = element.attribute("class").to_s.split - html_classes = html_classes.select { |html_class| html_class =~ html_class_regex } - - if html_classes.length >= 1 - parse_microformat(element, html_classes) - else - parse_nodeset(element.children) - end - end - - def define_method_and_set_value(method_name, value) - save_method_name(method_name) - define_method(method_name) - set_value(method_name, value) - end - - def save_method_name(method_name) - unless added_methods.include?(method_name) - added_methods << method_name - end - end - - def define_method(method_name) - unless respond_to?(method_name) - self.class.class_eval { attr_accessor method_name } - end - end - - def set_value(method_name, value) - if current = send(method_name) - if current.kind_of? Array - current << value - else - send("#{method_name}=", [current, value]) - end - else - send("#{method_name}=", value) - end - end - end -end diff --git a/lib/microformats2/property.rb b/lib/microformats2/property.rb index a217631..8be740c 100644 --- a/lib/microformats2/property.rb +++ b/lib/microformats2/property.rb @@ -1,11 +1,11 @@ module Microformats2 module Property - Parsers = { + CLASS_REG_EXP = /^(p-|u-|dt-|e-)/ + PREFIX_CLASS_MAP = { "p" => Text, "u" => Url, "dt" => DateTime, "e" => Embedded } - PrefixesRegEx = /^(p-|u-|dt-|e-)/ end end diff --git a/lib/microformats2/property/date_time.rb b/lib/microformats2/property/date_time.rb index 6e4d43e..ce6775b 100644 --- a/lib/microformats2/property/date_time.rb +++ b/lib/microformats2/property/date_time.rb @@ -1,6 +1,6 @@ module Microformats2 module Property - class DateTime < Property::Parser + class DateTime < Foundation def value ::DateTime.parse(super) rescue ArgumentError => e diff --git a/lib/microformats2/property/embedded.rb b/lib/microformats2/property/embedded.rb index 6efdd6f..b938735 100644 --- a/lib/microformats2/property/embedded.rb +++ b/lib/microformats2/property/embedded.rb @@ -1,6 +1,6 @@ module Microformats2 module Property - class Embedded < Property::Parser + class Embedded < Foundation def value @value ||= @element.inner_html.strip end diff --git a/lib/microformats2/property/parser.rb b/lib/microformats2/property/foundation.rb similarity index 60% rename from lib/microformats2/property/parser.rb rename to lib/microformats2/property/foundation.rb index 97816ee..e82ccd4 100644 --- a/lib/microformats2/property/parser.rb +++ b/lib/microformats2/property/foundation.rb @@ -1,22 +1,25 @@ module Microformats2 module Property - class Parser < Microformats2::Parser - attr_accessor :value, :element + class Foundation + attr_accessor :element, :value, :formats def initialize(element) @element = element - super() + @formats = [] end def parse - html_classes = element.attribute("class").to_s.split - format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ } - if format_classes.length >= 1 - parse_microformat(element, format_classes) - end - self + formats << FormatParser.parse(element) if format_classes.length >=1 + value + self end + def format_classes + element.attribute("class").to_s.split.select do |html_class| + html_class =~ Format::CLASS_REG_EXP + end + end + def value @value ||= value_class_pattern || element_value || text_value end @@ -45,7 +48,7 @@ module Microformats2 if formats.empty? value.to_s else - { value: value.to_s }.merge formats.first.to_hash + { value: value.to_s }.merge(formats.first.to_hash) end end end diff --git a/lib/microformats2/property/text.rb b/lib/microformats2/property/text.rb index 1a264e8..ebecc59 100644 --- a/lib/microformats2/property/text.rb +++ b/lib/microformats2/property/text.rb @@ -1,6 +1,6 @@ module Microformats2 module Property - class Text < Property::Parser + class Text < Foundation def attr_map @attr_map = { "abbr" => "title", diff --git a/lib/microformats2/property/url.rb b/lib/microformats2/property/url.rb index 5d100da..0771e1a 100644 --- a/lib/microformats2/property/url.rb +++ b/lib/microformats2/property/url.rb @@ -1,6 +1,6 @@ module Microformats2 module Property - class Url < Property::Parser + class Url < Foundation def attr_map @attr_map = { "a" => "href", diff --git a/lib/microformats2/property_parser.rb b/lib/microformats2/property_parser.rb new file mode 100644 index 0000000..ab731ab --- /dev/null +++ b/lib/microformats2/property_parser.rb @@ -0,0 +1,51 @@ +module Microformats2 + class PropertyParser + class << self + def parse(element) + parse_node(element) + end + + def parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_for_properties(node) + end + end + + def parse_nodeset(nodeset) + nodeset.map { |node| parse_node(node) } + end + + def parse_for_properties(element) + if property_classes(element).length >= 1 + parse_property(element) + else + parse_nodeset(element.children) + end + end + + def parse_property(element, html_classes) + property_classes(element).each do |property_class| + # p-class-name -> p + prefix = property_class.split("-").first + # p-class-name -> class_name + method_name = property_class.split("-")[1..-1].join("_") + # avoid overriding Object#class + method_name = "klass" if method_name == "class" + + # find ruby class for kind of property + klass = Microformats2::Property::PREFIX_CLASS_MAP[prefix] + + # parse property + klass.new(element).parse + end + end + + def property_classes(element, regexp) + element.attribute("class").to_s.split.select do |html_class| + html_class =~ Property::CLASS_REG_EXP + end + end + end + end +end diff --git a/spec/lib/microformats2/collection_spec.rb b/spec/lib/microformats2/collection_spec.rb index 1a7f96b..2aed1fc 100644 --- a/spec/lib/microformats2/collection_spec.rb +++ b/spec/lib/microformats2/collection_spec.rb @@ -25,21 +25,21 @@ describe Microformats2::Collection do describe "#parse" do it "creates ruby class HCard" do - @collection.h_card.should be_kind_of HCard + @collection.first.should be_kind_of HCard end it "assigns .h-card .p-name to HCard#name" do - @collection.h_card.name.value.should == "Jessica Lynn Suttles" + @collection.first.name.value.should == "Jessica Lynn Suttles" end it "assigns both .h-card .u-url to HCard#url" do urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"] - @collection.h_card.url.map(&:value).should == urls + @collection.first.url.map(&:value).should == urls end it "assings .h-card .dt-bday to HCard#bday" do - @collection.h_card.bday.value.should be_kind_of DateTime - @collection.h_card.bday.value.to_s.should == "1990-10-15T20:45:33-08:00" + @collection.first.bday.value.should be_kind_of DateTime + @collection.first.bday.value.to_s.should == "1990-10-15T20:45:33-08:00" end it "assigns .h-card .e-content to HCard#content" do - @collection.h_card.content.value.should == "Vegan. Cat lover. Coder." + @collection.first.content.value.should == "Vegan. Cat lover. Coder." end end @@ -69,10 +69,10 @@ describe Microformats2::Collection do describe "#parse" do it "creates ruby class HEntry" do - @collection.h_entry.should be_kind_of HEntry + @collection.first.should be_kind_of HEntry end it "assigns .h-entry .p-author to HEntry#author" do - @collection.h_entry.author.value.should == "Jessica Lynn Suttles" + @collection.first.author.value.should == "Jessica Lynn Suttles" end end diff --git a/spec/support/hcard-hcard-nested.html b/spec/support/hcard-hcard-nested.html new file mode 100644 index 0000000..6d41077 --- /dev/null +++ b/spec/support/hcard-hcard-nested.html @@ -0,0 +1,13 @@ + + + +
+
Jessica Lynn Suttles
+ +
+ + diff --git a/spec/support/hcard-pname-pnickname-nested.html b/spec/support/hcard-pname-pnickname-nested.html new file mode 100644 index 0000000..c0da6e0 --- /dev/null +++ b/spec/support/hcard-pname-pnickname-nested.html @@ -0,0 +1,12 @@ + + + +
+
+
+ jlsuttles +
+
+
+ +