diff --git a/lib/microformats2.rb b/lib/microformats2.rb index 475a458..4f34caa 100644 --- a/lib/microformats2.rb +++ b/lib/microformats2.rb @@ -2,6 +2,7 @@ require "nokogiri" require "open-uri" require "json" require "microformats2/version" +require "microformats2/parser" require "microformats2/collection" require "microformats2/format" require "microformats2/property" diff --git a/lib/microformats2/collection.rb b/lib/microformats2/collection.rb index 7ffbc64..4037011 100644 --- a/lib/microformats2/collection.rb +++ b/lib/microformats2/collection.rb @@ -1,15 +1,10 @@ module Microformats2 - class Collection - attr_accessor :added_methods, :formats + class Collection < Parser + attr_accessor :formats def initialize - @added_methods = [] @formats = [] - end - - def parse(document) - parse_nodeset(document.children) - self + super end def to_hash @@ -24,34 +19,12 @@ module Microformats2 to_hash.to_json end + def html_class_regex + /^h-/ + end + private - def parse_nodeset(nodeset) - nodeset.map { |node| parse_node(node) } - end - - def parse_node(node) - case - when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) - when node.is_a?(Nokogiri::XML::Element) then parse_element(node) - end - end - - def parse_element(element) - # look for root microformat class - html_classes = element.attribute("class").to_s.split - html_classes.keep_if { |html_class| html_class =~ /^h-/ } - - # if found root microformat, yay parse it - if html_classes.length >= 1 - parse_microformat(element, html_classes) - - # if no root microformat found, look at children - else - parse_nodeset(element.children) - end - end - def parse_microformat(microformat, html_classes) # only worry about the first format for now html_class = html_classes.first @@ -69,38 +42,14 @@ module Microformats2 Object.const_set constant_name, klass end - # get a new instance of the ruby class - format = klass.new.parse(microformat) + # parse microformat + value = klass.new.parse(microformat) - @formats << format + # save microformat in array in order + @formats << value - save_method_name(method_name) - add_method(method_name) - populate_method(method_name, format) - end - - def save_method_name(method_name) - unless @added_methods.include?(method_name) - @added_methods << method_name - end - end - - def add_method(method_name) - unless respond_to?(method_name) - self.class.class_eval { attr_accessor method_name } - end - end - - def populate_method(method_name, value) - if current = send(method_name) - if current.kind_of? Array - current << value - else - send("#{method_name}=", [current, value]) - end - else - send("#{method_name}=", value) - end + # save microformat under custom method + define_method_and_set_value(method_name, value) end end end diff --git a/lib/microformats2/format.rb b/lib/microformats2/format.rb index 20f3617..7b56681 100644 --- a/lib/microformats2/format.rb +++ b/lib/microformats2/format.rb @@ -1,16 +1,5 @@ module Microformats2 - class Format - attr_accessor :added_method_names - - def initialize - @added_method_names = [] - end - - def parse(element) - parse_nodeset(element.children) - self - end - + class Format < Parser def type # ClassName -> className -> class-name self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase} @@ -18,7 +7,7 @@ module Microformats2 def to_hash hash = { type: [type], properties: {} } - @added_method_names.each do |method_name| + @added_methods.each do |method_name| hash[:properties][method_name.to_sym] = send(method_name) end hash @@ -28,74 +17,26 @@ module Microformats2 to_hash.to_json end + def html_class_regex + Microformats2::PropertyPrefixesRegEx + end + private - def parse_nodeset(nodeset) - nodeset.map { |node| parse_node(node) } - end - - def parse_node(node) - case - when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) - when node.is_a?(Nokogiri::XML::Element) then parse_element(node) - end - end - - def parse_element(element) - # look for microformat property class - html_classes = element.attribute("class").to_s.split - html_classes.keep_if { |html_class| html_class =~ Microformats2::PropertyPrefixesRegEx } - - # if found microformat property, yay parse it - if html_classes.length >= 1 - parse_property(element, html_classes) - - # if no microformat property found, look at children - else - parse_nodeset(element.children) - end - end - - def parse_property(element, html_classes) + def parse_microformat(element, html_classes) html_classes.each do |html_class| # p-class-name -> p prefix = html_class.split("-").first # p-class-name -> class_name method_name = html_class.split("-")[1..-1].join("_") + # avoid overriding Object#class + method_name = "klass" if method_name == "class" + + # parse property value = Microformats2::PropertyPrefixes[prefix].parse(element) - # avoid overriding Object#class - if method_name == "class" - method_name = "klass" - end - - save_method_name(method_name) - add_method(method_name) - populate_method(method_name, value) - end - end - - def save_method_name(method_name) - unless @added_method_names.include?(method_name) - @added_method_names << method_name - end - end - - def add_method(method_name) - unless respond_to?(method_name) - self.class.class_eval { attr_accessor method_name } - end - end - - def populate_method(method_name, value) - if current = send(method_name) - if current.kind_of? Array - current << value - else - send("#{method_name}=", [current, value]) - end - else - send("#{method_name}=", value) + # save property under custom method + define_method_and_set_value(method_name, value) end end end diff --git a/lib/microformats2/parser.rb b/lib/microformats2/parser.rb new file mode 100644 index 0000000..44401ef --- /dev/null +++ b/lib/microformats2/parser.rb @@ -0,0 +1,79 @@ +module Microformats2 + class Parser + attr_accessor :added_methods + + def initialize + @added_methods = [] + end + + def parse(element) + parse_nodeset(element.children) + self + end + + protected + + # override with regex to match before parsing microformat + def html_class_regex + // + end + + # override and do interesting things here + def parse_microformat(element, html_classes) + element + end + + + def parse_nodeset(nodeset) + nodeset.map { |node| parse_node(node) } + end + + def parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_element(node) + end + end + + def parse_element(element) + html_classes = element.attribute("class").to_s.split + html_classes.keep_if { |html_class| html_class =~ html_class_regex } + + if html_classes.length >= 1 + parse_microformat(element, html_classes) + else + parse_nodeset(element.children) + end + end + + def define_method_and_set_value(method_name, value) + save_method_name(method_name) + define_method(method_name) + set_value(method_name, value) + end + + def save_method_name(method_name) + unless @added_methods.include?(method_name) + @added_methods << method_name + end + end + + def define_method(method_name) + unless respond_to?(method_name) + self.class.class_eval { attr_accessor method_name } + end + end + + def set_value(method_name, value) + if current = send(method_name) + if current.kind_of? Array + current << value + else + send("#{method_name}=", [current, value]) + end + else + send("#{method_name}=", value) + end + end + end +end