From d54e2015a38ef2f2b4399c34528cb32b3261e711 Mon Sep 17 00:00:00 2001 From: Jessica Lynn Suttles Date: Mon, 4 Feb 2013 18:24:21 -0800 Subject: [PATCH] adds to_hash and to_json for collection --- lib/microformats2.rb | 191 ++-------------------- lib/microformats2/collection.rb | 106 ++++++++++++ lib/microformats2/format.rb | 102 ++++++++++++ lib/microformats2/property.rb | 30 ++++ spec/lib/microformats2/collection_spec.rb | 29 ++++ spec/lib/microformats2_spec.rb | 11 +- 6 files changed, 286 insertions(+), 183 deletions(-) create mode 100644 lib/microformats2/collection.rb create mode 100644 lib/microformats2/format.rb create mode 100644 lib/microformats2/property.rb create mode 100644 spec/lib/microformats2/collection_spec.rb diff --git a/lib/microformats2.rb b/lib/microformats2.rb index ce227a7..475a458 100644 --- a/lib/microformats2.rb +++ b/lib/microformats2.rb @@ -2,187 +2,22 @@ require "nokogiri" require "open-uri" require "json" require "microformats2/version" +require "microformats2/collection" +require "microformats2/format" +require "microformats2/property" module Microformats2 - class TextProperty - def parse(element) - element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip - end - end - class UrlProperty - def parse(element) - (element.attribute("href") || property.text).to_s - end - end - class DateTimeProperty - def parse(element) - DateTime.parse(element.attribute("datetime") || property.text) - end - end - class EmbeddedProperty - def parse(element) - element.text - end - end - - Prefixes = { - "p" => TextProperty.new, - "u" => UrlProperty.new, - "dt" => DateTimeProperty.new, - "e" => EmbeddedProperty.new - } - PrefixesRegEx = /^(p-|u-|dt-|e-)/ - - class Root - attr_accessor :properties - - def initialize(element) - @properties = [] - parse_nodeset(element.children) + class << self + def parse(html) + html = read_html(html) + document = Nokogiri::HTML(html) + Collection.new.parse(document) end - def type - # ClassName -> className -> class-name - self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase} + def read_html(html) + open(html).read + rescue Errno::ENOENT => e + html end - - def to_hash - hash = { type: [type], properties: {} } - @properties.each do |method_name| - hash[:properties][method_name] = send(method_name) - end - hash - end - - def to_json(*a) - to_hash.to_json(a) - end - - def parse_nodeset(nodeset) - nodeset.map { |node| parse_node(node) } - end - - def parse_node(node) - case - when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) - when node.is_a?(Nokogiri::XML::Element) then parse_element(node) - end - end - - def parse_element(element) - # look for microformat property class - html_classes = element.attribute("class").to_s.split - html_classes.keep_if { |html_class| html_class =~ Microformats2::PrefixesRegEx } - - # if found microformat property, yay parse it - if html_classes.length >= 1 - parse_property(element, html_classes) - - # if no microformat property found, look at children - else - parse_nodeset(element.children) - end - end - - def parse_property(element, html_classes) - html_classes.each do |html_class| - # p-class-name -> p - prefix = html_class.split("-").first - # p-class-name -> class_name - method_name = html_class.split("-")[1..-1].join("_") - value = Microformats2::Prefixes[prefix].parse(element) - - # avoid overriding Object#class - if method_name == "class" - method_name = "klass" - end - - add_property(method_name) - add_method(method_name) - populate_method(method_name, value) - end - end - - def add_property(method_name) - unless @properties.include?(method_name) - @properties << method_name - end - end - - def add_method(method_name) - unless respond_to?(method_name) - self.class.class_eval { attr_accessor method_name } - end - end - - def populate_method(method_name, value) - if cur = send(method_name) - if cur.kind_of? Array - cur << value - else - send("#{method_name}=", [cur, value]) - end - else - send("#{method_name}=", value) - end - end - end - - def self.parse(html) - html = read_html(html) - document = Nokogiri::HTML(html) - parsed_document = parse_nodeset(document.children) - parsed_document.flatten.compact - end - - def self.read_html(html) - open(html).read - rescue Errno::ENOENT => e - html - end - - def self.parse_nodeset(nodeset) - nodeset.map { |node| parse_node(node) } - end - - def self.parse_node(node) - case - when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) - when node.is_a?(Nokogiri::XML::Element) then parse_element(node) - end - end - - def self.parse_element(element) - # look for root microformat class - html_classes = element.attribute("class").to_s.split - html_classes.keep_if { |html_class| html_class =~ /^h-/ } - - # if found root microformat, yay parse it - if html_classes.length >= 1 - parse_microformat(element, html_classes) - - # if no root microformat found, look at children - else - parse_nodeset(element.children) - end - end - - def self.parse_microformat(microformat, html_classes) - # only worry about the first format for now - html_class = html_classes.first - - # class_name -> class-name -> Class-name -> ClassName - constant_name = html_class.gsub("-","_").gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} - - # get ruby class for microformat - if Object.const_defined?(constant_name) - klass = Object.const_get(constant_name) - else - klass = Class.new(Microformats2::Root) - Object.const_set constant_name, klass - end - - # get a new instance of the ruby class - klass.new(microformat) - end + end # class << self end diff --git a/lib/microformats2/collection.rb b/lib/microformats2/collection.rb new file mode 100644 index 0000000..7ffbc64 --- /dev/null +++ b/lib/microformats2/collection.rb @@ -0,0 +1,106 @@ +module Microformats2 + class Collection + attr_accessor :added_methods, :formats + + def initialize + @added_methods = [] + @formats = [] + end + + def parse(document) + parse_nodeset(document.children) + self + end + + def to_hash + hash = { items: [] } + @formats.each do |format| + hash[:items] << format.to_hash + end + hash + end + + def to_json + to_hash.to_json + end + + private + + def parse_nodeset(nodeset) + nodeset.map { |node| parse_node(node) } + end + + def parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_element(node) + end + end + + def parse_element(element) + # look for root microformat class + html_classes = element.attribute("class").to_s.split + html_classes.keep_if { |html_class| html_class =~ /^h-/ } + + # if found root microformat, yay parse it + if html_classes.length >= 1 + parse_microformat(element, html_classes) + + # if no root microformat found, look at children + else + parse_nodeset(element.children) + end + end + + def parse_microformat(microformat, html_classes) + # only worry about the first format for now + html_class = html_classes.first + + # class-name -> class_name + method_name = html_class.downcase.gsub("-","_") + # class_name -> Class_name -> ClassName + constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase} + + # get ruby class for microformat + if Object.const_defined?(constant_name) + klass = Object.const_get(constant_name) + else + klass = Class.new(Microformats2::Format) + Object.const_set constant_name, klass + end + + # get a new instance of the ruby class + format = klass.new.parse(microformat) + + @formats << format + + save_method_name(method_name) + add_method(method_name) + populate_method(method_name, format) + end + + def save_method_name(method_name) + unless @added_methods.include?(method_name) + @added_methods << method_name + end + end + + def add_method(method_name) + unless respond_to?(method_name) + self.class.class_eval { attr_accessor method_name } + end + end + + def populate_method(method_name, value) + if current = send(method_name) + if current.kind_of? Array + current << value + else + send("#{method_name}=", [current, value]) + end + else + send("#{method_name}=", value) + end + end + end +end diff --git a/lib/microformats2/format.rb b/lib/microformats2/format.rb new file mode 100644 index 0000000..20f3617 --- /dev/null +++ b/lib/microformats2/format.rb @@ -0,0 +1,102 @@ +module Microformats2 + class Format + attr_accessor :added_method_names + + def initialize + @added_method_names = [] + end + + def parse(element) + parse_nodeset(element.children) + self + end + + def type + # ClassName -> className -> class-name + self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase} + end + + def to_hash + hash = { type: [type], properties: {} } + @added_method_names.each do |method_name| + hash[:properties][method_name.to_sym] = send(method_name) + end + hash + end + + def to_json + to_hash.to_json + end + + private + + def parse_nodeset(nodeset) + nodeset.map { |node| parse_node(node) } + end + + def parse_node(node) + case + when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node) + when node.is_a?(Nokogiri::XML::Element) then parse_element(node) + end + end + + def parse_element(element) + # look for microformat property class + html_classes = element.attribute("class").to_s.split + html_classes.keep_if { |html_class| html_class =~ Microformats2::PropertyPrefixesRegEx } + + # if found microformat property, yay parse it + if html_classes.length >= 1 + parse_property(element, html_classes) + + # if no microformat property found, look at children + else + parse_nodeset(element.children) + end + end + + def parse_property(element, html_classes) + html_classes.each do |html_class| + # p-class-name -> p + prefix = html_class.split("-").first + # p-class-name -> class_name + method_name = html_class.split("-")[1..-1].join("_") + value = Microformats2::PropertyPrefixes[prefix].parse(element) + + # avoid overriding Object#class + if method_name == "class" + method_name = "klass" + end + + save_method_name(method_name) + add_method(method_name) + populate_method(method_name, value) + end + end + + def save_method_name(method_name) + unless @added_method_names.include?(method_name) + @added_method_names << method_name + end + end + + def add_method(method_name) + unless respond_to?(method_name) + self.class.class_eval { attr_accessor method_name } + end + end + + def populate_method(method_name, value) + if current = send(method_name) + if current.kind_of? Array + current << value + else + send("#{method_name}=", [current, value]) + end + else + send("#{method_name}=", value) + end + end + end +end diff --git a/lib/microformats2/property.rb b/lib/microformats2/property.rb new file mode 100644 index 0000000..19f74db --- /dev/null +++ b/lib/microformats2/property.rb @@ -0,0 +1,30 @@ +module Microformats2 + class TextProperty + def parse(element) + element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip + end + end + class UrlProperty + def parse(element) + (element.attribute("href") || property.text).to_s + end + end + class DateTimeProperty + def parse(element) + DateTime.parse(element.attribute("datetime") || property.text) + end + end + class EmbeddedProperty + def parse(element) + element.text + end + end + + PropertyPrefixes = { + "p" => TextProperty.new, + "u" => UrlProperty.new, + "dt" => DateTimeProperty.new, + "e" => EmbeddedProperty.new + } + PropertyPrefixesRegEx = /^(p-|u-|dt-|e-)/ +end diff --git a/spec/lib/microformats2/collection_spec.rb b/spec/lib/microformats2/collection_spec.rb new file mode 100644 index 0000000..ebb015f --- /dev/null +++ b/spec/lib/microformats2/collection_spec.rb @@ -0,0 +1,29 @@ +require "spec_helper" +require "microformats2" + +describe Microformats2::Collection do + before do + @html = <<-HTML.strip +

Jessica Lynn Suttles

+ HTML + @collection = Microformats2::Collection.new.parse(Nokogiri::HTML(@html)) + end + + describe "#to_hash" do + it "returns the correct Hash" do + hash = {items: [ + {type: ["h-card"], properties: {name: "Jessica Lynn Suttles"}} + ]} + @collection.to_hash.should == hash + end + end + + describe "#to_json" do + it "returns the correct JSON" do + json = {items: [ + {type: ["h-card"], properties: {name: "Jessica Lynn Suttles"}} + ]}.to_json + @collection.to_json.should == json + end + end +end diff --git a/spec/lib/microformats2_spec.rb b/spec/lib/microformats2_spec.rb index 6a4b385..57a3634 100644 --- a/spec/lib/microformats2_spec.rb +++ b/spec/lib/microformats2_spec.rb @@ -10,15 +10,16 @@ describe Microformats2 do describe "::parse" do before do - html = "spec/support/simple.html" @microformats2 = Microformats2.parse(@html) end - it "returns an array of found root microformats" do - @microformats2.first.should be_kind_of HCard + it "returns a collection" do + @microformats2.should be_kind_of Microformats2::Collection + end + it "assigns root formats to collection" do + @microformats2.h_card.should be_kind_of HCard end it "assigns properties to found root microformats" do - puts @microformats2.first.to_hash - @microformats2.first.name.should == "Jessica Lynn Suttles" + @microformats2.h_card.name.should == "Jessica Lynn Suttles" end end