From ce2ea993a859e81d04b6f651e28aa9b977b056d3 Mon Sep 17 00:00:00 2001 From: Shane Becker Date: Wed, 15 Jun 2011 00:35:24 -0700 Subject: [PATCH] shit works --- Rakefile | 6 +- lib/microformats2.rb | 164 ++++++++++++++++++++++++++++++++++- test/hcard.html | 30 +++++++ test/test_microformats2.rb | 171 ++++++++++++++++++++++++++++++++++++- 4 files changed, 364 insertions(+), 7 deletions(-) create mode 100644 test/hcard.html diff --git a/Rakefile b/Rakefile index 12180b3..e396157 100644 --- a/Rakefile +++ b/Rakefile @@ -4,9 +4,9 @@ require 'rubygems' require 'hoe' Hoe.spec 'microformats2' do - # developer('FIX', 'FIX@example.com') - - # self.rubyforge_name = 'microformats2x' # if different than 'microformats2' + developer('Shane Becker', 'veganstraightedge@gmail.com') + extra_deps << ['nokogiri', ">= 0"] end + # vim: syntax=ruby diff --git a/lib/microformats2.rb b/lib/microformats2.rb index a2071f7..47a6aac 100644 --- a/lib/microformats2.rb +++ b/lib/microformats2.rb @@ -1,3 +1,163 @@ -class Microformats2 - VERSION = '1.0.0' +require 'nokogiri' +require 'time' +require 'date' + +module Microformats2 + VERSION = "1.0.0" + + def self.parse(html) + raise LoadError unless html.is_a?(String) + doc = Nokogiri::HTML(html) + microformats = Hash.new{|hash, key| hash[key] = Array.new} + doc.css("*[class^=h-]").each do |microformat| + constant_name = classify(microformat.attribute("class").to_s.gsub("-","_")) + + if Object.const_defined?(constant_name) + klass = Object.const_get(constant_name) + else + klass = Class.new + Object.const_set constant_name, klass + end + + obj = klass.new + + # Add any properties to the object + self.add_properties(microformat, obj) + self.add_urls(microformat, obj) + self.add_dates(microformat, obj) + self.add_times(microformat, obj) + #letters = %w(p u d n e i t) + + microformats[constant_name.downcase.to_sym] << obj + end + + return microformats + end + + def self.add_properties(mf, obj) + %w(p n e i).each do |letter| + mf.css("*[class|=#{letter}]").each do |property| + property.attribute("class").to_s.split.each do |css_class| + if css_class =~ /^[pnei]/ + css_class = css_class[2..-1].gsub("-","_") + method_name = css_class.gsub("-","_") + value = property.text.strip_whitespace + + obj.class.class_eval { attr_accessor method_name } + + if cur = obj.send(method_name) + if cur.kind_of? Array + cur << value + else + obj.send("#{method_name}=", [cur, value]) + end + else + obj.send("#{method_name}=", value) + end + end + end + end + end + end + + def self.add_urls(mf, obj) + mf.css("*[class*=u-]").each do |property| + property.attribute("class").to_s.split.each do |css_class| + if css_class =~ /^u/ + css_class = css_class[2..-1].gsub("-","_") + method_name = css_class.gsub("-","_") + value = property.attribute("href").to_s + + obj.class.class_eval { attr_accessor method_name } + + if cur = obj.send(method_name) + if cur.kind_of? Array + cur << value + else + obj.send("#{method_name}=", [cur, value]) + end + else + obj.send("#{method_name}=", value) + end + end + end + end + end + + def self.add_dates(mf, obj) + mf.css("*[class*=d-]").each do |property| + property.attribute("class").to_s.split.each do |css_class| + if css_class =~ /^d/ + css_class = css_class[2..-1].gsub("-","_") + method_name = css_class.gsub("-","_") + value = DateTime.parse((property.attribute("title") || property.text).to_s) + + obj.class.class_eval { attr_accessor method_name } + + if cur = obj.send(method_name) + if cur.kind_of? Array + cur << value + else + obj.send("#{method_name}=", [cur, value]) + end + else + obj.send("#{method_name}=", value) + end + end + end + end + end + + def self.add_times(mf, obj) + mf.css("*[class*=t-]").each do |property| + property.attribute("class").to_s.split.each do |css_class| + if css_class =~ /^t/ + css_class = css_class[2..-1].gsub("-","_") + method_name = css_class.gsub("-","_") + value = Time.parse((property.attribute("title") || property.text).to_s) + + obj.class.class_eval { attr_accessor method_name } + + if cur = obj.send(method_name) + if cur.kind_of? Array + cur << value + else + obj.send("#{method_name}=", [cur, value]) + end + else + obj.send("#{method_name}=", value) + end + end + end + end + end + + class LoadError < StandardError; end + + # Thank you Rails Developers for your unitentional contribution to this project + # File activesupport/lib/active_support/inflector/inflections.rb, line 206 + def self.classify(str) + # strip out any leading schema name + camelize(singularize(str.to_s.sub(/.*\./, ''))) + end + + # File activesupport/lib/active_support/inflector/inflections.rb, line 148 + def self.singularize(word) + result = word.to_s.dup + end + + # File activesupport/lib/active_support/inflector/methods.rb, line 28 + def self.camelize(lower_case_and_underscored_word, first_letter_in_uppercase = true) + if first_letter_in_uppercase + lower_case_and_underscored_word.to_s.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase } + else + lower_case_and_underscored_word.to_s[0].chr.downcase + camelize(lower_case_and_underscored_word)[1..-1] + end + end +end + +class String + def strip_whitespace + self.gsub(/\n+/, " ").gsub(/\s+/, " ").strip + end end diff --git a/test/hcard.html b/test/hcard.html new file mode 100644 index 0000000..bb82de4 --- /dev/null +++ b/test/hcard.html @@ -0,0 +1,30 @@ + + + Simple hCard + + + +

+ + Chris + R. + Messina + +

+ +

+ + Shane + B + Becker + +

+ +

+ + Breakfast + Cereal + +

+ + diff --git a/test/test_microformats2.rb b/test/test_microformats2.rb index fbdc10d..5da3cb1 100644 --- a/test/test_microformats2.rb +++ b/test/test_microformats2.rb @@ -2,7 +2,174 @@ require "test/unit" require "microformats2" class TestMicroformats2 < Test::Unit::TestCase - def test_sanity - flunk "write tests or I will kneecap you" + def test_acceptence_of_string + assert_nothing_raised Microformats2::LoadError do + Microformats2.parse("A String") + end + end + + def test_throw_exception_on_non_string_params + assert_raise Microformats2::LoadError do + Microformats2.parse(nil) + end + end + + def test_returns_array_of_microformat_objects + result = Microformats2.parse("A String") + assert_equal Array, result.class + end + + def test_only_parse_microformats + result = Microformats2.parse("

Something

") + assert_equal 0, result.size + end + + def test_extracts_hcard_from_html + hcard = <<-END + + + Simple hCard + + + +

+ + Chris + R. + Messina + +

+ + + END + result = Microformats2.parse(hcard) + assert_equal HCard, result.first.class + end + + def test_constructs_properties_from_hcard + hcard = <<-END + + + Simple hCard + + + +

+ + Chris + R. + Messina + +

+ + + END + result = Microformats2.parse(hcard) + mycard = result.first + + assert_equal "Chris", mycard.given_name + assert_equal "R.", mycard.additional_name + assert_equal "Messina", mycard.family_name + assert_equal "Chris R. Messina", mycard.fn + end + + def test_constructs_dates + hcard = <<-END + + + Simple hCard + + + +

+ + Chris + R. + Messina + + + 1979-09-18 + EPOCH! +

+ + + END + result = Microformats2.parse(hcard) + mycard = result.first + + assert_equal DateTime.parse("1979-09-18"), mycard.bday + assert_equal DateTime.parse("1970-01-01"), mycard.epoch + end + + def test_constructs_times + hcard = <<-END + + + Simple hCard + + + +

+ + Chris + R. + Messina + + + 09:30 + Leaving time +

+ + + END + result = Microformats2.parse(hcard) + mycard = result.first + + assert_equal Time.parse("09:30"), mycard.start + assert_equal Time.parse("06:00"), mycard.end + end + + def test_ignores_pattern_matches_not_at_the_beginning_of_class + hcard = <<-END + + + Simple hCard + + + +

+ Chris +

+ + + END + result = Microformats2.parse(hcard) + mycard = result.first + + assert_equal "Chris", mycard.n_x + assert mycard.n_x.is_a?(String) + end + + def test_constructs_urls_from_hcard + hcard = <<-END + + + Simple hCard + + + +

+ + Chris + R. + Messina + +

+ + + END + result = Microformats2.parse(hcard) + mycard = result.first + assert_equal "http://factoryjoe.com/", mycard.url end end