organizing broke the world
This commit is contained in:
parent
5ebeed6c90
commit
6f6a6418f1
15 changed files with 189 additions and 164 deletions
|
@ -2,10 +2,11 @@ require "nokogiri"
|
||||||
require "open-uri"
|
require "open-uri"
|
||||||
require "json"
|
require "json"
|
||||||
require "microformats2/version"
|
require "microformats2/version"
|
||||||
require "microformats2/parser"
|
require "microformats2/format_parser"
|
||||||
|
require "microformats2/property_parser"
|
||||||
require "microformats2/collection"
|
require "microformats2/collection"
|
||||||
require "microformats2/format"
|
require "microformats2/format"
|
||||||
require "microformats2/property/parser"
|
require "microformats2/property/foundation"
|
||||||
require "microformats2/property/text"
|
require "microformats2/property/text"
|
||||||
require "microformats2/property/url"
|
require "microformats2/property/url"
|
||||||
require "microformats2/property/date_time"
|
require "microformats2/property/date_time"
|
||||||
|
|
|
@ -1,8 +1,14 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
class Collection < Parser
|
class Collection
|
||||||
|
attr_accessor :formats
|
||||||
|
|
||||||
|
def parse(document)
|
||||||
|
formats = FormatParser.parse(document)
|
||||||
|
end
|
||||||
|
|
||||||
def to_hash
|
def to_hash
|
||||||
hash = { items: [] }
|
hash = { items: [] }
|
||||||
@formats.each do |format|
|
formats.each do |format|
|
||||||
hash[:items] << format.to_hash
|
hash[:items] << format.to_hash
|
||||||
end
|
end
|
||||||
hash
|
hash
|
||||||
|
|
|
@ -1,16 +1,30 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
class Format < Parser
|
class Format
|
||||||
attr_reader :format_types
|
CLASS_REG_EXP = /^(h-)/
|
||||||
|
|
||||||
def parse(element)
|
attr_reader :element, :properties, :format_types
|
||||||
html_classes = element.attribute("class").to_s.split
|
|
||||||
@format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ }
|
def initialize(element)
|
||||||
super
|
@element = element
|
||||||
|
@format_types = []
|
||||||
|
@properties = []
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse
|
||||||
|
properties << PropertyParser.parse(element)
|
||||||
|
format_types
|
||||||
|
self
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_types
|
||||||
|
@format_types ||= element.attribute("class").to_s.split.select do |html_class|
|
||||||
|
html_class =~ Format::CLASS_REG_EXP
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_hash
|
def to_hash
|
||||||
hash = { type: @format_types, properties: {} }
|
hash = { type: @format_types, properties: {} }
|
||||||
@added_methods.each do |method_name|
|
properties.each do |method_name|
|
||||||
value = send(method_name)
|
value = send(method_name)
|
||||||
value = value.is_a?(Array) ? value : [value]
|
value = value.is_a?(Array) ? value : [value]
|
||||||
hash[:properties][method_name.to_sym] = value.map(&:to_hash)
|
hash[:properties][method_name.to_sym] = value.map(&:to_hash)
|
||||||
|
@ -21,31 +35,5 @@ module Microformats2
|
||||||
def to_json
|
def to_json
|
||||||
to_hash.to_json
|
to_hash.to_json
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
|
||||||
|
|
||||||
# look for both formats and properties
|
|
||||||
def html_class_regex
|
|
||||||
/^(h-|p-|u-|dt-|e-)/
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_microformat(element, html_classes)
|
|
||||||
property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx }
|
|
||||||
|
|
||||||
property_classes.each do |property_class|
|
|
||||||
# p-class-name -> p
|
|
||||||
prefix = property_class.split("-").first
|
|
||||||
# p-class-name -> class_name
|
|
||||||
method_name = property_class.split("-")[1..-1].join("_")
|
|
||||||
# avoid overriding Object#class
|
|
||||||
method_name = "klass" if method_name == "class"
|
|
||||||
|
|
||||||
# parse property
|
|
||||||
value = Microformats2::Property::Parsers[prefix].new(element).parse
|
|
||||||
|
|
||||||
# save property under custom method
|
|
||||||
define_method_and_set_value(method_name, value)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
54
lib/microformats2/format_parser.rb
Normal file
54
lib/microformats2/format_parser.rb
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
module Microformats2
|
||||||
|
class FormatParser
|
||||||
|
class << self
|
||||||
|
def parse(element)
|
||||||
|
parse_node(element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_node(node)
|
||||||
|
case
|
||||||
|
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||||
|
when node.is_a?(Nokogiri::XML::Element) then parse_for_microformats(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_nodeset(nodeset)
|
||||||
|
nodeset.map { |node| parse_node(node) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_for_microformats(element)
|
||||||
|
if format_classes(element).length >= 1
|
||||||
|
parse_microformat(element)
|
||||||
|
else
|
||||||
|
parse_nodeset(element.children)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_microformat(element)
|
||||||
|
# only worry about the first format for now
|
||||||
|
html_class = format_classes(element).first
|
||||||
|
# class-name -> class_name
|
||||||
|
method_name = html_class.downcase.gsub("-","_")
|
||||||
|
# class_name -> Class_name -> ClassName
|
||||||
|
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
||||||
|
|
||||||
|
# find or create ruby class for microformat
|
||||||
|
if Object.const_defined?(constant_name)
|
||||||
|
klass = Object.const_get(constant_name)
|
||||||
|
else
|
||||||
|
klass = Class.new(Microformats2::Format)
|
||||||
|
Object.const_set constant_name, klass
|
||||||
|
end
|
||||||
|
|
||||||
|
# parse microformat
|
||||||
|
klass.new(element).parse
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_classes(element)
|
||||||
|
element.attribute("class").to_s.split.select do |html_class|
|
||||||
|
html_class =~ Format::CLASS_REG_EXP
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,103 +0,0 @@
|
||||||
module Microformats2
|
|
||||||
class Parser
|
|
||||||
attr_accessor :formats, :added_methods
|
|
||||||
|
|
||||||
def initialize
|
|
||||||
@formats = []
|
|
||||||
@added_methods = []
|
|
||||||
end
|
|
||||||
|
|
||||||
# override and do interesting things here
|
|
||||||
def parse(element)
|
|
||||||
parse_nodeset(element.children)
|
|
||||||
self
|
|
||||||
end
|
|
||||||
|
|
||||||
protected
|
|
||||||
|
|
||||||
# override with regex to match before parsing microformat
|
|
||||||
def html_class_regex
|
|
||||||
/^(h-)/
|
|
||||||
end
|
|
||||||
|
|
||||||
# override and do interesting things here
|
|
||||||
def parse_microformat(microformat, html_classes)
|
|
||||||
# only worry about the first format for now
|
|
||||||
html_class = html_classes.first
|
|
||||||
|
|
||||||
# class-name -> class_name
|
|
||||||
method_name = html_class.downcase.gsub("-","_")
|
|
||||||
# class_name -> Class_name -> ClassName
|
|
||||||
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
|
||||||
|
|
||||||
# get ruby class for microformat
|
|
||||||
if Object.const_defined?(constant_name)
|
|
||||||
klass = Object.const_get(constant_name)
|
|
||||||
else
|
|
||||||
klass = Class.new(Microformats2::Format)
|
|
||||||
Object.const_set constant_name, klass
|
|
||||||
end
|
|
||||||
|
|
||||||
# parse microformat
|
|
||||||
value = klass.new.parse(microformat)
|
|
||||||
|
|
||||||
# save microformat in array in order
|
|
||||||
formats << value
|
|
||||||
|
|
||||||
# save microformat under custom method
|
|
||||||
define_method_and_set_value(method_name, value)
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_nodeset(nodeset)
|
|
||||||
nodeset.map { |node| parse_node(node) }
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_node(node)
|
|
||||||
case
|
|
||||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
|
||||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def parse_element(element)
|
|
||||||
html_classes = element.attribute("class").to_s.split
|
|
||||||
html_classes = html_classes.select { |html_class| html_class =~ html_class_regex }
|
|
||||||
|
|
||||||
if html_classes.length >= 1
|
|
||||||
parse_microformat(element, html_classes)
|
|
||||||
else
|
|
||||||
parse_nodeset(element.children)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def define_method_and_set_value(method_name, value)
|
|
||||||
save_method_name(method_name)
|
|
||||||
define_method(method_name)
|
|
||||||
set_value(method_name, value)
|
|
||||||
end
|
|
||||||
|
|
||||||
def save_method_name(method_name)
|
|
||||||
unless added_methods.include?(method_name)
|
|
||||||
added_methods << method_name
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def define_method(method_name)
|
|
||||||
unless respond_to?(method_name)
|
|
||||||
self.class.class_eval { attr_accessor method_name }
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def set_value(method_name, value)
|
|
||||||
if current = send(method_name)
|
|
||||||
if current.kind_of? Array
|
|
||||||
current << value
|
|
||||||
else
|
|
||||||
send("#{method_name}=", [current, value])
|
|
||||||
end
|
|
||||||
else
|
|
||||||
send("#{method_name}=", value)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,11 +1,11 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
Parsers = {
|
CLASS_REG_EXP = /^(p-|u-|dt-|e-)/
|
||||||
|
PREFIX_CLASS_MAP = {
|
||||||
"p" => Text,
|
"p" => Text,
|
||||||
"u" => Url,
|
"u" => Url,
|
||||||
"dt" => DateTime,
|
"dt" => DateTime,
|
||||||
"e" => Embedded
|
"e" => Embedded
|
||||||
}
|
}
|
||||||
PrefixesRegEx = /^(p-|u-|dt-|e-)/
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
class DateTime < Property::Parser
|
class DateTime < Foundation
|
||||||
def value
|
def value
|
||||||
::DateTime.parse(super)
|
::DateTime.parse(super)
|
||||||
rescue ArgumentError => e
|
rescue ArgumentError => e
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
class Embedded < Property::Parser
|
class Embedded < Foundation
|
||||||
def value
|
def value
|
||||||
@value ||= @element.inner_html.strip
|
@value ||= @element.inner_html.strip
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,22 +1,25 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
class Parser < Microformats2::Parser
|
class Foundation
|
||||||
attr_accessor :value, :element
|
attr_accessor :element, :value, :formats
|
||||||
|
|
||||||
def initialize(element)
|
def initialize(element)
|
||||||
@element = element
|
@element = element
|
||||||
super()
|
@formats = []
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse
|
def parse
|
||||||
html_classes = element.attribute("class").to_s.split
|
formats << FormatParser.parse(element) if format_classes.length >=1
|
||||||
format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ }
|
value
|
||||||
if format_classes.length >= 1
|
self
|
||||||
parse_microformat(element, format_classes)
|
|
||||||
end
|
|
||||||
self
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def format_classes
|
||||||
|
element.attribute("class").to_s.split.select do |html_class|
|
||||||
|
html_class =~ Format::CLASS_REG_EXP
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def value
|
def value
|
||||||
@value ||= value_class_pattern || element_value || text_value
|
@value ||= value_class_pattern || element_value || text_value
|
||||||
end
|
end
|
||||||
|
@ -45,7 +48,7 @@ module Microformats2
|
||||||
if formats.empty?
|
if formats.empty?
|
||||||
value.to_s
|
value.to_s
|
||||||
else
|
else
|
||||||
{ value: value.to_s }.merge formats.first.to_hash
|
{ value: value.to_s }.merge(formats.first.to_hash)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -1,6 +1,6 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
class Text < Property::Parser
|
class Text < Foundation
|
||||||
def attr_map
|
def attr_map
|
||||||
@attr_map = {
|
@attr_map = {
|
||||||
"abbr" => "title",
|
"abbr" => "title",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
module Microformats2
|
module Microformats2
|
||||||
module Property
|
module Property
|
||||||
class Url < Property::Parser
|
class Url < Foundation
|
||||||
def attr_map
|
def attr_map
|
||||||
@attr_map = {
|
@attr_map = {
|
||||||
"a" => "href",
|
"a" => "href",
|
||||||
|
|
51
lib/microformats2/property_parser.rb
Normal file
51
lib/microformats2/property_parser.rb
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
module Microformats2
|
||||||
|
class PropertyParser
|
||||||
|
class << self
|
||||||
|
def parse(element)
|
||||||
|
parse_node(element)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_node(node)
|
||||||
|
case
|
||||||
|
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||||
|
when node.is_a?(Nokogiri::XML::Element) then parse_for_properties(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_nodeset(nodeset)
|
||||||
|
nodeset.map { |node| parse_node(node) }
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_for_properties(element)
|
||||||
|
if property_classes(element).length >= 1
|
||||||
|
parse_property(element)
|
||||||
|
else
|
||||||
|
parse_nodeset(element.children)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_property(element, html_classes)
|
||||||
|
property_classes(element).each do |property_class|
|
||||||
|
# p-class-name -> p
|
||||||
|
prefix = property_class.split("-").first
|
||||||
|
# p-class-name -> class_name
|
||||||
|
method_name = property_class.split("-")[1..-1].join("_")
|
||||||
|
# avoid overriding Object#class
|
||||||
|
method_name = "klass" if method_name == "class"
|
||||||
|
|
||||||
|
# find ruby class for kind of property
|
||||||
|
klass = Microformats2::Property::PREFIX_CLASS_MAP[prefix]
|
||||||
|
|
||||||
|
# parse property
|
||||||
|
klass.new(element).parse
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def property_classes(element, regexp)
|
||||||
|
element.attribute("class").to_s.split.select do |html_class|
|
||||||
|
html_class =~ Property::CLASS_REG_EXP
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -25,21 +25,21 @@ describe Microformats2::Collection do
|
||||||
|
|
||||||
describe "#parse" do
|
describe "#parse" do
|
||||||
it "creates ruby class HCard" do
|
it "creates ruby class HCard" do
|
||||||
@collection.h_card.should be_kind_of HCard
|
@collection.first.should be_kind_of HCard
|
||||||
end
|
end
|
||||||
it "assigns .h-card .p-name to HCard#name" do
|
it "assigns .h-card .p-name to HCard#name" do
|
||||||
@collection.h_card.name.value.should == "Jessica Lynn Suttles"
|
@collection.first.name.value.should == "Jessica Lynn Suttles"
|
||||||
end
|
end
|
||||||
it "assigns both .h-card .u-url to HCard#url" do
|
it "assigns both .h-card .u-url to HCard#url" do
|
||||||
urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"]
|
urls = ["http://flickr.com/jlsuttles", "http://twitter.com/jlsuttles"]
|
||||||
@collection.h_card.url.map(&:value).should == urls
|
@collection.first.url.map(&:value).should == urls
|
||||||
end
|
end
|
||||||
it "assings .h-card .dt-bday to HCard#bday" do
|
it "assings .h-card .dt-bday to HCard#bday" do
|
||||||
@collection.h_card.bday.value.should be_kind_of DateTime
|
@collection.first.bday.value.should be_kind_of DateTime
|
||||||
@collection.h_card.bday.value.to_s.should == "1990-10-15T20:45:33-08:00"
|
@collection.first.bday.value.to_s.should == "1990-10-15T20:45:33-08:00"
|
||||||
end
|
end
|
||||||
it "assigns .h-card .e-content to HCard#content" do
|
it "assigns .h-card .e-content to HCard#content" do
|
||||||
@collection.h_card.content.value.should == "Vegan. Cat lover. Coder."
|
@collection.first.content.value.should == "Vegan. Cat lover. Coder."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -69,10 +69,10 @@ describe Microformats2::Collection do
|
||||||
|
|
||||||
describe "#parse" do
|
describe "#parse" do
|
||||||
it "creates ruby class HEntry" do
|
it "creates ruby class HEntry" do
|
||||||
@collection.h_entry.should be_kind_of HEntry
|
@collection.first.should be_kind_of HEntry
|
||||||
end
|
end
|
||||||
it "assigns .h-entry .p-author to HEntry#author" do
|
it "assigns .h-entry .p-author to HEntry#author" do
|
||||||
@collection.h_entry.author.value.should == "Jessica Lynn Suttles"
|
@collection.first.author.value.should == "Jessica Lynn Suttles"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
13
spec/support/hcard-hcard-nested.html
Normal file
13
spec/support/hcard-hcard-nested.html
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div class="h-card">
|
||||||
|
<div class="p-author">Jessica Lynn Suttles</div>
|
||||||
|
<div class="h-card h-org">
|
||||||
|
<a class="p-name">
|
||||||
|
G5
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
12
spec/support/hcard-pname-pnickname-nested.html
Normal file
12
spec/support/hcard-pname-pnickname-nested.html
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div class="h-card">
|
||||||
|
<div class="p-name">
|
||||||
|
<div class="p-nickname">
|
||||||
|
jlsuttles
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Add table
Add a link
Reference in a new issue