organizing broke the world

This commit is contained in:
Jessica Lynn Suttles 2013-02-12 18:46:35 -08:00
parent 113d95af17
commit 081d0e5000
15 changed files with 189 additions and 164 deletions

View file

@ -2,10 +2,11 @@ require "nokogiri"
require "open-uri"
require "json"
require "microformats2/version"
require "microformats2/parser"
require "microformats2/format_parser"
require "microformats2/property_parser"
require "microformats2/collection"
require "microformats2/format"
require "microformats2/property/parser"
require "microformats2/property/foundation"
require "microformats2/property/text"
require "microformats2/property/url"
require "microformats2/property/date_time"

View file

@ -1,8 +1,14 @@
module Microformats2
class Collection < Parser
class Collection
attr_accessor :formats
def parse(document)
formats = FormatParser.parse(document)
end
def to_hash
hash = { items: [] }
@formats.each do |format|
formats.each do |format|
hash[:items] << format.to_hash
end
hash

View file

@ -1,16 +1,30 @@
module Microformats2
class Format < Parser
attr_reader :format_types
class Format
CLASS_REG_EXP = /^(h-)/
def parse(element)
html_classes = element.attribute("class").to_s.split
@format_types = html_classes.select { |html_class| html_class =~ /^(h-)/ }
super
attr_reader :element, :properties, :format_types
def initialize(element)
@element = element
@format_types = []
@properties = []
end
def parse
properties << PropertyParser.parse(element)
format_types
self
end
def format_types
@format_types ||= element.attribute("class").to_s.split.select do |html_class|
html_class =~ Format::CLASS_REG_EXP
end
end
def to_hash
hash = { type: @format_types, properties: {} }
@added_methods.each do |method_name|
properties.each do |method_name|
value = send(method_name)
value = value.is_a?(Array) ? value : [value]
hash[:properties][method_name.to_sym] = value.map(&:to_hash)
@ -21,31 +35,5 @@ module Microformats2
def to_json
to_hash.to_json
end
private
# look for both formats and properties
def html_class_regex
/^(h-|p-|u-|dt-|e-)/
end
def parse_microformat(element, html_classes)
property_classes = html_classes.select { |html_class| html_class =~ Microformats2::Property::PrefixesRegEx }
property_classes.each do |property_class|
# p-class-name -> p
prefix = property_class.split("-").first
# p-class-name -> class_name
method_name = property_class.split("-")[1..-1].join("_")
# avoid overriding Object#class
method_name = "klass" if method_name == "class"
# parse property
value = Microformats2::Property::Parsers[prefix].new(element).parse
# save property under custom method
define_method_and_set_value(method_name, value)
end
end
end
end

View file

@ -0,0 +1,54 @@
module Microformats2
class FormatParser
class << self
def parse(element)
parse_node(element)
end
def parse_node(node)
case
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
when node.is_a?(Nokogiri::XML::Element) then parse_for_microformats(node)
end
end
def parse_nodeset(nodeset)
nodeset.map { |node| parse_node(node) }
end
def parse_for_microformats(element)
if format_classes(element).length >= 1
parse_microformat(element)
else
parse_nodeset(element.children)
end
end
def parse_microformat(element)
# only worry about the first format for now
html_class = format_classes(element).first
# class-name -> class_name
method_name = html_class.downcase.gsub("-","_")
# class_name -> Class_name -> ClassName
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
# find or create ruby class for microformat
if Object.const_defined?(constant_name)
klass = Object.const_get(constant_name)
else
klass = Class.new(Microformats2::Format)
Object.const_set constant_name, klass
end
# parse microformat
klass.new(element).parse
end
def format_classes(element)
element.attribute("class").to_s.split.select do |html_class|
html_class =~ Format::CLASS_REG_EXP
end
end
end
end
end

View file

@ -1,103 +0,0 @@
module Microformats2
class Parser
attr_accessor :formats, :added_methods
def initialize
@formats = []
@added_methods = []
end
# override and do interesting things here
def parse(element)
parse_nodeset(element.children)
self
end
protected
# override with regex to match before parsing microformat
def html_class_regex
/^(h-)/
end
# override and do interesting things here
def parse_microformat(microformat, html_classes)
# only worry about the first format for now
html_class = html_classes.first
# class-name -> class_name
method_name = html_class.downcase.gsub("-","_")
# class_name -> Class_name -> ClassName
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
# get ruby class for microformat
if Object.const_defined?(constant_name)
klass = Object.const_get(constant_name)
else
klass = Class.new(Microformats2::Format)
Object.const_set constant_name, klass
end
# parse microformat
value = klass.new.parse(microformat)
# save microformat in array in order
formats << value
# save microformat under custom method
define_method_and_set_value(method_name, value)
end
def parse_nodeset(nodeset)
nodeset.map { |node| parse_node(node) }
end
def parse_node(node)
case
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
end
end
def parse_element(element)
html_classes = element.attribute("class").to_s.split
html_classes = html_classes.select { |html_class| html_class =~ html_class_regex }
if html_classes.length >= 1
parse_microformat(element, html_classes)
else
parse_nodeset(element.children)
end
end
def define_method_and_set_value(method_name, value)
save_method_name(method_name)
define_method(method_name)
set_value(method_name, value)
end
def save_method_name(method_name)
unless added_methods.include?(method_name)
added_methods << method_name
end
end
def define_method(method_name)
unless respond_to?(method_name)
self.class.class_eval { attr_accessor method_name }
end
end
def set_value(method_name, value)
if current = send(method_name)
if current.kind_of? Array
current << value
else
send("#{method_name}=", [current, value])
end
else
send("#{method_name}=", value)
end
end
end
end

View file

@ -1,11 +1,11 @@
module Microformats2
module Property
Parsers = {
CLASS_REG_EXP = /^(p-|u-|dt-|e-)/
PREFIX_CLASS_MAP = {
"p" => Text,
"u" => Url,
"dt" => DateTime,
"e" => Embedded
}
PrefixesRegEx = /^(p-|u-|dt-|e-)/
end
end

View file

@ -1,6 +1,6 @@
module Microformats2
module Property
class DateTime < Property::Parser
class DateTime < Foundation
def value
::DateTime.parse(super)
rescue ArgumentError => e

View file

@ -1,6 +1,6 @@
module Microformats2
module Property
class Embedded < Property::Parser
class Embedded < Foundation
def value
@value ||= @element.inner_html.strip
end

View file

@ -1,22 +1,25 @@
module Microformats2
module Property
class Parser < Microformats2::Parser
attr_accessor :value, :element
class Foundation
attr_accessor :element, :value, :formats
def initialize(element)
@element = element
super()
@formats = []
end
def parse
html_classes = element.attribute("class").to_s.split
format_classes = html_classes.select { |html_class| html_class =~ /^(h-)/ }
if format_classes.length >= 1
parse_microformat(element, format_classes)
end
self
formats << FormatParser.parse(element) if format_classes.length >=1
value
self
end
def format_classes
element.attribute("class").to_s.split.select do |html_class|
html_class =~ Format::CLASS_REG_EXP
end
end
def value
@value ||= value_class_pattern || element_value || text_value
end
@ -45,7 +48,7 @@ module Microformats2
if formats.empty?
value.to_s
else
{ value: value.to_s }.merge formats.first.to_hash
{ value: value.to_s }.merge(formats.first.to_hash)
end
end
end

View file

@ -1,6 +1,6 @@
module Microformats2
module Property
class Text < Property::Parser
class Text < Foundation
def attr_map
@attr_map = {
"abbr" => "title",

View file

@ -1,6 +1,6 @@
module Microformats2
module Property
class Url < Property::Parser
class Url < Foundation
def attr_map
@attr_map = {
"a" => "href",

View file

@ -0,0 +1,51 @@
module Microformats2
class PropertyParser
class << self
def parse(element)
parse_node(element)
end
def parse_node(node)
case
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
when node.is_a?(Nokogiri::XML::Element) then parse_for_properties(node)
end
end
def parse_nodeset(nodeset)
nodeset.map { |node| parse_node(node) }
end
def parse_for_properties(element)
if property_classes(element).length >= 1
parse_property(element)
else
parse_nodeset(element.children)
end
end
def parse_property(element, html_classes)
property_classes(element).each do |property_class|
# p-class-name -> p
prefix = property_class.split("-").first
# p-class-name -> class_name
method_name = property_class.split("-")[1..-1].join("_")
# avoid overriding Object#class
method_name = "klass" if method_name == "class"
# find ruby class for kind of property
klass = Microformats2::Property::PREFIX_CLASS_MAP[prefix]
# parse property
klass.new(element).parse
end
end
def property_classes(element, regexp)
element.attribute("class").to_s.split.select do |html_class|
html_class =~ Property::CLASS_REG_EXP
end
end
end
end
end