adds to_hash and to_json for collection
This commit is contained in:
parent
0826659f1e
commit
d54e2015a3
6 changed files with 286 additions and 183 deletions
|
@ -2,187 +2,22 @@ require "nokogiri"
|
|||
require "open-uri"
|
||||
require "json"
|
||||
require "microformats2/version"
|
||||
require "microformats2/collection"
|
||||
require "microformats2/format"
|
||||
require "microformats2/property"
|
||||
|
||||
module Microformats2
|
||||
class TextProperty
|
||||
def parse(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
class UrlProperty
|
||||
def parse(element)
|
||||
(element.attribute("href") || property.text).to_s
|
||||
end
|
||||
end
|
||||
class DateTimeProperty
|
||||
def parse(element)
|
||||
DateTime.parse(element.attribute("datetime") || property.text)
|
||||
end
|
||||
end
|
||||
class EmbeddedProperty
|
||||
def parse(element)
|
||||
element.text
|
||||
end
|
||||
end
|
||||
|
||||
Prefixes = {
|
||||
"p" => TextProperty.new,
|
||||
"u" => UrlProperty.new,
|
||||
"dt" => DateTimeProperty.new,
|
||||
"e" => EmbeddedProperty.new
|
||||
}
|
||||
PrefixesRegEx = /^(p-|u-|dt-|e-)/
|
||||
|
||||
class Root
|
||||
attr_accessor :properties
|
||||
|
||||
def initialize(element)
|
||||
@properties = []
|
||||
parse_nodeset(element.children)
|
||||
class << self
|
||||
def parse(html)
|
||||
html = read_html(html)
|
||||
document = Nokogiri::HTML(html)
|
||||
Collection.new.parse(document)
|
||||
end
|
||||
|
||||
def type
|
||||
# ClassName -> className -> class-name
|
||||
self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase}
|
||||
def read_html(html)
|
||||
open(html).read
|
||||
rescue Errno::ENOENT => e
|
||||
html
|
||||
end
|
||||
|
||||
def to_hash
|
||||
hash = { type: [type], properties: {} }
|
||||
@properties.each do |method_name|
|
||||
hash[:properties][method_name] = send(method_name)
|
||||
end
|
||||
hash
|
||||
end
|
||||
|
||||
def to_json(*a)
|
||||
to_hash.to_json(a)
|
||||
end
|
||||
|
||||
def parse_nodeset(nodeset)
|
||||
nodeset.map { |node| parse_node(node) }
|
||||
end
|
||||
|
||||
def parse_node(node)
|
||||
case
|
||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_element(element)
|
||||
# look for microformat property class
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
html_classes.keep_if { |html_class| html_class =~ Microformats2::PrefixesRegEx }
|
||||
|
||||
# if found microformat property, yay parse it
|
||||
if html_classes.length >= 1
|
||||
parse_property(element, html_classes)
|
||||
|
||||
# if no microformat property found, look at children
|
||||
else
|
||||
parse_nodeset(element.children)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_property(element, html_classes)
|
||||
html_classes.each do |html_class|
|
||||
# p-class-name -> p
|
||||
prefix = html_class.split("-").first
|
||||
# p-class-name -> class_name
|
||||
method_name = html_class.split("-")[1..-1].join("_")
|
||||
value = Microformats2::Prefixes[prefix].parse(element)
|
||||
|
||||
# avoid overriding Object#class
|
||||
if method_name == "class"
|
||||
method_name = "klass"
|
||||
end
|
||||
|
||||
add_property(method_name)
|
||||
add_method(method_name)
|
||||
populate_method(method_name, value)
|
||||
end
|
||||
end
|
||||
|
||||
def add_property(method_name)
|
||||
unless @properties.include?(method_name)
|
||||
@properties << method_name
|
||||
end
|
||||
end
|
||||
|
||||
def add_method(method_name)
|
||||
unless respond_to?(method_name)
|
||||
self.class.class_eval { attr_accessor method_name }
|
||||
end
|
||||
end
|
||||
|
||||
def populate_method(method_name, value)
|
||||
if cur = send(method_name)
|
||||
if cur.kind_of? Array
|
||||
cur << value
|
||||
else
|
||||
send("#{method_name}=", [cur, value])
|
||||
end
|
||||
else
|
||||
send("#{method_name}=", value)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.parse(html)
|
||||
html = read_html(html)
|
||||
document = Nokogiri::HTML(html)
|
||||
parsed_document = parse_nodeset(document.children)
|
||||
parsed_document.flatten.compact
|
||||
end
|
||||
|
||||
def self.read_html(html)
|
||||
open(html).read
|
||||
rescue Errno::ENOENT => e
|
||||
html
|
||||
end
|
||||
|
||||
def self.parse_nodeset(nodeset)
|
||||
nodeset.map { |node| parse_node(node) }
|
||||
end
|
||||
|
||||
def self.parse_node(node)
|
||||
case
|
||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
||||
end
|
||||
end
|
||||
|
||||
def self.parse_element(element)
|
||||
# look for root microformat class
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
html_classes.keep_if { |html_class| html_class =~ /^h-/ }
|
||||
|
||||
# if found root microformat, yay parse it
|
||||
if html_classes.length >= 1
|
||||
parse_microformat(element, html_classes)
|
||||
|
||||
# if no root microformat found, look at children
|
||||
else
|
||||
parse_nodeset(element.children)
|
||||
end
|
||||
end
|
||||
|
||||
def self.parse_microformat(microformat, html_classes)
|
||||
# only worry about the first format for now
|
||||
html_class = html_classes.first
|
||||
|
||||
# class_name -> class-name -> Class-name -> ClassName
|
||||
constant_name = html_class.gsub("-","_").gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
||||
|
||||
# get ruby class for microformat
|
||||
if Object.const_defined?(constant_name)
|
||||
klass = Object.const_get(constant_name)
|
||||
else
|
||||
klass = Class.new(Microformats2::Root)
|
||||
Object.const_set constant_name, klass
|
||||
end
|
||||
|
||||
# get a new instance of the ruby class
|
||||
klass.new(microformat)
|
||||
end
|
||||
end # class << self
|
||||
end
|
||||
|
|
106
lib/microformats2/collection.rb
Normal file
106
lib/microformats2/collection.rb
Normal file
|
@ -0,0 +1,106 @@
|
|||
module Microformats2
|
||||
class Collection
|
||||
attr_accessor :added_methods, :formats
|
||||
|
||||
def initialize
|
||||
@added_methods = []
|
||||
@formats = []
|
||||
end
|
||||
|
||||
def parse(document)
|
||||
parse_nodeset(document.children)
|
||||
self
|
||||
end
|
||||
|
||||
def to_hash
|
||||
hash = { items: [] }
|
||||
@formats.each do |format|
|
||||
hash[:items] << format.to_hash
|
||||
end
|
||||
hash
|
||||
end
|
||||
|
||||
def to_json
|
||||
to_hash.to_json
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_nodeset(nodeset)
|
||||
nodeset.map { |node| parse_node(node) }
|
||||
end
|
||||
|
||||
def parse_node(node)
|
||||
case
|
||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_element(element)
|
||||
# look for root microformat class
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
html_classes.keep_if { |html_class| html_class =~ /^h-/ }
|
||||
|
||||
# if found root microformat, yay parse it
|
||||
if html_classes.length >= 1
|
||||
parse_microformat(element, html_classes)
|
||||
|
||||
# if no root microformat found, look at children
|
||||
else
|
||||
parse_nodeset(element.children)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_microformat(microformat, html_classes)
|
||||
# only worry about the first format for now
|
||||
html_class = html_classes.first
|
||||
|
||||
# class-name -> class_name
|
||||
method_name = html_class.downcase.gsub("-","_")
|
||||
# class_name -> Class_name -> ClassName
|
||||
constant_name = method_name.gsub(/^([a-z])/){$1.upcase}.gsub(/_(.)/){$1.upcase}
|
||||
|
||||
# get ruby class for microformat
|
||||
if Object.const_defined?(constant_name)
|
||||
klass = Object.const_get(constant_name)
|
||||
else
|
||||
klass = Class.new(Microformats2::Format)
|
||||
Object.const_set constant_name, klass
|
||||
end
|
||||
|
||||
# get a new instance of the ruby class
|
||||
format = klass.new.parse(microformat)
|
||||
|
||||
@formats << format
|
||||
|
||||
save_method_name(method_name)
|
||||
add_method(method_name)
|
||||
populate_method(method_name, format)
|
||||
end
|
||||
|
||||
def save_method_name(method_name)
|
||||
unless @added_methods.include?(method_name)
|
||||
@added_methods << method_name
|
||||
end
|
||||
end
|
||||
|
||||
def add_method(method_name)
|
||||
unless respond_to?(method_name)
|
||||
self.class.class_eval { attr_accessor method_name }
|
||||
end
|
||||
end
|
||||
|
||||
def populate_method(method_name, value)
|
||||
if current = send(method_name)
|
||||
if current.kind_of? Array
|
||||
current << value
|
||||
else
|
||||
send("#{method_name}=", [current, value])
|
||||
end
|
||||
else
|
||||
send("#{method_name}=", value)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
102
lib/microformats2/format.rb
Normal file
102
lib/microformats2/format.rb
Normal file
|
@ -0,0 +1,102 @@
|
|||
module Microformats2
|
||||
class Format
|
||||
attr_accessor :added_method_names
|
||||
|
||||
def initialize
|
||||
@added_method_names = []
|
||||
end
|
||||
|
||||
def parse(element)
|
||||
parse_nodeset(element.children)
|
||||
self
|
||||
end
|
||||
|
||||
def type
|
||||
# ClassName -> className -> class-name
|
||||
self.class.name.gsub(/^([A-Z])/){$1.downcase}.gsub(/([A-Z])/){"-" + $1.downcase}
|
||||
end
|
||||
|
||||
def to_hash
|
||||
hash = { type: [type], properties: {} }
|
||||
@added_method_names.each do |method_name|
|
||||
hash[:properties][method_name.to_sym] = send(method_name)
|
||||
end
|
||||
hash
|
||||
end
|
||||
|
||||
def to_json
|
||||
to_hash.to_json
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def parse_nodeset(nodeset)
|
||||
nodeset.map { |node| parse_node(node) }
|
||||
end
|
||||
|
||||
def parse_node(node)
|
||||
case
|
||||
when node.is_a?(Nokogiri::XML::NodeSet) then parse_nodeset(node)
|
||||
when node.is_a?(Nokogiri::XML::Element) then parse_element(node)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_element(element)
|
||||
# look for microformat property class
|
||||
html_classes = element.attribute("class").to_s.split
|
||||
html_classes.keep_if { |html_class| html_class =~ Microformats2::PropertyPrefixesRegEx }
|
||||
|
||||
# if found microformat property, yay parse it
|
||||
if html_classes.length >= 1
|
||||
parse_property(element, html_classes)
|
||||
|
||||
# if no microformat property found, look at children
|
||||
else
|
||||
parse_nodeset(element.children)
|
||||
end
|
||||
end
|
||||
|
||||
def parse_property(element, html_classes)
|
||||
html_classes.each do |html_class|
|
||||
# p-class-name -> p
|
||||
prefix = html_class.split("-").first
|
||||
# p-class-name -> class_name
|
||||
method_name = html_class.split("-")[1..-1].join("_")
|
||||
value = Microformats2::PropertyPrefixes[prefix].parse(element)
|
||||
|
||||
# avoid overriding Object#class
|
||||
if method_name == "class"
|
||||
method_name = "klass"
|
||||
end
|
||||
|
||||
save_method_name(method_name)
|
||||
add_method(method_name)
|
||||
populate_method(method_name, value)
|
||||
end
|
||||
end
|
||||
|
||||
def save_method_name(method_name)
|
||||
unless @added_method_names.include?(method_name)
|
||||
@added_method_names << method_name
|
||||
end
|
||||
end
|
||||
|
||||
def add_method(method_name)
|
||||
unless respond_to?(method_name)
|
||||
self.class.class_eval { attr_accessor method_name }
|
||||
end
|
||||
end
|
||||
|
||||
def populate_method(method_name, value)
|
||||
if current = send(method_name)
|
||||
if current.kind_of? Array
|
||||
current << value
|
||||
else
|
||||
send("#{method_name}=", [current, value])
|
||||
end
|
||||
else
|
||||
send("#{method_name}=", value)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
30
lib/microformats2/property.rb
Normal file
30
lib/microformats2/property.rb
Normal file
|
@ -0,0 +1,30 @@
|
|||
module Microformats2
|
||||
class TextProperty
|
||||
def parse(element)
|
||||
element.text.gsub(/\n+/, " ").gsub(/\s+/, " ").strip
|
||||
end
|
||||
end
|
||||
class UrlProperty
|
||||
def parse(element)
|
||||
(element.attribute("href") || property.text).to_s
|
||||
end
|
||||
end
|
||||
class DateTimeProperty
|
||||
def parse(element)
|
||||
DateTime.parse(element.attribute("datetime") || property.text)
|
||||
end
|
||||
end
|
||||
class EmbeddedProperty
|
||||
def parse(element)
|
||||
element.text
|
||||
end
|
||||
end
|
||||
|
||||
PropertyPrefixes = {
|
||||
"p" => TextProperty.new,
|
||||
"u" => UrlProperty.new,
|
||||
"dt" => DateTimeProperty.new,
|
||||
"e" => EmbeddedProperty.new
|
||||
}
|
||||
PropertyPrefixesRegEx = /^(p-|u-|dt-|e-)/
|
||||
end
|
29
spec/lib/microformats2/collection_spec.rb
Normal file
29
spec/lib/microformats2/collection_spec.rb
Normal file
|
@ -0,0 +1,29 @@
|
|||
require "spec_helper"
|
||||
require "microformats2"
|
||||
|
||||
describe Microformats2::Collection do
|
||||
before do
|
||||
@html = <<-HTML.strip
|
||||
<div class="h-card"><p class="p-name">Jessica Lynn Suttles</p></div>
|
||||
HTML
|
||||
@collection = Microformats2::Collection.new.parse(Nokogiri::HTML(@html))
|
||||
end
|
||||
|
||||
describe "#to_hash" do
|
||||
it "returns the correct Hash" do
|
||||
hash = {items: [
|
||||
{type: ["h-card"], properties: {name: "Jessica Lynn Suttles"}}
|
||||
]}
|
||||
@collection.to_hash.should == hash
|
||||
end
|
||||
end
|
||||
|
||||
describe "#to_json" do
|
||||
it "returns the correct JSON" do
|
||||
json = {items: [
|
||||
{type: ["h-card"], properties: {name: "Jessica Lynn Suttles"}}
|
||||
]}.to_json
|
||||
@collection.to_json.should == json
|
||||
end
|
||||
end
|
||||
end
|
|
@ -10,15 +10,16 @@ describe Microformats2 do
|
|||
|
||||
describe "::parse" do
|
||||
before do
|
||||
html = "spec/support/simple.html"
|
||||
@microformats2 = Microformats2.parse(@html)
|
||||
end
|
||||
it "returns an array of found root microformats" do
|
||||
@microformats2.first.should be_kind_of HCard
|
||||
it "returns a collection" do
|
||||
@microformats2.should be_kind_of Microformats2::Collection
|
||||
end
|
||||
it "assigns root formats to collection" do
|
||||
@microformats2.h_card.should be_kind_of HCard
|
||||
end
|
||||
it "assigns properties to found root microformats" do
|
||||
puts @microformats2.first.to_hash
|
||||
@microformats2.first.name.should == "Jessica Lynn Suttles"
|
||||
@microformats2.h_card.name.should == "Jessica Lynn Suttles"
|
||||
end
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue