allows for parsing of html string, file path, or url

This commit is contained in:
Jessica Lynn Suttles 2013-02-04 13:40:33 -08:00
parent c2ac2b53ae
commit 07d510ab7d
3 changed files with 41 additions and 1 deletions

View file

@ -1,5 +1,16 @@
require "nokogiri"
require "open-uri"
require "microformats2/version"
module Microformats2
# Your code goes here...
def self.parse(html)
html = read_html(html)
Nokogiri::HTML(html)
end
def self.read_html(html)
open(html).read
rescue Errno::ENOENT => e
html
end
end

View file

@ -0,0 +1,23 @@
require "spec_helper"
require "microformats2"
describe Microformats2 do
describe "::read_html" do
before do
@html = <<-HTML.strip
<div class="h-card"><p class="p-name">Jessica Lynn Suttles</p></div>
HTML
end
it "can be a string of html" do
Microformats2.read_html(@html).should include @html
end
it "can be a file path to html" do
html = "spec/support/simple.html"
Microformats2.read_html(html).should include @html
end
it "can be a url to html" do
html = "http://google.com"
Microformats2.read_html(html).should include "google"
end
end
end

6
spec/support/simple.html Normal file
View file

@ -0,0 +1,6 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html>
<body>
<div class="h-card"><p class="p-name">Jessica Lynn Suttles</p></div>
</body>
</html>