allows for parsing of html string, file path, or url
This commit is contained in:
parent
c2ac2b53ae
commit
07d510ab7d
3 changed files with 41 additions and 1 deletions
|
@ -1,5 +1,16 @@
|
|||
require "nokogiri"
|
||||
require "open-uri"
|
||||
require "microformats2/version"
|
||||
|
||||
module Microformats2
|
||||
# Your code goes here...
|
||||
def self.parse(html)
|
||||
html = read_html(html)
|
||||
Nokogiri::HTML(html)
|
||||
end
|
||||
|
||||
def self.read_html(html)
|
||||
open(html).read
|
||||
rescue Errno::ENOENT => e
|
||||
html
|
||||
end
|
||||
end
|
||||
|
|
23
spec/lib/microformats2_spec.rb
Normal file
23
spec/lib/microformats2_spec.rb
Normal file
|
@ -0,0 +1,23 @@
|
|||
require "spec_helper"
|
||||
require "microformats2"
|
||||
|
||||
describe Microformats2 do
|
||||
describe "::read_html" do
|
||||
before do
|
||||
@html = <<-HTML.strip
|
||||
<div class="h-card"><p class="p-name">Jessica Lynn Suttles</p></div>
|
||||
HTML
|
||||
end
|
||||
it "can be a string of html" do
|
||||
Microformats2.read_html(@html).should include @html
|
||||
end
|
||||
it "can be a file path to html" do
|
||||
html = "spec/support/simple.html"
|
||||
Microformats2.read_html(html).should include @html
|
||||
end
|
||||
it "can be a url to html" do
|
||||
html = "http://google.com"
|
||||
Microformats2.read_html(html).should include "google"
|
||||
end
|
||||
end
|
||||
end
|
6
spec/support/simple.html
Normal file
6
spec/support/simple.html
Normal file
|
@ -0,0 +1,6 @@
|
|||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html>
|
||||
<body>
|
||||
<div class="h-card"><p class="p-name">Jessica Lynn Suttles</p></div>
|
||||
</body>
|
||||
</html>
|
Loading…
Add table
Add a link
Reference in a new issue