adds rake task to scrape sites for spec examples

This commit is contained in:
Jessica Lynn Suttles 2013-02-07 14:09:25 -08:00
parent dfa6047eb9
commit 506ae94adb
29 changed files with 368 additions and 0 deletions

View file

@ -1 +1,44 @@
require "bundler/gem_tasks"
require "nokogiri"
require "open-uri"
require "pp"
task :update_spec_cases do
sources = [
{ urls: ["http://microformats.org/wiki/microformats-2"],
html_selector: ".source-html4strict",
json_selector: ".source-javascript",
html_method: "inner_text"
},
{ urls: ["http://microformat2-node.jit.su/h-adr.html"],
html_selector: ".e-x-microformat",
json_selector: ".language-json",
html_method: "inner_html"
}
]
sources.each do |source|
source[:urls].each do |url|
document = Nokogiri::HTML(open(url).read)
html = document.css(source[:html_selector]).map { |e| e.send(source[:html_method]) }
json = document.css(source[:json_selector]).map { |e| e.inner_text }
filename = url.split("/").last.gsub(/[.]\w+/, "")
filepath = "spec/support/cases/"
([html.length, json.length].min).times do |index|
File.open("#{filepath}#{filename}-#{index}.html", "w") do |f|
f.write "<!-- #{url} -->\n"
f.write html[index]
end
File.open("#{filepath}#{filename}-#{index}.js", "w") do |f|
f.write "// #{url}\n"
f.write json[index]
end
end
end
end
end

View file

@ -0,0 +1,3 @@
<!-- http://microformat2-node.jit.su/h-adr.html -->
<p class="h-adr">665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A.</p>

View file

@ -0,0 +1,10 @@
// http://microformat2-node.jit.su/h-adr.html
{
"items": [{
"type": ["h-adr"],
"properties": {
"name": ["665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A."]
}
}]
}

View file

@ -0,0 +1,10 @@
<!-- http://microformat2-node.jit.su/h-adr.html -->
<p class="h-adr">
<span class="p-street-address">665 3rd St.</span>
<span class="p-extended-address">Suite 207</span>
<span class="p-locality">San Francisco</span>,
<span class="p-region">CA</span>
<span class="p-postal-code">94107</span>
<span class="p-country-name">U.S.A.</span>
</p>

View file

@ -0,0 +1,16 @@
// http://microformat2-node.jit.su/h-adr.html
{
"items": [{
"type": ["h-adr"],
"properties": {
"street-address": ["665 3rd St."],
"extended-address": ["Suite 207"],
"locality": ["San Francisco"],
"region": ["CA"],
"postal-code": ["94107"],
"country-name": ["U.S.A."],
"name": ["665 3rd St. Suite 207 San Francisco, CA 94107 U.S.A."]
}
}]
}

View file

@ -0,0 +1,12 @@
<!-- http://microformat2-node.jit.su/h-adr.html -->
<p class="h-adr">
<span class="p-name">Bricklayer's Arms</span>
<span class="p-label">
<span class="p-street-address">3 Charlotte Road</span>,
<span class="p-locality">City of London</span>,
<span class="p-postal-code">EC2A 3PE</span>,
<span class="p-country-name">UK</span>
</span>
Geo:(<span class="p-geo">51.526421;-0.081067</span>)
</p>

View file

@ -0,0 +1,16 @@
// http://microformat2-node.jit.su/h-adr.html
{
"items": [{
"type": ["h-adr"],
"properties": {
"name": ["Bricklayer's Arms"],
"label": ["3 Charlotte Road, City of London, EC2A 3PE, UK"],
"street-address": ["3 Charlotte Road"],
"locality": ["City of London"],
"postal-code": ["EC2A 3PE"],
"country-name": ["UK"],
"geo": ["51.526421;-0.081067"]
}
}]
}

View file

@ -0,0 +1,6 @@
<!-- http://microformat2-node.jit.su/h-adr.html -->
<p class="h-adr">
<a class="p-name u-geo" href="geo:51.526421;-0.081067;crs=wgs84;u=40">Bricklayer's Arms</a>,
<span class="p-locality">London</span>
</p>

View file

@ -0,0 +1,12 @@
// http://microformat2-node.jit.su/h-adr.html
{
"items": [{
"type": ["h-adr"],
"properties": {
"name": ["Bricklayer's Arms"],
"geo": ["geo:51.526421;-0.081067;crs=wgs84;u=40"],
"locality": ["London"]
}
}]
}

View file

@ -0,0 +1,2 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<span class="h-card">Frances Berriman</span>

View file

@ -0,0 +1,9 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Frances Berriman"]
}
}]
}

View file

@ -0,0 +1,2 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<a class="h-card" href="http://benward.me">Ben Ward</a>

View file

@ -0,0 +1,10 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Ben Ward"],
"url": ["http://benward.me"]
}
}]
}

View file

@ -0,0 +1,5 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<a class="h-card" href="http://rohit.khare.org/">
<img alt="Rohit Khare"
src="https://s3.amazonaws.com/twitter_production/profile_images/53307499/180px-Rohit-sq_bigger.jpg" />
</a>

View file

@ -0,0 +1,11 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Rohit Khare"],
"url": ["http://rohit.khare.org"],
"photo": ["https://s3.amazonaws.com/twitter_production/profile_images/53307499/180px-Rohit-sq_bigger.jpg"]
}
}]
}

View file

@ -0,0 +1,17 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<img class="u-photo" alt="photo of Mitchell"
src="https://webfwd.org/content/about-experts/300.mitchellbaker/mentor_mbaker.jpg"/>
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<a class="u-url"
href="https://twitter.com/MitchellBaker"
>@MitchellBaker</a>)
<span class="p-org">Mozilla Foundation</span>
<p class="p-note">
Mitchell is responsible for setting the direction and scope of the Mozilla Foundation and its activities.
</p>
<span class="p-category">Strategy</span>
<span class="p-category">Leadership</span>
</div>

View file

@ -0,0 +1,20 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"photo": ["https://webfwd.org/content/about-experts/300.mitchellbaker/mentor_mbaker.jpg"],
"name": ["Mitchell Baker"],
"url": [
"http://blog.lizardwrangler.com/",
"https://twitter.com/MitchellBaker"
],
"org": ["Mozilla Foundation"],
"note": ["Mitchell is responsible for setting the direction and scope of the Mozilla Foundation and its activities."],
"category": [
"Strategy",
"Leadership"
]
}
}]
}

View file

@ -0,0 +1,16 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-event">
<a class="p-name u-url" href="http://indiewebcamp.com/2012">
IndieWebCamp 2012
</a>
from <time class="dt-start">2012-06-30</time>
to <time class="dt-end">2012-07-01</time> at
<span class="p-location h-card">
<a class="p-name p-org u-url" href="http://geoloqi.com/">
Geoloqi
</a>,
<span class="p-street-address">920 SW 3rd Ave. Suite 400</span>,
<span class="p-locality">Portland</span>,
<abbr class="p-region" title="Oregon">OR</abbr>
</span>
</div>

View file

@ -0,0 +1,24 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-event"],
"properties": {
"name": ["IndieWebCamp 2012"],
"url": ["http://indiewebcamp.com/2012"],
"start": ["2012-06-30"],
"end": ["2012-07-01"],
"location": [{
"value": "Geoloqi, 920 SW 3rd Ave. Suite 400, Portland, OR",
"type": ["h-card"],
"properties": {
"name": ["Geoloqi"],
"org": ["Geoloqi"],
"url": ["http://geoloqi.com/"],
"street-address": ["920 SW 3rd Ave. Suite 400"],
"locality": ["Portland"],
"region": ["Oregon"]
}
}]
}
}]
}

View file

@ -0,0 +1,7 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<span class="p-org">Mozilla Foundation</span>)
</div>

View file

@ -0,0 +1,11 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Mitchell Baker"],
"url": ["http://blog.lizardwrangler.com/"],
"org": ["Mozilla Foundation"]
}
}]
}

View file

@ -0,0 +1,9 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<a class="p-org h-card"
href="http://mozilla.org/"
>Mozilla Foundation</a>)
</div>

View file

@ -0,0 +1,18 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Mitchell Baker"],
"url": ["http://blog.lizardwrangler.com/"],
"org": [{
"value": "Mozilla Foundation",
"type": ["h-card"],
"properties": {
"name": ["Mozilla Foundation"],
"url": ["http://mozilla.org/"]
}
}]
}
}]
}

View file

@ -0,0 +1,9 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<a class="p-org h-card h-org"
href="http://mozilla.org/"
>Mozilla Foundation</a>)
</div>

View file

@ -0,0 +1,18 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Mitchell Baker"],
"url": ["http://blog.lizardwrangler.com/"],
"org": [{
"value": "Mozilla Foundation",
"type": ["h-card", "h-org"],
"properties": {
"name": ["Mozilla Foundation"],
"url": ["http://mozilla.org/"]
}
}]
}
}]
}

View file

@ -0,0 +1,9 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<a class="h-org h-card"
href="http://mozilla.org/"
>Mozilla Foundation</a>)
</div>

View file

@ -0,0 +1,17 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Mitchell Baker"],
"url": ["http://blog.lizardwrangler.com/"]
},
"children": [{
"type": ["h-card","h-org"],
"properties": {
"name": ["Mozilla Foundation"],
"url": ["http://mozilla.org/"]
}
}]
}]
}

View file

@ -0,0 +1,9 @@
<!-- http://microformats.org/wiki/microformats-2 -->
<div class="h-card">
<a class="p-name u-url"
href="http://blog.lizardwrangler.com/"
>Mitchell Baker</a>
(<a class="h-card"
href="http://mozilla.org/"
>Mozilla Foundation</a>)
</div>

View file

@ -0,0 +1,17 @@
// http://microformats.org/wiki/microformats-2
{
"items": [{
"type": ["h-card"],
"properties": {
"name": ["Mitchell Baker"],
"url": ["http://blog.lizardwrangler.com/"]
},
"children": [{
"type": ["h-card"],
"properties": {
"name": ["Mozilla Foundation"],
"url": ["http://mozilla.org/"]
}
}]
}]
}