mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
add new test
This commit is contained in:
parent
fa63d297f8
commit
d9c92ea42c
3 changed files with 182 additions and 0 deletions
26
resources/tests/readability/daringfireball-1/expected.html
Normal file
26
resources/tests/readability/daringfireball-1/expected.html
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
<article><div id="readability-page-1">
|
||||||
|
<h2>About This Site</h2>
|
||||||
|
<p>Daring Fireball is written and produced by John Gruber.</p>
|
||||||
|
<p><a href="http://fakehost/graphics/author/addison-bw.jpg" target="_blank"><img src="http://fakehost/graphics/author/addison-bw-425.jpg" alt="Photograph of the author."></a><br><em>Portrait by <a href="http://superbiate.com/inquiries/" target="_blank">George Del Barrio</a></em></p>
|
||||||
|
<h2>Mac Apps</h2>
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://www.barebones.com/products/bbedit/" target="_blank">BBEdit</a></li>
|
||||||
|
<li><a href="http://www.flyingmeat.com/acorn/" target="_blank">Acorn</a></li>
|
||||||
|
<li><a href="http://www.red-sweater.com/marsedit/" target="_blank">MarsEdit</a></li>
|
||||||
|
<li><a href="http://aged-and-distilled.com/napkin/" target="_blank">Napkin</a></li>
|
||||||
|
<li><a href="http://www.barebones.com/products/Yojimbo/" target="_blank">Yojimbo</a></li>
|
||||||
|
<li><a href="http://www.panic.com/transmit/" target="_blank">Transmit</a></li>
|
||||||
|
<li><a href="http://latenightsw.com/sd4/index.html" target="_blank">Script Debugger</a></li>
|
||||||
|
<li><a href="http://www.ambrosiasw.com/utilities/snapzprox/" target="_blank">Snapz Pro X</a></li>
|
||||||
|
<li><a href="http://nightly.webkit.org/" target="_blank">WebKit</a></li>
|
||||||
|
</ul>
|
||||||
|
<h2>iPhone Apps</h2>
|
||||||
|
<ul><li><a href="http://vesperapp.co/" target="_blank">Vesper</a></li></ul>
|
||||||
|
<h2>Server Software</h2>
|
||||||
|
<p>The Daring Fireball website is hosted by <a href="http://joyent.com/" target="_blank">Joyent</a>.</p>
|
||||||
|
<p>Articles and links are published through <a href="http://movabletype.org/" target="_blank">Movable Type</a>. In addition to my own SmartyPants and Markdown plug-ins, Daring Fireball uses several excellent Movable Type plug-ins, including Brad Choate’s <a href="http://bradchoate.com/weblog/2003/06/24/regular-expressions" target="_blank">MT-Regex</a> and <a href="http://bradchoate.com/weblog/2004/10/20/mtifempty" target="_blank">MT-IfEmpty</a>, and <a href="http://bumppo.net/projects/amputator/" target="_blank">Nat Irons’s Amputator</a>.</p>
|
||||||
|
<p>Stats are tracked using <a href="http://haveamint.com/" target="_blank">Mint</a>. Additional web nerdery, including the membership system, is fueled by <a href="http://perl.org/" target="_blank">Perl</a>, <a href="http://www.php.net/" target="_blank">PHP</a>, and <a href="http://www.mysql.com/" target="_blank">MySQL</a>.</p>
|
||||||
|
<h2>Web Standards</h2>
|
||||||
|
<p>Web standards are important, and Daring Fireball adheres to them. Specifically, Daring Fireball’s HTML markup should validate as either <a href="http://www.whatwg.org/specs/web-apps/current-work/" target="_blank">HTML 5</a> or XHTML 4.01 Transitional, its layout is constructed using <a href="http://jigsaw.w3.org/css-validator/validator?uri=http://daringfireball.net/css/fireball_screen.css" target="_blank">valid CSS</a>, and its syndicated feed is <a href="http://feedvalidator.org/check?url=http%3A%2F%2Fdaringfireball.net%2Findex.xml" target="_blank">valid Atom</a>.</p>
|
||||||
|
<p>If Daring Fireball looks goofy in your browser, you’re likely using a shitty browser that doesn’t support web standards. Internet Explorer, I’m looking in your direction. If you complain about this, I will laugh at you, because I do not care. If, however, you are using a modern, standards-compliant browser and have trouble viewing or reading Daring Fireball, please do let me know.</p>
|
||||||
|
</div></article>
|
151
resources/tests/readability/daringfireball-1/source.html
Normal file
151
resources/tests/readability/daringfireball-1/source.html
Normal file
|
@ -0,0 +1,151 @@
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="content-type" content="text/html; charset=utf-8"/>
|
||||||
|
<title>Daring Fireball: Colophon</title>
|
||||||
|
<meta name="viewport" content="width=600, initial-scale=0.5, minimum-scale=0.45"/>
|
||||||
|
<link rel="apple-touch-icon-precomposed" href="/graphics/apple-touch-icon.png"/>
|
||||||
|
<link rel="shortcut icon" href="/graphics/favicon.ico?v=005"/>
|
||||||
|
<link rel="stylesheet" type="text/css" media="screen" href="/css/fireball_screen.css?v1.6"/>
|
||||||
|
<link rel="stylesheet" type="text/css" media="screen" href="/css/ie_sucks.php"/>
|
||||||
|
<link rel="stylesheet" type="text/css" media="print" href="/css/fireball_print.css?v01"/>
|
||||||
|
<link rel="alternate" type="application/atom+xml" href="/feeds/main"/>
|
||||||
|
<script src="/js/js-global/FancyZoom.js" type="text/javascript"></script>
|
||||||
|
<script src="/js/js-global/FancyZoomHTML.js" type="text/javascript"></script>
|
||||||
|
<link rel="shortcut icon" href="/favicon.ico"/> </head>
|
||||||
|
|
||||||
|
<body onload="setupZoom()">
|
||||||
|
<div id="Box">
|
||||||
|
<div id="Banner">
|
||||||
|
<a href="/" title="Daring Fireball: Home"><img src="/graphics/logos/" alt="Daring Fireball" height="56"/></a>
|
||||||
|
</div>
|
||||||
|
<div id="Sidebar">
|
||||||
|
<p>By <strong>John Gruber</strong></p>
|
||||||
|
<ul>
|
||||||
|
<!--★-->
|
||||||
|
<li><a href="/archive/" title="Previous articles.">Archive</a></li>
|
||||||
|
<li>
|
||||||
|
<script type="text/javascript">
|
||||||
|
// <![CDATA[
|
||||||
|
function ReadCookie(name) {
|
||||||
|
var nameEQ = name + "=";
|
||||||
|
var ca = document.cookie.split(';');
|
||||||
|
for (var i = 0; i < ca.length; i++) {
|
||||||
|
var c = ca[i];
|
||||||
|
while (c.charAt(0) == ' ') c = c.substring(1, c.length);
|
||||||
|
if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length, c.length);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
var display_linked_list = ReadCookie('displayLinkedList');
|
||||||
|
var li_linked = '<a href="/linked/" title="The Linked List.">Linked List<\/a>';
|
||||||
|
if (display_linked_list == "hide") {
|
||||||
|
// Linked List is off on home page, so show it in menu:
|
||||||
|
document.write(li_linked + "<\/li>\n<li>");
|
||||||
|
} else {
|
||||||
|
// Default to not putting separate LL item in sidebar:
|
||||||
|
}
|
||||||
|
// ]]>
|
||||||
|
</script>
|
||||||
|
</li>
|
||||||
|
<li><a href="/thetalkshow/" title="The world’s most popular podcast.">The Talk Show</a></li>
|
||||||
|
<li><a href="http://vesperapp.co/" title="A simple, elegant iOS app for collecting notes, ideas, thoughts, and images.">Vesper: <em>Now for iPad</em></a></li>
|
||||||
|
<li><a href="/projects/" title="Software projects, including SmartyPants and Markdown.">Projects</a></li>
|
||||||
|
<li><a href="/contact/" title="How to send email regarding Daring Fireball.">Contact</a></li>
|
||||||
|
<li><a href="/colophon/" title="About this site and the tools used to produce it.">Colophon</a></li>
|
||||||
|
<li><a href="/feeds/">RSS Feed</a></li>
|
||||||
|
<li><a href="https://twitter.com/daringfireball">Twitter</a></li>
|
||||||
|
<li><a href="/feeds/sponsors/">Sponsorship</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<!-- Sidebar -->
|
||||||
|
<div id="Main">
|
||||||
|
<div class="article">
|
||||||
|
<h1>About This Site</h1>
|
||||||
|
<p>Daring Fireball is written and produced by John Gruber.</p>
|
||||||
|
<p>
|
||||||
|
<a href="/graphics/author/addison-bw.jpg"> <img src="/graphics/author/addison-bw-425.jpg" alt="Photograph of the author." style="border: 0;"/></a>
|
||||||
|
<br/><em>Portrait by <a href="http://superbiate.com/inquiries/">George Del Barrio</a></em> </p>
|
||||||
|
<h2>Mac Apps</h2>
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://www.barebones.com/products/bbedit/">BBEdit</a></li>
|
||||||
|
<li><a href="http://www.flyingmeat.com/acorn/">Acorn</a></li>
|
||||||
|
<li><a href="http://www.red-sweater.com/marsedit/">MarsEdit</a></li>
|
||||||
|
<li><a href="http://aged-and-distilled.com/napkin/">Napkin</a></li>
|
||||||
|
<li><a href="http://www.barebones.com/products/Yojimbo/">Yojimbo</a></li>
|
||||||
|
<li><a href="http://www.panic.com/transmit/">Transmit</a></li>
|
||||||
|
<li><a href="http://latenightsw.com/sd4/index.html">Script Debugger</a></li>
|
||||||
|
<li><a href="http://www.ambrosiasw.com/utilities/snapzprox/">Snapz Pro X</a></li>
|
||||||
|
<li><a href="http://nightly.webkit.org/">WebKit</a></li>
|
||||||
|
</ul>
|
||||||
|
<h2>iPhone Apps</h2>
|
||||||
|
<ul>
|
||||||
|
<li><a href="http://vesperapp.co/">Vesper</a></li>
|
||||||
|
</ul>
|
||||||
|
<h2>Server Software</h2>
|
||||||
|
<p>The Daring Fireball website is hosted by <a href="http://joyent.com/">Joyent</a>.</p>
|
||||||
|
<p>Articles and links are published through <a href="http://movabletype.org/">Movable Type</a>. In addition to my own SmartyPants and Markdown plug-ins, Daring Fireball uses several excellent Movable Type plug-ins, including Brad Choate’s <a href="http://bradchoate.com/weblog/2003/06/24/regular-expressions">MT-Regex</a> and <a href="http://bradchoate.com/weblog/2004/10/20/mtifempty">MT-IfEmpty</a>, and <a href="http://bumppo.net/projects/amputator/">Nat Irons’s Amputator</a>.</p>
|
||||||
|
<p>Stats are tracked using <a href="http://haveamint.com/">Mint</a>. Additional web nerdery, including the membership system, is fueled by <a href="http://perl.org/">Perl</a>, <a href="http://www.php.net/">PHP</a>, and <a href="http://www.mysql.com/">MySQL</a>.</p>
|
||||||
|
<h2>Web Standards</h2>
|
||||||
|
<p>Web standards are important, and Daring Fireball adheres to them. Specifically, Daring Fireball’s HTML markup should validate as either <a href="http://www.whatwg.org/specs/web-apps/current-work/">HTML 5</a> or XHTML 4.01 Transitional, its layout is constructed using <a href="http://jigsaw.w3.org/css-validator/validator?uri=http://daringfireball.net/css/fireball_screen.css">valid CSS</a>, and its syndicated feed is <a href="http://feedvalidator.org/check?url=http%3A%2F%2Fdaringfireball.net%2Findex.xml">valid Atom</a>.</p>
|
||||||
|
<p>If Daring Fireball looks goofy in your browser, you’re likely using a shitty browser that doesn’t support web standards. Internet Explorer, I’m looking in your direction. If you complain about this, I will laugh at you, because I do not care. If, however, you are using a modern, standards-compliant browser and have trouble viewing or reading Daring Fireball, please do let me know.</p>
|
||||||
|
</div>
|
||||||
|
<!-- article -->
|
||||||
|
<div id="Footer">
|
||||||
|
<form id="SiteSearch" action="https://daringfireball.net/search" method="get" style="margin-bottom: 2.5em;">
|
||||||
|
<div>
|
||||||
|
<input name="q" type="text" value="" style="margin-right: 8px; width: 66%;"/>
|
||||||
|
<input type="submit" value="Search"/> </div>
|
||||||
|
</form>
|
||||||
|
<p class="smallprint"> <a href="/preferences/" title="Customize the font size and presentation options for this web site.">Display Preferences</a>
|
||||||
|
<br/>
|
||||||
|
<br/> Copyright © 2002–2015 The Daring Fireball Company LLC. </p>
|
||||||
|
</div>
|
||||||
|
<div id="SidebarTheDeck">
|
||||||
|
<script type="text/javascript">
|
||||||
|
// <![CDATA[
|
||||||
|
(function(id) {
|
||||||
|
document.write('<script type="text/javascript" src="' + '//connect.decknetwork.net/deck' + id + '_js.php?' + (new Date().getTime()) + '"></' + 'script>');
|
||||||
|
})("DF");
|
||||||
|
var deckDiv = document.getElementById("SidebarTheDeck");
|
||||||
|
var sidebarDiv = document.getElementById("Sidebar");
|
||||||
|
sidebarDiv.appendChild(deckDiv);
|
||||||
|
// ]]>
|
||||||
|
</script>
|
||||||
|
<p id="ViaTheDeck">
|
||||||
|
<a href="http://decknetwork.net/"> <img src="//daringfireball.net/graphics/madison/via_the_deck.png" alt="Ads via The Deck" class="the_deck_promo" width="70"/> <span>Ads via The Deck</span> </a>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<!-- Google Analytics -->
|
||||||
|
<script type="text/javascript">
|
||||||
|
var _gaq = _gaq || [];
|
||||||
|
_gaq.push(['_setAccount', 'UA-593949-1']);
|
||||||
|
_gaq.push(['_trackPageview']);
|
||||||
|
(function() {
|
||||||
|
var ga = document.createElement('script');
|
||||||
|
ga.type = 'text/javascript';
|
||||||
|
ga.async = true;
|
||||||
|
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||||
|
var s = document.getElementsByTagName('script')[0];
|
||||||
|
s.parentNode.insertBefore(ga, s);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
<!-- Asynchronously load Mint -->
|
||||||
|
<script type="text/javascript">
|
||||||
|
(function() {
|
||||||
|
var ma = document.createElement('script');
|
||||||
|
ma.type = 'text/javascript';
|
||||||
|
ma.src = '/mint/?js';
|
||||||
|
ma.async = true;
|
||||||
|
var s = document.getElementsByTagName('script')[0];
|
||||||
|
s.parentNode.insertBefore(ma, s);
|
||||||
|
})();
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
<!-- Main -->
|
||||||
|
</div>
|
||||||
|
<!-- box -->
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
|
@ -151,6 +151,11 @@ async fn comment_inside_script_parsing() {
|
||||||
run_test("comment-inside-script-parsing").await
|
run_test("comment-inside-script-parsing").await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn daringfireball_1() {
|
||||||
|
run_test("daringfireball-1").await
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn webmd_1() {
|
async fn webmd_1() {
|
||||||
run_test("webmd-1").await
|
run_test("webmd-1").await
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue