mirror of
https://gitlab.com/news-flash/article_scraper.git
synced 2025-07-07 16:15:32 +02:00
6 more tags & make seattletimes test consistent
This commit is contained in:
parent
0d6db710e8
commit
cc6ff6d7e2
16 changed files with 3931 additions and 3 deletions
File diff suppressed because one or more lines are too long
|
@ -72,7 +72,7 @@
|
||||||
</p>
|
</p>
|
||||||
<div>
|
<div>
|
||||||
<figure id="image-11519494">
|
<figure id="image-11519494">
|
||||||
<img data-ratio="1.5" data-caption="Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)" alt="Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)" src="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg" srcset="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-300x200.jpg 300w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-768x512.jpg 768w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1024x683.jpg 1024w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-780x520.jpg 780w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg 1020w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1560x1040.jpg 1560w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-375x250.jpg 375w" sizes="(max-width: 767px) calc(100vw - 20px), (max-width: 1019px) calc(100vw - 30px), (max-width: 1044px) calc(100vw - 60px), 970px" data-sizes="(max-width: 767px) calc(100vw - 20px), (max-width: 1019px) calc(100vw - 30px), (max-width: 1044px) calc(100vw - 60px), 970px" data-src="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg" data-old-src="https://www.seattletimes.com/wp-content/themes/st_refresh/img/lazy-loading-14x9.png" data-srcset="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-300x200.jpg 300w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-768x512.jpg 768w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1024x683.jpg 1024w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-780x520.jpg 780w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg 1020w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1560x1040.jpg 1560w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-375x250.jpg 375w">
|
<img src="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg" data-old-src="https://www.seattletimes.com/wp-content/themes/st_refresh/img/lazy-loading-14x9.png">
|
||||||
<figcaption>
|
<figcaption>
|
||||||
<span>Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)</span>
|
<span>Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)</span>
|
||||||
</figcaption>
|
</figcaption>
|
||||||
|
|
|
@ -1161,7 +1161,7 @@
|
||||||
</p>
|
</p>
|
||||||
<div class="image-single-wrapper image-11519494-499029239 layout-column-width">
|
<div class="image-single-wrapper image-11519494-499029239 layout-column-width">
|
||||||
<figure id="image-11519494" class="article-figure align-none">
|
<figure id="image-11519494" class="article-figure align-none">
|
||||||
<img data-ratio="1.5" data-caption="Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)" class="lazy" alt="Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)" src="https://www.seattletimes.com/wp-content/themes/st_refresh/img/lazy-loading-14x9.png" data-src="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg" data-srcset="https://static.seattletimes.com/wp-content/uploads/2019/04/120109-300x200.jpg 300w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-768x512.jpg 768w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1024x683.jpg 1024w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-780x520.jpg 780w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg 1020w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1560x1040.jpg 1560w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-375x250.jpg 375w" data-sizes="(max-width: 767px) calc(100vw - 20px), (max-width: 1019px) calc(100vw - 30px), (max-width: 1044px) calc(100vw - 60px), 970px" /><noscript><img data-ratio="1.5" data-caption="Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)" class='' alt='Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)' src='https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg' srcset='https://static.seattletimes.com/wp-content/uploads/2019/04/120109-300x200.jpg 300w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-768x512.jpg 768w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1024x683.jpg 1024w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-780x520.jpg 780w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg 1020w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1560x1040.jpg 1560w, https://static.seattletimes.com/wp-content/uploads/2019/04/120109-375x250.jpg 375w' sizes='(max-width: 767px) calc(100vw - 20px), (max-width: 1019px) calc(100vw - 30px), (max-width: 1044px) calc(100vw - 60px), 970px' data-sizes='(max-width: 767px) calc(100vw - 20px), (max-width: 1019px) calc(100vw - 30px), (max-width: 1044px) calc(100vw - 60px), 970px' /></noscript>
|
<img data-ratio="1.5" class="lazy" src="https://www.seattletimes.com/wp-content/themes/st_refresh/img/lazy-loading-14x9.png"/><noscript><img src='https://static.seattletimes.com/wp-content/uploads/2019/04/120109-1020x680.jpg'/></noscript>
|
||||||
<figcaption class="article-figure-caption">
|
<figcaption class="article-figure-caption">
|
||||||
<span class="caption">Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)</span>
|
<span class="caption">Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)</span>
|
||||||
</figcaption>
|
</figcaption>
|
||||||
|
|
21
resources/tests/readability/style-tags-removal/expected.html
Normal file
21
resources/tests/readability/style-tags-removal/expected.html
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
<article><DIV id="readability-page-1"><article>
|
||||||
|
<h2>Lorem</h2>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<p>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article></DIV></article>
|
42
resources/tests/readability/style-tags-removal/source.html
Normal file
42
resources/tests/readability/style-tags-removal/source.html
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>Style tags removal</title>
|
||||||
|
<style>h1{font-weight:normal}</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Lorem</h1>
|
||||||
|
<style>
|
||||||
|
div{font-weight:bold}
|
||||||
|
</style>
|
||||||
|
<div>
|
||||||
|
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<style>
|
||||||
|
h2 {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<h2>Foo</h2>
|
||||||
|
<div>
|
||||||
|
Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
<style>
|
||||||
|
* {
|
||||||
|
color: yellow;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</body>
|
||||||
|
</html>
|
38
resources/tests/readability/svg-parsing/expected.html
Normal file
38
resources/tests/readability/svg-parsing/expected.html
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
<article><DIV id="readability-page-1">
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" viewbox="0 0 50 50" height="50" width="50">
|
||||||
|
<g>
|
||||||
|
<clippath id="hex-mask-large"><polygon points="15,35 10,35 10,0 10,0 45,0 45,35 45,35 25,35 15,43"></polygon></clippath>
|
||||||
|
<clippath id="hex-mask-small"><polygon points="5,1 5,16 3,23 10,20 24,20 24,1"></polygon></clippath>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</DIV></article>
|
44
resources/tests/readability/svg-parsing/source.html
Normal file
44
resources/tests/readability/svg-parsing/source.html
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>SVG parsing</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<svg version="1.1" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 50 50" height="50" width="50" style="position: absolute;">
|
||||||
|
<g>
|
||||||
|
<clipPath id="hex-mask-large"><polygon points="15,35 10,35 10,0 10,0 45,0 45,35 45,35 25,35 15,43"/></clipPath>
|
||||||
|
<clipPath id="hex-mask-small"><polygon points="5,1 5,16 3,23 10,20 24,20 24,1"/></clipPath>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
122
resources/tests/readability/table-style-attributes/expected.html
Normal file
122
resources/tests/readability/table-style-attributes/expected.html
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
<article><DIV id="readability-page-1">
|
||||||
|
<p>
|
||||||
|
<span size="+3"><b>linux usability
|
||||||
|
<span size="4"><br>...or, why do I bother.</span></b></span><br> © 2002, 2003
|
||||||
|
<a href="mailto:/dev/null@jwz.org?subject=Please%20delete%20this%20message%20without%20reading%20it." target="_blank">Jamie Zawinski</a> </p>
|
||||||
|
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<table>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>
|
||||||
|
<p> In December 2002, I tried to install some software on my computer. The experience was, shall we say, less than pleasant. On many levels. I wrote about my experience, as I so often do. </p>
|
||||||
|
<p> Then in January, the jackasses over at Slashdot <a href="http://slashdot.org/article.pl?sid=03/01/24/1440207" target="_blank">posted</a> a link to it, calling it a "review" of Linux video software. I guess you could consider it a review, if you were to squint at it just right. But really what it is is a <i>rant</i> about how I had an evening stolen from me by crap software design. It is a flame about the pathetic state of Linux usability in general, and the handful of video players I tried out in particular. It makes no attempt to be balanced or objective or exhaustive. It is a description of my experience. Perhaps your experience was different. Good for you. </p>
|
||||||
|
<p> So of course that day I got hundreds of emails about it. Every Linux apologist in the world wanted to make sure I was fully informed of their opinion. The replies were roughly in the following groups: </p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<li> "Right on! I had exactly the same experience! Thank you for putting it into words." <i>(This was about 1/3 of the replies.)</i>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "You're clearly an idiot, Linux is too sophisticated for you, you clearly are incapable of understanding anything, you should go back to kindergarten and/or use a Mac." <i>(Oddly, all of these messages used the word</i> `clearly' <i>repeatedly.)</i>
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "If you don't like it, fix it yourself."
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "Netscape sucks! XEmacs sucks! You suck! I never liked you anyway! And you swear too much!"
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "How dare you criticize someone else's work! You got it for free! You should be on your knees thanking them for wasting your time!"
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "While you have some valid complaints, I'm going to focus on this one inconsequential error you made in your characterization of one of the many roadblocks you encountered. You suck!"
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "It's your fault for using Red Hat! You should be using Debian/<wbr></wbr>Mandrake/<wbr></wbr>Gentoo instead!"
|
||||||
|
|
||||||
|
</li>
|
||||||
|
<li> "Red Hat 7.2 is totally obsolete! It's almost 14 months old! What were you expecting!" </li>
|
||||||
|
</ul>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<p> While I am flattered that so many logorrheic Linux fanboys are sufficiently interested in my opinions and experiences to share their deeply heartfelt views with me, you can all rest assured that: </p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<ul type="A">
|
||||||
|
<li> I've heard it before; and </li>
|
||||||
|
<li> I didn't care the first time. </li>
|
||||||
|
</ul>
|
||||||
|
</ul>
|
||||||
|
<p> So please. Don't bother sending me any more mail about this. It's a near certainty that I will just delete it unread, so you might as well not waste your time. Feel free to call me names on your own web page if you feel the need to get it out of your system. But kindly stay out of my inbox.
|
||||||
|
|
||||||
|
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<span size="+3"><b>that said...</b></span>
|
||||||
|
</p>
|
||||||
|
<p> I understand that one can play videos on one's computer. I understand these videos come in many different formats. Every now and then I try to figure out what the Done Thing is, as far as playing movies on one's Linux machine. </p>
|
||||||
|
|
||||||
|
<ul> (Really my eventual goal is to be able to <i>create</i> video on Linux, but I figured I'd start small, and see if I could just get <i>playback</i> working before trying something that is undoubtedly ten thousand times harder.) </ul>
|
||||||
|
<p> I finally found <a href="http://mirrors.sctpc.com/dominik/linux/pkgs/mplayer/" target="_blank">RPMs</a> of <a href="http://www.mplayerhq.hu/" target="_blank">mplayer</a> that would consent to install themselves on a Red Hat 7.2 machine, and actually got it to play some videos. Amazing. But it's a total pain in the ass to use due to rampant "themeing." <i>Why do people do this?</i> They map this stupid shaped window with no titlebar (oh, sorry, your choice of a dozen stupidly-shaped windows without titlebars) all of which use fonts that are way too small to read. But, here's the best part, there's no way to raise the window to the top. So if another window ever gets on top of it, well, sorry, you're out of luck. And half of the themes always map the window at the very bottom of the
|
||||||
|
<nobr>screen --</nobr> conveniently <i>under</i> my panel where I can't reach it. </p>
|
||||||
|
<p> Resizing the window changes the aspect ratio of the video! Yeah, I'm sure someone has <i>ever</i> wanted that. </p>
|
||||||
|
<p> It moves the mouse to the upper left corner of every dialog box it creates! Which is great, because that means that when it gets into this cute little state of popping up a blank dialog that says "Error" five times a second, you can't even move the mouse over to another window to kill the program, you have to log in from another machine. </p>
|
||||||
|
<p> Fucking morons. </p>
|
||||||
|
<p> So I gave up on that, and tried to install <a href="http://gstreamer.net/" target="_blank">gstreamer</a>. Get this. Their propose ``solution'' for distributing binaries on Red Hat systems? They point you at an RPM that installs <b>apt</b>, the Debian package system! Yeah, that's a <i>good</i> idea, I want to struggle with two competing packaging systems on my machine just to install a single app. Well, I found some <a href="http://gstreamer.net/releases/redhat/redhat-72-i386/RPMS.ximian/" target="_blank">
|
||||||
|
RPMs</a> for Red Hat 7.2, but apparently they expect you to have already rectally inserted <a href="http://www.gnome.org/" target="_blank">Gnome2</a> on that 7.2 system first. Uh, no. I've seen the horror of Red Hat 8.0, and there's no fucking way I'm putting Gnome2 on any more of my machines for at least another six months, maybe a year. </p>
|
||||||
|
<p> Ok, no gstreamer. Let's try <a href="http://xinehq.de/" target="_blank">Xine</a>. I found <a href="http://rpmfind.net/linux/redhat/7.3/en/os/i386/RedHat/RPMS/" target="_blank">
|
||||||
|
RPMs</a>, and it sucks about the same as mplayer, and in about the same ways, though slightly less bad: it doesn't screw the aspect ratio when you resize the window; and at least its stupidly-shaped window is always forced to be on top. I don't like that either, but it's better than <i>never</i> being on top. It took me ten minutes to figure out where the "Open File" dialog was. It's on the button labeled <b><tt>"://"</tt></b> whose tooltip says "MRL Browser". Then you get to select file names from an oh-so-cute window that I guess is supposed to look like a tty, or maybe an LCD screen. It conveniently <i>centers</i> the file names in the list, and truncates them at about 30 characters. The scrollbar is also composed of "characters": it's an underscore. </p>
|
||||||
|
<p> <i> What are these fucktards <b><u>thinking???</u></b></i> </p>
|
||||||
|
<p> Then I checked out <a href="http://www.dtek.chalmers.se/groups/dvd/" target="_blank">Ogle</a> again, and it hasn't been updated since the last time I tried, six months ago. It's a pretty decent DVD player, if you have the physical DVD. It does on-screen menus, and you can click on them with the mouse. But I don't need a DVD player (I have a hardware DVD player that works just fine.) It can't, as far as I can tell, play anything but actual discs. </p>
|
||||||
|
<p> Oh, and even though I have libdvdcss installed (as evidenced by the fact that Ogle actually works) Xine won't play the same disc that Ogle will play. It seems to be claiming that the CSS stuff isn't installed, which it clearly is. </p>
|
||||||
|
<p> An idiocy that all of these programs have in common is that, in addition to opening a window for the movie, and a window for the control panel, they <i>also</i> spray a constant spatter of curses crud on the terminal they were started from. I imagine at some point, there was some user who said, ``this program is pretty nice, but you know what it's missing? It's missing a lot of pointless chatter about what plugins and fonts have been loaded!'' </p>
|
||||||
|
|
||||||
|
<hr> <b>And here's the Random Commentary section:</b>
|
||||||
|
|
||||||
|
<blockquote> <b><a href="http://www.lazycat.org/" target="_blank">Makali</a> wrote:</b>
|
||||||
|
<ul><i>
|
||||||
|
Whenever a programmer thinks, "Hey, skins, what a cool idea", their
|
||||||
|
computer's speakers should create some sort of cock-shaped soundwave
|
||||||
|
and plunge it repeatedly through their skulls.
|
||||||
|
</i></ul>
|
||||||
|
<p> I am fully in support of this proposed audio-cock technology. </p>
|
||||||
|
<p><b>Various people wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
You shouldn't even bother compiling the GUI into mplayer!
|
||||||
|
</i></ul>
|
||||||
|
<p> So I should solve the problem of ``crappy GUI'' by replacing it with ``no GUI at all?'' I should use the program only from the command line, or by memorizing magic keystrokes? Awesome idea. </p>
|
||||||
|
<p><b>Various other people wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
You didn't try <a href="http://www.videolan.org/vlc/" target="_blank">vlc</a>!
|
||||||
|
</i></ul>
|
||||||
|
<p> True, I hadn't. Now I have. It has an overly-complicated UI, (the Preferences panel is a festival of overkill) but at least it uses standard menus and buttons, so it doesn't make you want to claw your eyes out immediately. But, it can only play a miniscule number of video formats, so it's mostly useless. <i>*plonk*</i> </p>
|
||||||
|
<p><b>Someone else wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
Have you considered changing distributions?
|
||||||
|
</i></ul>
|
||||||
|
<p> Yes, every single time I try something like this, I very seriously consider <a href="http://fakehost/test/gruntle/bittybox.html" target="_blank">getting a Mac</a>. </p>
|
||||||
|
<p> Really the only thing that's stopping me is that I fear the <a href="http://www.xemacs.org/" target="_blank">Emacs situation</a>. </p>
|
||||||
|
<p> (By which I mean, ``Lack of a usable version thereof.'' No, running RMSmacs inside a terminal window doesn't qualify. Nor does running an X server on the Mac: if I were going to switch, why in the world would I continue inflicting the X Windows Disaster on myself? Wouldn't getting away from that be the <i>whole
|
||||||
|
point?</i>) </p>
|
||||||
|
|
||||||
|
<ul>
|
||||||
|
<span size="-1"> (I understand there is an almost-functional Aqua version of <a href="http://fakehost/test/hacks/why-cooperation-with-rms-is-impossible.mp3" target="_blank">
|
||||||
|
RMSmacs</a> now. I'll probably check it out at some point, but the problem with <i><a href="http://fakehost/test/base/lemacs.html" target="_blank">me</a></i> switching from XEmacs to RMSmacs is that it would probably result in another <a href="http://slashdot.org/article.pl?sid=03/01/24/1440207" target="_blank">
|
||||||
|
Slashdork</a> post, meaning I'd wake up to another 150+ poorly spelled flames in my inbox... I'm hoping for a Aquafied XEmacs, but I know that's not likely to happen any time soon.)</span>
|
||||||
|
</ul>
|
||||||
|
<p> By the way, the suggestion to switch Linux distrubutions in order to get a single app to work might sound absurd at first. And that's because <a href="http://fakehost/test/base/linux.html" target="_blank">it is</a>. But I've been saturated with Unix-peanut-gallery effluvia for so long that it no longer even surprises me when every
|
||||||
|
<nobr>question --</nobr> no matter how
|
||||||
|
<nobr>simple --</nobr> results in someone suggesting that you either A) patch your kernel or B) change distros. It's inevitable and inescapable, like Hitler. </p>
|
||||||
|
</blockquote>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<p> <a href="http://fakehost/test/" target="_blank"><img alt="[ up ]" src="http://fakehost/test/compass1.gif" onmouseover='this.src="../compass2.gif"' onmouseout='this.src="../compass1.gif"'></a> </p>
|
||||||
|
</DIV></article>
|
167
resources/tests/readability/table-style-attributes/source.html
Normal file
167
resources/tests/readability/table-style-attributes/source.html
Normal file
|
@ -0,0 +1,167 @@
|
||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||||
|
<html>
|
||||||
|
|
||||||
|
<head>
|
||||||
|
<title>linux video</title>
|
||||||
|
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"/>
|
||||||
|
<link rel="stylesheet" type="text/css" href="/jwz.css"/>
|
||||||
|
<script type="text/javascript" src="/jwz.js"></script>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body class="jwzw">
|
||||||
|
<p align="CENTER">
|
||||||
|
<font size="+3"><b>linux usability
|
||||||
|
<font size="4"><br/>...or, why do I bother.</font></b></font><br/> © 2002, 2003
|
||||||
|
<!--
|
||||||
|
<A HREF="../">Jamie Zawinski</A>
|
||||||
|
<A HREF="../about.html"><jwz@jwz.org></A>
|
||||||
|
--><a href="mailto:/dev/null@jwz.org?subject=Please%20delete%20this%20message%20without%20reading%20it.">Jamie Zawinski</a> </p>
|
||||||
|
<!--
|
||||||
|
<P><BR>
|
||||||
|
<DIV ALIGN=CENTER>
|
||||||
|
<TABLE WIDTH="50%" CELLPADDING=20 CELLSPACING=0 BORDER BGCOLOR="#FEFFE6">
|
||||||
|
<TR>
|
||||||
|
<TD BGCOLOR="#FEFFE6">
|
||||||
|
|
||||||
|
<P> <B>Welcome,
|
||||||
|
<A HREF="http://slashdot.org/article.pl?sid=03/01/24/1440207">
|
||||||
|
Slashdot</A> readers!</B>
|
||||||
|
|
||||||
|
<P> If you have any comments on this article, please feel free to
|
||||||
|
mail them to me <A HREF="mailto:malda@slashdot.org"><B>here!</B></A>
|
||||||
|
|
||||||
|
</TD></TR></TABLE></DIV>
|
||||||
|
-->
|
||||||
|
<p><br/> </p>
|
||||||
|
<div align="CENTER">
|
||||||
|
<table width="90%" cellpadding="20" cellspacing="0" border="" bgcolor="#FEFFE6">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td bgcolor="#FEFFE6">
|
||||||
|
<p> In December 2002, I tried to install some software on my computer. The experience was, shall we say, less than pleasant. On many levels. I wrote about my experience, as I so often do. </p>
|
||||||
|
<p> Then in January, the jackasses over at Slashdot <a href="http://slashdot.org/article.pl?sid=03/01/24/1440207">posted</a> a link to it, calling it a "review" of Linux video software. I guess you could consider it a review, if you were to squint at it just right. But really what it is is a <i>rant</i> about how I had an evening stolen from me by crap software design. It is a flame about the pathetic state of Linux usability in general, and the handful of video players I tried out in particular. It makes no attempt to be balanced or objective or exhaustive. It is a description of my experience. Perhaps your experience was different. Good for you. </p>
|
||||||
|
<p> So of course that day I got hundreds of emails about it. Every Linux apologist in the world wanted to make sure I was fully informed of their opinion. The replies were roughly in the following groups: </p>
|
||||||
|
<p> </p>
|
||||||
|
<ul>
|
||||||
|
<li> "Right on! I had exactly the same experience! Thank you for putting it into words." <i>(This was about 1/3 of the replies.)</i>
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "You're clearly an idiot, Linux is too sophisticated for you, you clearly are incapable of understanding anything, you should go back to kindergarten and/or use a Mac." <i>(Oddly, all of these messages used the word</i> `clearly' <i>repeatedly.)</i>
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "If you don't like it, fix it yourself."
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "Netscape sucks! XEmacs sucks! You suck! I never liked you anyway! And you swear too much!"
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "How dare you criticize someone else's work! You got it for free! You should be on your knees thanking them for wasting your time!"
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "While you have some valid complaints, I'm going to focus on this one inconsequential error you made in your characterization of one of the many roadblocks you encountered. You suck!"
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "It's your fault for using Red Hat! You should be using Debian/<wbr/>Mandrake/<wbr/>Gentoo instead!"
|
||||||
|
<p></p>
|
||||||
|
</li>
|
||||||
|
<li> "Red Hat 7.2 is totally obsolete! It's almost 14 months old! What were you expecting!" </li>
|
||||||
|
</ul>
|
||||||
|
</td>
|
||||||
|
<td bgcolor="#FEFFE6">
|
||||||
|
<p> While I am flattered that so many logorrheic Linux fanboys are sufficiently interested in my opinions and experiences to share their deeply heartfelt views with me, you can all rest assured that: </p>
|
||||||
|
<p></p>
|
||||||
|
<ul>
|
||||||
|
<ul type="A">
|
||||||
|
<li> I've heard it before; and </li>
|
||||||
|
<li> I didn't care the first time. </li>
|
||||||
|
</ul>
|
||||||
|
</ul>
|
||||||
|
<p> So please. Don't bother sending me any more mail about this. It's a near certainty that I will just delete it unread, so you might as well not waste your time. Feel free to call me names on your own web page if you feel the need to get it out of your system. But kindly stay out of my inbox.
|
||||||
|
<!--
|
||||||
|
<P> If you <I>must</I> send me mail you can send it to me
|
||||||
|
<A HREF="mailto:malda@slashdot.org"><B>here.</B></A>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<PRE><FONT SIZE="1">
|
||||||
|
Rob Malda wrote:
|
||||||
|
>
|
||||||
|
> com'on man. take my email address off your page. I'm deleting
|
||||||
|
> tons of these messages.
|
||||||
|
|
||||||
|
Jamie Zawinski wrote:
|
||||||
|
>
|
||||||
|
> Sure, how about you take the story off slashdot so that I don't
|
||||||
|
> have to delete them either?
|
||||||
|
</FONT></PRE>
|
||||||
|
-->
|
||||||
|
</p>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
<p><br/> </p>
|
||||||
|
<p align="CENTER">
|
||||||
|
<font size="+3"><b>that said...</b></font>
|
||||||
|
</p>
|
||||||
|
<p> I understand that one can play videos on one's computer. I understand these videos come in many different formats. Every now and then I try to figure out what the Done Thing is, as far as playing movies on one's Linux machine. </p>
|
||||||
|
<p></p>
|
||||||
|
<ul> (Really my eventual goal is to be able to <i>create</i> video on Linux, but I figured I'd start small, and see if I could just get <i>playback</i> working before trying something that is undoubtedly ten thousand times harder.) </ul>
|
||||||
|
<p> I finally found <a href="http://mirrors.sctpc.com/dominik/linux/pkgs/mplayer/">RPMs</a> of <a href="http://www.mplayerhq.hu/">mplayer</a> that would consent to install themselves on a Red Hat 7.2 machine, and actually got it to play some videos. Amazing. But it's a total pain in the ass to use due to rampant "themeing." <i>Why do people do this?</i> They map this stupid shaped window with no titlebar (oh, sorry, your choice of a dozen stupidly-shaped windows without titlebars) all of which use fonts that are way too small to read. But, here's the best part, there's no way to raise the window to the top. So if another window ever gets on top of it, well, sorry, you're out of luck. And half of the themes always map the window at the very bottom of the
|
||||||
|
<nobr>screen --</nobr> conveniently <i>under</i> my panel where I can't reach it. </p>
|
||||||
|
<p> Resizing the window changes the aspect ratio of the video! Yeah, I'm sure someone has <i>ever</i> wanted that. </p>
|
||||||
|
<p> It moves the mouse to the upper left corner of every dialog box it creates! Which is great, because that means that when it gets into this cute little state of popping up a blank dialog that says "Error" five times a second, you can't even move the mouse over to another window to kill the program, you have to log in from another machine. </p>
|
||||||
|
<p> Fucking morons. </p>
|
||||||
|
<p> So I gave up on that, and tried to install <a href="http://gstreamer.net/">gstreamer</a>. Get this. Their propose ``solution'' for distributing binaries on Red Hat systems? They point you at an RPM that installs <b>apt</b>, the Debian package system! Yeah, that's a <i>good</i> idea, I want to struggle with two competing packaging systems on my machine just to install a single app. Well, I found some <a href="http://gstreamer.net/releases/redhat/redhat-72-i386/RPMS.ximian/">
|
||||||
|
RPMs</a> for Red Hat 7.2, but apparently they expect you to have already rectally inserted <a href="http://www.gnome.org/">Gnome2</a> on that 7.2 system first. Uh, no. I've seen the horror of Red Hat 8.0, and there's no fucking way I'm putting Gnome2 on any more of my machines for at least another six months, maybe a year. </p>
|
||||||
|
<p> Ok, no gstreamer. Let's try <a href="http://xinehq.de/">Xine</a>. I found <a href="http://rpmfind.net/linux/redhat/7.3/en/os/i386/RedHat/RPMS/">
|
||||||
|
RPMs</a>, and it sucks about the same as mplayer, and in about the same ways, though slightly less bad: it doesn't screw the aspect ratio when you resize the window; and at least its stupidly-shaped window is always forced to be on top. I don't like that either, but it's better than <i>never</i> being on top. It took me ten minutes to figure out where the "Open File" dialog was. It's on the button labeled <b><tt>"://"</tt></b> whose tooltip says "MRL Browser". Then you get to select file names from an oh-so-cute window that I guess is supposed to look like a tty, or maybe an LCD screen. It conveniently <i>centers</i> the file names in the list, and truncates them at about 30 characters. The scrollbar is also composed of "characters": it's an underscore. </p>
|
||||||
|
<p> <i> What are these fucktards <b><u>thinking???</u></b></i> </p>
|
||||||
|
<p> Then I checked out <a href="http://www.dtek.chalmers.se/groups/dvd/">Ogle</a> again, and it hasn't been updated since the last time I tried, six months ago. It's a pretty decent DVD player, if you have the physical DVD. It does on-screen menus, and you can click on them with the mouse. But I don't need a DVD player (I have a hardware DVD player that works just fine.) It can't, as far as I can tell, play anything but actual discs. </p>
|
||||||
|
<p> Oh, and even though I have libdvdcss installed (as evidenced by the fact that Ogle actually works) Xine won't play the same disc that Ogle will play. It seems to be claiming that the CSS stuff isn't installed, which it clearly is. </p>
|
||||||
|
<p> An idiocy that all of these programs have in common is that, in addition to opening a window for the movie, and a window for the control panel, they <i>also</i> spray a constant spatter of curses crud on the terminal they were started from. I imagine at some point, there was some user who said, ``this program is pretty nice, but you know what it's missing? It's missing a lot of pointless chatter about what plugins and fonts have been loaded!'' </p>
|
||||||
|
<p> </p>
|
||||||
|
<hr width="50%"/> <b>And here's the Random Commentary section:</b>
|
||||||
|
<p></p>
|
||||||
|
<blockquote> <b><a href="http://www.lazycat.org/">Makali</a> wrote:</b>
|
||||||
|
<ul><i>
|
||||||
|
Whenever a programmer thinks, "Hey, skins, what a cool idea", their
|
||||||
|
computer's speakers should create some sort of cock-shaped soundwave
|
||||||
|
and plunge it repeatedly through their skulls.
|
||||||
|
</i></ul>
|
||||||
|
<p> I am fully in support of this proposed audio-cock technology. </p>
|
||||||
|
<p><b>Various people wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
You shouldn't even bother compiling the GUI into mplayer!
|
||||||
|
</i></ul>
|
||||||
|
<p> So I should solve the problem of ``crappy GUI'' by replacing it with ``no GUI at all?'' I should use the program only from the command line, or by memorizing magic keystrokes? Awesome idea. </p>
|
||||||
|
<p><b>Various other people wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
You didn't try <a href="http://www.videolan.org/vlc/">vlc</a>!
|
||||||
|
</i></ul>
|
||||||
|
<p> True, I hadn't. Now I have. It has an overly-complicated UI, (the Preferences panel is a festival of overkill) but at least it uses standard menus and buttons, so it doesn't make you want to claw your eyes out immediately. But, it can only play a miniscule number of video formats, so it's mostly useless. <i>*plonk*</i> </p>
|
||||||
|
<p><b>Someone else wrote:</b> </p>
|
||||||
|
<ul><i>
|
||||||
|
Have you considered changing distributions?
|
||||||
|
</i></ul>
|
||||||
|
<p> Yes, every single time I try something like this, I very seriously consider <a href="../gruntle/bittybox.html">getting a Mac</a>. </p>
|
||||||
|
<p> Really the only thing that's stopping me is that I fear the <a href="http://www.xemacs.org/">Emacs situation</a>. </p>
|
||||||
|
<p> (By which I mean, ``Lack of a usable version thereof.'' No, running RMSmacs inside a terminal window doesn't qualify. Nor does running an X server on the Mac: if I were going to switch, why in the world would I continue inflicting the X Windows Disaster on myself? Wouldn't getting away from that be the <i>whole
|
||||||
|
point?</i>) </p>
|
||||||
|
<p></p>
|
||||||
|
<ul>
|
||||||
|
<font size="-1"> (I understand there is an almost-functional Aqua version of <a href="../hacks/why-cooperation-with-rms-is-impossible.mp3">
|
||||||
|
RMSmacs</a> now. I'll probably check it out at some point, but the problem with <i><a href="lemacs.html">me</a></i> switching from XEmacs to RMSmacs is that it would probably result in another <a href="http://slashdot.org/article.pl?sid=03/01/24/1440207">
|
||||||
|
Slashdork</a> post, meaning I'd wake up to another 150+ poorly spelled flames in my inbox... I'm hoping for a Aquafied XEmacs, but I know that's not likely to happen any time soon.)</font>
|
||||||
|
</ul>
|
||||||
|
<p> By the way, the suggestion to switch Linux distrubutions in order to get a single app to work might sound absurd at first. And that's because <a href="linux.html">it is</a>. But I've been saturated with Unix-peanut-gallery effluvia for so long that it no longer even surprises me when every
|
||||||
|
<nobr>question --</nobr> no matter how
|
||||||
|
<nobr>simple --</nobr> results in someone suggesting that you either A) patch your kernel or B) change distros. It's inevitable and inescapable, like Hitler. </p>
|
||||||
|
</blockquote>
|
||||||
|
<p></p>
|
||||||
|
<hr/>
|
||||||
|
<p> </p>
|
||||||
|
<p align="CENTER"> <a href="../"><img alt="[ up ]" class="compass" src="../compass1.gif" onmouseover="this.src="../compass2.gif"" onmouseout="this.src="../compass1.gif""/></a> </p>
|
||||||
|
</body>
|
||||||
|
|
||||||
|
</html>
|
27
resources/tests/readability/telegraph/expected.html
Normal file
27
resources/tests/readability/telegraph/expected.html
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
<article><DIV id="readability-page-1">
|
||||||
|
<div>
|
||||||
|
<p><span>Z</span>imbabwe President <a href="http://www.telegraph.co.uk/news/2017/11/17/zimbabwes-ruling-party-drafting-motion-fire-robert-mugabe-sunday/" target="_blank">Robert Mugabe</a>, his wife Grace and two key figures from her G40 political faction are under house arrest at Mugabe's "Blue House" compound in Harare and are insisting the 93 year-old finishes his presidential term, a source said.</p>
|
||||||
|
<p>The G40 figures are cabinet ministers Jonathan Moyo and Saviour Kasukuwere, who fled to the compound after their homes were attacked by troops in Tuesday night's coup, the source, who said he had spoken to people inside the compound, told Reuters.</p>
|
||||||
|
<p>Mr Mugabe is resisting mediation by a Catholic priest to allow the former guerrilla a graceful exit after the military takeover.</p>
|
||||||
|
<p>The priest, Fidelis Mukonori, is acting as a middle-man between Mr Mugabe and the generals, <a href="http://www.telegraph.co.uk/news/2017/11/15/zimbabwe-crisis-have-spent-long-careful-really-change/" target="_blank">who seized power in a targeted operation against "criminals" in his entourage</a>, a senior political source told Reuters.</p>
|
||||||
|
<p>The source could not provide details of the talks, which appear to be aimed at a smooth and bloodless transition after the departure of Mr Mugabe, who has led Zimbabwe since independence in 1980.</p>
|
||||||
|
<p>Mr Mugabe, still seen by many Africans as a liberation hero, is reviled in the West as a despot whose disastrous handling of the economy and willingness to resort to violence to maintain power destroyed one of Africa's most promising states.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p><span>Z</span>imbabwean intelligence reports seen by Reuters suggest that former security chief Emmerson Mnangagwa, who was ousted as vice-president this month, has been mapping out a post-Mugabe vision with the military and opposition for more than a year.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p><span>F</span>uelling speculation that Mnangagwa's plan might be rolling into action, opposition leader Morgan Tsvangirai, who has been receiving cancer treatment in Britain and South Africa, returned to Harare late on Wednesday, his spokesman said.</p>
|
||||||
|
<p>South Africa said Mr Mugabe had told President Jacob Zuma by telephone on Wednesday that he was confined to his home but was otherwise fine and the military said it was keeping him and his family, including wife Grace, safe.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p><span>D</span>espite the lingering admiration for Mr Mugabe, there is little public affection for 52-year-old Grace, a former government typist who started having an affair with Mr Mugabe in the early 1990s as his first wife, Sally, was dying of kidney disease.</p>
|
||||||
|
<p>Dubbed "DisGrace" or "Gucci Grace" on account of her reputed love of shopping, she enjoyed a meteoric rise through the ranks of Mugabe's ruling Zanu-PF in the last two years, culminating in Mnangagwa's removal a week ago - a move seen as clearing the way for her to succeed her husband.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p><span>I</span>n contrast to the high political drama unfolding behind closed doors, the streets of the capital remained calm, with people going about their daily business, albeit under the watch of soldiers on armoured vehicles at strategic locations.</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p><span>W</span>hatever the final outcome, the events could signal a once-in-a-generation change for the former British colony, a regional breadbasket reduced to destitution by economic policies Mr Mugabe's critics have long blamed on him.</p>
|
||||||
|
</div>
|
||||||
|
</DIV></article>
|
1821
resources/tests/readability/telegraph/source.html
Normal file
1821
resources/tests/readability/telegraph/source.html
Normal file
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,21 @@
|
||||||
|
<article><DIV id="readability-page-1"><article>
|
||||||
|
<h2>This is a long title with a colon: But the final text here is different</h2>
|
||||||
|
<p>
|
||||||
|
Lorem
|
||||||
|
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
Lorem
|
||||||
|
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</p>
|
||||||
|
</article></DIV></article>
|
|
@ -0,0 +1,30 @@
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<title>This is a long title with a colon: Hello there</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>This is a long title with a colon: But the final text here is different</h1>
|
||||||
|
<div>
|
||||||
|
Lorem
|
||||||
|
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
Lorem
|
||||||
|
ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod
|
||||||
|
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam,
|
||||||
|
quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
||||||
|
consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse
|
||||||
|
cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non
|
||||||
|
proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
37
resources/tests/readability/tmz-1/expected.html
Normal file
37
resources/tests/readability/tmz-1/expected.html
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
<article><div id="readability-page-1">
|
||||||
|
<P>
|
||||||
|
|
||||||
|
|
||||||
|
<h4>$150K Pearl Oscar Dress ... STOLEN!!!!</h4>
|
||||||
|
|
||||||
|
</P>
|
||||||
|
<h5>
|
||||||
|
2/26/2015 7:11 AM PST BY TMZ STAFF
|
||||||
|
</h5>
|
||||||
|
|
||||||
|
<div itemprop="articleBody">
|
||||||
|
<p><span>EXCLUSIVE</span>
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
<img alt="0225-lupita-nyongo-getty-01" src="http://ll-media.tmz.com/2015/02/26/0225-lupita-nyongo-getty-4.jpg"><strong>Lupita Nyong</strong>'<strong>o</strong>'s now-famous Oscar dress
|
||||||
|
-- adorned in pearls -- was stolen right out of her hotel room ... TMZ
|
||||||
|
has learned.</p>
|
||||||
|
<p>Law enforcement sources tell TMZ ... the dress was taken out of Lupita's
|
||||||
|
room at The London West Hollywood. The dress is made of pearls ... 6,000
|
||||||
|
white Akoya pearls. It's valued at $150,000.</p>
|
||||||
|
<p>Our sources say Lupita told cops it was taken from her room sometime between
|
||||||
|
8 AM and 9 PM Wednesday ... while she was gone. </p>
|
||||||
|
<p>We're told there is security footage that cops are looking at that could
|
||||||
|
catch the culprit right in the act. </p>
|
||||||
|
<p>
|
||||||
|
<img alt="update_graphic_red_bar" src="http://ll-media.tmz.com/2013/11/20/update-graphic-red-bar.jpg"><strong>12:00 PM PT</strong> -- Sheriff's deputies were at The London Thursday
|
||||||
|
morning. We know they were in the manager's office and we're told
|
||||||
|
they have looked at security footage to determine if they can ID the culprit.</p>
|
||||||
|
<p>
|
||||||
|
<img alt="0226-SUB-london-hotel-swipe-tmz-02" src="http://ll-media.tmz.com/2015/02/26/0226-sub-london-hotel-swipe-tmz-11.jpg">
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div></article>
|
1528
resources/tests/readability/tmz-1/source.html
Normal file
1528
resources/tests/readability/tmz-1/source.html
Normal file
File diff suppressed because it is too large
Load diff
|
@ -458,6 +458,36 @@ async fn social_buttons() {
|
||||||
run_test("social-buttons").await
|
run_test("social-buttons").await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn style_tags_removal() {
|
||||||
|
run_test("style-tags-removal").await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn svg_parsing() {
|
||||||
|
run_test("svg-parsing").await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn table_style_attributes() {
|
||||||
|
run_test("table-style-attributes").await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn title_and_h1_discrepancy() {
|
||||||
|
run_test("title-and-h1-discrepancy").await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn tmz_1() {
|
||||||
|
run_test("tmz-1").await
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn telegraph() {
|
||||||
|
run_test("telegraph").await
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn webmd_1() {
|
async fn webmd_1() {
|
||||||
run_test("webmd-1").await
|
run_test("webmd-1").await
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue