From f737ab27fde9b1293083bd9009ed836d7fc9faa1 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Wed, 26 Apr 2023 21:04:35 +0200 Subject: [PATCH] update readability test results --- .../tests/readability/001/expected.html | 10 +- .../tests/readability/002/expected.html | 149 +- .../tests/readability/003/expected.html | 2 +- .../tests/readability/aclu/expected.html | 2 +- .../tests/readability/aktualne/expected.html | 12 +- .../archive-of-our-own/expected.html | 4 +- .../tests/readability/ars-1/expected.html | 12 +- .../expected.html | 12 +- .../basic-tags-cleaning/expected.html | 9 +- .../tests/readability/bbc-1/expected.html | 44 +- .../tests/readability/blogger/expected.html | 132 +- .../tests/readability/breitbart/expected.html | 13 +- .../readability/bug-1255978/expected.html | 29 +- .../readability/buzzfeed-1/expected.html | 8 +- .../tests/readability/citylab-1/expected.html | 29 +- .../readability/clean-links/expected.html | 13 +- .../cnet-svg-classes/expected.html | 15 +- .../tests/readability/cnet/expected.html | 8 +- .../tests/readability/cnn/expected.html | 4 +- .../expected.html | 9 +- .../daringfireball-1/expected.html | 6 +- .../readability/data-url-image/expected.html | 12 +- .../tests/readability/dev418/expected.html | 26 +- .../readability/dropbox-blog/expected.html | 80 +- .../tests/readability/ebb-org/expected.html | 18 +- .../tests/readability/ehow-1/expected.html | 63 +- .../tests/readability/ehow-2/expected.html | 34 +- .../readability/embedded-videos/expected.html | 12 +- .../tests/readability/engadget/expected.html | 42 +- .../firefox-nightly-blog/expected.html | 11 +- .../tests/readability/folha/expected.html | 2 +- .../tests/readability/gmw/expected.html | 18 +- .../google-sre-book-1/expected.html | 2 +- .../readability/guardian-1/expected.html | 197 +-- .../tests/readability/heise/expected.html | 6 +- .../readability/herald-sun-1/expected.html | 24 +- .../readability/hidden-nodes/expected.html | 2 +- .../readability/hukumusume/expected.html | 144 +- .../tests/readability/iab-1/expected.html | 4 +- .../tests/readability/ietf-1/expected.html | 76 +- .../js-link-replacement/expected.html | 2 +- .../readability/keep-images/expected.html | 82 +- .../keep-tabular-data/expected.html | 458 +++-- .../tests/readability/la-nacion/expected.html | 4 +- .../readability/lazy-image-1/expected.html | 50 +- .../readability/lazy-image-2/expected.html | 232 ++- .../readability/lazy-image-3/expected.html | 6 +- .../tests/readability/lemonde-1/expected.html | 8 +- .../readability/liberation-1/expected.html | 6 +- .../expected.html | 67 +- .../lifehacker-working/expected.html | 67 +- .../readability/links-in-tables/expected.html | 172 +- .../tests/readability/lwn-1/expected.html | 31 +- .../medicalnewstoday/expected.html | 204 +-- .../tests/readability/medium-1/expected.html | 90 +- .../tests/readability/medium-2/expected.html | 26 +- .../tests/readability/medium-3/expected.html | 28 +- .../tests/readability/mercurial/expected.html | 1476 ++++++++--------- .../metadata-content-missing/expected.html | 38 +- .../missing-paragraphs/expected.html | 100 +- .../tests/readability/mozilla-1/expected.html | 201 ++- .../tests/readability/mozilla-2/expected.html | 18 +- .../tests/readability/msn/expected.html | 8 +- .../normalize-spaces/expected.html | 2 +- .../tests/readability/nytimes-1/expected.html | 30 +- .../tests/readability/nytimes-2/expected.html | 33 +- .../tests/readability/nytimes-3/expected.html | 22 +- .../tests/readability/nytimes-4/expected.html | 8 +- .../tests/readability/nytimes-5/expected.html | 78 +- .../tests/readability/pixnet/expected.html | 114 +- .../tests/readability/qq/expected.html | 14 +- .../tests/readability/quanta-1/expected.html | 12 +- .../remove-aria-hidden/expected.html | 2 +- .../remove-extra-paragraphs/expected.html | 9 +- .../remove-script-tags/expected.html | 9 +- .../reordering-paragraphs/expected.html | 6 +- .../replace-font-tags/expected.html | 2 +- .../tests/readability/salon-1/expected.html | 94 +- .../readability/seattletimes-1/expected.html | 4 +- .../readability/simplyfound-1/expected.html | 4 +- .../readability/social-buttons/expected.html | 2 +- .../style-tags-removal/expected.html | 2 +- .../readability/svg-parsing/expected.html | 9 +- .../table-style-attributes/expected.html | 12 +- .../tests/readability/telegraph/expected.html | 21 +- .../title-and-h1-discrepancy/expected.html | 2 +- .../tests/readability/tmz-1/expected.html | 8 +- .../readability/toc-missing/expected.html | 44 +- .../readability/topicseed-1/expected.html | 10 +- .../tests/readability/tumblr/expected.html | 4 +- .../tests/readability/v8-blog/expected.html | 14 +- .../tests/readability/videos-1/expected.html | 86 +- .../tests/readability/videos-2/expected.html | 36 +- .../tests/readability/wapo-1/expected.html | 9 +- .../tests/readability/wapo-2/expected.html | 15 +- .../tests/readability/webmd-1/expected.html | 2 +- .../tests/readability/webmd-2/expected.html | 2 +- .../tests/readability/wikia/expected.html | 10 +- .../readability/wikipedia-2/expected.html | 286 ++-- .../readability/wikipedia-3/expected.html | 80 +- .../tests/readability/wikipedia/expected.html | 377 ++--- .../tests/readability/wordpress/expected.html | 8 +- .../tests/readability/yahoo-1/expected.html | 26 +- .../tests/readability/yahoo-2/expected.html | 6 +- .../tests/readability/yahoo-3/expected.html | 22 +- .../tests/readability/yahoo-4/expected.html | 32 +- .../tests/readability/youth/expected.html | 8 +- .../src/full_text_parser/readability/tests.rs | 3 +- 108 files changed, 2709 insertions(+), 3539 deletions(-) diff --git a/article_scraper/resources/tests/readability/001/expected.html b/article_scraper/resources/tests/readability/001/expected.html index 741c0aa..eaf8ef6 100644 --- a/article_scraper/resources/tests/readability/001/expected.html +++ b/article_scraper/resources/tests/readability/001/expected.html @@ -16,9 +16,9 @@ help. actually works…

Drinking game for web devs: -
(1) Think of a noun -
(2) Google "<noun>.js" -
(3) If a library with that name exists - drink

— Shay Friedman (@ironshay) +
(1) Think of a noun +
(2) Google "<noun>.js" +
(3) If a library with that name exists - drink

— Shay Friedman (@ironshay) August 22, 2013

Blanket.js is an easy to install, easy to configure, @@ -114,7 +114,7 @@ describe("Cow", function() {

Running the tests now gives us something like this:

- screenshot + screenshot

As you can see, the report at the bottom highlights that we haven't actually tested the case where an error is raised in case a target name is missing. @@ -129,4 +129,4 @@ sessions but that's another story.

So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!

- + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/002/expected.html b/article_scraper/resources/tests/readability/002/expected.html index 1932918..09a607e 100644 --- a/article_scraper/resources/tests/readability/002/expected.html +++ b/article_scraper/resources/tests/readability/002/expected.html @@ -33,8 +33,7 @@

The most useful, high-level part of the Fetch API is the fetch() function. In its simplest form it takes a URL and returns a promise that resolves to the response. The response is captured as a Response object.

-
-
fetch("/data.json").then(function(res) {
+                        
fetch("/data.json").then(function(res) {
   // res instanceof Response == true.
   if (res.ok) {
     res.json().then(function(data) {
@@ -48,8 +47,7 @@
 });

Submitting some parameters, it would look like this:

-
-
fetch("http://www.example.org/submit.php", {
+            
fetch("http://www.example.org/submit.php", {
   method: "POST",
   headers: {
     "Content-Type": "application/x-www-form-urlencoded"
@@ -67,23 +65,22 @@
                             

The fetch() function’s arguments are the same as those passed to the -
+
Request() constructor, so you may directly pass arbitrarily complex requests to fetch() as discussed below.

Headers

Fetch introduces 3 interfaces. These are Headers, Request and -
+
Response. They map directly to the underlying HTTP concepts, but have -
certain visibility filters in place for privacy and security reasons, +
certain visibility filters in place for privacy and security reasons, such as -
supporting CORS rules and ensuring cookies aren’t readable by third parties.

+
supporting CORS rules and ensuring cookies aren’t readable by third parties.

The Headers interface is a simple multi-map of names to values:

-
-
var content = "Hello World";
+            
var content = "Hello World";
 var reqHeaders = new Headers();
 reqHeaders.append("Content-Type", "text/plain"
 reqHeaders.append("Content-Length", content.length.toString());
@@ -91,17 +88,15 @@ reqHeaders.append("X-Custom-Header"
                             

The same can be achieved by passing an array of arrays or a JS object literal -
to the constructor:

-
-
reqHeaders = new Headers({
+                
to the constructor:

+
reqHeaders = new Headers({
   "Content-Type": "text/plain",
   "Content-Length": content.length.toString(),
   "X-Custom-Header": "ProcessThisImmediately",
 });

The contents can be queried and retrieved:

-
-
console.log(reqHeaders.has("Content-Type")); // true
+            
console.log(reqHeaders.has("Content-Type")); // true
 console.log(reqHeaders.has("Set-Cookie")); // false
 reqHeaders.set("Content-Type", "text/html");
 reqHeaders.append("X-Custom-Header", "AnotherValue");
@@ -113,33 +108,32 @@ reqHeaders.delete("X-Custom-Header"
 console.log(reqHeaders.getAll("X-Custom-Header")); // []

Some of these operations are only useful in ServiceWorkers, but they provide -
a much nicer API to Headers.

+
a much nicer API to Headers.

Since Headers can be sent in requests, or received in responses, and have various limitations about what information can and should be mutable, Headers objects have a guard property. This is not exposed to the Web, but it affects which mutation operations are allowed on the Headers object. -
Possible values are:

+
Possible values are:

  • “none”: default.
  • “request”: guard for a Headers object obtained from a Request (Request.headers).
  • “request-no-cors”: guard for a Headers object obtained from a Request created -
    with mode “no-cors”.
  • +
    with mode “no-cors”.
  • “response”: naturally, for Headers obtained from Response (Response.headers).
  • “immutable”: Mostly used for ServiceWorkers, renders a Headers object -
    read-only.
  • +
    read-only.

The details of how each guard affects the behaviors of the Headers object are -
in the specification. For example, +
in the specification. For example, you may not append or set a “request” guarded Headers’ “Content-Length” header. Similarly, inserting “Set-Cookie” into a Response header is not allowed so that ServiceWorkers may not set cookies via synthesized Responses.

All of the Headers methods throw TypeError if name is not a valid HTTP Header name. The mutation operations will throw TypeError if there is an immutable guard. Otherwise they fail silently. For example:

-
-
var res = Response.error();
+            
var res = Response.error();
 try {
   res.headers.set("Origin", "http://mybank.com");
 } catch(e) {
@@ -154,27 +148,24 @@ console.log(reqHeaders.getAll(
         

The simplest Request is of course, just a URL, as you may do to GET a resource.

-
-
var req = new Request("/index.html");
+        
var req = new Request("/index.html");
 console.log(req.method); // "GET"
 console.log(req.url); // "http://example.com/index.html"

You may also pass a Request to the Request() constructor to create a copy. -
(This is not the same as calling the clone() method, which +
(This is not the same as calling the clone() method, which is covered in -
the “Reading bodies” section.).

-
-
var copy = new Request(req);
+            
the “Reading bodies” section.).

+
var copy = new Request(req);
 console.log(copy.method); // "GET"
 console.log(copy.url); // "http://example.com/index.html"

Again, this form is probably only useful in ServiceWorkers.

The non-URL attributes of the Request can only be set by passing initial -
values as a second argument to the constructor. This argument is a dictionary.

-
-
var uploadReq = new Request("/uploadImage", {
+            
values as a second argument to the constructor. This argument is a dictionary.

+
var uploadReq = new Request("/uploadImage", {
   method: "POST",
   headers: {
     "Content-Type": "image/png",
@@ -189,9 +180,8 @@ console.log(copy.url);
             

The "same-origin" mode is simple, if a request is made to another origin with this mode set, the result is simply an error. You could use this to ensure that -
a request is always being made to your origin.

-
-
var arbitraryUrl = document.getElementById("url-input").value;
+                
a request is always being made to your origin.

+
var arbitraryUrl = document.getElementById("url-input").value;
 fetch(arbitraryUrl, { mode: "same-origin" }).then(function(res) {
   console.log("Response succeeded?", res.ok);
 }, function(e) {
@@ -210,13 +200,12 @@ fetch(arbitraryUrl, { mode:
             

"cors" mode is what you’ll usually use to make known cross-origin requests to access various APIs offered by other vendors. These are expected to adhere to -
the CORS protocol. +
the CORS protocol. Only a limited set of headers is exposed in the Response, but the body is readable. For example, you could get a list of Flickr’s most interesting photos today like this:

-
-
var u = new URLSearchParams();
+            
var u = new URLSearchParams();
 u.append('method', 'flickr.interestingness.getList');
 u.append('api_key', '<insert api key here>');
 u.append('format', 'json');
@@ -237,23 +226,22 @@ apiCall.then(function(respon
                             

You may not read out the “Date” header since Flickr does not allow it via -
+
Access-Control-Expose-Headers.

-
-
response.headers.get("Date"); // null
+
response.headers.get("Date"); // null

The credentials enumeration determines if cookies for the other domain are -
sent to cross-origin requests. This is similar to XHR’s withCredentials -
flag, but tri-valued as "omit" (default), "same-origin" and "include".

+
sent to cross-origin requests. This is similar to XHR’s withCredentials +
flag, but tri-valued as "omit" (default), "same-origin" and "include".

The Request object will also give the ability to offer caching hints to the user-agent. This is currently undergoing some security review. Firefox exposes the attribute, but it has no effect.

Requests have two read-only attributes that are relevant to ServiceWorkers -
intercepting them. There is the string referrer, which is +
intercepting them. There is the string referrer, which is set by the UA to be -
the referrer of the Request. This may be an empty string. The other is -
+
the referrer of the Request. This may be an empty string. The other is +
context which is a rather large enumeration defining what sort of resource is being fetched. This could be “image” if the request is from an @@ -275,22 +263,18 @@ apiCall.then(function(respon The url attribute reflects the URL of the corresponding request.

Response also has a type, which is “basic”, “cors”, “default”, “error” or -
“opaque”.

+
“opaque”.

    -
  • -"basic": normal, same origin response, with all headers exposed +
  • "basic": normal, same origin response, with all headers exposed except -
    “Set-Cookie” and “Set-Cookie2″.
  • -
  • -"cors": response was received from a valid cross-origin request. +
    “Set-Cookie” and “Set-Cookie2″.
  • +
  • "cors": response was received from a valid cross-origin request. Certain headers and the bodymay be accessed.
  • -
  • -"error": network error. No useful information describing +
  • "error": network error. No useful information describing the error is available. The Response’s status is 0, headers are empty and immutable. This is the type for a Response obtained from Response.error().
  • -
  • -"opaque": response for “no-cors” request to cross-origin - resource. Severely
    +
  • "opaque": response for “no-cors” request to cross-origin + resource. Severely
    restricted
@@ -298,10 +282,9 @@ apiCall.then(function(respon TypeError.

There are certain attributes that are useful only in a ServiceWorker scope. The -
idiomatic way to return a Response to an intercepted request in ServiceWorkers +
idiomatic way to return a Response to an intercepted request in ServiceWorkers is:

-
-
addEventListener('fetch', function(event) {
+            
addEventListener('fetch', function(event) {
   event.respondWith(new Response("Response body", {
     headers: { "Content-Type" : "text/plain" }
   });
@@ -313,7 +296,7 @@ apiCall.then(function(respon
             

The static method Response.error() simply returns an error response. Similarly, Response.redirect(url, status) returns a Response resulting in -
a redirect to url.

+
a redirect to url.

Dealing with bodies

@@ -322,22 +305,17 @@ apiCall.then(function(respon cover it in detail now.

A body is an instance of any of the following types.

@@ -345,35 +323,28 @@ apiCall.then(function(respon extract their body. These all return a Promise that is eventually resolved with the actual content.

    -
  • -arrayBuffer() +
  • arrayBuffer()
  • -
  • -blob() +
  • blob()
  • -
  • -json() +
  • json()
  • -
  • -text() +
  • text()
  • -
  • -formData() +
  • formData()

This is a significant improvement over XHR in terms of ease of use of non-text data!

Request bodies can be set by passing body parameters:

-
-
var form = new FormData(document.getElementById('login-form'));
+            
var form = new FormData(document.getElementById('login-form'));
 fetch("/login", {
   method: "POST",
   body: form
 })

Responses take the first argument as the body.

-
-
var res = new Response(new File(["chunk", "chunk"], "archive.zip",
+                
var res = new Response(new File(["chunk", "chunk"], "archive.zip",
                        { type: "application/zip" }));

Both Request and Response (and by extension the fetch() function), @@ -386,8 +357,7 @@ fetch("/login", {

It is important to realise that Request and Response bodies can only be read once! Both interfaces have a boolean attribute bodyUsed to determine if it is safe to read or not.

-
-
var res = new Response("one time use");
+                
var res = new Response("one time use");
 console.log(res.bodyUsed); // false
 res.text().then(function(v) {
   console.log(res.bodyUsed); // true
@@ -411,8 +381,7 @@ res.text().catch(clone() MUST
                     be called before the body of the corresponding object has been used. That
                     is, clone() first, read later.

-
-
addEventListener('fetch', function(evt) {
+                
addEventListener('fetch', function(evt) {
   var sheep = new Response("Dolly");
   console.log(sheep.bodyUsed); // false
   var clone = sheep.clone();
@@ -439,11 +408,11 @@ res.text().catch(Fetch and
                     ServiceWorkerspecifications.

For a better web!

-

The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben
+

The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben
Kelly for helping with the specification and implementation.

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/003/expected.html b/article_scraper/resources/tests/readability/003/expected.html index 2013a9d..91cddbc 100644 --- a/article_scraper/resources/tests/readability/003/expected.html +++ b/article_scraper/resources/tests/readability/003/expected.html @@ -16,4 +16,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/aclu/expected.html b/article_scraper/resources/tests/readability/aclu/expected.html index a0b80a9..c420933 100644 --- a/article_scraper/resources/tests/readability/aclu/expected.html +++ b/article_scraper/resources/tests/readability/aclu/expected.html @@ -121,4 +121,4 @@

As we work to address the fallout from the current storm around Facebook and Cambridge Analytica, we can't afford to lose sight of these larger mechanisms at play. Cambridge Analytica's failures and mistakes are inherent to Facebook's business model. We need to seriously challenge the social structures that encourage people to opt in to this kind of surveillance. At the same time, we also need to protect those of us who manage to opt out.

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/aktualne/expected.html b/article_scraper/resources/tests/readability/aktualne/expected.html index 3b5f940..4b62035 100644 --- a/article_scraper/resources/tests/readability/aktualne/expected.html +++ b/article_scraper/resources/tests/readability/aktualne/expected.html @@ -1,8 +1,6 @@ -
-

+

Zázrak jedné sezony? West Ham dává pochybovačům stále pádnější odpovědi a fotbalový svět si začíná uvědomovat, že se absolutní anglická fotbalová elita rozrůstá o nového člena. Tým manažera Davida Moyese prohání giganty i v aktuálním ročníku Premier League. -

-
+

Pět vítězných soutěžních duelů v řadě, během nich jediný inkasovaný gól. Čtvrté místo v lize, stejný bodový zisk jako loňský šampion Manchester City a nadšené ohlasy z tábora těch nejrenomovanějších komentátorů ostrovního fotbalu.

@@ -71,8 +69,6 @@

"Jsme na děleném třetím místě. Lidé se před sezonou hodně ptali, zda to můžeme dokázat znovu. Ukázali jsme, že ano. Ale musíme pokračovat. Tohle musí být náš standard. Nesmíme polevit, pokud chceme být velkým týmem," zdůrazňuje Rice.

-
-

+

Pokud jste v článku zaznamenali chybu nebo překlep, dejte nám, prosím, vědět prostřednictvím kontaktního formuláře. Děkujeme! -

-
+

\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/archive-of-our-own/expected.html b/article_scraper/resources/tests/readability/archive-of-our-own/expected.html index 422e8af..ab00777 100644 --- a/article_scraper/resources/tests/readability/archive-of-our-own/expected.html +++ b/article_scraper/resources/tests/readability/archive-of-our-own/expected.html @@ -95,7 +95,7 @@ It might have been curiosity or it might have been the nagging sensation that chewed at his brain for the three weeks that he researched the subject of the conversation. All For One was a cryptid. Mystical in more ways than one, he was only a rumour on a network that was two-hundred years old. There were whispers of a shadowy figure who once ruled Japan, intermingled with a string of conspiracies and fragmented events.

- Izuku had even braved the dark web, poking and prodding at some of the seedier elements of the world wide web. The internet had rumours, but the dark web had stories.
+ Izuku had even braved the dark web, poking and prodding at some of the seedier elements of the world wide web. The internet had rumours, but the dark web had stories.

An implied yakuza wrote about his grandfather who lost a fire manipulation Quirk and his sanity without any reason. His grandfather had been institutionalised, crying and repeating “he took it, he took it” until his dying days. No one could console him. @@ -314,4 +314,4 @@

And he’d be dealing with him again in another week.

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/ars-1/expected.html b/article_scraper/resources/tests/readability/ars-1/expected.html index 22847c1..59d4415 100644 --- a/article_scraper/resources/tests/readability/ars-1/expected.html +++ b/article_scraper/resources/tests/readability/ars-1/expected.html @@ -7,16 +7,12 @@

Two-year-old bug exposes thousands of servers to crippling attack.

-
- -
+
- Just-released Minecraft exploit makes it easy to crash game servers -
- -
+ Just-released Minecraft exploit makes it easy to crash game servers +

@@ -90,4 +86,4 @@

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/base-url-base-element-relative/expected.html b/article_scraper/resources/tests/readability/base-url-base-element-relative/expected.html index b8214f2..3799684 100644 --- a/article_scraper/resources/tests/readability/base-url-base-element-relative/expected.html +++ b/article_scraper/resources/tests/readability/base-url-base-element-relative/expected.html @@ -18,11 +18,11 @@

link

link

Images

-

-

-

-

-

+

+

+

+

+

Foo

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, @@ -31,4 +31,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/basic-tags-cleaning/expected.html b/article_scraper/resources/tests/readability/basic-tags-cleaning/expected.html index 6bf8114..522de9d 100644 --- a/article_scraper/resources/tests/readability/basic-tags-cleaning/expected.html +++ b/article_scraper/resources/tests/readability/basic-tags-cleaning/expected.html @@ -1,5 +1,4 @@ -
-
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

Ut enim ad minim veniam, @@ -9,8 +8,7 @@

Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
-
+

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

@@ -19,5 +17,4 @@

Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/bbc-1/expected.html b/article_scraper/resources/tests/readability/bbc-1/expected.html index 2c031c8..5eeda3e 100644 --- a/article_scraper/resources/tests/readability/bbc-1/expected.html +++ b/article_scraper/resources/tests/readability/bbc-1/expected.html @@ -1,11 +1,5 @@
-

President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency.

-

In an interview with the BBC, Mr Obama said it was "distressing" not to have made progress on the issue "even in the face of repeated mass killings".

-

He vowed to keep trying, but the BBC's North America editor Jon Sopel said the president did not sound very confident.

-

However, Mr Obama said race relations had improved during his presidency.

-

Hours after the interview, a gunman opened fire at a cinema in the US state of Louisiana, killing two people and injuring several others before shooting himself.

-

In a wide-ranging interview, President Obama also said:

-
    +

    President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency.

    In an interview with the BBC, Mr Obama said it was "distressing" not to have made progress on the issue "even in the face of repeated mass killings".

    He vowed to keep trying, but the BBC's North America editor Jon Sopel said the president did not sound very confident.

    However, Mr Obama said race relations had improved during his presidency.

    Hours after the interview, a gunman opened fire at a cinema in the US state of Louisiana, killing two people and injuring several others before shooting himself.

    In a wide-ranging interview, President Obama also said:

    • The UK must stay in the EU to have influence on the world stage
    • He is confident the Iran nuclear deal will be passed by Congress
    • @@ -14,43 +8,15 @@
    • He would defend his advocacy of gay rights following protests in Kenya
    • Despite racial tensions, the US is becoming more diverse and more tolerant
    • -
    -

    Read the full transcript of his interview

    -

    Mr Obama lands in Kenya later on Friday for his first visit since becoming president.

    -

    But with just 18 months left in power, he said gun control was the area where he has been "most frustrated and most stymied" since coming to power in 2009.

    -

    "If you look at the number of Americans killed since 9/11 by terrorism, it's less than 100. If you look at the number that have been killed by gun violence, it's in the tens of thousands," Mr Obama said.

    -
    Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013 +

Read the full transcript of his interview

Mr Obama lands in Kenya later on Friday for his first visit since becoming president.

But with just 18 months left in power, he said gun control was the area where he has been "most frustrated and most stymied" since coming to power in 2009.

"If you look at the number of Americans killed since 9/11 by terrorism, it's less than 100. If you look at the number that have been killed by gun violence, it's in the tens of thousands," Mr Obama said.

Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013
The president said he would continue fighting for greater gun control laws -

"For us not to be able to resolve that issue has been something that is distressing," he added.

-

Mr Obama has pushed for stricter gun control throughout his presidency but has been unable to secure any significant changes to the laws.

-

After nine African-American churchgoers were killed in South Carolina in June, he admitted "politics in this town" meant there were few options available.

-
line

Analysis: Jon Sopel, BBC News, Washington

-
President Barack Obama participates in an interview with Jon Sopel of BBC in the Roosevelt Room of the White House - 23 July 2015

Nine months ago, the president seemed like a spent force, after taking a beating in the midterm elections, during which members of his own party were reluctant to campaign on his record.

-

But the man sat before me today was relaxed and confident, buoyed by a string of "wins" on healthcare, Cuba and Iran, after bitter and ongoing battles with his many critics.

-

The only body swerve the president performed was when I asked him how many minds he had changed on the Iran nuclear deal after an intense sell aimed at Gulf allies and members of US Congress who remain implacably opposed.

-

There was a momentary flicker across the president's face as if to say "You think you got me?" before his smile returned and he proceeded to talk about how Congress would come round.

-

But notably, he did not give a direct answer to that question, which leaves me with the impression that he has persuaded precisely zero.

-

Five things we learned from Obama interview

-

The presidential body swerve

-
line

On race relations, Mr Obama said recent concerns around policing and mass incarcerations were "legitimate and deserve intense attention" but insisted progress had been made.

-

Children growing up during the eight years of his presidency "will have a different view of race relations in this country and what's possible," he said.

-

"There are going to be tensions that arise. But if you look at my daughters' generation, they have an attitude about race that's entirely different than even my generation."

-

Talking about how he was feeling after his recent successes, he said "every president, every leader has strengths and weaknesses".

-

"One of my strengths is I have a pretty even temperament. I don't get too high when it's high and I don't get too low when it's low," he said.

-
Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015 +

"For us not to be able to resolve that issue has been something that is distressing," he added.

Mr Obama has pushed for stricter gun control throughout his presidency but has been unable to secure any significant changes to the laws.

After nine African-American churchgoers were killed in South Carolina in June, he admitted "politics in this town" meant there were few options available.

line

Analysis: Jon Sopel, BBC News, Washington

President Barack Obama participates in an interview with Jon Sopel of BBC in the Roosevelt Room of the White House - 23 July 2015

Nine months ago, the president seemed like a spent force, after taking a beating in the midterm elections, during which members of his own party were reluctant to campaign on his record.

But the man sat before me today was relaxed and confident, buoyed by a string of "wins" on healthcare, Cuba and Iran, after bitter and ongoing battles with his many critics.

The only body swerve the president performed was when I asked him how many minds he had changed on the Iran nuclear deal after an intense sell aimed at Gulf allies and members of US Congress who remain implacably opposed.

There was a momentary flicker across the president's face as if to say "You think you got me?" before his smile returned and he proceeded to talk about how Congress would come round.

But notably, he did not give a direct answer to that question, which leaves me with the impression that he has persuaded precisely zero.

Five things we learned from Obama interview

The presidential body swerve

line

On race relations, Mr Obama said recent concerns around policing and mass incarcerations were "legitimate and deserve intense attention" but insisted progress had been made.

Children growing up during the eight years of his presidency "will have a different view of race relations in this country and what's possible," he said.

"There are going to be tensions that arise. But if you look at my daughters' generation, they have an attitude about race that's entirely different than even my generation."

Talking about how he was feeling after his recent successes, he said "every president, every leader has strengths and weaknesses".

"One of my strengths is I have a pretty even temperament. I don't get too high when it's high and I don't get too low when it's low," he said.

Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015
Kenya is getting ready to welcome the US president -

Kenya trip

-

Mr Obama was speaking to the BBC at the White House before departing for Kenya.

-

His father was Kenyan and the president is expected to meet relatives in Nairobi.

-

Mr Obama has faced criticism in the country after the US legalised gay marriage. However, in his interview, the president said he would not fall silent on the issue.

-

"I am not a fan of discrimination and bullying of anybody on the basis of race, on the basis of religion, on the basis of sexual orientation or gender," he said.

-

The president also admitted that some African governments, including Kenya's, needed to improve their records on human rights and democracy. However, he defended his decision to engage with and visit those governments.

-

"Well, they're not ideal institutions. But what we found is, is that when we combined blunt talk with engagement, that gives us the best opportunity to influence and open up space for civil society."

-

Mr Obama will become the first US president to address the African Union when he travels on to Ethiopia on Sunday.

-
+

Kenya trip

Mr Obama was speaking to the BBC at the White House before departing for Kenya.

His father was Kenyan and the president is expected to meet relatives in Nairobi.

Mr Obama has faced criticism in the country after the US legalised gay marriage. However, in his interview, the president said he would not fall silent on the issue.

"I am not a fan of discrimination and bullying of anybody on the basis of race, on the basis of religion, on the basis of sexual orientation or gender," he said.

The president also admitted that some African governments, including Kenya's, needed to improve their records on human rights and democracy. However, he defended his decision to engage with and visit those governments.

"Well, they're not ideal institutions. But what we found is, is that when we combined blunt talk with engagement, that gives us the best opportunity to influence and open up space for civil society."

Mr Obama will become the first US president to address the African Union when he travels on to Ethiopia on Sunday.

+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/blogger/expected.html b/article_scraper/resources/tests/readability/blogger/expected.html index 182a508..0149e1e 100644 --- a/article_scraper/resources/tests/readability/blogger/expected.html +++ b/article_scraper/resources/tests/readability/blogger/expected.html @@ -1,73 +1,61 @@ -
-

- I've written a couple of posts in the past few months but they were all for the blog at work so I figured I'm long overdue for one on Silicon Exposed.

-

+

+ I've written a couple of posts in the past few months but they were all for the blog at work so I figured I'm long overdue for one on Silicon Exposed.

So what's a GreenPak?

-

Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their 5th generation parts were just announced, but I started this project before that happened so I'm still targeting the 4th generation.
-
GreenPak devices are kind of like itty bitty PSoCs - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).
-
It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.
-
Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:

- +

Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their 5th generation parts were just announced, but I started this project before that happened so I'm still targeting the 4th generation.
+
GreenPak devices are kind of like itty bitty PSoCs - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).
+
It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.
+
Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:

-
- +
SLG46620V block diagram (from device datasheet)
-

- They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.
-
The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.
-
To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.
-
The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.
-
While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.

- +

+ They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.
+
The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.
+
To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.
+
The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.
+
While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.

-
- +
Schematic from hell!
-

- As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.
-
The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?
-
This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs says, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.

-

+

+ As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.
+
The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?
+
This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs says, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.

Great! How does it work?

-

Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, Yosys, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.
-
The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.
-
Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)
-
After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.

- +

Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, Yosys, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.
+
The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.
+
Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)
+
After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.

-
- +
Example labeling for a subset of the netlist and device graphs
-

- The labeled nodes now need to be placed. The initial placement uses a simple greedy algorithm to create a valid (although not necessarily optimal or even routable) placement:

-
    +

    + The labeled nodes now need to be placed. The initial placement uses a simple greedy algorithm to create a valid (although not necessarily optimal or even routable) placement:

    1. Loop over the cells in the netlist. If any cell has a LOC constraint, which locks the cell to a specific physical site, attempt to assign the node to the specified site. If the specified node is the wrong type, doesn't exist, or is already used by another constrained node, the constraint is invalid so fail with an error.
    2. Loop over all of the unconstrained cells in the netlist and assign them to the first unused site with the right label. If none are available, the design is too big for the device so fail with an error.
    3. -
    -

    - Once the design is placed, the placement optimizer then loops over the design and attempts to improve it. A simulated annealing algorithm is used, where changes to the design are accepted unconditionally if they make the placement better, and with a random, gradually decreasing probability if they make it worse. The optimizer terminates when the design receives a perfect score (indicating an optimal placement) or if it stops making progress for several iterations. Each iteration does the following:

    -
      +

    + Once the design is placed, the placement optimizer then loops over the design and attempts to improve it. A simulated annealing algorithm is used, where changes to the design are accepted unconditionally if they make the placement better, and with a random, gradually decreasing probability if they make it worse. The optimizer terminates when the design receives a perfect score (indicating an optimal placement) or if it stops making progress for several iterations. Each iteration does the following:

    1. Compute a score for the current design based on the number of unroutable nets, the amount of routing congestion (number of nets crossing between halves of the device), and static timing analysis (not yet implemented, always zero).
    2. Make a list of nodes that contributed to this score in some way (having some attached nets unroutable, crossing to the other half of the device, or failing timing).
    3. Remove nodes from the list that are LOC'd to a specific location since we're not allowed to move them.
    4. @@ -82,52 +70,40 @@
    5. Pick one of the candidates at random and move the pivot to that location. If another cell in the netlist is already there, put it in the vacant site left by the pivot.
    6. Re-compute the score for the design. If it's better, accept this change and start the next iteration.
    7. If the score is worse, accept it with a random probability which decreases as the iteration number goes up. If the change is not accepted, restore the previous placement.
    8. -
    -

    - After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.
    -
    The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:

    -
      +

    + After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.
    +
    The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:

    1. If dedicated (non-fabric) routing is used for this path, configure the destination's input mux appropriately and stop.
    2. If the source and destination are in the same half of the device, configure the destination's input mux appropriately and stop.
    3. A cross-connection must be used. Check if we already used one to bring the source signal to the other half of the device. If found, configure the destination to route from that cross-connection and stop.
    4. Check if we have any cross-connections left going in this direction. If they're all used, the design is unroutable due to congestion so fail with an error.
    5. Pick the next unused cross-connection and configure it to route from the source. Configure the destination to route from the cross-connection and stop.
    6. -
    -

    - Once routing is finished, run a series of post-PAR design rule checks. These currently include the following:

    -
      +

+ Once routing is finished, run a series of post-PAR design rule checks. These currently include the following:

  • If any node has no loads, generate a warning
  • If an I/O buffer is connected to analog hard IP, fail with an error if it's not configured in analog mode.
  • Some signals (such as comparator inputs and oscillator power-down controls) are generated by a shared mux and fed to many loads. If different loads require conflicting settings for the shared mux, fail with an error.
  • -
-

- If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.
-
Finally, generate the bitstream from all of the per-cell configuration and write it to a file.

-

- Great, let's get started!

-

- If you don't already have one, you'll need to buy a GreenPak4 development kit. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install GreenPak Designer.
-
Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only my fork on Github is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.
-
Download and install gp4par. You can get it from the Github repository.
-
Write your HDL, compile with Yosys, P&R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a relatively recent PDF version on my web server.
-
If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, here it is.
-
Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).

-

- I love it / it segfaulted / there's a problem in the manual!

-

- Hop in our IRC channel (##openfpga on Freenode) and let me know. Feedback is great, pull requests are even better,

-

- You're competing with Silego's IDE. Have they found out and sued you yet?

-

- Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.
-
After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!
-
They've even offered me free hardware to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.

-

+

+ If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.
+
Finally, generate the bitstream from all of the per-cell configuration and write it to a file.

+ Great, let's get started!

+ If you don't already have one, you'll need to buy a GreenPak4 development kit. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install GreenPak Designer.
+
Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only my fork on Github is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.
+
Download and install gp4par. You can get it from the Github repository.
+
Write your HDL, compile with Yosys, P&R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a relatively recent PDF version on my web server.
+
If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, here it is.
+
Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).

+ I love it / it segfaulted / there's a problem in the manual!

+ Hop in our IRC channel (##openfpga on Freenode) and let me know. Feedback is great, pull requests are even better,

+ You're competing with Silego's IDE. Have they found out and sued you yet?

+ Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.
+
After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!
+
They've even offered me free hardware to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.

So what's next?

-

Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.
-
My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.
-
Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).
-
Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.
-
My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&R suite. +

Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.
+
My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.
+
Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).
+
Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.
+
My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&R suite.

-
+

\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/breitbart/expected.html b/article_scraper/resources/tests/readability/breitbart/expected.html index f787dab..9e3b06a 100644 --- a/article_scraper/resources/tests/readability/breitbart/expected.html +++ b/article_scraper/resources/tests/readability/breitbart/expected.html @@ -1,18 +1,14 @@ -
-
+
-
-

Supporters of Republican presidential nominee Donald Trump cheer during election night at the New York Hilton Midtown in New York on November 9, 2016.  / AFP / JIM WATSON        (Photo credit should read JIM WATSON/AFP/Getty Images)

-

JIM WATSON/AFP/Getty Images

+

Supporters of Republican presidential nominee Donald Trump cheer during election night at the New York Hilton Midtown in New York on November 9, 2016.  / AFP / JIM WATSON        (Photo credit should read JIM WATSON/AFP/Getty Images)

JIM WATSON/AFP/Getty Images

-
-
+
@@ -48,5 +44,4 @@ -
-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/bug-1255978/expected.html b/article_scraper/resources/tests/readability/bug-1255978/expected.html index 715f747..12043fb 100644 --- a/article_scraper/resources/tests/readability/bug-1255978/expected.html +++ b/article_scraper/resources/tests/readability/bug-1255978/expected.html @@ -9,12 +9,12 @@

Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on Quora.

-

+
-

bandb2.jpg

+

bandb2.jpg

Even posh hotels might not wash a blanket in between stays @@ -31,12 +31,12 @@

Video shows bed bug infestation at New York hotel

-

+
-

hotel-door-getty.jpg

+

hotel-door-getty.jpg

Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use. @@ -48,12 +48,12 @@

This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.

-

+
-

luggage-3.jpg

+

luggage-3.jpg

Put luggage on the floor @@ -65,12 +65,12 @@

Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, it’s a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.

-

+
-

Lifestyle-hotels.jpg

+

Lifestyle-hotels.jpg

The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate @@ -88,7 +88,7 @@

It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.

-

+ @@ -96,12 +96,12 @@

Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.

-

+
-

agenda7.jpg

+

agenda7.jpg

Towels should always be cleaned between stays @@ -112,7 +112,7 @@

7. Always made sure the hand towels are clean when you arrive

Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.

- +
  • More about: @@ -121,6 +121,5 @@
  • Hygiene
-

Reuse content -

-
+

Reuse content +

\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/buzzfeed-1/expected.html b/article_scraper/resources/tests/readability/buzzfeed-1/expected.html index 5ca2fd1..46f2872 100644 --- a/article_scraper/resources/tests/readability/buzzfeed-1/expected.html +++ b/article_scraper/resources/tests/readability/buzzfeed-1/expected.html @@ -14,13 +14,13 @@
-

+

Facebook

-

+

Facebook

@@ -34,8 +34,8 @@

Fiona Parry issued a plea for people to stay away from pills containing the chemical.

-
+

“[Eloise] just never really understood how dangerous the tablets that she took were,” she said. “Most of us don’t believe that a slimming tablet could possibly kill us.

“DNP is not a miracle slimming pill. It is a deadly toxin.”

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/citylab-1/expected.html b/article_scraper/resources/tests/readability/citylab-1/expected.html index 1b6362e..1756561 100644 --- a/article_scraper/resources/tests/readability/citylab-1/expected.html +++ b/article_scraper/resources/tests/readability/citylab-1/expected.html @@ -1,19 +1,19 @@
- - - + + +
- - + + - - - + + + - - + +
@@ -117,7 +117,7 @@

- +
Workers remove a hammer and sickle from a neon sign that reads “Glory to Communism,” @@ -150,8 +150,7 @@

Cities are changing fast. Keep up with the CityLab Daily newsletter. -

-
@@ -167,7 +166,7 @@

- +
Martin Wartman, a student at Northern Kentucky University, works on a neon sign at @@ -224,4 +223,4 @@

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/clean-links/expected.html b/article_scraper/resources/tests/readability/clean-links/expected.html index c0a82ff..2ae2958 100644 --- a/article_scraper/resources/tests/readability/clean-links/expected.html +++ b/article_scraper/resources/tests/readability/clean-links/expected.html @@ -1,12 +1,8 @@

Study Webtext

-

-"Bartleby the Scrivener: A Story of Wall-Street " (1853) 
+

"Bartleby the Scrivener: A Story of Wall-Street " (1853) 
Herman Melville

-

-To the story text without notes
-        +

To the story text without notes

Prepared by Ann Woodlief, Virginia Commonwealth University

@@ -33,8 +29,7 @@ I make some mention of myself, my employees, my business, my chambers, and general surroundings; because some such description is indispensable to an adequate understanding of the chief character about to be presented. -

-

Imprimis: I am a man who, from his youth upwards, has been +

Imprimis: I am a man who, from his youth upwards, has been filled with a profound conviction that the easiest way of life is the best.. Hence, though I belong to a profession proverbially energetic and nervous, even to turbulence, at times, yet nothing of that sort have I ever suffered to invade @@ -1387,4 +1382,4 @@

Ah Bartleby! Ah humanity!

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/cnet-svg-classes/expected.html b/article_scraper/resources/tests/readability/cnet-svg-classes/expected.html index 0ac4eb7..1976383 100644 --- a/article_scraper/resources/tests/readability/cnet-svg-classes/expected.html +++ b/article_scraper/resources/tests/readability/cnet-svg-classes/expected.html @@ -1,7 +1,5 @@
-
twitter-lite - - +
twitter-lite

Twitter Lite estará disponible en Google Play Store en 11 países de América Latina.

Twitter
@@ -15,16 +13,15 @@

Además, el app ocupa menos espacio en tu teléfono móvil, al reducir a 3MB su peso.

Twitter dio a conocer Twitter Lite en abril en India, y desde entonces ha estado trabajando para llevarlo a más países. La empresa en los últimos meses también se ha involucrado de forma definitiva en la eliminación de los abusos en la red social, tomando medidas incluso en la verificación de cuentas.

- -
-
-
+
+
+

Reproduciendo: Mira esto: Google Assistant mejora, hay más cambios en Twitter y...

- 8:09

+ 8:09

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/cnet/expected.html b/article_scraper/resources/tests/readability/cnet/expected.html index 4c34c23..9158f2e 100644 --- a/article_scraper/resources/tests/readability/cnet/expected.html +++ b/article_scraper/resources/tests/readability/cnet/expected.html @@ -3,9 +3,7 @@ -
- - +

Facebook CEO Mark Zuckerberg, the man with the acquisition plan.

Photo by James Martin/CNET
@@ -36,7 +34,7 @@ -

Tech Enabled: CNET chronicles tech's role in providing new kinds of accessibility. Check it out here.

+

Tech Enabled: CNET chronicles tech's role in providing new kinds of accessibility. Check it out here.

Technically Literate: Original works of short fiction with unique perspectives on tech, exclusively on CNET. Here.

@@ -45,4 +43,4 @@ -
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/cnn/expected.html b/article_scraper/resources/tests/readability/cnn/expected.html index ac1ad81..27ef685 100644 --- a/article_scraper/resources/tests/readability/cnn/expected.html +++ b/article_scraper/resources/tests/readability/cnn/expected.html @@ -32,7 +32,7 @@

The report also suggested the U.S. might not be the "jobs machine" it thinks it is, when compared to other countries.

It ranked near the bottom of the pack based on the levels of unemployment among men and women of prime working age. The study determined this by taking the ratio of employed men and women between the ages of 25 and 54 compared to the total population of each country.

-

The overall rankings of the countries were as follows:
1. Finland
2. Norway
3. Australia
4. Canada
5. Germany
6. France
7. United Kingdom
8. Italy
9. Spain
10. United States
+

The overall rankings of the countries were as follows:
1. Finland
2. Norway
3. Australia
4. Canada
5. Germany
6. France
7. United Kingdom
8. Italy
9. Spain
10. United States
@@ -48,4 +48,4 @@

CNNMoney (New York) First published February 1, 2016: 1:28 AM ET

-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/comment-inside-script-parsing/expected.html b/article_scraper/resources/tests/readability/comment-inside-script-parsing/expected.html index adcc161..c265c13 100644 --- a/article_scraper/resources/tests/readability/comment-inside-script-parsing/expected.html +++ b/article_scraper/resources/tests/readability/comment-inside-script-parsing/expected.html @@ -1,5 +1,4 @@ -
-
+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

Ut enim ad minim veniam, @@ -8,8 +7,7 @@

Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
-
+

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

@@ -17,5 +15,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/daringfireball-1/expected.html b/article_scraper/resources/tests/readability/daringfireball-1/expected.html index 3e30a20..5db2c0e 100644 --- a/article_scraper/resources/tests/readability/daringfireball-1/expected.html +++ b/article_scraper/resources/tests/readability/daringfireball-1/expected.html @@ -2,8 +2,8 @@

About This Site

Daring Fireball is written and produced by John Gruber.

- Photograph of the author. -
Portrait by George Del Barrio

+ Photograph of the author. +
Portrait by George Del Barrio

Mac Apps

  • BBEdit
  • @@ -27,4 +27,4 @@

    Web Standards

    Web standards are important, and Daring Fireball adheres to them. Specifically, Daring Fireball’s HTML markup should validate as either HTML 5 or XHTML 4.01 Transitional, its layout is constructed using valid CSS, and its syndicated feed is valid Atom.

    If Daring Fireball looks goofy in your browser, you’re likely using a shitty browser that doesn’t support web standards. Internet Explorer, I’m looking in your direction. If you complain about this, I will laugh at you, because I do not care. If, however, you are using a modern, standards-compliant browser and have trouble viewing or reading Daring Fireball, please do let me know.

    -
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/data-url-image/expected.html b/article_scraper/resources/tests/readability/data-url-image/expected.html index 6884102..8e8daa6 100644 --- a/article_scraper/resources/tests/readability/data-url-image/expected.html +++ b/article_scraper/resources/tests/readability/data-url-image/expected.html @@ -1,11 +1,11 @@
- Illustration for article titled The Spectacular Story Of Metroid + Illustration for article titled The Spectacular Story Of Metroid

Lorem ipsum dolor sit amet consectetur adipisicing elit. Natus eaque totam provident obcaecati nisi praesentium iusto velit fuga debitis quidem ut repellat corrupti, eligendi inventore quibusdam perspiciatis delectus omnis pariatur excepturi quasi fugit? A adipisci natus nostrum, qui aperiam, at culpa corrupti autem enim earum vitae. Nostrum et officiis facere ex recusandae tenetur, delectus odit provident soluta id perferendis ducimus quibusdam corporis rerum voluptatem architecto sequi beatae quod mollitia voluptatibus earum tempora inventore ut. Deserunt reprehenderit recusandae nostrum, eaque fuga cum, repellat, perspiciatis ducimus in non consequatur ratione. Sint rerum necessitatibus deleniti odio earum voluptatum eos modi ab dolor minus.

- Illustration for article titled The Spectacular Story Of Metroid + Illustration for article titled The Spectacular Story Of Metroid

Quae veniam recusandae vel cupiditate doloribus pariatur, dolorum saepe hic quos mollitia harum nihil molestias magni modi maiores? Ea accusamus velit vel doloremque pariatur voluptate? Consequuntur dolorum fugit facere architecto eveniet beatae eligendi rerum nisi maxime, voluptatum a repellendus perferendis dolores eius repudiandae accusamus animi eum esse adipisci voluptatibus incidunt necessitatibus! Adipisci id, quos libero dolor odit dignissimos et, cum explicabo est facere aliquid mollitia! Iure nihil nobis beatae quibusdam, dolor saepe reiciendis, exercitationem hic accusamus quasi commodi modi molestiae ipsum, maxime non nesciunt deserunt itaque quo cupiditate rerum ipsa ad minus vel quod. Earum dolorum velit aut minus hic?

- +

Quidem nam tempore ex harum repellat modi expedita quia rerum est ut ullam, quisquam repudiandae at dolorum. Sint atque fugit facere id blanditiis dolor omnis rem repudiandae, laboriosam unde delectus illo voluptate dolorem consectetur rerum expedita quas debitis deleniti iure amet. At sequi distinctio facere eaque, esse molestiae tenetur, unde sapiente fugit minus eos ex aperiam atque? Animi soluta, dolorem sequi doloremque pariatur in. Impedit, voluptates dolorem natus perspiciatis pariatur eligendi labore rerum facilis est error. Illum eveniet nostrum atque nobis molestiae expedita perferendis, esse ullam corrupti sit explicabo beatae. Sapiente aliquid nesciunt reiciendis incidunt cumque et officiis minus aliquam?

- +

Veritatis sit nostrum officiis alias ea quam nulla veniam eos, quidem nisi, magni molestias ut odio. Ea, mollitia nesciunt. Maiores iste quos quas sapiente sit at esse quidem? Laboriosam fugiat nobis voluptatum reiciendis aliquam excepturi ipsum? Perspiciatis expedita sit quod, optio assumenda veritatis culpa, neque explicabo distinctio facere incidunt magnam accusamus, quidem animi delectus doloremque vel molestiae a quae dolorem. Sit, at? Numquam possimus animi esse incidunt quos quibusdam. Ab sit eligendi laborum beatae maxime suscipit, obcaecati enim cum nam autem, dicta nobis corrupti ut aut nostrum ex excepturi perferendis, soluta veniam. Excepturi beatae suscipit enim dignissimos! Quam, esse praesentium!

- -
+ + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/dev418/expected.html b/article_scraper/resources/tests/readability/dev418/expected.html index d9b9bfd..735d599 100644 --- a/article_scraper/resources/tests/readability/dev418/expected.html +++ b/article_scraper/resources/tests/readability/dev418/expected.html @@ -3,22 +3,22 @@

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-
+

Single <img>

- An image + An image

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-
+

Single <figure>

- An image + An image
Caption of the figure
@@ -26,32 +26,32 @@

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-
+

<ul> List of <img>

  • - An image + An image
  • - An image + An image
  • - An image + An image

Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-
+

<ul> List of <figure>

  • - An image + An image
    Caption of the figure
    @@ -59,7 +59,7 @@
  • - An image + An image
    Caption of the figure
    @@ -67,7 +67,7 @@
  • - An image + An image
    Caption of the figure
    @@ -77,4 +77,4 @@

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/dropbox-blog/expected.html b/article_scraper/resources/tests/readability/dropbox-blog/expected.html index 1fd339c..be9355e 100644 --- a/article_scraper/resources/tests/readability/dropbox-blog/expected.html +++ b/article_scraper/resources/tests/readability/dropbox-blog/expected.html @@ -64,19 +64,19 @@

    - Task scheduling
    + Task scheduling
    Clients can schedule tasks to execute at a specified time. Tasks can be scheduled for immediate execution, or delayed to fit the use case.

    - Priority based execution
    + Priority based execution
    Tasks should be associated with a priority. Tasks with higher priority should get executed before tasks with a lower priority once they are ready for execution.

    - Task gating
    + Task gating
    ATF enables the the gating of tasks based on lambda, or a subset of tasks on a lambda based on collection. Tasks can be gated to be completely dropped or paused until a suitable time for execution.

    - Track task status
    + Track task status
    Clients can query the status of a scheduled task.

    @@ -89,19 +89,19 @@

    - At-least once task execution
    The ATF system guarantees that a task is executed at least once after being scheduled. Execution is said to be complete once the user-defined callback signals task completion to the ATF system. + At-least once task execution
    The ATF system guarantees that a task is executed at least once after being scheduled. Execution is said to be complete once the user-defined callback signals task completion to the ATF system.

    - No concurrent task execution
    The ATF system guarantees that at most one instance of a task will be actively executing at any given in point. This helps users write their callbacks without designing for concurrent execution of the same task from different locations. + No concurrent task execution
    The ATF system guarantees that at most one instance of a task will be actively executing at any given in point. This helps users write their callbacks without designing for concurrent execution of the same task from different locations.

    - Isolation
    Tasks in a given lambda are isolated from the tasks in other lambdas. This isolation spans across several dimensions, including worker capacity for task execution and resource use for task scheduling. Tasks on the same lambda but different priority levels are also isolated in their resource use for task scheduling. + Isolation
    Tasks in a given lambda are isolated from the tasks in other lambdas. This isolation spans across several dimensions, including worker capacity for task execution and resource use for task scheduling. Tasks on the same lambda but different priority levels are also isolated in their resource use for task scheduling.

    - Delivery latency
    95% of tasks begin execution within five seconds from their scheduled execution time. + Delivery latency
    95% of tasks begin execution within five seconds from their scheduled execution time.

    - High availability for task scheduling
    The ATF service is 99.9% available to accept task scheduling requests from any client. + High availability for task scheduling
    The ATF service is 99.9% available to accept task scheduling requests from any client.

    @@ -116,15 +116,15 @@ Following are some restrictions we place on the callback logic (lambda):

    - Idempotence
    + Idempotence
    A single task on a lambda can be executed multiple times within the ATF system. Developers should ensure that their lambda logic and correctness of task execution in clients are not affected by this.

    - Resiliency
    + Resiliency
    Worker processes which execute tasks might die at any point during task execution. ATF retries abruptly interrupted tasks, which could also be retried on different hosts. Lambda owners must design their lambdas such that retries on different hosts do not affect lambda correctness.

    - Terminal state handling
    ATF retries tasks until they are signaled to be complete from the lambda logic. Client code can mark a task as successfully completed, fatally terminated, or retriable. It is critical that lambda owners design clients to signal task completion appropriately to avoid misbehavior such as infinite retries.  + Terminal state handling
    ATF retries tasks until they are signaled to be complete from the lambda logic. Client code can mark a task as successfully completed, fatally terminated, or retriable. It is critical that lambda owners design clients to signal task completion appropriately to avoid misbehavior such as infinite retries. 

    @@ -136,7 +136,7 @@
    - Async Task Framework (ATF) [Fig 1] + Async Task Framework (ATF) [Fig 1]
    Async Task Framework (ATF) [Fig 1]
    @@ -162,18 +162,18 @@
  • Executor
  • -
  • Heartbeat and Status Controller (HSC)
    +
  • Heartbeat and Status Controller (HSC)

- Frontend
- This is the service that schedules requests via an RPC interface. The frontend accepts RPC requests from clients and schedules tasks by interacting with ATF’s task store described below.

+ Frontend
+ This is the service that schedules requests via an RPC interface. The frontend accepts RPC requests from clients and schedules tasks by interacting with ATF’s task store described below.

- Task Store
ATF tasks are stored in and triggered from the task store. The task store could be any generic data store with indexed querying capability. In ATF’s case, We use our in-house metadata store Edgestore to power the task store. More details can be found in the Data Model section below. + Task Store
ATF tasks are stored in and triggered from the task store. The task store could be any generic data store with indexed querying capability. In ATF’s case, We use our in-house metadata store Edgestore to power the task store. More details can be found in the Data Model section below.

- Store Consumer
The Store Consumer is a service that periodically polls the task store to find tasks that are ready for execution and pushes them onto the right queues, as described in the queue section below. These could be tasks that are newly ready for execution, or older tasks that are ready for execution again because they either failed in a retriable way on execution, or were dropped elsewhere within the ATF system.  + Store Consumer
The Store Consumer is a service that periodically polls the task store to find tasks that are ready for execution and pushes them onto the right queues, as described in the queue section below. These could be tasks that are newly ready for execution, or older tasks that are ready for execution again because they either failed in a retriable way on execution, or were dropped elsewhere within the ATF system. 

Below is a simple walkthrough of the Store Consumer’s function:  @@ -191,16 +191,16 @@ The Store Consumer polls tasks that failed in earlier execution attempts. This helps with the at-least-once guarantee that the ATF system provides. More details on how the Store Consumer polls new and previously failed tasks is presented in the Lifecycle of a task section below.

- Queue
ATF uses AWS Simple Queue Service (SQS) to queue tasks internally. These queues act as a buffer between the Store Consumer and Controllers (described below). Each <lambda, priority>  pair gets a dedicated SQS queue. The total number of SQS queues used by ATF is #lambdas x #priorities. + Queue
ATF uses AWS Simple Queue Service (SQS) to queue tasks internally. These queues act as a buffer between the Store Consumer and Controllers (described below). Each <lambda, priority>  pair gets a dedicated SQS queue. The total number of SQS queues used by ATF is #lambdas x #priorities.

- Controller
Worker hosts are physical hosts dedicated for task execution. Each worker host has one controller process responsible for polling tasks from SQS queues in a background thread, and then pushing them onto process local buffered queues. The Controller is only aware of the lambdas it is serving and thus polls only the limited set of necessary queues.  + Controller
Worker hosts are physical hosts dedicated for task execution. Each worker host has one controller process responsible for polling tasks from SQS queues in a background thread, and then pushing them onto process local buffered queues. The Controller is only aware of the lambdas it is serving and thus polls only the limited set of necessary queues. 

The Controller serves tasks from its process local queue as a response to NextWork RPCs. This is the layer where execution level task prioritization occurs. The Controller has different process level queues for tasks of different priorities and can thus prioritize tasks in response to NextWork RPCs.

- Executor
The Executor is a process with multiple threads, responsible for the actual task execution. Each thread within an Executor process follows this simple loop: + Executor
The Executor is a process with multiple threads, responsible for the actual task execution. Each thread within an Executor process follows this simple loop:

@@ -214,7 +214,7 @@ Each worker host has a single Controller process and multiple executor processes. Both the Controller and Executors work in a “pull” model, in which active loops continuously long-poll for new work to be done.

- Heartbeat and Status Controller (HSC)
+ Heartbeat and Status Controller (HSC)
The HSC serves RPCs for claiming a task for execution (ClaimTask), setting task status after execution (SetResults) and heartbeats during task execution (Heartbeat). ClaimTask requests originate from the Controllers in response to NextWork requests. Heartbeat and SetResults requests originate from executor processes during and after task execution. The HSC interacts with the task store to update the task status on the kind of request it receives.

@@ -413,9 +413,7 @@ N/A

- - - + @@ -433,9 +431,7 @@ N/A

- - - + @@ -443,15 +439,15 @@ The store consumer polls for tasks based on the following query:

- assoc_status= && next_timestamp<=time.now()
+ assoc_status= && next_timestamp<=time.now()

- Below is the state machine that defines task state transitions: 
+ Below is the state machine that defines task state transitions: 

- Task State Transitions [Fig 2] + Task State Transitions [Fig 2]
@@ -463,22 +459,22 @@

- At-least-once task execution
At-least-once execution is guaranteed in ATF by retrying a task until it completes execution (which is signaled by a Success or a FatalFailure state). All ATF system errors are implicitly considered retriable failures, and lambda owners have an option of marking tasks with a RetriableFailure state. Tasks might be dropped from the ATF execution pipeline in different parts of the system through transient RPC failures and failures on dependencies like Edgestore or SQS. These transient failures at different parts of the system do not affect the at-least-once guarantee, though, because of the system of timeouts and re-polling from Store Consumer. + At-least-once task execution
At-least-once execution is guaranteed in ATF by retrying a task until it completes execution (which is signaled by a Success or a FatalFailure state). All ATF system errors are implicitly considered retriable failures, and lambda owners have an option of marking tasks with a RetriableFailure state. Tasks might be dropped from the ATF execution pipeline in different parts of the system through transient RPC failures and failures on dependencies like Edgestore or SQS. These transient failures at different parts of the system do not affect the at-least-once guarantee, though, because of the system of timeouts and re-polling from Store Consumer.

- No concurrent task execution
Concurrent task execution is avoided through a combination of two methods in ATF. First, tasks are explicitly claimed through an exclusive task state (Claimed) before starting execution. Once the task execution is complete, the task status is updated to one of Success, FatalFailure or RetriableFailure. A task can be claimed only if its existing task state is Enqueued (retried tasks go to the Enqueued state as well once they are re-pushed onto SQS). + No concurrent task execution
Concurrent task execution is avoided through a combination of two methods in ATF. First, tasks are explicitly claimed through an exclusive task state (Claimed) before starting execution. Once the task execution is complete, the task status is updated to one of Success, FatalFailure or RetriableFailure. A task can be claimed only if its existing task state is Enqueued (retried tasks go to the Enqueued state as well once they are re-pushed onto SQS).

- However, there might be situations where once a long running task starts execution, its heartbeats might fail repeatedly yet the task execution continues. ATF would retry this task by polling it from the store consumer because the heartbeat timeouts would’ve expired. This task can then be claimed by another worker and lead to concurrent execution. 
+ However, there might be situations where once a long running task starts execution, its heartbeats might fail repeatedly yet the task execution continues. ATF would retry this task by polling it from the store consumer because the heartbeat timeouts would’ve expired. This task can then be claimed by another worker and lead to concurrent execution. 

To avoid this situation, there is a termination logic in the Executor processes whereby an Executor process terminates itself as soon as three consecutive heartbeat calls fail. Each heartbeat timeout is large enough to eclipse three consecutive heartbeat failures. This ensures that the Store Consumer cannot pull such tasks before the termination logic ends them—the second method that helps achieve this guarantee.

- Isolation
Isolation of lambdas is achieved through dedicated worker clusters, dedicated queues, and dedicated per-lambda scheduling quotas. In addition, isolation across different priorities within the same lambda is likewise achieved through dedicated queues and scheduling bandwidth. + Isolation
Isolation of lambdas is achieved through dedicated worker clusters, dedicated queues, and dedicated per-lambda scheduling quotas. In addition, isolation across different priorities within the same lambda is likewise achieved through dedicated queues and scheduling bandwidth.

- Delivery latency
ATF use cases do not require ultra-low task delivery latencies. Task delivery latencies on the order of a couple of seconds are acceptable. Tasks ready for execution are periodically polled by the Store Consumer and this period of polling largely controls the task delivery latency. Using this as a tuning lever, ATF can achieve different delivery latencies as required. Increasing poll frequency reduces task delivery latency and vice versa. Currently, we have calibrated ATF to poll for ready tasks once every two seconds. + Delivery latency
ATF use cases do not require ultra-low task delivery latencies. Task delivery latencies on the order of a couple of seconds are acceptable. Tasks ready for execution are periodically polled by the Store Consumer and this period of polling largely controls the task delivery latency. Using this as a tuning lever, ATF can achieve different delivery latencies as required. Increasing poll frequency reduces task delivery latency and vice versa. Currently, we have calibrated ATF to poll for ready tasks once every two seconds.

@@ -503,16 +499,16 @@ As described above, ATF provides an infrastructural building block for scheduling asynchronous tasks. With this foundation established, ATF can be extended to support more generic use cases and provide more features as a framework. Following are some examples of what could be built as an extension to ATF. 

- Periodic task execution
Currently, ATF is a system for one-time task scheduling. Building support for periodic task execution as an extension to this framework would be useful in unlocking new capabilities for our clients. + Periodic task execution
Currently, ATF is a system for one-time task scheduling. Building support for periodic task execution as an extension to this framework would be useful in unlocking new capabilities for our clients.

- Better support for task chaining
Currently, it is possible to chain tasks on ATF by scheduling a task onto ATF that then schedules other tasks onto ATF during its execution. Although it is possible to do this in the current ATF setup, visibility and control on this chaining is absent at the framework level. Another natural extension here would be to better support task chaining through framework-level visibility and control, to make this use case a first class concept in the ATF model. + Better support for task chaining
Currently, it is possible to chain tasks on ATF by scheduling a task onto ATF that then schedules other tasks onto ATF during its execution. Although it is possible to do this in the current ATF setup, visibility and control on this chaining is absent at the framework level. Another natural extension here would be to better support task chaining through framework-level visibility and control, to make this use case a first class concept in the ATF model.

- Dead letter queues for misbehaving tasks
One common source of maintenance overhead we observe on ATF is that some tasks get stuck in infinite retry loops due to occasional bugs in lambda logic. This requires manual intervention from the ATF framework owners in some cases where there are a large number of tasks stuck in such loops, occupying a lot of the scheduling bandwidth in the system. Typical manual actions in response to such a situation include pausing execution of the lambdas with misbehaving tasks, or dropping them outright. + Dead letter queues for misbehaving tasks
One common source of maintenance overhead we observe on ATF is that some tasks get stuck in infinite retry loops due to occasional bugs in lambda logic. This requires manual intervention from the ATF framework owners in some cases where there are a large number of tasks stuck in such loops, occupying a lot of the scheduling bandwidth in the system. Typical manual actions in response to such a situation include pausing execution of the lambdas with misbehaving tasks, or dropping them outright.

- One way to reduce this operational overhead and provide an easy interface for lambda owners to recover from such incidents would be to create dead letter queues filled with such misbehaving tasks. The ATF framework could impose a maximum number of retries before tasks are pushed onto the dead letter queue. We could create and expose tools that make it easy to reschedule tasks from the dead letter queue back into the ATF system, once the associated lambda bugs are fixed.
+ One way to reduce this operational overhead and provide an easy interface for lambda owners to recover from such incidents would be to create dead letter queues filled with such misbehaving tasks. The ATF framework could impose a maximum number of retries before tasks are pushed onto the dead letter queue. We could create and expose tools that make it easy to reschedule tasks from the dead letter queue back into the ATF system, once the associated lambda bugs are fixed.

@@ -523,7 +519,7 @@

- We hope this post helps engineers elsewhere to develop better async task frameworks of their own. Many thanks to everyone who worked on this project: Anirudh Jayakumar, Deepak Gupta, Dmitry Kopytkov, Koundinya Muppalla, Peng Kang, Rajiv Desai, Ryan Armstrong, Steve Rodrigues, Thomissa Comellas, Xiaonan Zhang and Yuhuan Du.
+ We hope this post helps engineers elsewhere to develop better async task frameworks of their own. Many thanks to everyone who worked on this project: Anirudh Jayakumar, Deepak Gupta, Dmitry Kopytkov, Koundinya Muppalla, Peng Kang, Rajiv Desai, Ryan Armstrong, Steve Rodrigues, Thomissa Comellas, Xiaonan Zhang and Yuhuan Du.
 

- + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/ebb-org/expected.html b/article_scraper/resources/tests/readability/ebb-org/expected.html index 02e230e..fa425b8 100644 --- a/article_scraper/resources/tests/readability/ebb-org/expected.html +++ b/article_scraper/resources/tests/readability/ebb-org/expected.html @@ -1,5 +1,4 @@ -
-
+

Tuesday 15 October 2019 by Bradley M. Kuhn @@ -45,14 +44,11 @@

-
-

- #include <std/disclaimer.h>
- use Standard::Disclaimer;
- from standard import disclaimer
+

+ #include <std/disclaimer.h>
+ use Standard::Disclaimer;
+ from standard import disclaimer
SELECT full_text FROM standard WHERE type = 'disclaimer'; -

-

+

Both previously and presently, I have been employed by and/or done work for various organizations that also have views on Free, Libre, and Open Source Software. As should be blatantly obvious, this is my website, not theirs, so please do not assume views and opinions here belong to any such organization. Since I do co-own ebb.org with my wife, it may not be so obvious that these aren't her views and opinions, either. -

-
+

\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/ehow-1/expected.html b/article_scraper/resources/tests/readability/ehow-1/expected.html index d02bb05..2272723 100644 --- a/article_scraper/resources/tests/readability/ehow-1/expected.html +++ b/article_scraper/resources/tests/readability/ehow-1/expected.html @@ -1,12 +1,8 @@
-
- - - -
+

Glass cloche terrariums are not only appealing to the eye, but they also preserve a bit of nature in your home and serve as a simple, yet beautiful, piece of art. Closed terrariums are easy to care for, as they retain much of their own moisture and provide a warm environment with a consistent level of humidity. You won’t have to water the terrariums unless you see that the walls are not misting up. Small growing plants that don’t require a lot of light work best such as succulents, ferns, moss, even orchids.

-
Glass cloche terrariums
+
Glass cloche terrariums
Glass cloche terrariums (Lucy Akins)
@@ -14,8 +10,7 @@

Other People Are Reading

-

What You'll Need:

-
    +

    What You'll Need:

    • Cloche
    • Planter saucer, small shallow dish or desired platform
    • Floral foam oasis
    • @@ -29,75 +24,65 @@
    -

    Step 1

    -

    Measure the circumference of your cloche and cut the foam oasis about 3/4 inch (2 cm) smaller. Place the foam oasis into a container full of water and allow to soak until it sinks to the bottom. Dig out a hole on the oasis large enough to fit your plant, being careful not to pierce all the way through to the bottom.

    +

    Step 1

    Measure the circumference of your cloche and cut the foam oasis about 3/4 inch (2 cm) smaller. Place the foam oasis into a container full of water and allow to soak until it sinks to the bottom. Dig out a hole on the oasis large enough to fit your plant, being careful not to pierce all the way through to the bottom.

    -
    Dig a hole in the oasis.
    +
    Dig a hole in the oasis.
    Dig a hole in the oasis. (Lucy Akins)
    -

    Step 2

    -

    Insert your plant into the hole.

    +

    Step 2

    Insert your plant into the hole.

    -
    Orchid in foam oasis
    +
    Orchid in foam oasis
    Orchid in foam oasis (Lucy Akins)
    -

    Step 3

    -

    You can add various plants if you wish.

    +

    Step 3

    You can add various plants if you wish.

    -
    Various foliage
    +
    Various foliage
    Various foliage (Lucy Akins)
    -

    Step 4

    -

    Using floral pins, attach enough moss around the oasis to cover it.

    +

    Step 4

    Using floral pins, attach enough moss around the oasis to cover it.

    -
    Attach moss.
    +
    Attach moss.
    Attach moss. (Lucy Akins)
    -

    Step 5

    -

    Gently place the cloche over the oasis. The glass may push some of the moss upward, exposing some of the foam.

    +

    Step 5

    Gently place the cloche over the oasis. The glass may push some of the moss upward, exposing some of the foam.

    -
    Place cloche over oasis.
    +
    Place cloche over oasis.
    Place cloche over oasis. (Lucy Akins)
    -

    Step 6

    -

    Simply pull down the moss with tweezers or insert more moss to fill in the empty spaces.

    +

    Step 6

    Simply pull down the moss with tweezers or insert more moss to fill in the empty spaces.

    -
    Rearrange moss.
    +
    Rearrange moss.
    Rearrange moss. (Lucy Akins)
    -

    Step 7

    -

    You can use any platform you wish. In this case, a small saucer was used.

    +

    Step 7

    You can use any platform you wish. In this case, a small saucer was used.

    -
    Place cloche on a platform to sit on.
    +
    Place cloche on a platform to sit on.
    Place cloche on a platform to sit on. (Lucy Akins)
    -

    Step 8

    -

    This particular terrarium rests on a planter saucer and features a small white pumpkin.

    +

    Step 8

    This particular terrarium rests on a planter saucer and features a small white pumpkin.

    -
    Cloche placed on a terracotta saucer
    +
    Cloche placed on a terracotta saucer
    Cloche placed on a terracotta saucer (Lucy Akins)
    -

    Step 9

    -

    This particular terrarium was placed on a wood slice and a little toy squirrel was placed inside to add a little whimsy.

    +

    Step 9

    This particular terrarium was placed on a wood slice and a little toy squirrel was placed inside to add a little whimsy.

    -
    Placed on a wooden slice
    +
    Placed on a wooden slice
    Placed on a wooden slice (Lucy Akins)
    -

    Finished Terrarium

    -

    Displayed alone or in a group, these pretty arrangements allow you to add a little nature to your decor or tablescape.

    +

    Finished Terrarium

    Displayed alone or in a group, these pretty arrangements allow you to add a little nature to your decor or tablescape.

    -
    Cloche terrarium
    +
    Cloche terrarium
    Cloche terrarium (Lucy Akins)
    @@ -110,4 +95,4 @@

    Featured

    -
+ \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/ehow-2/expected.html b/article_scraper/resources/tests/readability/ehow-2/expected.html index d17cda0..82d9a10 100644 --- a/article_scraper/resources/tests/readability/ehow-2/expected.html +++ b/article_scraper/resources/tests/readability/ehow-2/expected.html @@ -1,20 +1,13 @@ -
-
+

-

+

-
-
+
@@ -23,7 +16,7 @@

Thankfully, there are plenty of creative ways to trim a little grad party fat without sacrificing any of the fun or celebratory spirit.

- Graduation + Graduation
(Mike Watson Images/Moodboard/Getty) @@ -36,7 +29,7 @@

Parties hosted at restaurants, clubhouses and country clubs eliminate the need to spend hours cleaning up once party guests have gone home. But that convenience comes with a price tag. A country club may charge as much as $2,000 for room rental and restaurant food and beverage will almost always cost more than food prepped and served at home.

- Save money hosting the party at home. + Save money hosting the party at home.
Thomas Jackson/Digital Vision/Getty Images
@@ -49,7 +42,7 @@

Instead of hiring a DJ, use your iPod or Smartphone to spin the tunes. Both easily hook up to most speakers or mp3 compatible docks to play music from your music library. Or download Pandora, the free online radio app, and play hours of music for free.

Personalize the music with a playlist of the grad’s favorite songs or songs that were big hits during his or her years in school.

- Online radio can take the place of a hired DJ. + Online radio can take the place of a hired DJ.
Spencer Platt/Getty Images News/Getty Images
@@ -61,7 +54,7 @@

Avoid canned drinks, which guests often open, but don't finish. Serve pitchers of tap water with lemon and cucumber slices or sliced strawberries for an interesting and refreshing flavor. Opt for punches and non-alcoholic drinks for high school graduates that allow guests to dole out the exact amount they want to drink.

- Serve drinks in pitchers, not in cans. + Serve drinks in pitchers, not in cans.
evgenyb/iStock/Getty Images
@@ -74,7 +67,7 @@

Instead of inviting everyone you – and the graduate – know or ever knew, scale back the guest list. Forgo inviting guests that you or your grad haven't seen for eons. There is no reason to provide provisions for people who are essentially out of your lives. Sticking to a small, but personal, guest list allows more time to mingle with loved ones during the party, too.

- Limit guests to those close to the graduate. + Limit guests to those close to the graduate.
Kane Skennar/Photodisc/Getty Images
@@ -86,7 +79,7 @@

See if your grad and his best friend, girlfriend or close family member would consider hosting a joint party. You can split some of the expenses, especially when the two graduates share mutual friends. You'll also have another parent to bounce ideas off of and to help you stick to your budget when you're tempted to splurge.

- Throw a joint bash for big savings. + Throw a joint bash for big savings.
Mike Watson Images/Moodboard/Getty
@@ -99,7 +92,7 @@

Skip carving stations of prime rib and jumbo shrimp as appetizers, especially for high school graduation parties. Instead, serve some of the graduate's favorite side dishes that are cost effective, like a big pot of spaghetti with breadsticks. Opt for easy and simple food such as pizza, finger food and mini appetizers.

Avoid pre-packaged foods and pre-made deli platters. These can be quite costly. Instead, make your own cheese and deli platters for less than half the cost of pre-made.

- Cost effective appetizers are just as satisfying as pre-made deli platters. + Cost effective appetizers are just as satisfying as pre-made deli platters.
Mark Stout/iStock/Getty Images
@@ -111,7 +104,7 @@

Instead of an evening dinner party, host a grad lunch or all appetizers party. Brunch and lunch fare or finger food costs less than dinner. Guests also tend to consume less alcohol in the middle of the day, which keeps cost down.

- A brunch gathering will cost less than a dinner party. + A brunch gathering will cost less than a dinner party.
Mark Stout/iStock/Getty Images
@@ -130,7 +123,7 @@

Decorate your party in the graduate's current school colors or the colors of the school he or she will be headed to next. Décor that is not specifically graduation-themed may cost a bit less, and any leftovers can be re-used for future parties, picnics and events.

- Theme the party by color without graduation-specific decor. + Theme the party by color without graduation-specific decor.
jethuynh/iStock/Getty Images
@@ -147,5 +140,4 @@

Promoted By Zergnet

-
-
+
\ No newline at end of file diff --git a/article_scraper/resources/tests/readability/embedded-videos/expected.html b/article_scraper/resources/tests/readability/embedded-videos/expected.html index 33e6650..690b431 100644 --- a/article_scraper/resources/tests/readability/embedded-videos/expected.html +++ b/article_scraper/resources/tests/readability/embedded-videos/expected.html @@ -8,13 +8,13 @@ proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

At root

- - - + + +

In a paragraph

-

+

In a div

-
+

Foo

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, @@ -23,4 +23,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

- + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/engadget/expected.html b/article_scraper/resources/tests/readability/engadget/expected.html index 628a9b0..a112ae2 100644 --- a/article_scraper/resources/tests/readability/engadget/expected.html +++ b/article_scraper/resources/tests/readability/engadget/expected.html @@ -1,5 +1,4 @@ -
-
+

The Xbox One X is the ultimate video game system. It sports more horsepower than any system ever. And it plays more @@ -14,13 +13,11 @@ Everyone else might be better off waiting, or opting for the $279 Xbox One S.

-
-
+

Gallery: Xbox One X | 14 Photos

-
-

- +

@@ -64,12 +61,11 @@ PlayStation 4 Pro. 4K/HDR enhanced games look great, but it’s lack of VR is disappointing in 2017.

-
-
+

Hardware

-

+

Despite all the power inside, the One X is Microsoft's smallest console to date. It looks similar to the Xbox One S, except it has an entirely @@ -87,7 +83,7 @@ That additional horsepower means the Xbox One X can run more games in full native 4K than the Sony's console.

-

+

Along the front, there's the slot-loading 4K Blu-ray drive, a physical power button, a single USB port and a controller pairing button. And around back, @@ -99,7 +95,7 @@ plug it in.

-
+
Devindra Hardawar/AOL
@@ -119,7 +115,7 @@

In use

-
+
Devindra Hardawar/AOL
@@ -158,7 +154,7 @@
-
+
@@ -192,9 +188,8 @@

Gallery: Xbox One X screenshots | 9 Photos

-
-

- +

@@ -234,7 +229,7 @@
-
+
@@ -255,9 +250,7 @@ capable HDR 10 standard. That makes sense since it's more widely supported, but it would have been nice to see Dolby's, too.

-

- -

+

And speaking of Dolby technology, Microsoft is also highlighting Atmos support on the One X, just like it did with the One S. The company's app lets you @@ -283,7 +276,7 @@

Pricing and the competition

-
+
Devindra Hardawar/AOL
@@ -320,7 +313,7 @@ PC, you won't be missing out on much by ditching consoles.

Wrap-up

-

+

Ultimately, the Xbox One X offers some major performance upgrades that gamers will notice -- especially if you're coming from an original Xbox @@ -332,5 +325,4 @@

-
-
+ \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/firefox-nightly-blog/expected.html b/article_scraper/resources/tests/readability/firefox-nightly-blog/expected.html index f085ff0..77e6e10 100644 --- a/article_scraper/resources/tests/readability/firefox-nightly-blog/expected.html +++ b/article_scraper/resources/tests/readability/firefox-nightly-blog/expected.html @@ -16,8 +16,7 @@
  • -

    The about:restartrequired error page, saying "Sorry. We just need to do one small thing to keep going. Nightly has just been updated in the background. Click Restart Nightly to complete the update. We will restore all your pages, windows and tabs afterwards, so you can be on your way quickly.", followed by a button to restart Nightly.

    -

    +

    The about:restartrequired error page, saying "Sorry. We just need to do one small thing to keep going. Nightly has just been updated in the background. Click Restart Nightly to complete the update. We will restore all your pages, windows and tabs afterwards, so you can be on your way quickly.", followed by a button to restart Nightly.

    Users who run multiple user profiles concurrently will probably see this less!

    @@ -119,8 +118,7 @@
    • -

      A table showing the total number of remaining bugs for the MVP to make the DevTools Fission-compatible.

      -

      +

      A table showing the total number of remaining bugs for the MVP to make the DevTools Fission-compatible.

      Our DevTools are ready for Fission (out-of-process iframes)!

      @@ -132,8 +130,7 @@
      • -

        A table showing the total number of remaining bugs for the MVP to make Marionette Fission-compatible.

        -

        +

        A table showing the total number of remaining bugs for the MVP to make Marionette Fission-compatible.

        Marionette, the framework that allows Firefox to be tested with automation, is now Fission compatible too!

        @@ -292,4 +289,4 @@ - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/folha/expected.html b/article_scraper/resources/tests/readability/folha/expected.html index 65947c1..b779d68 100644 --- a/article_scraper/resources/tests/readability/folha/expected.html +++ b/article_scraper/resources/tests/readability/folha/expected.html @@ -21,4 +21,4 @@

        "Em 2012 eu fiz e errei. O protocolo e a situação gerada no jogo do Palmeiras são fatos de opinião pessoal. CBF e Palmeiras, enquanto instituições têm a opinião. Errei lá atrás, não faria com o presidente antes da Copa e nem agora porque entendo que misturar esporte e política não é legal. Fiz errado lá atrás? Sim. Faria de novo? Não", acrescentou o comandante.

        - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/gmw/expected.html b/article_scraper/resources/tests/readability/gmw/expected.html index 6fe4c9c..1e0831a 100644 --- a/article_scraper/resources/tests/readability/gmw/expected.html +++ b/article_scraper/resources/tests/readability/gmw/expected.html @@ -5,9 +5,9 @@

          翱翔于距地球数千公里的太空中,进入广袤漆黑的未知领域,是一项艰苦卓绝的工作。这让人感到巨大压力和极度恐慌。那么,为什么不能让宇航员来一杯“地球末日”鸡尾酒来放松一下?

          不幸的是,对于希望能喝上一杯的太空探险者,那些将他们送上太空的政府机构普遍禁止他们染指包括酒在内的含酒精饮料。

          但是,很快普通人都会有机会向人类“最终的边疆”出发——以平民化旅行的形式,去探索和殖民火星。确实,火星之旅将是一次令人感到痛苦的旅行,可能一去不复返并要几年时间才能完成,但是否应该允许参与者在旅程中痛饮一番?或至少携带能在火星上发酵自制酒精饮料的设备?

        -

        (Credit: Nasa)

        +

        (Credit: Nasa)

        -   图注:巴兹?奥尔德林(Buzz Aldrin)可能是第二个在月球上行走的人,但他是第一个在月球上喝酒的人 +   图注:巴兹?奥尔德林(Buzz Aldrin)可能是第二个在月球上行走的人,但他是第一个在月球上喝酒的人

          事实是,历史上酒与太空探险有一种复杂的关系。让我们来看看喝了酒的航天员究竟会发生什么—— 如果我们开始给予进入太空的人类更大的自由度,又可能会发生什么。

          人们普遍认为,当一个人所处的海拔越高,喝醉后会越容易感到头昏。因此,人们自然地想到,当人身处地球轨道上时,饮酒会对人体有更强烈的致眩作用。但这种说法可能不是正确的。

        @@ -20,9 +20,9 @@

          所以,如果酒精对人体的物理效应与海拔高度无关,那么在国际空间站上睡前小饮一杯不应该是一个大问题,对吧?错了。

          美国宇航局约翰逊航天中心发言人丹尼尔·霍特(Daniel Huot)表示:“国际空间站上的宇航员不允许喝酒。在国际空间站上,酒精和其它挥发性化合物的使用受到控制,因为它们的挥发物可能对该站的水回收系统产生影响。”

          为此,国际空间站上的宇航员甚至没有被提供含有酒精的产品,例如漱口水、香水或须后水。如果在国际空间站上饮酒狂欢,溢出的啤酒也可能存在损坏设备的风险。

        -

        (Credit: iStock)

        +

        (Credit: iStock)

        -   图注:测试表明,有关人在高空中喝酒更容易醉的传言是不正确的 +   图注:测试表明,有关人在高空中喝酒更容易醉的传言是不正确的

          然后是责任的问题。我们不允许汽车司机或飞机飞行员喝醉后驾驶,所以并不奇怪同样的规则适用于国际空间站上的宇航员。毕竟国际空间站的造价高达1500亿美元,而且在接近真空的太空中其运行速度达到了每小时27680公里。

          然而,2007年,美国宇航局(NASA)成立了一个负责调查宇航员健康状况的独立小组,称历史上该机构至少有两名宇航员在即将飞行前喝了大量的酒,但仍然被允许飞行。Nasa安全负责人随后的审查发现并没有证据支持这一指控。宇航员在飞行前12小时是严禁饮酒的,因为他们需要充分的思维能力和清醒的意识。

        @@ -39,19 +39,19 @@

          因此,即使宇航员自己被禁止在地球轨道上饮酒,但他们正在做的工作可以提高在地上消费的酒的质量。

          相比之下,执行登陆火星任务的人将远离家乡几年,而不是几个月,因此可能会有人提出有关禁止饮酒的规定可以放松一些。

          然而,像戴夫?汉森这样的专家认为,继续禁止饮酒并没有什么害处。除了实际的安全问题,饮酒还可能有其它挑战。汉森认为,地球人存在许多社会文化方面的差异,而且人连续几年时间呆在一个狭小的空间里,很容易突然发怒,这些因素都使饮酒问题变得很棘手。

        -

        (Credit: David Frohman/Peachstate Historical Consulting Inc)

        +

        (Credit: David Frohman/Peachstate Historical Consulting Inc)

        -   图注:奥尔德林的圣餐杯回到了地球上 +   图注:奥尔德林的圣餐杯回到了地球上

          他说:“这是一个政治问题,也是一个文化方面的问题,但不是一个科学上的问题。这将是未来一个可能产生冲突领域,因为人们具有不同的文化背景,他们对饮酒的态度不同。”他进一步指出,如果你与穆斯林、摩门教徒或禁酒主义者分配在同一间宿舍怎么办?面对未来人们可能在一个没有期限的时间内呆在一个有限的空间里,需要“尽早解决”如何协调不同文化观点的问题。

        -

          所以,当宇航员在地球轨道上时,将还不得不满足于通过欣赏外面的景色来振作精神,而不要指望沉溺于烈酒中。我们留在地球上的人,则可以准备好适量的香槟酒,以迎接他们的归来。

        +

          所以,当宇航员在地球轨道上时,将还不得不满足于通过欣赏外面的景色来振作精神,而不要指望沉溺于烈酒中。我们留在地球上的人,则可以准备好适量的香槟酒,以迎接他们的归来。

          原标题:他晚于阿姆斯特朗登月 却是首个敢在月球喝酒的人

          出品︱网易科学人栏目组 胖胖

          作者︱春春 - +

        [责任编辑:肖春芳]

        - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/google-sre-book-1/expected.html b/article_scraper/resources/tests/readability/google-sre-book-1/expected.html index 2b9ce5b..b0dc0ea 100644 --- a/article_scraper/resources/tests/readability/google-sre-book-1/expected.html +++ b/article_scraper/resources/tests/readability/google-sre-book-1/expected.html @@ -455,4 +455,4 @@

        - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/guardian-1/expected.html b/article_scraper/resources/tests/readability/guardian-1/expected.html index 0d4a648..8be1273 100644 --- a/article_scraper/resources/tests/readability/guardian-1/expected.html +++ b/article_scraper/resources/tests/readability/guardian-1/expected.html @@ -9,17 +9,14 @@ “Man’s greed in the ocean is hurting the whales,” says Parata, a fierce and uncompromising elder of the Ngātiwai tribe of eastern Northland.

        - - - - + + +
        - - Hori Parata at his Pātaua farm, the place where he was born and grew up. -
        - -
        + + Hori Parata at his Pātaua farm, the place where he was born and grew up. +
        • @@ -35,17 +32,14 @@ Whale experts regard New Zealand – or Aotearoa as it is called by Māori – as the whale stranding capital of the world, with more than 5,000 incidents recorded since 1840, and an average of 300 individual animals beaching themselves each year.

          - - - - + + +
          - - Kauri (Tekaurinui Robert) Parata, watched by his father Hori Parata, carves a traditional Maōri design at their home in Whangārei. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata -
          - -
          + + Kauri (Tekaurinui Robert) Parata, watched by his father Hori Parata, carves a traditional Maōri design at their home in Whangārei. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata +
          • @@ -61,43 +55,34 @@ Climate change is to blame too, scientists think, with warming ocean temperatures moving whales’ prey closer to the shore and forcing them to pursue their food into shallow waters.

            - - - - + + +
            - - A bin of small whale bones. -
            - -
            + + A bin of small whale bones. +
            - - - - + + +
            - - The baleen recovered from a stranded Pygmy Right Whale. -
            - -
            + + The baleen recovered from a stranded Pygmy Right Whale. +
            - - - - + + +
            - - Squid beaks, from the stomach of a Sperm Whale. -
            - -
            + + Squid beaks, from the stomach of a Sperm Whale. +
            • @@ -134,17 +119,14 @@ Then the tribe moves in en masse and holds a karakia (prayer), names each animal and sets to work removing their bones, blubber, eyes and teeth for cultural purposes.

              - - - - + + +
              - - Buck Cullen with his daughter Kaiarahi (10 months) in his back yard where he is storing a pair of massive Sperm Whale jawbones. Buck is a integral member of the whale recovery team, alongside Hori Parata. -
              - -
              + + Buck Cullen with his daughter Kaiarahi (10 months) in his back yard where he is storing a pair of massive Sperm Whale jawbones. Buck is a integral member of the whale recovery team, alongside Hori Parata. +
              • @@ -160,17 +142,14 @@ Earlier this year in South Taranaki, a mass stranding that was described as “unprecedented” left the local Māori tribe scrambling. Security was brought in when thieves attacked a sperm whale with an axe, trying to remove valuable teeth from its jaw.

                - - - - + + +
                - - 12 Parāoa Whales (Sperm Whales) recently stranded on the South Taranaki coast of Kaupokonui, on a scale not seen on their coast in recent memory. -
                - -
                + + 12 Parāoa Whales (Sperm Whales) recently stranded on the South Taranaki coast of Kaupokonui, on a scale not seen on their coast in recent memory. +
                • @@ -186,17 +165,14 @@ He says mass strandings are getting more local and international attention and money from donations, but traditional knowledge is being dismissed as overly spiritual.

                  - - - - + + +
                  - - Kauri (Tekaurinui Robert) Parata, of the New Zealand Māori tribe Ngāti Wai, in front of the carving shed at Hihiaua Cultural Centre in Whangarei -
                  - -
                  + + Kauri (Tekaurinui Robert) Parata, of the New Zealand Māori tribe Ngāti Wai, in front of the carving shed at Hihiaua Cultural Centre in Whangarei +
                  • @@ -215,30 +191,24 @@ “Our own ancestors wouldn’t say to go down there and hug the whales. That’s a modern thing,” says Te Kaurinui.

                    - - - - + + +
                    - - The Pou in front of the carving shed at Hihiaua Cultural centre -
                    - -
                    + + The Pou in front of the carving shed at Hihiaua Cultural centre +
                    - - - - + + +
                    - - Kauri (Tekaurinui Robert) Parata, holds three whale teeth recovered from a beached whale. The middle tooth shows the marks where a poacher had attempted to hack it out with an axe before the recovery group arrived. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata. -
                    - -
                    + + Kauri (Tekaurinui Robert) Parata, holds three whale teeth recovered from a beached whale. The middle tooth shows the marks where a poacher had attempted to hack it out with an axe before the recovery group arrived. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata. +

                    The Ngātiwai are investigating a possible link between the crisis of the dieback disease killing New Zealand’s native kauri trees – and threatening the giant Tāne Mahuta, which may be 2,000 years old – and the increase in whale strandings. @@ -250,17 +220,14 @@ “People dismiss us when we tell them our spiritual understanding of whales – why they are beaching, why they are hurting,” says Te Kaurinui.

                    - - - - + + +
                    - - Whangārei Harbour from Tamaterau, looking south through Mangrove sprouts coming up through the harbourside silt. -
                    - -
                    + + Whangārei Harbour from Tamaterau, looking south through Mangrove sprouts coming up through the harbourside silt. +
                    • @@ -279,17 +246,14 @@ “I arrived at the beach and we leapfrogged between the animals. They were calling out to each other and reassuring each other,” says Werner. “It was a shock. We’re working to adapt but the ocean is changing so fast.”

                      - - - - + + +
                      - - The skull of a Brydes whale, in the storage container at Hihiaua Cultural Centre, Whangārei. -
                      - -
                      + + The skull of a Brydes whale, in the storage container at Hihiaua Cultural Centre, Whangārei. +
                      • @@ -299,17 +263,14 @@
                      - - - - + + +
                      - - A large calibre bullet of the type that the New Zealand Department of Conservation (DOC) uses for euthanasing stranded whales that are beyond rescue. -
                      - -
                      + + A large calibre bullet of the type that the New Zealand Department of Conservation (DOC) uses for euthanasing stranded whales that are beyond rescue. +

                      The recent spate of mass strandings has been described as “heartbreaking” by the conservation department. @@ -320,4 +281,4 @@

                      “It’s very emotional. Our ancestors tell us the strandings are a sign from the sea. So what is the sea telling us? We need to listen.”

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/heise/expected.html b/article_scraper/resources/tests/readability/heise/expected.html index 66eba45..5b6cbe8 100644 --- a/article_scraper/resources/tests/readability/heise/expected.html +++ b/article_scraper/resources/tests/readability/heise/expected.html @@ -2,7 +2,7 @@
                      - +
                      @@ -26,9 +26,9 @@ (bsc) -
                      +

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/herald-sun-1/expected.html b/article_scraper/resources/tests/readability/herald-sun-1/expected.html index 789cef4..b699ba5 100644 --- a/article_scraper/resources/tests/readability/herald-sun-1/expected.html +++ b/article_scraper/resources/tests/readability/herald-sun-1/expected.html @@ -1,6 +1,6 @@
                      -

                      A new Bill would require telecommunications service providers to store so-called ‘metadat +

                      A new Bill would require telecommunications service providers to store so-called ‘metadat

                      A new Bill would require telecommunications service providers to store so-called ‘metadata’ for two years. @@ -11,27 +11,11 @@

                      A HIGH-powered federal government team has been doing the rounds of media organisations in the past few days in an attempt to allay concerns about the impact of new surveillance legislation on press freedom. It failed.

                      -

                      The roadshow featured the Prime Minister’s national security adviser, Andrew Shearer, Justin Bassi, who advises Attorney-General George Brandis on crime and security matters, and Australian Federal Police Commissioner Andrew Colvin. Staffers from the office of Communications Minister Malcolm Turnbull also took part.

                      -

                      They held meetings with executives from News Corporation and Fairfax, representatives of the TV networks, the ABC top brass and a group from the media union and the Walkley journalism foundation. I was involved as a member of the Walkley board.

                      -

                      The initiative, from Tony Abbott’s office, is evidence that the Government has been alarmed by the strength of criticism from media of the Data Retention Bill it wants passed before Parliament rises in a fortnight. Bosses, journalists, even the Press Council, are up in arms, not only over this measure, but also over aspects of two earlier pieces of national security legislation that interfere with the ability of the media to hold government to account.

                      +

                      The roadshow featured the Prime Minister’s national security adviser, Andrew Shearer, Justin Bassi, who advises Attorney-General George Brandis on crime and security matters, and Australian Federal Police Commissioner Andrew Colvin. Staffers from the office of Communications Minister Malcolm Turnbull also took part.

                      They held meetings with executives from News Corporation and Fairfax, representatives of the TV networks, the ABC top brass and a group from the media union and the Walkley journalism foundation. I was involved as a member of the Walkley board.

                      The initiative, from Tony Abbott’s office, is evidence that the Government has been alarmed by the strength of criticism from media of the Data Retention Bill it wants passed before Parliament rises in a fortnight. Bosses, journalists, even the Press Council, are up in arms, not only over this measure, but also over aspects of two earlier pieces of national security legislation that interfere with the ability of the media to hold government to account.

                      -

                      The Bill would require telecommunications service providers to store so-called “metadata” — the who, where, when and how of a communication, but not its content — for two years so security and law enforcement agencies can access it without warrant. Few would argue against the use of such material to catch criminals or terrorists. But, as Parliament’s Joint Committee on Intelligence and Security has pointed out, it would also be used “for the purpose of determining the identity of a journalist’s sources”.

                      -

                      And that should ring warning bells for anyone genuinely concerned with the health of our democracy. Without the ability to protect the identity of sources, journalists would be greatly handicapped in exposing corruption, dishonesty, waste, incompetence and misbehaviour by public officials.

                      -

                      The Press Council is concerned the laws would crush investigative journalism.

                      -

                      “These legitimate concerns cannot be addressed effectively short of exempting journalists and media organisations,” says president David Weisbrot.

                      -

                      The media union is adamant journalists’ metadata must be exempted from the law. That’s what media bosses want, too, though they have a fallback position based on new safeguards being implemented in Britain.

                      -

                      That would prevent access to the metadata of journalists or media organisations without a judicial warrant. There would be a code including — according to the explanatory notes of the British Bill — “provision to protect the public interest in the confidentiality of journalistic sources”.

                      -

                      In their meetings this week, the government team boasted of concessions in the new Data Retention Bill. The number of agencies able to access metadata will be reduced by excluding such organisations as the RSPCA and local councils. And whenever an authorisation is issued for access to information about a journalist’s sources, the Ombudsman (or, where ASIO is involved, the Inspector-General of Intelligence and Security) will receive a copy.

                      -

                      That does nothing to solve the problem. The Government has effectively admitted as much by agreeing that the parliamentary committee should conduct a separate review of how to deal with the issue of journalists’ sources.

                      -

                      But another inquiry would be a waste of time — the committee has already received and considered dozens of submissions on the subject. The bottom line is that the Government does not deny that the legislation is flawed, but is demanding it be passed anyway with the possibility left open of a repair job down the track. That is a ridiculous approach.

                      -

                      Claims that immediate action is imperative do not stand up. These are measures that won’t come into full effect for two years. Anyway, amending the Bill to either exempt journalists or adopt the UK model could be done quickly, without any risk to national security.

                      -

                      AS Opposition Leader Bill Shorten said in a letter to Abbott last month: “Press freedom concerns about mandatory data retention would ideally be addressed in this Bill to avoid the need for future additional amendments or procedures to be put in place in the future.”

                      -

                      The Data Retention Bill will be debated in the House of Representatives this week. Then, on Friday, CEOs from leading media organisations will front the parliamentary committee to air their concerns before the legislation goes to the Senate.

                      -

                      Those CEOs should make it clear they are just as angry about this as they were about Stephen Conroy’s attempt to impinge on press freedom through media regulation under the previous Labor government.

                      -

                      Memories of the grief Conroy brought down on his head would undoubtedly make Abbott sit up and take notice.

                      -

                      LAURIE OAKES IS THE NINE NETWORK POLITICAL EDITOR

                      +

                      The Bill would require telecommunications service providers to store so-called “metadata” — the who, where, when and how of a communication, but not its content — for two years so security and law enforcement agencies can access it without warrant. Few would argue against the use of such material to catch criminals or terrorists. But, as Parliament’s Joint Committee on Intelligence and Security has pointed out, it would also be used “for the purpose of determining the identity of a journalist’s sources”.

                      And that should ring warning bells for anyone genuinely concerned with the health of our democracy. Without the ability to protect the identity of sources, journalists would be greatly handicapped in exposing corruption, dishonesty, waste, incompetence and misbehaviour by public officials.

                      The Press Council is concerned the laws would crush investigative journalism.

                      “These legitimate concerns cannot be addressed effectively short of exempting journalists and media organisations,” says president David Weisbrot.

                      The media union is adamant journalists’ metadata must be exempted from the law. That’s what media bosses want, too, though they have a fallback position based on new safeguards being implemented in Britain.

                      That would prevent access to the metadata of journalists or media organisations without a judicial warrant. There would be a code including — according to the explanatory notes of the British Bill — “provision to protect the public interest in the confidentiality of journalistic sources”.

                      In their meetings this week, the government team boasted of concessions in the new Data Retention Bill. The number of agencies able to access metadata will be reduced by excluding such organisations as the RSPCA and local councils. And whenever an authorisation is issued for access to information about a journalist’s sources, the Ombudsman (or, where ASIO is involved, the Inspector-General of Intelligence and Security) will receive a copy.

                      That does nothing to solve the problem. The Government has effectively admitted as much by agreeing that the parliamentary committee should conduct a separate review of how to deal with the issue of journalists’ sources.

                      But another inquiry would be a waste of time — the committee has already received and considered dozens of submissions on the subject. The bottom line is that the Government does not deny that the legislation is flawed, but is demanding it be passed anyway with the possibility left open of a repair job down the track. That is a ridiculous approach.

                      Claims that immediate action is imperative do not stand up. These are measures that won’t come into full effect for two years. Anyway, amending the Bill to either exempt journalists or adopt the UK model could be done quickly, without any risk to national security.

                      AS Opposition Leader Bill Shorten said in a letter to Abbott last month: “Press freedom concerns about mandatory data retention would ideally be addressed in this Bill to avoid the need for future additional amendments or procedures to be put in place in the future.”

                      The Data Retention Bill will be debated in the House of Representatives this week. Then, on Friday, CEOs from leading media organisations will front the parliamentary committee to air their concerns before the legislation goes to the Senate.

                      Those CEOs should make it clear they are just as angry about this as they were about Stephen Conroy’s attempt to impinge on press freedom through media regulation under the previous Labor government.

                      Memories of the grief Conroy brought down on his head would undoubtedly make Abbott sit up and take notice.

                      LAURIE OAKES IS THE NINE NETWORK POLITICAL EDITOR

                      -
                      + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/hidden-nodes/expected.html b/article_scraper/resources/tests/readability/hidden-nodes/expected.html index 88da507..67f2020 100644 --- a/article_scraper/resources/tests/readability/hidden-nodes/expected.html +++ b/article_scraper/resources/tests/readability/hidden-nodes/expected.html @@ -11,4 +11,4 @@ Third header - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/hukumusume/expected.html b/article_scraper/resources/tests/readability/hukumusume/expected.html index 8d57aa9..90da1ee 100644 --- a/article_scraper/resources/tests/readability/hukumusume/expected.html +++ b/article_scraper/resources/tests/readability/hukumusume/expected.html @@ -4,11 +4,11 @@ - + - + - + @@ -34,18 +34,18 @@ 福娘童話集 > きょうのイソップ童話 > 1月のイソップ童話 > 欲張りなイヌ

                      - 元旦のイソップ童話
                      -
                      -
                      -
                      - よくばりなイヌ
                      -
                      -
                      -
                      - 欲張りなイヌ
                      -
                      -
                      -
                      + 元旦のイソップ童話
                      +
                      +
                      +
                      + よくばりなイヌ
                      +
                      +
                      +
                      + 欲張りなイヌ
                      +
                      +
                      +
                      ひらがな ←→ 日本語・英語 ←→ English

                      @@ -53,7 +53,7 @@ - + おりがみをつくろう @@ -62,7 +62,7 @@ ( おりがみくらぶ より) - + @@ -71,7 +71,7 @@ - 犬の顔の折り紙いぬのかお   犬の顔の紙いぬ + 犬の顔の折り紙いぬのかお   犬の顔の紙いぬ @@ -90,7 +90,7 @@ - +
                      + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/iab-1/expected.html b/article_scraper/resources/tests/readability/iab-1/expected.html index 4b043d5..824b9d7 100644 --- a/article_scraper/resources/tests/readability/iab-1/expected.html +++ b/article_scraper/resources/tests/readability/iab-1/expected.html @@ -8,7 +8,7 @@

                      Through our pursuit of further automation and maximization of margins during the industrial age of media technology, we built advertising technology to optimize publishers’ yield of marketing budgets that had eroded after the last recession. Looking back now, our scraping of dimes may have cost us dollars in consumer loyalty. The fast, scalable systems of targeting users with ever-heftier advertisements have slowed down the public internet and drained more than a few batteries. We were so clever and so good at it that we over-engineered the capabilities of the plumbing laid down by, well, ourselves. This steamrolled the users, depleted their devices, and tried their patience.

                      The rise of ad blocking poses a threat to the internet and could potentially drive users to an enclosed platform world dominated by a few companies. We have let the fine equilibrium of content, commerce, and technology get out of balance in the open web. We had, and still do have, a responsibility to educate the business side, and in some cases to push back. We lost sight of our social and ethical responsibility to provide a safe, usable experience for anyone and everyone wanting to consume the content of their choice.

                      We need to bring that back into alignment, starting right now.

                      -

                      Getting LEAN with Digital Ad UXToday, the IAB Tech Lab is launching the L.E.A.N. Ads program. Supported by the Executive Committee of the IAB Tech Lab Board, IABs around the world, and hundreds of member companies, L.E.A.N. stands for Light, Encrypted, Ad choice supported, Non-invasive ads. These are principles that will help guide the next phases of advertising technical standards for the global digital advertising supply chain.

                      +

                      Getting LEAN with Digital Ad UXToday, the IAB Tech Lab is launching the L.E.A.N. Ads program. Supported by the Executive Committee of the IAB Tech Lab Board, IABs around the world, and hundreds of member companies, L.E.A.N. stands for Light, Encrypted, Ad choice supported, Non-invasive ads. These are principles that will help guide the next phases of advertising technical standards for the global digital advertising supply chain.

                      As with any other industry, standards should be created by non-profit standards-setting bodies, with many diverse voices providing input. We will invite all parties for public comment, and make sure consumer interest groups have the opportunity to provide input.

                      L.E.A.N. Ads do not replace the current advertising standards many consumers still enjoy and engage with while consuming content on our sites across all IP enabled devices. Rather, these principles will guide an alternative set of standards that provide choice for marketers, content providers, and consumers.

                      Among the many areas of concentration, we must also address frequency capping on retargeting in Ad Tech and make sure a user is targeted appropriately before, but never AFTER they make a purchase. If we are so good at reach and scale, we can be just as good, if not better, at moderation. Additionally, we must address volume of ads per page as well as continue on the path to viewability. The dependencies here are critical to an optimized user experience.

                      @@ -19,4 +19,4 @@

                      IAB Tech Lab Members can join the IAB Tech Lab Ad Blocking Working Group, please email adblocking@iab.com for more information.

                      Read more about ad blocking here.

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/ietf-1/expected.html b/article_scraper/resources/tests/readability/ietf-1/expected.html index 5c9f350..261b51a 100644 --- a/article_scraper/resources/tests/readability/ietf-1/expected.html +++ b/article_scraper/resources/tests/readability/ietf-1/expected.html @@ -1,9 +1,9 @@
                      -[Docs] [txt|pdf] [Tracker] [Email] [Diff1] [Diff2] [Nits]
                      -
                      -Versions: 00 01 02 03 04
                      -
                      +[Docs] [txt|pdf] [Tracker] [Email] [Diff1] [Diff2] [Nits]
                      +
                      +Versions: 00 01 02 03 04
                      +
                      INTERNET DRAFT                                      Michiel B. de Jong
                       Document: draft-dejong-remotestorage-04                   IndieHosters
                                                                                    F. Kooman
                      @@ -57,8 +57,7 @@ Copyright Notice
                       
                       
                       de Jong                                                         [Page 1]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -108,8 +107,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 2]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -159,8 +157,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 3]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -210,8 +207,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 4]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -261,8 +257,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 5]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -312,8 +307,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 6]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -363,8 +357,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 7]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -414,8 +407,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 8]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -465,8 +457,7 @@ Table of Contents
                       
                       
                       de Jong                                                         [Page 9]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -516,8 +507,7 @@ Table of Contents
                       
                       
                       de Jong                                                        [Page 10]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -567,8 +557,7 @@ Table of Contents
                       
                       
                       de Jong                                                        [Page 11]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -618,8 +607,7 @@ Table of Contents
                       
                       
                       de Jong                                                        [Page 12]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -669,8 +657,7 @@ motestorage-04",
                       
                       
                       de Jong                                                        [Page 13]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -720,8 +707,7 @@ XjzzzHNjkd1CJxoQubA1o%3D&token_type=bearer&state=
                       
                       
                       de Jong                                                        [Page 14]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -771,8 +757,7 @@ ntent-Type, Origin, X-Requested-With, If-Match, If-None-Match
                       
                       
                       de Jong                                                        [Page 15]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -822,8 +807,7 @@ e.io/spec/modules/myfavoritedrinks/drink"}
                       
                       
                       de Jong                                                        [Page 16]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -873,8 +857,7 @@ charset=UTF-8","Content-Length":106}}}
                       
                       
                       de Jong                                                        [Page 17]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -924,8 +907,7 @@ charset=UTF-8","Content-Length":106}}}
                       
                       
                       de Jong                                                        [Page 18]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -975,8 +957,7 @@ charset=UTF-8","Content-Length":106}}}
                       
                       
                       de Jong                                                        [Page 19]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -1026,8 +1007,7 @@ charset=UTF-8","Content-Length":106}}}
                       
                       
                       de Jong                                                        [Page 20]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -1077,8 +1057,7 @@ charset=UTF-8","Content-Length":106}}}
                       
                       
                       de Jong                                                        [Page 21]
                      -
                      -
                       
                      +
                       Internet-Draft              remoteStorage                  December 2014
                       
                       
                      @@ -1129,9 +1108,8 @@ charset=UTF-8","Content-Length":106}}}
                       
                       de Jong                                                        [Page 22]
                       
                      -
                      -
                      +
                      Html markup produced by rfcmarkup 1.111, available from https://tools.ietf.org/tools/rfcmarkup/ -
                      + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/js-link-replacement/expected.html b/article_scraper/resources/tests/readability/js-link-replacement/expected.html index 037242e..419afe8 100644 --- a/article_scraper/resources/tests/readability/js-link-replacement/expected.html +++ b/article_scraper/resources/tests/readability/js-link-replacement/expected.html @@ -2,4 +2,4 @@

                      abc

                      def

                      ghi - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/keep-images/expected.html b/article_scraper/resources/tests/readability/keep-images/expected.html index 1bc6901..751a41d 100644 --- a/article_scraper/resources/tests/readability/keep-images/expected.html +++ b/article_scraper/resources/tests/readability/keep-images/expected.html @@ -3,9 +3,8 @@
                      -

                      -

                      -
                      +

                      +

                      @@ -16,9 +15,8 @@
                      -

                      -

                      -
                      +

                      +

                      Standing at a table in a chemistry lab in Barcelona, Cristina Gil Lladanosa tears open a silver, smell-proof protective envelope. She slides out a @@ -38,9 +36,8 @@

                      -

                      -

                      -
                      +

                      +

                      Cristina Gil Lladanosa, at the Barcelona testing lab | photo by Joan Bardeletti
                      @@ -73,9 +70,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -93,9 +89,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -123,9 +118,8 @@
                      -

                      -

                      -
                      +

                      +

                      The deep web drug lab is the brainchild of Fernando Caudevilla, a Spanish physician who is better known as “DoctorX” on the deep web, a nickname @@ -140,9 +134,8 @@

                      -

                      -

                      -
                      +

                      +

                      Fernando Caudevilla, AKA DoctorX. Photo: Joseph Cox
                      @@ -189,9 +182,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo: Joseph Cox
                      @@ -199,9 +191,8 @@
                      -

                      -

                      -
                      +

                      +

                      While the Energy Control lab in Madrid lab only tests Spanish drugs from various sources, it is the Barcelona location which vets the substances @@ -230,9 +221,8 @@

                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -274,9 +264,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -284,9 +273,8 @@
                      -

                      -

                      -
                      +

                      +

                      Despite the prevalence of people using the service to gauge the quality of what goes up their nose, many users send samples to Energy Control in @@ -327,9 +315,8 @@

                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -357,9 +344,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -419,9 +405,8 @@
                      -

                      -

                      -
                      +

                      +

                      Photo by Joan Bardeletti
                      @@ -437,9 +422,8 @@
                      -

                      -

                      -
                      +

                      +

                      Top photo by Joan Bardeletti

                      @@ -447,4 +431,4 @@ |Facebook

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/keep-tabular-data/expected.html b/article_scraper/resources/tests/readability/keep-tabular-data/expected.html index da5ed21..14b5942 100644 --- a/article_scraper/resources/tests/readability/keep-tabular-data/expected.html +++ b/article_scraper/resources/tests/readability/keep-tabular-data/expected.html @@ -35,7 +35,7 @@ - + @@ -47,276 +47,260 @@ - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + + - - - - - - - - - + + + + + + + + +
                      General UX UX draft UX review
                      Load map
                      Save map
                      Graphics settings
                      Control settings
                      Sound settings
                      Interface settings
                      Other settings
                      Map generator
                      Quick bar Twinsen -Quick bar Twinsen
                      Train GUI kovarex -Train GUI kovarex
                      Technology GUI Oxyd -Technology GUI Oxyd
                      Technology tooltip Oxyd -Technology tooltip Oxyd
                      Blueprint library kovarex -Blueprint library kovarex
                      Shortcut bar Oxyd -Shortcut bar Oxyd
                      Character screen Dominik -Character screen Dominik
                      Help overlay kovarex -Help overlay kovarex
                      Manage/Install mods Rseding -Manage/Install mods Rseding
                      Recipe/item/Entity tooltip Twinsen -Recipe/item/Entity tooltip Twinsen
                      Chat icon selector ? -Chat icon selector ?
                      New game ? -New game ?
                      Menu structure ? -Menu structure ?
                      Main screen chat ? -Main screen chat ?
                      Recipe explorer ? -Recipe explorer ?
                      -

                      * Newly finished things since the last update in FFF-277.

                      -

                      Blueprint library

                      +

                      * Newly finished things since the last update in FFF-277.

                      Blueprint library

                      The blueprint library changes have been split into several steps. The reason is, that there was a big motivation to do the integration with the new quickbar (final version introduced in FFF-278) in time for 0.17.0, while the other changes can be done after. The thing with the quickbar is, that it is quite a big change to one of the most used tools in the game and people generally don't like change even when it is for the better. To minimize the hate of the change, we need to "sell it properly". By that, we should provide as many of the positive aspects of the new quickbar at the time of its introduction.

                      @@ -327,19 +311,19 @@ In addition to this, other changes related to the blueprint library will follow soon after 0.17.0. The first thing is the change of how the GUI looks:

                      - +

                      We will also allow to switch between grid and list view. It mainly provides a way to nicely see the longer names of the blueprint. We noticed that players try to put a large amount of info about a blueprint in its name, so we are planning to add a possibility to write a textual description of the blueprint.

                      - +

                      The last big change is to allow to put blueprint books into blueprint books, allowing better organisation. Basically like a directory structure. Whenever a blueprint/book is opened, we plan to show its current location, so the player knows exactly what is going on.

                      - +

                      The hand

                      @@ -349,7 +333,7 @@ This was annoying in 0.16 from time to time, but with the new quickbar, it started to happen even more, as now, you have only one inventory, and no reserved slots in the quickbar. To solve that, we just extended the "principal" of the hand. When you pick something from the inventory, the hand icon appears on the slot. As long as you hold the thing in your cursor, the hand stays there, and prevents other things from being inserted there. This way, you should always be able to return the currently selected item into your inventory as long as you didn't get it from external source like a chest.

                      -
                      +
                      The hand is protecting the slot from the robots.

                      Terrain generation updates (TOGoS) @@ -372,8 +356,8 @@ For 0.17 we've reworked biter placement using a system similar to that with which we got resource placement under control. The size and frequency controls now act more like most people would expect, with frequency increasing the number of bases, and size changing the size of each base.

                      - -
                      New preview UI showing the effects of enemy base controls. + +
                      New preview UI showing the effects of enemy base controls. In reality the preview takes a couple seconds to regenerate after every change, but the regeneration part is skipped in this animation to clearly show the effects of the controls.

                      @@ -392,8 +376,8 @@ Or you can turn it way down to make cliffs very rare or be completely absent.

                      - -
                      Changing cliff frequency and continuity. Since cliffs are based on elevation, + +
                      Changing cliff frequency and continuity. Since cliffs are based on elevation, you'll have to turn frequency way up if you want lots of layers even near the starting lake.

                      @@ -414,8 +398,8 @@ and with overlap in some cases.

                      - -
                      Rectangles. + +
                      Rectangles.

                      Having the humidity-aux-tile chart is all well and good, but doesn't tell the whole story, @@ -427,8 +411,8 @@ linearly from north-south and west-east, respectively.

                      - -
                      Using 'debug-moisture' and 'debug-aux' generators to drive moisture and aux, respectively. + +
                      Using 'debug-moisture' and 'debug-aux' generators to drive moisture and aux, respectively.

                      This map helped us realize that, rather than having controls @@ -437,8 +421,8 @@ because 'aux' doesn't mean anything).

                      - -
                      Sliding the moisture and aux bias sliders to make the world more or less grassy or red-deserty. + +
                      Sliding the moisture and aux bias sliders to make the world more or less grassy or red-deserty.

                      A pet project of mine has been to @@ -495,14 +479,14 @@ There's a slider to let you change the size of the island(s).

                      - +

                      Maps with multiple starting points will have multiple islands.

                      - -
                      PvP islands! + +
                      PvP islands!

                      And speaking of scale sliders, we're expanding their range from ± a factor of 2 (the old 'very low' to 'very high' settings) @@ -521,7 +505,7 @@

                      High-res accumulators (Ernestas, Albert)

                      - +

                      The design of the accumulator has been always good. The 4 very visible cylinders, looking like giant batteries, Tesla poles and the electric beams perfectly telegraphed its function in terms of style and readability. That’s why for the high-res conversion we were very careful about keeping this entity as it was. @@ -530,11 +514,11 @@ The only thing that was a bit disturbing (for some) are the poles crossing to each other when more than one accumulator is placed in a row. So we decided to fix it (or break it). The rest of the work was making the entity compatible for the actual look of the game. But in essence accumulators are still the same.

                      - +

                      As always, let us know what you think on our forum.

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/la-nacion/expected.html b/article_scraper/resources/tests/readability/la-nacion/expected.html index 6e2f466..7d6b778 100644 --- a/article_scraper/resources/tests/readability/la-nacion/expected.html +++ b/article_scraper/resources/tests/readability/la-nacion/expected.html @@ -25,7 +25,7 @@ informe ubicó "con domicilios en Palermo y en el centro porteño", y aseguraba incluso que había sido visto "en Neuquén, Río Negro y Chubut durante el juicio a Jones Huala".

                      -

                      +

                      Foto: LA NACION
                      @@ -93,4 +93,4 @@ administrativista y analista internacional

                      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lazy-image-1/expected.html b/article_scraper/resources/tests/readability/lazy-image-1/expected.html index d1e9a2d..f4cc5f7 100644 --- a/article_scraper/resources/tests/readability/lazy-image-1/expected.html +++ b/article_scraper/resources/tests/readability/lazy-image-1/expected.html @@ -1,7 +1,6 @@ -
                      -
                      +
                      -

                      Vincent Vallet +

                      Vincent Vallet

                      @@ -25,7 +24,7 @@

                      -

                      +

                      @@ -48,7 +47,7 @@

                      -

                      +

                      @@ -87,7 +86,7 @@
                      node --prof app.js
                      -

                      +

                      @@ -103,7 +102,7 @@
                      node --prof-process isolate-0xnnnnn-v8.log > processed.txt
                      -

                      +

                      @@ -121,7 +120,7 @@

                      -

                      +

                      @@ -173,9 +172,7 @@

                      And here is how to make a CPU profiling with this module:

                      -
                      - -
                      +

                      As you can see, all the data is returned in variable “profile”. Basically, it’s a simple JSON object representing all the call stack and the CPU consumption for each function. And if you want to use an Async/await syntax you can install the module “inspector-api”.

                      @@ -183,15 +180,11 @@

                      It also comes with a built-in exporter to send data to S3, with this method you don’t write anything on the disk!

                      -
                      - -
                      +

                      If you use another storage system you can just collect the data and export it by yourself.

                      -
                      - -
                      +

                      And now, CPU profiling on-demand!

                      @@ -200,7 +193,7 @@

                      -

                      +

                      @@ -210,18 +203,14 @@

                      Here is a simple example of a server using the “ws” module to send a message to a unique instance.

                      -
                      - -
                      +

                      Of course, it only works with one instance, but it’s a fake project to demonstrate the principle ;)

                      Now we can request our server to ask it to send a message to our instance and start/stop a CPU profiling. In your instance, you can handle the CPU profiling like this:

                      -
                      - -
                      +

                      To sum up: we are able to trigger a CPU profiling, on-demand, in real-time, without interruption or connection to the server. Data can be collected on the disk (and extracted later) or can be sent to S3 (or any other system, PR are welcomed on the inspector-api project).

                      @@ -230,8 +219,7 @@ And because the profiler is a part of V8 itself, the format of the generated JSON file is compatible with the Chrome dev tools.

                      -
                      -
                      +

                      How can we identify an issue?

                      @@ -258,7 +246,7 @@

                      -

                      +

                      @@ -268,14 +256,13 @@

                      As you can notice, we have to zoom to the profile if we want to see the call stack, because after optimizations the API was able to take a lot more traffic. Now every function in the call stack looks like a microtask.

                      -
                      -
                      +

                      And now our application is able to serve more than 200,000 requests in 20 seconds; we increased the performance by a factor of 100k!

                      -

                      +

                      @@ -343,5 +330,4 @@ https://www.npmjs.com/package/inspector-api

                    - - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lazy-image-2/expected.html b/article_scraper/resources/tests/readability/lazy-image-2/expected.html index eec6097..d19b93a 100644 --- a/article_scraper/resources/tests/readability/lazy-image-2/expected.html +++ b/article_scraper/resources/tests/readability/lazy-image-2/expected.html @@ -1,7 +1,7 @@
                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    @@ -9,7 +9,7 @@ Nothing beats the passion of a true fan writing about something they love. That's what you're about to see here: one of the richest, most amazing tributes to a great gaming series that we've ever run on Kotaku. Warning #1: this one might make your browser chug, so close your other tabs. Warning #2: This piece might make it hurt a little more than there are no new Metroid games from Nintendo on the horizon.

                    - Please note that this is the first half of Mama Robotnik's massive Metroid story. The second half can be found here. The entire post is a greatly-expanded version of a post that Mama Robotnik originally published on the NeoGAF forum before revising and reworking it for Kotaku. Take it away, MR... + Please note that this is the first half of Mama Robotnik's massive Metroid story. The second half can be found here. The entire post is a greatly-expanded version of a post that Mama Robotnik originally published on the NeoGAF forum before revising and reworking it for Kotaku. Take it away, MR...

                    @@ -73,9 +73,7 @@

                    Each story section includes one or more of the below superscript annotations, to help inform the reader as to where the lore or speculation comes from. A brief key:

                    -
                    - -
                    +

                    With all that said, let us begin. @@ -88,13 +86,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    On an unknown planet in the universe, a race of avian humanoids evolved. The species that will come to be known as the Chozo possessed great strength, agility and intelligence. The species is peaceful, and is driven by a social/religious value that nature is sacred. [M1 / MP] @@ -102,12 +100,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    Certain blessed individuals were born with a unique gift – the vague comprehension of events set to take place in the distant future. Driven by these prophecies, the race advanced quickly and became space faring. With abstract predictions of a hostile universe, the Chozo developed powered armour and armaments to defend themselves. Prepared for whatever hostility awaited them, the Chozo explored the stars. [M1 / MP / MP SP] @@ -115,7 +113,7 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    @@ -128,12 +126,12 @@
                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Slapshoft) + (Artist: Slapshoft)

                    Peace reigned through the cosmos. The alliance was a great universal renaissance, and lasted for a millennium. [MPH SP / MP2 SP / MP3 SP] @@ -141,12 +139,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    During this calm, the Chozo prophets continued to receive increasingly severe visions of chaos. They foresaw a universe consumed by war, horrors evolving on distant worlds, and a great toxicity waiting to be unleashed. As the visions became more precise, the species isolated itself from its allies. The Chozo civilisation became intensely driven to fight this unclear threat. [MP / MP3 SP / M2 SP /MF SP] @@ -154,12 +152,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: DanilLovesFood) + (Artist: DanilLovesFood)

                    The Chozo needed more potent tools to locate this unseen and distant danger. They expanded their SkyTown colony on the gas giant Elysia and remade it into a vast interstellar observatory powered by the planet’s endless storms. The facility was of such scale that an entire species of artificial life became necessary to maintain it. The Chozo created their first species – the mechanical Elysians. [MP3 / MP3 SP] @@ -170,13 +168,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Mechanical-Hand) + (Artist: Mechanical-Hand)

                    The data received was terrifying. The blue planet registered as an organism, somehow existing as both mineral and flesh. Impossible radiation pulsed from the surface, which overwhelmed the Chozo satellite and rendered it inert. The location of the planet was immediately lost, and only a broad region of space could be established. [MP3] @@ -194,13 +192,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    Finding the exact location of the deadly planet becomes a priority for the Chozo civilisation. A gargantuan ship was assembled on the holy planet of Tallon IV, and dispatched to the dark corner of the universe where the Elysian satellite had been lost. The greatest Chozo warriors, scientists and prophets commenced a crusade for the hostile world, knowing that they would likely never make it back home. During their long journey, they conceive a name for their target: Phaaze. [MP3 SP] @@ -208,12 +206,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: SesakaTH) + (Artist: SesakaTH)

                    Generations passed, and the Chozo expedition finally located the blue planet. As they approached, they witnessed the living world as it endlessly pulsed with blue and white energies. There was nothing like this place elsewhere in the universe. [MP3 SP] @@ -224,13 +222,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: SamusMMX) + (Artist: SamusMMX)

                    For billions of years, Phaaze had mutated and irradiated life that evolved on its surface. The strongest creatures had survived to thrive in an ecosystem of beautiful poison. It was then that the Chozo understood: They had arrived at the home of the most devastating and deranged creatures in the known universe. [MP3 SP] @@ -248,13 +246,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Adoublea) + (Artist: Adoublea)

                    Chozo Warriors in power suits fought the planet’s creatures as they swarmed the ship. The soldiers battled, watching their kin die around them, in a desperate mission to buy time. [MP3 SP] @@ -265,13 +263,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Methuselah3000) + (Artist: Methuselah3000)

                    The Metroid creature was unleashed onto the planet, and the radiation caused it to reproduce quickly. The resulting swarm of Metroids began to consume the planet’s monstrosities and established themselves as Phaaze’s apex predator. [MP3 SP] @@ -282,13 +280,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    On Phaaze, the Metroid presence lasted decades as they consumed the planet’s superpredators. The corpses of Chozo warriors were absorbed into the planet, and their battle armour slowly became weathered and scattered. The planet’s slow sentience developed an outrage that seethed under its continents. It had been violated by the Chozo. As the Metroid infestation began to die out, Phaaze developed a very primitive concept of purpose and retribution. [MP3 SP] @@ -303,12 +301,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Methuselah3000) + (Artist: Methuselah3000)

                    As the Tallon IV seed began its centuries of travelling through space, the lone Metroid within absorbed vast amounts of Phazon and radiation. It became self-aware, and grew in size, intelligence and strength. It used the ruined pieces of Chozo armour to construct itself an exoskeleton, and descended into madness. The exoskeleton failed to protect the creature from the endless radiation, and the Metroid became as exotic as Phaaze’s extinct superpredators: An undying tortured genius. [MP / MP2 / MP3 / MP3 SP] @@ -326,12 +324,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: PeaceFistArtist) + (Artist: PeaceFistArtist)

                    The Chozo detected strange readings coming from a world in a desolate part of the galaxy. The planet had been previously considered so obscure and unimportant that it didn’t have a name, merely catalogued with the codename SR388 and left to its obscurity. A detailed analysis picked up some extremely strange observations; though seemingly mineral, the caverns and liquids beneath the surface shifted with metabolic rhythm – as if the whole planet was somehow a living thing. A ship was dispatched, and the strongest Chozo warriors braved the caverns beneath the surface. [M2 / M2 SP] @@ -342,13 +340,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: LightningArts) + (Artist: LightningArts)

                    Beneath that planet, evolution had been won by an abomination that could steal the flesh, abilities, memories and strengths of all of its prey. The creature was a fusion of energy and plasma that parasitized on life itself. With no word suitable for the nightmare they had discovered, the Chozo simply called it X. If these X-Parasites somehow gained access to the wider universe, there would be no force that could contain them. [M2] @@ -363,12 +361,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    The Chozo tried to recreate the plan of their ancestors – the use of Metroids to pacify superpredators too dangerous to exist. Without access to the same planetary radiation and materials the Phaaze expedition had, progress was slow. As the war against the planet was raging around them, the Chozo scientists were able to engineer Metroids, but not a variant strong enough to overcome the X-Parasites. As more and more Chozo died protecting the laboratory, a different approach was needed. [M2 SP] @@ -376,12 +374,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Starshadow76) + (Artist: Starshadow76)

                    The Chozo succeeded in engineering a Metroid Queen, a colossal creature who would lay Metroid Hatchling eggs. When hatched, these resulting Metroids were strong and durable creatures, and finally potent enough to combat the X menace. The Chozo knew that to completely suppress the parasites, the Metroid presence on SR388 had to be permanent. To ensure that the species would not overfeed on the environment and wipe out its food chains, the scientists hardwired an instinct into the Metroid Queen’s feral mind: Only thirty-nine Metroids were to exist on the planet at any one time. This, it was hoped, would keep their numbers high enough to destroy any X re-emergence, but low enough so that they wouldn’t consume the rest of the life on the planet, and starve to death from lack of food. [M2] @@ -399,13 +397,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Hermax669) + (Artist: Hermax669)

                    SR388 had been violated by the Chozo. Though very different to Phaaze, SR388 had its own vague sense of awareness. It perceived the Chozo as a viral infection, and the dead X-Parasites as part of itself. It understood loss, and shook with ancient rage. [MP3 SP / M2 SP / MF SP] @@ -443,13 +441,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Hameed) + (Artist: Hameed)

                    The Leviathan crashed down, and rained poison and death unto the world. The impact survivors watched as their sacred nature succumbed to the mutagens leaking from the seed, and barricaded themselves in their temples as the flora and fauna transformed. Phazon spread beneath the surface of the dying planet, and radiation storms battered the surface. [MP] @@ -457,12 +455,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Source: Riivka) + (Source: Riivka)

                    The Chozos’ punishment for their sins, and the fulfilment of Phaaze’s wrath, reached biblical proportions. The Chozo of Tallon IV did not get to rest in peace. Their life energies suffered from Phazon disruption, and upon death they became mad ghosts who screamed forever as they were torn in and out of the material world. In this purgatory, the undead immaterial Chozo murdered anyone they could find. [MP / MP3 SP] @@ -473,13 +471,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Source: Havoc-DM) + (Source: Havoc-DM)

                    Within the Impact Crater, Metroid Prime remained trapped within the Chozo energy field. In its armour constructed from ancient Chozo power suits, it continued its wait to be unleashed on the universe. [MP / MP3 SP] @@ -490,13 +488,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    On the planet Aether, an ancient race of mystics known as the Luminoth received the horrifying data coming from Tallon IV. In distant times, the Luminoth and the Chozo had been steadfast allies – until the Chozo retreat ended their ties. Desperate to assist, the Luminoth began to organise a rescue mission. [MP2 / MP2 SP] @@ -504,12 +502,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: PugOfDoon) + (Artist: PugOfDoon)

                    A dark transmission was received from Tallon IV. The image showed a screaming, ghostly Chozo figure, flickering in and out of the living universe. In its undead madness, it spoke for its kin. It raged that they would kill anyone who would set foot on their world. The planet was pandemonium, a cursed world on which the dead could not die. As the signal faded, the Luminoth realised that there was no one left alive to rescue. [MP SP / MP2 SP] @@ -524,12 +522,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: SesaKath) + (Artist: SesaKath)

                    The Luminoth used their great Light to engineer a small pocket universe, a dark lifeless echo of existence. The plan was bold: they would use the Light of Aether to surgically open the fabric of reality in the path of the Phazon seed, and allow it to harmlessly enter the pocket universe. If all went well, they would be saved. [MP2 SP] @@ -540,13 +538,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Adriencgd) + (Artist: Adriencgd)

                    Phaaze’s seed was a sum of living materials beyond Luminoth comprehension. It hit the pocket universe with incalculable force, and a tsunami of exotic energy ruptured space and time. The equipment containing the dark reality lost containment within moments, and the Luminoth were helpless as their creation expanded across the entire planet. A wave of dark energy absorbed creatures, structures and land into the dark universe, and what was once a single planet – was now two. [MP2 / MP2 SP] @@ -557,13 +555,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: AzureParagon) + (Artist: AzureParagon)

                    In the dark universe, a grotesque world was being born. Previous inhabitants of Aether, having been absorbed when containment of the pocket universe was lost, found themselves twisted by the corrosive new reality around them. Most perished, and their flesh fed the strange carnivorous fungi that glowed sickly colours. Some survivors were mutated by the Phazon slowly spreading beneath the surface, and adapted to survive in the hostility. [MP2 SP] @@ -574,13 +572,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Xxkiragaxx) + (Artist: Xxkiragaxx)

                    A womb of Phazon mutation and dark energies had birthed a cunning and ferocious horde. The Ing erupted through the cracks between the two worlds, and commenced slaughter. They were fought back by the Luminoth, and a war began between the two parallel worlds. The Ing invaded Aether with regularity, and killed, pillaged and destroyed all that they could find. The Luminoth retaliated and crusaded into Dark Aether in their Light Suits, on suicide missions to exterminate the source of the Ing menace. Both sides suffered colossal casualties as the decades went on. [MP2] @@ -588,12 +586,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    The war was being lost by the Luminoth. The Ing had exterminated most of their race and had stolen too many vital technologies. With the theft of essential energy components from the Light of Aether power network, they had become a defeated people. [MP2] @@ -607,13 +605,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Kihunter) + (Artist: Kihunter)

                    As the Chozo and the Luminoth fell, so too did other ancient races. In a distant part of the universe, the Alimbics were a militaristic society that maintained peace in their galactic cluster. Their order was shattered when a murderous entity, originating from someplace beyond the understood universe, plummeted into one of their worlds. The creature emerged from the devastation as a gaseous entity, and assumed an Alimbic-styled body to begin its onslaught. [MPH] @@ -624,13 +622,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Sesakath) + (Artist: Sesakath)

                    The Alimbics performed an act of supreme sacrifice. They combined the mental energies of their entire race to forge a prison for Gorea. The psychic prison held it bound, and it was transplanted into an organic vessel called The Oubliette. The vessel was launched into the void outside of the universe, a course that would keep its indestructible prisoner in exile forever. The systems of the prison ship were tasked to scan the every molecule of the imprisoned Gorea, and devise an Omega weapon that could be used to kill it. [MPH / MPH SP] @@ -644,13 +642,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Sesakath) + (Artist: Sesakath)

                    As the old races of the universe died around them, the lizard people of Bryyo faced their own challenges. The Bryyonians were an advanced, space-faring race who had learned much from their Chozo allies. Their society was a deeply polarised one, with tensions eternal between the scientific and religious factions.[MP3] @@ -672,12 +670,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Kaiquesilva) + (Artist: Kaiquesilva)

                    On a small, rainy planet called Zebes, the last known Chozo colony had watched the stars with impotence. This small settlement of the nearly-extinct avian race witnessed the end of the great universal renaissance, and the slow beginning of a new chapter in galactic history. Gradually, the younger races were launching their first satellites into space. In time, new empires would rise to take the place of the old. [M1 / M1 SP] @@ -685,12 +683,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: 3ihard) + (Artist: 3ihard)

                    Zebes prophets’ saw the visions the Chozo had always endured: great wars, spreading poison and death. And suddenly, something bold was foreseen. [M1 SP / MP3 SP] @@ -698,12 +696,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Fddt) + (Artist: Fddt)

                    A great hunter, clad in orange, red and green. The Chozo glimpsed a future hero, alone in the darkness beneath worlds, fighting so that good could survive evil. They saw her curing poisoned planets, and ending galactic wars. They saw the universe’s one chance to survive its apocalyptic future. They saw the only one who could defy prophecy. [M1 / MP3 SP] @@ -737,13 +735,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Mr-Corr) + (Artist: Mr-Corr)

                    First contact was brief and furious. On that day, the warning went out to all the worlds of the Federation: Beware the Space Pirates. Though no state of war was officially declared, the empires attacked each other on sight. The Galactic Federation was large enough to repress any meaningful incursions into their space. [M1 SP / MP SP / MP3 SP / SM SP] @@ -761,13 +759,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Methuselah3000) + (Artist: Methuselah3000)

                    Barely out of infancy, the young Samus witnessed her family die. A Space Pirate raiding party overwhelmed her colony and murdered everyone she ever knew. By staying silent while surrounded by horror, Samus survived as the Pirates ransacked the settlement and left. [M1] @@ -782,12 +780,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: R3dFiVe) + (Artist: R3dFiVe)

                    Samus Aran reached maturity amongst the Chozo. She was trained in the combat arts of the great extinct races. She was infused with Chozo genetic material so she could employ their technologies. She was educated to be a scientist, an explorer, and a tactician. Everything that was good about the Chozo civilisation was allowed to live on in Samus. [M1] @@ -795,12 +793,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Pyra) + (Artist: Pyra)

                    Samus became an adult, and the Chozo presented her with their greatest works: a toughened power suit and an agile spacecraft, both more potent than anything their race had ever made. The Chozo leader, decaying and blind, told Samus it was time for her to find her destiny in the universe. Samus Aran departed for the stars, and years pass. [M1 / M1 SP] @@ -811,13 +809,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Phobos-Romulus) + (Artist: Phobos-Romulus)

                    The Chozo hid their technologies throughout the planet, in places that they were certain Samus would find them. They concealed a second Power Suit within the walls of their holy temple, having foreseen that Samus may require it in the future. They then returned to the surface to await the inevitable. [M1 SP] @@ -835,12 +833,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Jaagup) + (Artist: Jaagup)

                    The results went beyond High Command’s most optimistic projections. The Space Pirates had created a leader, a desperately needed figure to unite their fragmented empire. They had created their Mother Brain. The great Space Pirate generals Ridley and Kraid arrived at Zebes, ready to pay tribute to their new master and to plan for the future. Mother Brain delivered to the Space Pirates knowledge and power. She told them of a world referenced in her oldest Chozo databanks, a planet bathed in a mutagenic poison waiting to be farmed. She instructed High Command to prepare an armada of ships and invade the planet Tallon IV. [M1 / MP SP] @@ -858,12 +856,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Fireborn Form) + (Artist: Fireborn Form)

                    A Galactic Federation survey team studied the surface, and soon encountered a gelatinous creature that swam through air. The alien defied gravity and physics as it phased through dense rock with ease. It perceived the survey team, and made a few curious chirps in their direction. It then suddenly changed temperament, aggressively charging to latch itself onto the skull of one of the party. The victim died in agony as the Metroid fed on all the energy within, and could not be removed until its prey had been reduced to a dried husk of collapsing matter. The young Metroid had just killed, in a way that science could not explain. [M1 SP / M2 SP] @@ -891,13 +889,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Ojanpohja) + (Artist: Ojanpohja)

                    In her first mission as a Bounty Hunter, Samus Arran was commissioned by the Galactic Federation to neutralise the stolen Metroids. Through careful investigation, Samus discovered that the Pirates are operating from Zebes – her home. She concluded that the Space Pirates had murdered her second family, as they had done with her first. They have took from her everyone she ever loved, and destroyed her two worlds. [M1] @@ -905,25 +903,25 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Stuart Hughe) + (Artist: Stuart Hughe)

                    Samus stormed Zebes and killed everyone in her path. [M1]

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Immarart) + (Artist: Immarart)

                    As her defences were breached, Mother Brain unleashed the great generals Ridley and Kraid. Both were killled, and, desperate to stop the intruder, Mother Brain released the Metroids. Samus Aran exterminated the creatures, and invaded the inner sanctum. [M1] @@ -931,12 +929,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Twigs) + (Artist: Twigs)

                    Samus confronted the malevolent Mother Brain and blasted apart her body. A power overload was caused, and the Tourian facility shook itself apart. Samus evacuated to her ship and tried to leave Zebes, but a Space Pirate battleship in orbit registered her ascent and opened fire. Samus’ gunship plummeted back towards the Zebes and impacted Chozodia, her former home. [M1] @@ -947,13 +945,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Eyes5) + (Artist: Eyes5)

                    Samus found herself surrounded with murals of the dead Chozo, and accepted she was alone in the universe. Overcoming despair, she solved the trials of the Chozodian temple and a concealed power suit was revealed to her. This shining armour was even more potent than the one she had just lost, and was able to integrate the most exotic Chozo technologies. Samus realised the greater meaning of her find; the Chozo had left her gifts for her in places they had foreseen she would traverse. Her adopted family continued to protect her long after their deaths, and she would find their statues cradling survival equipment in the darkest corners of the cosmos. [M1 / MP / MP3 / M2 / SM] @@ -961,12 +959,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Imachinivid) + (Artist: Imachinivid)

                    With her new armaments, Samus cleansed the Space Pirate presence from Zebes. She came to be known as “The Hunter”, and the Space Pirates learned that they will always be hunted down for what they did to her families. They fled the planet in terror. [M1 / MP / MP2 / MP3] @@ -984,13 +982,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Lightningarts) + (Artist: Lightningarts)

                    Samus lost Ridley in the planet’s stormy atmosphere, and elected to land in a nearby jungle to conceal her presence from the Pirate ground forces. Exploring the surroundings, Samus discovered that the planet was once home to the bulk of the extinct Chozo civilisation. In a great temple Samus studied poetic murals that told of the Phazon comet that had struck their world. The scribblings informed her of a creature trapped deep in the comet that they referred to as “The Worm,” and of the powerful shield they erected to prevent its escape. Samus read their last prophecy; that a hero would traverse fire and ice, jungle and cave, and find twelve sacred keys that would deactivate the barrier and allow passage to the Impact Crater. This saviour from the stars would bring down the ancient shield, and destroy the worm that infected their planet. [MP1] @@ -1001,13 +999,13 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: R-Sraven) + (Artist: R-Sraven)

                    Samus hunted the Pirates and accessed their computer logs. The Empire had found quantities of an intensely potent mutagen called Phazon. Laboratories across the outpost experimented with the substance, and in a short space of time they had created prototypes for the next generation of their races: powerful Phazon-fuelled juggernauts. Should these advances continue, Samus knew that the Space Pirates would be able to conquer the Galactic Federation. [MP1] @@ -1015,12 +1013,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Greenstranger) + (Artist: Greenstranger)

                    In the most secure laboratory, Samus made a devastating discovery. The Space Pirates had used Phazon to create an army of stable clone Metroids and lost containment. The Metroid creatures were roaming the caverns deep in the planet, reproducing and mutating as the Phazon influenced their physiology. [MP1] @@ -1028,12 +1026,12 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Ohimseeinstars) + (Artist: Ohimseeinstars)

                    Samus’ final discovery was the most horrific. The powerful, poisonous Phazon was not a rare material on Tallon IV. Despite the Chozo shield containing the Impact Crater, the substance had spread and consumed the world inside-out. The core of the planet presented the Space Pirates with a vast supply of Phazon, enough to fuel their conquest of the stars. [MP1] @@ -1051,7 +1049,7 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    @@ -1064,12 +1062,12 @@
                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Sabretoontigers) + (Artist: Sabretoontigers)

                    Seemingly dying, Metroid Prime lashed out, grabbing a layer of material from Samus Aran’s armour. The creature melted into a pool of Phazon particles, and the bounty hunter evacuated the Impact Crater. [MP1] @@ -1087,22 +1085,22 @@

                    -

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes +

                    Illustration for article titled The Spectacular Story Of emMetroid/em, One Of Gamings Richest Universes

                    - (Artist: Imachinivid) + (Artist: Imachinivid)

                    Dark Samus clawed its way out of the Impact Crater. It departed Tallon IV to spread its venom across the stars, and to sow the seeds of a great war. [MP1 / MP2 / MP3]

                    - Click here for the second half of this epic story. + Click here for the second half of this epic story.

                    -
                    +

                    - Mama Robotnik is a video game historian living somewhere in the British Empire. He specialises in unearthing lost gaming media, but also enjoys a good long essay about his favourite games every now and then. He drinks a lot of tea, and has a horrendously naughty black and white cat called Blossom. If you would like to contact him, he responds to his private messages over at NeoGAF. + Mama Robotnik is a video game historian living somewhere in the British Empire. He specialises in unearthing lost gaming media, but also enjoys a good long essay about his favourite games every now and then. He drinks a lot of tea, and has a horrendously naughty black and white cat called Blossom. If you would like to contact him, he responds to his private messages over at NeoGAF.

                    -
                    + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lazy-image-3/expected.html b/article_scraper/resources/tests/readability/lazy-image-3/expected.html index ba34499..874de81 100644 --- a/article_scraper/resources/tests/readability/lazy-image-3/expected.html +++ b/article_scraper/resources/tests/readability/lazy-image-3/expected.html @@ -1,6 +1,6 @@

                    Test Case 1

                    - performance.jpg + performance.jpg

                    Test Case 2

                    - performance.jpg -
                    + performance.jpg + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lemonde-1/expected.html b/article_scraper/resources/tests/readability/lemonde-1/expected.html index 5d58c38..e870560 100644 --- a/article_scraper/resources/tests/readability/lemonde-1/expected.html +++ b/article_scraper/resources/tests/readability/lemonde-1/expected.html @@ -1,7 +1,5 @@
                    -

                    - -

                    +

                    Les députés ont, sans surprise, adopté à une large majorité (438 contre 86 et 42 abstentions) le projet de loi sur le renseignement défendu par le gouvernement lors d’un vote solennel, mardi 5 mai. Il sera désormais examiné par le Sénat, puis le Conseil constitutionnel, prochainement saisi par 75 députés. Dans un souci d'apaisement, François Hollande avait annoncé par avance qu'il saisirait les Sages.

                    Revivez le direct du vote à l’Assemblée avec vos questions.

                    Ont voté contre : 10 députés socialistes (sur 288), 35 UMP (sur 198), 11 écologistes (sur 18), 11 UDI (sur 30), 12 députés Front de gauche (sur 15) et 7 non-inscrits (sur 9). Le détail est disponible sur le site de l'Assemblée nationale.

                    @@ -18,7 +16,7 @@

                    Les « boîtes noires »

                    Une des dispositions les plus contestées de ce projet de loi prévoit de pouvoir contraindre les fournisseurs d’accès à Internet (FAI) à « détecter une menace terroriste sur la base d’un traitement automatisé ». Ce dispositif – autorisé par le premier ministre par tranche de quatre mois – permettrait de détecter, en temps réel ou quasi réel, les personnes ayant une activité en ligne typique de « schémas » utilisés par les terroristes pour transmettre des informations.

                    En pratique, les services de renseignement pourraient installer chez les FAI une « boîte noire » surveillant le trafic. Le contenu des communications – qui resterait « anonyme » – ne serait pas surveillé, mais uniquement les métadonnées : origine ou destinataire d’un message, adresse IP d’un site visité, durée de la conversation ou de la connexion… Ces données ne seraient pas conservées.

                    -

                    La Commission nationale informatique et libertés (CNIL), qui critique fortement cette disposition. La CNIL soulève notamment que l’anonymat de ces données est très relatif, puisqu’il peut être levé.

                    +

                    La Commission nationale informatique et libertés(CNIL), qui critique fortement cette disposition. La CNIL soulève notamment que l’anonymat de ces données est très relatif, puisqu’il peut être levé.

                    Lire aussi : Les critiques de la CNIL contre le projet de loi sur le renseignement

                    Le dispositif introduit une forme de « pêche au chalut » – un brassage très large des données des Français à la recherche de quelques individus. Le gouvernement se défend de toute similarité avec les dispositifs mis en place par la NSA américaine, arguant notamment que les données ne seront pas conservées et que cette activité sera contrôlée par une toute nouvelle commission aux moyens largement renforcés. Il s’agit cependant d’un dispositif très large, puisqu’il concernera tous les fournisseurs d’accès à Internet, et donc tous les internautes français.

                    L’élargissement de la surveillance électronique pour détecter les « futurs » terroristes

                    @@ -38,4 +36,4 @@

                    La durée de conservation des données collectées – et l’adaptation de cette durée à la technique employée – a par ailleurs été inscrite dans la loi, contrairement au projet initial du gouvernement qui entendait fixer ces limites par décret. Elle pourra aller jusqu’à cinq ans dans le cas des données de connexion.

                    Un dispositif pour les lanceurs d’alerte

                    La loi prévoit également une forme de protection pour les agents qui seraient témoins de surveillance illégale. Ces lanceurs d’alerte pourraient solliciter la CNCTR, voire le premier ministre, et leur fournir toutes les pièces utiles. La CNCTR pourra ensuite aviser le procureur de la République et solliciter la Commission consultative du secret de la défense nationale afin que cette dernière « donne au premier ministre son avis sur la possibilité de déclassifier tout ou partie de ces éléments ». Aucune mesure de rétorsion ne pourra viser l’agent qui aurait dénoncé des actes potentiellement illégaux.

                    -
                    + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/liberation-1/expected.html b/article_scraper/resources/tests/readability/liberation-1/expected.html index af1c912..4b911de 100644 --- a/article_scraper/resources/tests/readability/liberation-1/expected.html +++ b/article_scraper/resources/tests/readability/liberation-1/expected.html @@ -8,9 +8,7 @@

                    L’appareil, mis à disposition par Airbus, était arrivé à Katmandou mercredi matin avec 55 personnels de santé et humanitaires, ainsi que 25 tonnes de matériel (abris, médicaments, aide alimentaire). Un deuxième avion dépêché par Paris, qui était immobilisé aux Emirats depuis mardi avec 20 tonnes de matériel, est arrivé jeudi à Katmandou, dont le petit aéroport est engorgé par le trafic et l’afflux d’aide humanitaire. Il devait lui aussi ramener des Français, «les plus éprouvés» par la catastrophe et les «plus vulnérables (blessés, familles avec enfants)», selon le ministère des Affaires étrangères.

                    2 209 Français ont été localisés sains et saufs tandis que 393 n’ont pas encore pu être joints, selon le Quai d’Orsay. Environ 400 Français ont demandé à être rapatriés dans les vols mis en place par la France.

                    Le séisme a fait près de 5 500 morts et touche huit des 28 millions d’habitants du Népal. Des dizaines de milliers de personnes sont sans abri.

                    -

                    - -

                    + - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lifehacker-post-comment-load/expected.html b/article_scraper/resources/tests/readability/lifehacker-post-comment-load/expected.html index 45013f9..929daa3 100644 --- a/article_scraper/resources/tests/readability/lifehacker-post-comment-load/expected.html +++ b/article_scraper/resources/tests/readability/lifehacker-post-comment-load/expected.html @@ -1,5 +1,4 @@ -
                    -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -33,7 +32,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -55,16 +54,7 @@ -
                      -
                    • -Color: Stores use color to make products attractive and eye-catching, but they also use color on price labels. Red stands out and can encourage taking action, that's why it's commonly associated with sale signage and advertising. When you see red, remember what they're trying to do to your brain with that color. You don't to buy something just because it's on sale.
                    • -
                    • -Navigation Roadblocks: Stores force you to walk around stuff you don't need to find the stuff you are really after. Have a list of what you need before you go in, go straight to it, and imagine it's the only item in the store.
                    • -
                    • -The Touch Factor: Stores place items they want to sell in easy to reach locations and encourage you to touch them. Don't do it! As soon as you pick something up, you're more likely to buy it because your mind suddenly takes ownership of the object. Don't pick anything up and don't play with display items.
                    • -
                    • -Scents and Sounds: You'll probably hear classic, upbeat tunes when you walk into a store. The upbeat music makes you happy and excited, while playing familiar songs makes you feel comfortable. They also use pleasant smells to put your mind at ease. A happy, comfortable mind at ease is a dangerous combination for your brain when shopping. There's not much you can do to avoid this unless you shop online, but it's good to be aware of it.
                    • -
                    +
                    • Color: Stores use color to make products attractive and eye-catching, but they also use color on price labels. Red stands out and can encourage taking action, that's why it's commonly associated with sale signage and advertising. When you see red, remember what they're trying to do to your brain with that color. You don't to buy something just because it's on sale.
                    • Navigation Roadblocks: Stores force you to walk around stuff you don't need to find the stuff you are really after. Have a list of what you need before you go in, go straight to it, and imagine it's the only item in the store.
                    • The Touch Factor: Stores place items they want to sell in easy to reach locations and encourage you to touch them. Don't do it! As soon as you pick something up, you're more likely to buy it because your mind suddenly takes ownership of the object. Don't pick anything up and don't play with display items.
                    • Scents and Sounds: You'll probably hear classic, upbeat tunes when you walk into a store. The upbeat music makes you happy and excited, while playing familiar songs makes you feel comfortable. They also use pleasant smells to put your mind at ease. A happy, comfortable mind at ease is a dangerous combination for your brain when shopping. There's not much you can do to avoid this unless you shop online, but it's good to be aware of it.
                    @@ -129,9 +119,7 @@ -

                    -Make a List of Everything You Own and Do Some Decluttering -

                    +

                    Make a List of Everything You Own and Do Some Decluttering

                    @@ -142,7 +130,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -207,16 +195,7 @@ -
                      -
                    • -Need: You absolutely need this item to get by on a day to day basis.
                    • -
                    • -Sometimes Need: You don't need this item every day, but you use it on a somewhat regular basis.
                    • -
                    • -Want: You bought this item because you wanted it, not because you needed it.
                    • -
                    • -Crap: You don't have a good reason why you have it and you already know it needs to go (there's probably a few of these items, at least).
                    • -
                    +
                    • Need: You absolutely need this item to get by on a day to day basis.
                    • Sometimes Need: You don't need this item every day, but you use it on a somewhat regular basis.
                    • Want: You bought this item because you wanted it, not because you needed it.
                    • Crap: You don't have a good reason why you have it and you already know it needs to go (there's probably a few of these items, at least).
                    @@ -238,11 +217,7 @@ -
                      -
                    • When was the last time I used this?
                    • -
                    • When will I use this again?
                    • -
                    • Does this item bring you joy?
                    • -
                    +
                    • When was the last time I used this?
                    • When will I use this again?
                    • Does this item bring you joy?
                    @@ -286,7 +261,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -330,7 +305,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -374,7 +349,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -418,7 +393,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -440,15 +415,7 @@ -
                      -
                    • Is this a planned purchase?
                    • -
                    • Will it end up in the "crap" list picture one day?
                    • -
                    • -Where am I going to put it?
                    • -
                    • Have I included this in my budget?
                    • -
                    • -Why do I want/need it?
                    • -
                    +
                    • Is this a planned purchase?
                    • Will it end up in the "crap" list picture one day?
                    • Where am I going to put it?
                    • Have I included this in my budget?
                    • Why do I want/need it?
                    @@ -481,7 +448,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -569,8 +536,7 @@
                    -

                    Whenever I consistently cut quality time for my main interests out of my life, I start to long for them. As you saw in that "typical" day, I do make room for spending time with my family, but my other two main interests are absent. If that happens too many days in a row, I start to really miss reading. I start to really miss playing thoughtful board games with friends. What happens after that? I start to substitute. When I don't have the opportunity to sit down for an hour or even for half an hour and really get lost in a book, I start looking for an alternative way to fill in the tiny slices of time that I do have. I'll spend money.

                    -
                    +

                    Whenever I consistently cut quality time for my main interests out of my life, I start to long for them. As you saw in that "typical" day, I do make room for spending time with my family, but my other two main interests are absent. If that happens too many days in a row, I start to really miss reading. I start to really miss playing thoughtful board games with friends. What happens after that? I start to substitute. When I don't have the opportunity to sit down for an hour or even for half an hour and really get lost in a book, I start looking for an alternative way to fill in the tiny slices of time that I do have. I'll spend money.

                    @@ -603,7 +569,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -690,5 +656,4 @@ -

                    Photos by cmgirl (Shutterstock), Macrovector (Shutterstock), J E Theriot, davidd, George Redgrave, David Amsler, Arup Malakar, J B, jakerome, 401(K) 2012.

                    -
                    +

                    Photos by cmgirl (Shutterstock), Macrovector (Shutterstock), J E Theriot, davidd, George Redgrave, David Amsler, Arup Malakar, J B, jakerome, 401(K) 2012.

                    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lifehacker-working/expected.html b/article_scraper/resources/tests/readability/lifehacker-working/expected.html index 45013f9..929daa3 100644 --- a/article_scraper/resources/tests/readability/lifehacker-working/expected.html +++ b/article_scraper/resources/tests/readability/lifehacker-working/expected.html @@ -1,5 +1,4 @@ -
                    -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -33,7 +32,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -55,16 +54,7 @@ -
                      -
                    • -Color: Stores use color to make products attractive and eye-catching, but they also use color on price labels. Red stands out and can encourage taking action, that's why it's commonly associated with sale signage and advertising. When you see red, remember what they're trying to do to your brain with that color. You don't to buy something just because it's on sale.
                    • -
                    • -Navigation Roadblocks: Stores force you to walk around stuff you don't need to find the stuff you are really after. Have a list of what you need before you go in, go straight to it, and imagine it's the only item in the store.
                    • -
                    • -The Touch Factor: Stores place items they want to sell in easy to reach locations and encourage you to touch them. Don't do it! As soon as you pick something up, you're more likely to buy it because your mind suddenly takes ownership of the object. Don't pick anything up and don't play with display items.
                    • -
                    • -Scents and Sounds: You'll probably hear classic, upbeat tunes when you walk into a store. The upbeat music makes you happy and excited, while playing familiar songs makes you feel comfortable. They also use pleasant smells to put your mind at ease. A happy, comfortable mind at ease is a dangerous combination for your brain when shopping. There's not much you can do to avoid this unless you shop online, but it's good to be aware of it.
                    • -
                    +
                    • Color: Stores use color to make products attractive and eye-catching, but they also use color on price labels. Red stands out and can encourage taking action, that's why it's commonly associated with sale signage and advertising. When you see red, remember what they're trying to do to your brain with that color. You don't to buy something just because it's on sale.
                    • Navigation Roadblocks: Stores force you to walk around stuff you don't need to find the stuff you are really after. Have a list of what you need before you go in, go straight to it, and imagine it's the only item in the store.
                    • The Touch Factor: Stores place items they want to sell in easy to reach locations and encourage you to touch them. Don't do it! As soon as you pick something up, you're more likely to buy it because your mind suddenly takes ownership of the object. Don't pick anything up and don't play with display items.
                    • Scents and Sounds: You'll probably hear classic, upbeat tunes when you walk into a store. The upbeat music makes you happy and excited, while playing familiar songs makes you feel comfortable. They also use pleasant smells to put your mind at ease. A happy, comfortable mind at ease is a dangerous combination for your brain when shopping. There's not much you can do to avoid this unless you shop online, but it's good to be aware of it.
                    @@ -129,9 +119,7 @@ -

                    -Make a List of Everything You Own and Do Some Decluttering -

                    +

                    Make a List of Everything You Own and Do Some Decluttering

                    @@ -142,7 +130,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -207,16 +195,7 @@ -
                      -
                    • -Need: You absolutely need this item to get by on a day to day basis.
                    • -
                    • -Sometimes Need: You don't need this item every day, but you use it on a somewhat regular basis.
                    • -
                    • -Want: You bought this item because you wanted it, not because you needed it.
                    • -
                    • -Crap: You don't have a good reason why you have it and you already know it needs to go (there's probably a few of these items, at least).
                    • -
                    +
                    • Need: You absolutely need this item to get by on a day to day basis.
                    • Sometimes Need: You don't need this item every day, but you use it on a somewhat regular basis.
                    • Want: You bought this item because you wanted it, not because you needed it.
                    • Crap: You don't have a good reason why you have it and you already know it needs to go (there's probably a few of these items, at least).
                    @@ -238,11 +217,7 @@ -
                      -
                    • When was the last time I used this?
                    • -
                    • When will I use this again?
                    • -
                    • Does this item bring you joy?
                    • -
                    +
                    • When was the last time I used this?
                    • When will I use this again?
                    • Does this item bring you joy?
                    @@ -286,7 +261,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -330,7 +305,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -374,7 +349,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -418,7 +393,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -440,15 +415,7 @@ -
                      -
                    • Is this a planned purchase?
                    • -
                    • Will it end up in the "crap" list picture one day?
                    • -
                    • -Where am I going to put it?
                    • -
                    • Have I included this in my budget?
                    • -
                    • -Why do I want/need it?
                    • -
                    +
                    • Is this a planned purchase?
                    • Will it end up in the "crap" list picture one day?
                    • Where am I going to put it?
                    • Have I included this in my budget?
                    • Why do I want/need it?
                    @@ -481,7 +448,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -569,8 +536,7 @@
                    -

                    Whenever I consistently cut quality time for my main interests out of my life, I start to long for them. As you saw in that "typical" day, I do make room for spending time with my family, but my other two main interests are absent. If that happens too many days in a row, I start to really miss reading. I start to really miss playing thoughtful board games with friends. What happens after that? I start to substitute. When I don't have the opportunity to sit down for an hour or even for half an hour and really get lost in a book, I start looking for an alternative way to fill in the tiny slices of time that I do have. I'll spend money.

                    -
                    +

                    Whenever I consistently cut quality time for my main interests out of my life, I start to long for them. As you saw in that "typical" day, I do make room for spending time with my family, but my other two main interests are absent. If that happens too many days in a row, I start to really miss reading. I start to really miss playing thoughtful board games with friends. What happens after that? I start to substitute. When I don't have the opportunity to sit down for an hour or even for half an hour and really get lost in a book, I start looking for an alternative way to fill in the tiny slices of time that I do have. I'll spend money.

                    @@ -603,7 +569,7 @@ -

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    +

                    How to Program Your Mind to Stop Buying Crap You Don’t Need

                    @@ -690,5 +656,4 @@ -

                    Photos by cmgirl (Shutterstock), Macrovector (Shutterstock), J E Theriot, davidd, George Redgrave, David Amsler, Arup Malakar, J B, jakerome, 401(K) 2012.

                    -
                    +

                    Photos by cmgirl (Shutterstock), Macrovector (Shutterstock), J E Theriot, davidd, George Redgrave, David Amsler, Arup Malakar, J B, jakerome, 401(K) 2012.

                    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/links-in-tables/expected.html b/article_scraper/resources/tests/readability/links-in-tables/expected.html index 7b80bfd..99c80b8 100644 --- a/article_scraper/resources/tests/readability/links-in-tables/expected.html +++ b/article_scraper/resources/tests/readability/links-in-tables/expected.html @@ -54,7 +54,7 @@ to inefficient patches. Watch how much the compressed text on the right side changes from a one-letter change in the uncompressed text on the left:

                    -

                    +

                    File-by-File therefore is based on detecting changes in the uncompressed data. To generate a patch, we first decompress both old and new files before computing @@ -105,132 +105,54 @@ Patching? Here are examples of app updates already using File-by-File Patching:

                    - ------ - - - - - + + + + + + +
                    -

                    Application

                    -
                    -

                    Original Size

                    -
                    -

                    Previous (BSDiff) Patch Size

                    + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

                    Application

                    +

                    Original Size

                    +

                    Previous (BSDiff) Patch Size

                    (% vs original)

                    -
                    -

                    File-by-File Patch Size (% vs original)

                    -
                    - - -

                    71.1 MB

                    -
                    -

                    13.4 MB (-81%)

                    -
                    -

                    8.0 MB (-89%)

                    -
                    - - -

                    32.7 MB

                    -
                    -

                    17.5 MB (-46%)

                    -
                    -

                    9.6 MB (-71%)

                    -
                    -
                    -

                    Gmail

                    -
                    -
                    -

                    17.8 MB

                    -
                    -

                    7.6 MB (-57%)

                    -
                    -

                    7.3 MB (-59%)

                    -
                    - - -

                    18.9 MB

                    -
                    -

                    17.2 MB (-9%)

                    -
                    -

                    13.1 MB (-31%)

                    -
                    -
                    -

                    Kindle

                    -
                    -
                    -

                    52.4 MB

                    -
                    -

                    19.1 MB (-64%)

                    -
                    -

                    8.4 MB (-84%)

                    -
                    - - -

                    16.2 MB

                    -
                    -

                    7.7 MB (-52%)

                    -
                    -

                    1.2 MB (-92%)

                    -
                    +

                    File-by-File Patch Size (% vs original)

                    +
                    +

                    71.1 MB

                    +

                    13.4 MB (-81%)

                    +

                    8.0 MB (-89%)

                    +
                    +

                    32.7 MB

                    +

                    17.5 MB (-46%)

                    +

                    9.6 MB (-71%)

                    +
                    +

                    17.8 MB

                    +

                    7.6 MB (-57%)

                    +

                    7.3 MB (-59%)

                    +
                    +

                    18.9 MB

                    +

                    17.2 MB (-9%)

                    +

                    13.1 MB (-31%)

                    +
                    +

                    52.4 MB

                    +

                    19.1 MB (-64%)

                    +

                    8.4 MB (-84%)

                    +
                    +

                    16.2 MB

                    +

                    7.7 MB (-52%)

                    +

                    1.2 MB (-92%)

                    +

                    Disclaimer: if you see different patch sizes when you press "update" manually, that is because we are not currently using File-by-file for -interactive updates, only those done in the background.

                    -

                    +interactive updates, only those done in the background.

                    Saving data and making our users (& developers!) happy

                    @@ -252,6 +174,6 @@ As a developer if you're interested in reducing your APK size still further, here are some general tips on reducing APK size.

                    -

                    +

                    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/lwn-1/expected.html b/article_scraper/resources/tests/readability/lwn-1/expected.html index a1b0e35..7eb3e2b 100644 --- a/article_scraper/resources/tests/readability/lwn-1/expected.html +++ b/article_scraper/resources/tests/readability/lwn-1/expected.html @@ -26,32 +26,28 @@ program for third-party manufacturers interested in using the "Arduino" bran

                    Mapping and data mining with QGIS 2.8

                    By Nathan Willis -
                    March 25, 2015

                    +
                    March 25, 2015

                    QGIS is a free-software geographic information system (GIS) tool; it provides a unified interface in which users can import, edit, and analyze geographic-oriented information, and it can produce output as varied as printable maps or map-based web services. The project recently made its first update to be designated a long-term release (LTR), and that release is both poised for high-end usage and friendly to newcomers alike.

                    The new release is version 2.8, which was unveiled on March 2. An official change log is available on the QGIS site, while the release itself was announced primarily through blog posts (such as this post by Anita Graser of the project's steering committee). Downloads are available for a variety of platforms, including packages for Ubuntu, Debian, Fedora, openSUSE, and several other distributions.

                    -

                    [QGIS main interface]

                    -

                    As the name might suggest, QGIS is a Qt application; the latest release will, in fact, build on both Qt4 and Qt5, although the binaries released by the project come only in Qt4 form at present. 2.8 has been labeled a long-term release (LTR)—which, in this case, means that the project has committed to providing backported bug fixes for one full calendar year, and that the 2.8.x series is in permanent feature freeze. The goal, according to the change log, is to provide a stable version suitable for businesses and deployments in other large organizations. The change log itself points out that the development of quite a few new features was underwritten by various GIS companies or university groups, which suggests that taking care of these organizations' needs is reaping dividends for the project.

                    +

                    [QGIS main interface]

                    As the name might suggest, QGIS is a Qt application; the latest release will, in fact, build on both Qt4 and Qt5, although the binaries released by the project come only in Qt4 form at present. 2.8 has been labeled a long-term release (LTR)—which, in this case, means that the project has committed to providing backported bug fixes for one full calendar year, and that the 2.8.x series is in permanent feature freeze. The goal, according to the change log, is to provide a stable version suitable for businesses and deployments in other large organizations. The change log itself points out that the development of quite a few new features was underwritten by various GIS companies or university groups, which suggests that taking care of these organizations' needs is reaping dividends for the project.

                    For those new to QGIS (or GIS in general), there is a detailed new-user tutorial that provides a thorough walk-through of the data-manipulation, mapping, and analysis functions. Being a new user, I went through the tutorial; although there are a handful of minor differences between QGIS 2.8 and the version used in the text (primarily whether specific features were accessed through a toolbar or right-click menu), on the whole it is well worth the time.

                    QGIS is designed to make short work of importing spatially oriented data sets, mining information from them, and turning the results into a meaningful visualization. Technically speaking, the visualization output is optional: one could simply extract the needed statistics and results and use them to answer some question or, perhaps, publish the massaged data set as a database for others to use.

                    But well-made maps are often the easiest way to illuminate facts about populations, political regions, geography, and many other topics when human comprehension is the goal. QGIS makes importing data from databases, web-mapping services (WMS), and even unwieldy flat-file data dumps a painless experience. It handles converting between a variety of map-referencing systems more or less automatically, and allows the user to focus on finding the useful attributes of the data sets and rendering them on screen.

                    Here be data

                    The significant changes in QGIS 2.8 fall into several categories. There are updates to how QGIS handles the mathematical expressions and queries users can use to filter information out of a data set, improvements to the tools used to explore the on-screen map canvas, and enhancements to the "map composer" used to produce visual output. This is on top of plenty of other under-the-hood improvements, naturally.

                    -

                    [QGIS query builder]

                    -

                    In the first category are several updates to the filtering tools used to mine a data set. Generally speaking, each independent data set is added to a QGIS project as its own layer, then transformed with filters to focus in on a specific portion of the original data. For instance, the land-usage statistics for a region might be one layer, while roads and buildings for the same region from OpenStreetMap might be two additional layers. Such filters can be created in several ways: there is a "query builder" that lets the user construct and test expressions on a data layer, then save the results, an SQL console for performing similar queries on a database, and spreadsheet-like editing tools for working directly on data tables.

                    +

                    [QGIS query builder]

                    In the first category are several updates to the filtering tools used to mine a data set. Generally speaking, each independent data set is added to a QGIS project as its own layer, then transformed with filters to focus in on a specific portion of the original data. For instance, the land-usage statistics for a region might be one layer, while roads and buildings for the same region from OpenStreetMap might be two additional layers. Such filters can be created in several ways: there is a "query builder" that lets the user construct and test expressions on a data layer, then save the results, an SQL console for performing similar queries on a database, and spreadsheet-like editing tools for working directly on data tables.

                    All three have been improved in this release. New are support for if(condition, true, false) conditional statements, a set of operations for geometry primitives (e.g., to test whether regions overlap or lines intersect), and an "integer divide" operation. Users can also add comments to their queries to annotate their code, and there is a new custom function editor for writing Python functions that can be called in mathematical expressions within the query builder.

                    It is also now possible to select only some rows in a table, then perform calculations just on the selection—previously, users would have to extract the rows of interest into a new table first. Similarly, in the SQL editor, the user can highlight a subset of the SQL query and execute it separately, which is no doubt helpful for debugging.

                    There have also been several improvements to the Python and Processing plugins. Users can now drag-and-drop Python scripts onto QGIS and they will be run automatically. Several new analysis algorithms are now available through the Processing interface that were previously Python-only; they include algorithms for generating grids of points or vectors within a region, splitting layers and lines, generating hypsometric curves, refactoring data sets, and more.

                    Maps in, maps out

                    -

                    [QGIS simplify tool]

                    -

                    The process of working with on-screen map data picked up some improvements in the new release as well. Perhaps the most fundamental is that each map layer added to the canvas is now handled in its own thread, so fewer hangs in the user interface are experienced when re-rendering a layer (as happens whenever the user changes the look of points or shapes in a layer). Since remote databases can also be layers, this multi-threaded approach is more resilient against connectivity problems, too. The interface also now supports temporary "scratch" layers that can be used to merge, filter, or simply experiment with a data set, but are not saved when the current project is saved.

                    +

                    [QGIS simplify tool]

                    The process of working with on-screen map data picked up some improvements in the new release as well. Perhaps the most fundamental is that each map layer added to the canvas is now handled in its own thread, so fewer hangs in the user interface are experienced when re-rendering a layer (as happens whenever the user changes the look of points or shapes in a layer). Since remote databases can also be layers, this multi-threaded approach is more resilient against connectivity problems, too. The interface also now supports temporary "scratch" layers that can be used to merge, filter, or simply experiment with a data set, but are not saved when the current project is saved.

                    For working on the canvas itself, polygonal regions can now use raster images (tiled, if necessary) as fill colors, the map itself can be rotated arbitrarily, and objects can be "snapped" to align with items on any layer (not just the current layer). For working with raster image layers (e.g., aerial photographs) or simply creating new geometric shapes by hand, there is a new digitizing tool that can offer assistance by locking lines to specific angles, automatically keeping borders parallel, and other niceties.

                    There is a completely overhauled "simplify" tool that is used to reduce the number of extraneous vertices of a vector layer (thus reducing its size). The old simplify tool provided only a relative "tolerance" setting that did not correspond directly to any units. With the new tool, users can set a simplification threshold in terms of the underlying map units, layer-specific units, pixels, and more—and, in addition, the tool reports how much the simplify operation has reduced the size of the data.

                    -

                    [QGIS style editing]

                    -

                    There has also been an effort to present a uniform interface to one of the most important features of the map canvas: the ability to change the symbology used for an item based on some data attribute. The simplest example might be to change the line color of a road based on whether its road-type attribute is "highway," "service road," "residential," or so on. But the same feature is used to automatically highlight layer information based on the filtering and querying functionality discussed above. The new release allows many more map attributes to be controlled by these "data definition" settings, and provides a hard-to-miss button next to each attribute, through which a custom data definition can be set.

                    +

                    [QGIS style editing]

                    There has also been an effort to present a uniform interface to one of the most important features of the map canvas: the ability to change the symbology used for an item based on some data attribute. The simplest example might be to change the line color of a road based on whether its road-type attribute is "highway," "service road," "residential," or so on. But the same feature is used to automatically highlight layer information based on the filtering and querying functionality discussed above. The new release allows many more map attributes to be controlled by these "data definition" settings, and provides a hard-to-miss button next to each attribute, through which a custom data definition can be set.

                    QGIS's composer module is the tool used to take project data and generate a map that can be used outside of the application (in print, as a static image, or as a layer for MapServer or some other software tool, for example). Consequently, it is not a simple select-and-click-export tool; composing the output can involve a lot of choices about which data to make visible, how (and where) to label it, and how to make it generally accessible.

                    The updated composer in 2.8 now has a full-screen mode and sports several new options for configuring output. For instance, the user now has full control over how map axes are labeled. In previous releases, the grid coordinates of the map could be turned on or off, but the only options were all or nothing. Now, the user can individually choose whether coordinates are displayed on all four sides, and can even choose in which direction vertical text labels will run (so that they can be correctly justified to the edge of the map, for example).

                    There are, as usual, many more changes than there is room to discuss. Some particularly noteworthy improvements include the ability to save and load bookmarks for frequently used data sources (perhaps most useful for databases, web services, and other non-local data) and improvements to QGIS's server module. This module allows one QGIS instance to serve up data accessible to other QGIS applications (for example, to simply team projects). The server can now be extended with Python plugins and the data layers that it serves can be styled with style rules like those used in the desktop interface.

                    @@ -60,8 +56,7 @@ curves, refactoring data sets, and more.

                    Development activity in LibreOffice and OpenOffice

                    By Jonathan Corbet -
                    March 25, 2015

                    -

                    The LibreOffice project was announced with great fanfare in September 2010. Nearly one year later, the OpenOffice.org project (from which LibreOffice was forked) was +
                    March 25, 2015

                    The LibreOffice project was announced with great fanfare in September 2010. Nearly one year later, the OpenOffice.org project (from which LibreOffice was forked) was cut loose from Oracle and found a new home as an Apache project. It is fair to say that the rivalry between the two projects in the time since then has been strong. Predictions that one project or the other would fail have not been borne out, but that does not mean that the two projects are equally successful. A look at the two projects' development communities reveals some interesting differences.

                    Release histories

                    @@ -582,8 +577,7 @@ cut loose from Oracle and found a new home as an Apache project. It is fair 38.0% - Collabora Multimedia - + Collabora Multimedia 6531 29.5% @@ -642,7 +636,7 @@ bark but the caravan moves on." That may be true, but, in this case, the

                    Comments (74 posted)

                    Page editor: Jonathan Corbet -

                    +

                    Inside this week's LWN.net Weekly Edition

                    • Security: Toward secure package downloads; New vulnerabilities in drupal, mozilla, openssl, python-django ...
                    • @@ -651,12 +645,9 @@ bark but the caravan moves on." That may be true, but, in this case, the
                    • Development: A look at GlusterFS; LibreOffice Online; Open sourcing existing code; Secure Boot in Windows 10; ...
                    • Announcements: A Turing award for Michael Stonebraker, Sébastien Jodogne, ReGlue are Free Software Award winners, Kat Walsh joins FSF board of directors, Cyanogen, ...

                    Next page: Security>> -

                    - +

                    - - - - + + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/medicalnewstoday/expected.html b/article_scraper/resources/tests/readability/medicalnewstoday/expected.html index f4761bf..e04399c 100644 --- a/article_scraper/resources/tests/readability/medicalnewstoday/expected.html +++ b/article_scraper/resources/tests/readability/medicalnewstoday/expected.html @@ -1,102 +1,102 @@ -
                    -
                    - Neuroscience tells us that most of the work done by our brains happens on an unconscious level, but when does that "a-ha!" moment occur? And what happens during it? New research investigates. -
                    -

                    hand holding brain lightbulb
                    - A new study investigates when the 'a-ha!' moment takes place in the brain, and how similar it is to other brain processes. -

                    -

                    - Many of us have noticed that we seem to get our best ideas when we're in the shower, or that we can find the answer to a difficult question when we least think about it. -

                    -

                    - A large body of neuroscientific studies has pointed out that the brain does a lot of work in its spare time, the so-called idle state - wherein the brain does not appear to be thinking about anything at all - and that this is the time when it works at its hardest to find solutions to complex problems. -

                    -

                    - With time and advances in neuroscience, it has become more and more clear to researchers that Freud was right and the mind, as well as the brain, do work unconsciously. In fact, it would be safe to say that what is consciously known to us is just the tip of a much larger iceberg, deeply submerged in unconscious waters. -

                    -

                    - But the exact moment at which information becomes known to us - or when the "tip of the iceberg" pierces through the water, and the unconscious becomes conscious - has been somewhat of a mystery, from a neuroscientific point of view. -

                    -

                    - In other words, we do not yet know when that intellectually satisfying "a-ha!" moment takes place, or what the biology is behind it. This is why a team of researchers at Columbia University in New York City, NY, set out to investigate this moment in more detail. -

                    - -

                    - The scientists were led by Michael Shadlen, Ph.D., of Columbia University's Mortimer B. Zuckerman Mind Brain Behavior Institute, and the findings were published in the journal Current Biology. -

                    - -

                    - The hypothesis -

                    -

                    - Dr. Shadlen and colleagues started out from an interesting hypothesis, one which they derived from previous research on the neurobiological processes involved in decision-making. -

                    -

                    - As the authors explain, research conducted in both monkeys and humans shows that many of our decisions take place at a point when the brain "feels" as though it has gathered enough information, or when a critical level of information has been accumulated. -

                    - -

                    - This process of making a decision once the brain has accumulated enough evidence bears the name of "bounded evidence accumulation." Reaching this threshold is important because, although the brain does not use all of the information available, it uses as much as is necessary to make a speedy yet accurate decision. -

                    -

                    - The researchers wondered whether or not this threshold is also responsible for our "eureka!" moments. -

                    - -

                    - In Dr. Shadlen's words, "Could the moment when the brain believes it has accumulated enough evidence be tied to the person's awareness of having decided - that important 'a-ha!' moment?" -

                    - -

                    - Examining the 'a-ha!' moment -

                    -

                    - To answer this question, the scientists asked five people to perform a "direction discrimination" task. In it, the participants looked at dots on a computer screen. The dots moved randomly, as grains of sand would when blown by the wind. The participants were asked to say in which direction the dots had moved. -

                    -

                    - The moment they "decided" which direction the dots seemed to be taking was considered to be the equivalent of the "a-ha!" moment. -

                    -

                    - In the center of the screen, there was a fixed point and a clock. The display also had two "choice targets" - namely, left or right - and these were the directions in which the participants had to decide that the dots had moved. -

                    -

                    - Shortly after the dots had stopped moving, the participants used an electronic, hand-held stylus to move the cursor in the direction that they thought the dots had moved. -

                    -

                    - To determine when the decision was made, the researchers used the technique called "mental chronometry" - that is, after they made their decision, the participants were asked to move the clock backward to the point when they felt that they had consciously done so. -

                    - -

                    - "The moment in time indicated by the participants - this mental chronometry - was entirely subjective; it relied solely on their own estimation of how long it took them to make that decision," Dr. Shadlen says. "And because it was purely subjective, in principle it ought to be unverifiable." -

                    - -

                    - 'A-ha' moment similar to making a decision -

                    -

                    - However, by applying a mathematical model, the scientists were able to match these subjective decision times to the bounded evidence accumulation process. -

                    -

                    - The subjective decision times fit so well with what the scientists determined as the evidence accumulation threshold that they were able to predict the choices of four of the five participants. -

                    -

                    - "If the time reported to us by the participants was valid, we reasoned that it might be possible to predict the accuracy of the decision," explains Dr. Shadlen. -

                    -

                    - "We incorporated a kind of mathematical trick, based on earlier studies, which showed that the speed and accuracy of decisions were tied together by the same brain function." This "mathematical trick" was the evidence accumulation model. -

                    -
                    -

                    - "Essentially, the act of becoming consciously aware of a decision conforms to the same process that the brain goes through to complete a decision, even a simple one - such as whether to turn left or right." -

                    -

                    - Michael Shadlen, Ph.D. -

                    -
                    -

                    - In other words, the study shows that the conscious awareness of the "a-ha!" moment takes place precisely when the brain has reached that threshold of evidence accumulation. -

                    -

                    - The findings provide unique insights into the biology of consciousness, say the researchers, and they bring us closer to understanding the biological basis of decisions, ethics, and, generally, the human mind. -

                    - -
                    +
                    +
                    + Neuroscience tells us that most of the work done by our brains happens on an unconscious level, but when does that "a-ha!" moment occur? And what happens during it? New research investigates. +
                    +

                    hand holding brain lightbulb
                    + A new study investigates when the 'a-ha!' moment takes place in the brain, and how similar it is to other brain processes. +

                    +

                    + Many of us have noticed that we seem to get our best ideas when we're in the shower, or that we can find the answer to a difficult question when we least think about it. +

                    +

                    + A large body of neuroscientific studies has pointed out that the brain does a lot of work in its spare time, the so-called idle state - wherein the brain does not appear to be thinking about anything at all - and that this is the time when it works at its hardest to find solutions to complex problems. +

                    +

                    + With time and advances in neuroscience, it has become more and more clear to researchers that Freud was right and the mind, as well as the brain, do work unconsciously. In fact, it would be safe to say that what is consciously known to us is just the tip of a much larger iceberg, deeply submerged in unconscious waters. +

                    +

                    + But the exact moment at which information becomes known to us - or when the "tip of the iceberg" pierces through the water, and the unconscious becomes conscious - has been somewhat of a mystery, from a neuroscientific point of view. +

                    +

                    + In other words, we do not yet know when that intellectually satisfying "a-ha!" moment takes place, or what the biology is behind it. This is why a team of researchers at Columbia University in New York City, NY, set out to investigate this moment in more detail. +

                    + +

                    + The scientists were led by Michael Shadlen, Ph.D., of Columbia University's Mortimer B. Zuckerman Mind Brain Behavior Institute, and the findings were published in the journal Current Biology. +

                    + +

                    + The hypothesis +

                    +

                    + Dr. Shadlen and colleagues started out from an interesting hypothesis, one which they derived from previous research on the neurobiological processes involved in decision-making. +

                    +

                    + As the authors explain, research conducted in both monkeys and humans shows that many of our decisions take place at a point when the brain "feels" as though it has gathered enough information, or when a critical level of information has been accumulated. +

                    + +

                    + This process of making a decision once the brain has accumulated enough evidence bears the name of "bounded evidence accumulation." Reaching this threshold is important because, although the brain does not use all of the information available, it uses as much as is necessary to make a speedy yet accurate decision. +

                    +

                    + The researchers wondered whether or not this threshold is also responsible for our "eureka!" moments. +

                    + +

                    + In Dr. Shadlen's words, "Could the moment when the brain believes it has accumulated enough evidence be tied to the person's awareness of having decided - that important 'a-ha!' moment?" +

                    + +

                    + Examining the 'a-ha!' moment +

                    +

                    + To answer this question, the scientists asked five people to perform a "direction discrimination" task. In it, the participants looked at dots on a computer screen. The dots moved randomly, as grains of sand would when blown by the wind. The participants were asked to say in which direction the dots had moved. +

                    +

                    + The moment they "decided" which direction the dots seemed to be taking was considered to be the equivalent of the "a-ha!" moment. +

                    +

                    + In the center of the screen, there was a fixed point and a clock. The display also had two "choice targets" - namely, left or right - and these were the directions in which the participants had to decide that the dots had moved. +

                    +

                    + Shortly after the dots had stopped moving, the participants used an electronic, hand-held stylus to move the cursor in the direction that they thought the dots had moved. +

                    +

                    + To determine when the decision was made, the researchers used the technique called "mental chronometry" - that is, after they made their decision, the participants were asked to move the clock backward to the point when they felt that they had consciously done so. +

                    + +

                    + "The moment in time indicated by the participants - this mental chronometry - was entirely subjective; it relied solely on their own estimation of how long it took them to make that decision," Dr. Shadlen says. "And because it was purely subjective, in principle it ought to be unverifiable." +

                    + +

                    + 'A-ha' moment similar to making a decision +

                    +

                    + However, by applying a mathematical model, the scientists were able to match these subjective decision times to the bounded evidence accumulation process. +

                    +

                    + The subjective decision times fit so well with what the scientists determined as the evidence accumulation threshold that they were able to predict the choices of four of the five participants. +

                    +

                    + "If the time reported to us by the participants was valid, we reasoned that it might be possible to predict the accuracy of the decision," explains Dr. Shadlen. +

                    +

                    + "We incorporated a kind of mathematical trick, based on earlier studies, which showed that the speed and accuracy of decisions were tied together by the same brain function." This "mathematical trick" was the evidence accumulation model. +

                    +
                    +

                    + "Essentially, the act of becoming consciously aware of a decision conforms to the same process that the brain goes through to complete a decision, even a simple one - such as whether to turn left or right." +

                    +

                    + Michael Shadlen, Ph.D. +

                    +
                    +

                    + In other words, the study shows that the conscious awareness of the "a-ha!" moment takes place precisely when the brain has reached that threshold of evidence accumulation. +

                    +

                    + The findings provide unique insights into the biology of consciousness, say the researchers, and they bring us closer to understanding the biological basis of decisions, ethics, and, generally, the human mind. +

                    + +
                    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/medium-1/expected.html b/article_scraper/resources/tests/readability/medium-1/expected.html index 7331fd8..3c34e1a 100644 --- a/article_scraper/resources/tests/readability/medium-1/expected.html +++ b/article_scraper/resources/tests/readability/medium-1/expected.html @@ -1,5 +1,4 @@ -
                    -
                    +

                    Better Student Journalism

                    @@ -29,9 +28,8 @@
                    -

                    -

                    -
                    +

                    +

                    topleftpixel.com

I started discovering beautiful things the web could do with images: @@ -53,9 +51,8 @@

-

-

-
+

+

We don’t know what we don’t know

We spent much of the rest of the school year asking “what should we be @@ -103,9 +100,8 @@

-

-

-
+

+

From our 2011 research

Common problems in student newsrooms (2013)

@@ -158,9 +154,8 @@
-

-

-
+

+

We designed many of these slides to help explain to ourselves what we were doing
@@ -170,24 +165,21 @@ is a print issue. However…

    -
  1. -The handoff -
    Problems arise because web editors are given roles that absolve the rest +
  2. The handoff +
    Problems arise because web editors are given roles that absolve the rest of the editors from thinking about the web. All editors should be involved in the process of story development for the web. While it’s a good idea to have one specific editor manage the website, contributors and editors should all play with and learn about the web. Instead of “can you make a computer do XYZ for me?”, we should be saying “can you show me how to make a computer do XYZ?”
  3. -
  4. -Not just social media
    A +
  5. Not just social media
    A web editor could do much more than simply being in charge of the social media accounts for the student paper. Their responsibility could include teaching all other editors to be listening to what’s happening online. The web editor can take advantage of live information to change how the student newsroom reports news in real time.
  6. -
  7. -Web (interactive) editor
    The +
  8. Web (interactive) editor
    The goal of having a web editor should be for someone to build and tell stories that take full advantage of the web as their medium. Too often the web’s interactivity is not considered when developing the story. The web then @@ -210,17 +202,15 @@
    -

    -

    -
    +

    +

    The current Open Journalism site was a few years in the making. This was an original launch page we use in 2012

    What we know

      -
    • -New process -
      Our rough research has told us newsrooms need to be reorganized. This +
    • New process +
      Our rough research has told us newsrooms need to be reorganized. This includes every part of the newsroom’s workflow: from where a story and its information comes from, to thinking of every word, pixel, and interaction the reader will have with your stories. If I was a photo editor that wanted @@ -232,18 +222,16 @@ “digital manifestos”, it’s about being curious enough that you’ll want to to continue experimenting with your process until you’ve found one that fits your newsroom’s needs.
    • -
    • -More (remote) mentorship -
      Lack of mentorship is still a big problem. Google’s fellowship program is great. The fact that it +
    • More (remote) mentorship +
      Lack of mentorship is still a big problem. Google’s fellowship program is great. The fact that it only caters to United States students isn’t. There are only a handful of internships in Canada where students interested in journalism can get experience writing code and building interactive stories. We’re OK with this for now, as we expect internships and mentorship over the next 5 years between professional newsrooms and student newsrooms will only increase. It’s worth noting that some of that mentorship will likely be done remotely.
    • -
    • -Changing a newsroom culture -
      Skill diversity needs to change. We encourage every student newsroom we +
    • Changing a newsroom culture +
      Skill diversity needs to change. We encourage every student newsroom we talk to, to start building a partnership with their school’s Computer Science department. It will take some work, but you’ll find there are many CS undergrads that love playing with web technologies, and using data to tell stories. @@ -256,35 +244,30 @@

    What we don’t know

      -
    • -Sharing curiosity for the web -
      We don’t know how to best teach students about the web. It’s not efficient +
    • Sharing curiosity for the web +
      We don’t know how to best teach students about the web. It’s not efficient for us to teach coding classes. We do go into newsrooms and get them running their first code exercises, but if someone wants to learn to program, we can only provide the initial push and curiosity. We will be trying out “labs” with a few schools next school year to hopefully get a better idea of how to teach students about the web.
    • -
    • -Business -
      We don’t know how to convince the business side of student papers that +
    • Business +
      We don’t know how to convince the business side of student papers that they should invest in the web. At the very least we’re able to explain that having students graduate with their current skill set is painful in the current job market.
    • -
    • -The future -
      We don’t know what journalism or the web will be like in 10 years, but +
    • The future +
      We don’t know what journalism or the web will be like in 10 years, but we can start encouraging students to keep an open mind about the skills they’ll need. We’re less interested in preparing students for the current newsroom climate, than we are in teaching students to have the ability to learn new tools quickly as they come and go.
    - -
    +

    What we’re trying to share with others

      -
    • -A concise guide to building stories for the web -
      There are too many options to get started. We hope to provide an opinionated +
    • A concise guide to building stories for the web +
      There are too many options to get started. We hope to provide an opinionated guide that follows both our experiences, research, and observations from trying to teach our peers.
    @@ -300,9 +283,8 @@
    -

    -

    -
    +

    +

    2012

    This is a start

    @@ -323,9 +305,8 @@
    -

    -

    -
    +

    +

    Let’s talk. Let’s listen. @@ -340,5 +321,4 @@ manifesto™© we just think it’s pretty cool to share what we’ve learned so far, and hope you’ll do the same. We’re all in this together.

    - - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/medium-2/expected.html b/article_scraper/resources/tests/readability/medium-2/expected.html index 413bc78..9f06aab 100644 --- a/article_scraper/resources/tests/readability/medium-2/expected.html +++ b/article_scraper/resources/tests/readability/medium-2/expected.html @@ -1,25 +1 @@ -
    -

    -
    Words need defenders.

    On Behalf of “Literally”

    -

    You either are a “literally” abuser or know of one. If you’re anything like me, hearing the word “literally” used incorrectly causes a little piece of your soul to whither and die. Of course I do not mean that literally, I mean that figuratively. An abuser would have said: “Every time a person uses that word, a piece of my soul literally withers and dies.” Which is terribly, horribly wrong.

    -

    For whatever bizarre reason, people feel the need to use literally as a sort of verbal crutch. They use it to emphasize a point, which is silly because they’re already using an analogy or a metaphor to illustrate said point. For example: “Ugh, I literally tore the house apart looking for my remote control!” No, you literally did not tear apart your house, because it’s still standing. If you’d just told me you “tore your house apart” searching for your remote, I would’ve understood what you meant. No need to add “literally” to the sentence.

    -

    Maybe I should define literally.

    -
    Literally means actually. When you say something literally happened, you’re describing the scene or situation as it actually happened.
    -

    So you should only use literally when you mean it. It should not be used in hyperbole. Example: “That was so funny I literally cried.” Which is possible. Some things are funny enough to elicit tears. Note the example stops with “literally cried.” You cannot literally cry your eyes out. The joke wasn’t so funny your eyes popped out of their sockets.

    -

    When in Doubt, Leave it Out

    -

    “I’m so hungry I could eat a horse,” means you’re hungry. You don’t need to say “I’m so hungry I could literally eat a horse.” Because you can’t do that in one sitting, I don’t care how big your stomach is.

    -

    “That play was so funny I laughed my head off,” illustrates the play was amusing. You don’t need to say you literally laughed your head off, because then your head would be on the ground and you wouldn’t be able to speak, much less laugh.

    -

    “I drove so fast my car was flying,” we get your point: you were speeding. But your car is never going fast enough to fly, so don’t say your car was literally flying.

    -

    Insecurities?

    -

    Maybe no one believed a story you told as a child, and you felt the need to prove that it actually happened. No really, mom, I literally climbed the tree. In efforts to prove truth, you used literally to describe something real, however outlandish it seemed. Whatever the reason, now your overuse of literally has become a habit.

    -

    Hard Habit to Break?

    -

    Abusing literally isn’t as bad a smoking, but it’s still an unhealthy habit (I mean that figuratively). Help is required in order to break it.

    -

    This is my version of an intervention for literally abusers. I’m not sure how else to do it other than in writing. I know this makes me sound like a know-it-all, and I accept that. But there’s no excuse other than blatant ignorance to misuse the word “literally.” So just stop it.

    -

    Don’t say “Courtney, this post is so snobbish it literally burned up my computer.” Because nothing is that snobbish that it causes computers to combust. Or: “Courtney, your head is so big it literally cannot get through the door.” Because it can, unless it’s one of those tiny doors from Alice in Wonderland and I need to eat a mushroom to make my whole body smaller.

    -

    No One’s Perfect

    -

    And I’m not saying I am. I’m trying to restore meaning to a word that’s lost meaning. I’m standing up for literally. It’s a good word when used correctly. People are butchering it and destroying it every day (figuratively speaking) and the massacre needs to stop. Just as there’s a coalition of people against the use of certain fonts (like Comic Sans and Papyrus), so should there be a coalition of people against the abuse of literally.

    -

    Saying it to Irritate?

    -

    Do you misuse the word “literally” just to annoy your know-it-all or grammar police friends/acquaintances/total strangers? If so, why? Doing so would be like me going outside when it’s freezing, wearing nothing but a pair of shorts and t-shirt in hopes of making you cold by just looking at me. Who suffers more?

    -

    Graphical Representation

    -

    Matthew Inman of “The Oatmeal” wrote a comic about literally. Abusers and defenders alike should check it out. It’s clear this whole craze about literally is driving a lot of us nuts. You literally abusers are killing off pieces of our souls. You must be stopped, or the world will be lost to meaninglessness forever. Figuratively speaking.

    -
    +

    Words need defenders.

    On Behalf of “Literally”

    You either are a “literally” abuser or know of one. If you’re anything like me, hearing the word “literally” used incorrectly causes a little piece of your soul to whither and die. Of course I do not mean that literally, I mean that figuratively. An abuser would have said: “Every time a person uses that word, a piece of my soul literally withers and dies.” Which is terribly, horribly wrong.

    For whatever bizarre reason, people feel the need to use literally as a sort of verbal crutch. They use it to emphasize a point, which is silly because they’re already using an analogy or a metaphor to illustrate said point. For example: “Ugh, I literally tore the house apart looking for my remote control!” No, you literally did not tear apart your house, because it’s still standing. If you’d just told me you “tore your house apart” searching for your remote, I would’ve understood what you meant. No need to add “literally” to the sentence.

    Maybe I should define literally.

    Literally means actually. When you say something literally happened, you’re describing the scene or situation as it actually happened.

    So you should only use literally when you mean it. It should not be used in hyperbole. Example: “That was so funny I literally cried.” Which is possible. Some things are funny enough to elicit tears. Note the example stops with “literally cried.” You cannot literally cry your eyes out. The joke wasn’t so funny your eyes popped out of their sockets.

    When in Doubt, Leave it Out

    “I’m so hungry I could eat a horse,” means you’re hungry. You don’t need to say “I’m so hungry I could literally eat a horse.” Because you can’t do that in one sitting, I don’t care how big your stomach is.

    “That play was so funny I laughed my head off,” illustrates the play was amusing. You don’t need to say you literally laughed your head off, because then your head would be on the ground and you wouldn’t be able to speak, much less laugh.

    “I drove so fast my car was flying,” we get your point: you were speeding. But your car is never going fast enough to fly, so don’t say your car was literally flying.

    Insecurities?

    Maybe no one believed a story you told as a child, and you felt the need to prove that it actually happened. No really, mom, I literally climbed the tree. In efforts to prove truth, you used literally to describe something real, however outlandish it seemed. Whatever the reason, now your overuse of literally has become a habit.

    Hard Habit to Break?

    Abusing literally isn’t as bad a smoking, but it’s still an unhealthy habit (I mean that figuratively). Help is required in order to break it.

    This is my version of an intervention for literally abusers. I’m not sure how else to do it other than in writing. I know this makes me sound like a know-it-all, and I accept that. But there’s no excuse other than blatant ignorance to misuse the word “literally.” So just stop it.

    Don’t say “Courtney, this post is so snobbish it literally burned up my computer.” Because nothing is that snobbish that it causes computers to combust. Or: “Courtney, your head is so big it literally cannot get through the door.” Because it can, unless it’s one of those tiny doors from Alice in Wonderland and I need to eat a mushroom to make my whole body smaller.

    No One’s Perfect

    And I’m not saying I am. I’m trying to restore meaning to a word that’s lost meaning. I’m standing up for literally. It’s a good word when used correctly. People are butchering it and destroying it every day (figuratively speaking) and the massacre needs to stop. Just as there’s a coalition of people against the use of certain fonts (like Comic Sans and Papyrus), so should there be a coalition of people against the abuse of literally.

    Saying it to Irritate?

    Do you misuse the word “literally” just to annoy your know-it-all or grammar police friends/acquaintances/total strangers? If so, why? Doing so would be like me going outside when it’s freezing, wearing nothing but a pair of shorts and t-shirt in hopes of making you cold by just looking at me. Who suffers more?

    Graphical Representation

    Matthew Inman of “The Oatmeal” wrote a comic about literally. Abusers and defenders alike should check it out. It’s clear this whole craze about literally is driving a lot of us nuts. You literally abusers are killing off pieces of our souls. You must be stopped, or the world will be lost to meaninglessness forever. Figuratively speaking.

    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/medium-3/expected.html b/article_scraper/resources/tests/readability/medium-3/expected.html index 7b336d9..0ad5996 100644 --- a/article_scraper/resources/tests/readability/medium-3/expected.html +++ b/article_scraper/resources/tests/readability/medium-3/expected.html @@ -1,7 +1,6 @@ -
    -
    +
    -

    John C. Welch +

    John C. Welch

    @@ -10,7 +9,7 @@

    - don’t preach to me
    + don’t preach to me
    Mr. integrity

    @@ -29,8 +28,7 @@

    That, readers, is “The Big Lie”. It is a lie so big that if one ponders the reality of it, as I am going to, one wonders why anyone would believe it. It is a lie and it is one we should stop telling.

    -
    -
    +

    Samantha’s points (I assume you read it, for you are smart people who know the importance of such things) are fairly clear:

    @@ -55,8 +53,7 @@

    If you really believe that, you are the most preciously ignorant person in the world, and can I have your seriously charmed life.

    -
    -
    +

    The response, from all quarters, including Marco, someone who is so sensitive to criticism that the word “useless” is enough to shut him down, who blocked a friend of mine for the high crime of pointing out that his review of podcasting mics centered around higher priced gear and ignored folks without the scratch, who might not be ready for such things, is, in a single word, disgusting. Vomitous even.

    @@ -479,7 +476,7 @@

    -

    Image for post +

    Image for post

    @@ -576,7 +573,7 @@

    -

    Image for post +

    Image for post

    @@ -629,7 +626,7 @@

    -

    Image for post +

    Image for post

    @@ -639,8 +636,7 @@

    Great Feminists are often tools.

    -
    -
    +

    Luckily, I hope, the people who get Samantha’s point also started chiming in (and you get 100% of the women commenting here that I’ve seen):

    @@ -664,8 +660,7 @@ Catching up on the debate, and agreeing with Harry’s remark. (Enjoyed your article, Samantha, and ‘got’ your point.)

    -
    -
    +

    I would like to say I’m surprised at the reaction to Samantha’s article, but I’m not. In spite of his loud declarations of support for The Big Lie, Marco Arment is as bad at any form of criticism that he hasn’t already approved as a very insecure tween. An example from 2011: http://www.businessinsider.com/marco-arment-2011-9

    @@ -690,5 +685,4 @@

    So I hope she stays, but if she goes, I understand. For what it’s worth, I don’t think she’s wrong either way.

    -
    -
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/mercurial/expected.html b/article_scraper/resources/tests/readability/mercurial/expected.html index c670ccc..bbfc105 100644 --- a/article_scraper/resources/tests/readability/mercurial/expected.html +++ b/article_scraper/resources/tests/readability/mercurial/expected.html @@ -1,738 +1,738 @@ -
    - - -

    - Once you have mastered the art of mutable history in a single repository (see the user guide), you can move up to the next level: shared mutable history. evolve lets you push and pull draft changesets between repositories along with their obsolescence markers. This opens up a number of interesting possibilities. -

    -

    - The simplest scenario is a single developer working across two computers. Say you’re working on code that must be tested on a remote test server, probably in a rack somewhere, only accessible by SSH, and running an “enterprise-grade” (out-of-date) OS. But you probably prefer to write code locally: everything is setup the way you like it, and you can use your preferred editor, IDE, merge/diff tools, etc. -

    -

    - Traditionally, your options are limited: either -

    -
    -
    -
      -
    • (ab)use your source control system by committing half-working code in order to get it onto the remote test server, or -
    • -
    • go behind source control’s back by using rsync (or similar) to transfer your code back-and-forth until it is ready to commit -
    • -
    -
    -
    -

    - The former is less bad with distributed version control systems like Mercurial, but it’s still far from ideal. (One important version control “best practice” is that every commit should make things just a little bit better, i.e. you should never commit code that is worse than what came before.) The latter, avoiding version control entirely, means that you’re walking a tightrope without a safety net. One accidental rsync in the wrong direction could destroy hours of work. -

    -

    - Using Mercurial with evolve to share mutable history solves these problems. As with single-repository evolve, you can commit whenever the code is demonstrably better, even if all the tests aren’t passing yet—just hg amend when they are. And you can transfer those half-baked changesets between repositories to try things out on your test server before anything is carved in stone. -

    -

    - A less common scenario is multiple developers sharing mutable history, typically for code review. We’ll cover this scenario later. First, we will cover single-user sharing. -

    -
    -

    - Sharing with a single developer -

    -
    -

    - Publishing and non-publishing repositories -

    -

    - The key to shared mutable history is to keep your changesets in draft phase as you pass them around. Recall that by default, hg push promotes changesets from draft to public, and public changesets are immutable. You can change this behaviour by reconfiguring the remote repository so that it is non-publishing. (Short version: set phases.publish to false. Long version follows.) -

    -
    -
    -

    - Setting up -

    -

    - We’ll work through an example with three local repositories, although in the real world they’d most likely be on three different computers. First, the public repository is where tested, polished changesets live, and it is where you synchronize with the rest of your team. -

    - -

    - We’ll need two clones where work gets done, test-repo and dev-repo: -

    -
    -
    $ hg clone public test-repo
    -updating to branch default
    -0 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -$ hg clone test-repo dev-repo
    -updating to branch default
    -0 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -
    -
    -

    - dev-repo is your local machine, with GUI merge tools and IDEs and everything configured just the way you like it. test-repo is the test server in a rack somewhere behind SSH. So for the most part, we’ll develop in dev-repo, push to test-repo, test and polish there, and push to public. -

    -

    - The key to shared mutable history is to make the target repository, in this case test-repo, non-publishing. And, of course, we have to enable the evolve extension in both test-repo and dev-repo. -

    -

    - First, edit the configuration for test-repo: -

    -
    -
    $ hg -R test-repo config --edit --local
    -
    -
    -

    - and add -

    -
    -
    [phases]
    -publish = false
    -
    -[extensions]
    -evolve =
    -
    -
    -

    - Then edit the configuration for dev-repo: -

    -
    -
    $ hg -R dev-repo config --edit --local
    -
    -
    -

    - and add -

    - -

    - Keep in mind that in real life, these repositories would probably be on separate computers, so you’d have to login to each one to configure each repository. -

    -

    - To start things off, let’s make one public, immutable changeset: -

    -
    -
    $ cd test-repo
    -$ echo 'my new project' > file1
    -$ hg add file1
    -$ hg commit -m 'create new project'
    -$ hg push
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -
    -
    -

    - and pull that into the development repository: -

    -
    -
    $ cd ../dev-repo
    -$ hg pull -u
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -
    -
    -
    - -
    -

    - Example 2: Amend again, locally -

    -

    - This process can repeat. Perhaps you figure out a more elegant fix to the bug, and want to mutate history so nobody ever knows you had a less-than-perfect idea. We’ll implement it locally in dev-repo and push to test-repo: -

    -
    -
    $ echo 'Fix, fix, and fix.' > file1
    -$ hg amend
    -$ hg push
    -
    -
    -

    - This time around, the temporary amend commit is in dev-repo, and it is not transferred to test-repo—the same as before, just in the opposite direction. Figure 4 shows the two repositories after amending in dev-repo and pushing to test-repo. -

    -
    -

    - [figure SG04: each repo has one temporary amend commit, but they’re different in each one] -

    -
    -

    - Let’s hop over to test-repo to test the more elegant fix: -

    -
    -
    $ cd ../test-repo
    -$ hg update
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -
    -
    -

    - This time, all the tests pass, so no further amending is required. This bug fix is finished, so we push it to the public repository: -

    -
    -
    $ hg push
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -
    -
    -

    - Note that only one changeset—the final version, after two amendments—was actually pushed. Again, Mercurial doesn’t transfer hidden changesets on push and pull. -

    -

    - So the picture in public is much simpler than in either dev-repo or test-repo. Neither of our missteps nor our amendments are publicly visible, just the final, beautifully polished changeset: -

    -
    -

    - [figure SG05: public repo with rev 0:0dc9, 1:de61, both public] -

    -
    -

    - There is one important step left to do. Because we pushed from test-repo to public, the pushed changeset is in public phase in those two repositories. But dev-repo has been out-of-the-loop; changeset de61 is still draft there. If we’re not careful, we might mutate history in dev-repo, obsoleting a changeset that is already public. Let’s avoid that situation for now by pushing up to dev-repo: -

    -
    -
    $ hg push ../dev-repo
    -pushing to ../dev-repo
    -searching for changes
    -no changes found
    -
    -
    -

    - Even though no changesets were pushed, Mercurial still pushed obsolescence markers and phase changes to dev-repo. -

    -

    - A final note: since this fix is now public, it is immutable. It’s no longer possible to amend it: -

    -
    -
    $ hg amend -m 'fix bug 37'
    -abort: cannot amend public changesets
    -
    -
    -

    - This is, after all, the whole point of Mercurial’s phases: to prevent rewriting history that has already been published. -

    -
    -
    -
    -

    - Sharing with multiple developers: code review -

    -

    - Now that you know how to share your own mutable history across multiple computers, you might be wondering if it makes sense to share mutable history with others. It does, but you have to be careful, stay alert, and communicate with your peers. -

    -

    - Code review is a good use case for sharing mutable history across multiple developers: Alice commits a draft changeset, submits it for review, and amends her changeset until her reviewer is satisfied. Meanwhile, Bob is also committing draft changesets for review, amending until his reviewer is satisfied. Once a particular changeset passes review, the respective author (Alice or Bob) pushes it to the public (publishing) repository. -

    -

    - Incidentally, the reviewers here can be anyone: maybe Bob and Alice review each other’s work; maybe the same third party reviews both; or maybe they pick different experts to review their work on different parts of a large codebase. Similarly, it doesn’t matter if reviews are conducted in person, by email, or by carrier pigeon. Code review is outside of the scope of Mercurial, so all we’re looking at here is the mechanics of committing, amending, pushing, and pulling. -

    -
    -

    - Setting up -

    -

    - To demonstrate, let’s start with the public repository as we left it in the last example, with two immutable changesets (figure 5 above). We’ll clone a review repository from it, and then Alice and Bob will both clone from review. -

    -
    -
    $ hg clone public review
    -updating to branch default
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -$ hg clone review alice
    -updating to branch default
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -$ hg clone review bob
    -updating to branch default
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -
    -
    -

    - We need to configure Alice’s and Bob’s working repositories to enable evolve. First, edit Alice’s configuration with -

    -
    -
    $ hg -R alice config --edit --local
    -
    -
    -

    - and add -

    - -

    - Then edit Bob’s repository configuration: -

    -
    -
    $ hg -R bob config --edit --local
    -
    -
    -

    - and add the same text. -

    -
    -
    -

    - Example 3: Alice commits and amends a draft fix -

    -

    - We’ll follow Alice working on a bug fix. We’re going to use bookmarks to make it easier to understand multiple branch heads in the review repository, so Alice starts off by creating a bookmark and committing her first attempt at a fix: -

    -
    -
    $ hg bookmark bug15
    -$ echo 'fix' > file2
    -$ hg commit -A -u alice -m 'fix bug 15 (v1)'
    -adding file2
    -
    -
    -

    - Note the unorthodox “(v1)” in the commit message. We’re just using that to make this tutorial easier to follow; it’s not something we’d recommend in real life. -

    -

    - Of course Alice wouldn’t commit unless her fix worked to her satisfaction, so it must be time to solicit a code review. She does this by pushing to the review repository: -

    -
    -
    $ hg push -B bug15
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -exporting bookmark bug15
    -
    -
    -

    - (The use of -B is important to ensure that we only push the bookmarked head, and that the bookmark itself is pushed. See this guide to bookmarks, especially the Sharing Bookmarks section, if you’re not familiar with bookmarks.) -

    -

    - Some time passes, and Alice receives her code review. As a result, Alice revises her fix and submits it for a second review: -

    -
    -
    $ echo 'Fix.' > file2
    -$ hg amend -m 'fix bug 15 (v2)'
    -$ hg push
    -[...]
    -added 1 changesets with 1 changes to 1 files (+1 heads)
    -updating bookmark bug15
    -
    -
    -

    - Figure 6 shows the state of the review repository at this point. -

    -
    -

    - [figure SG06: rev 2:fn1e is Alice’s obsolete v1, rev 3:cbdf is her v2; both children of rev 1:de61] -

    -
    -

    - After a busy morning of bug fixing, Alice stops for lunch. Let’s see what Bob has been up to. -

    -
    -
    -

    - Example 4: Bob implements and publishes a new feature -

    -

    - Meanwhile, Bob has been working on a new feature. Like Alice, he’ll use a bookmark to track his work, and he’ll push that bookmark to the review repository, so that reviewers know which changesets to review. -

    -
    -
    $ cd ../bob
    -$ echo 'stuff' > file1
    -$ hg bookmark featureX
    -$ hg commit -u bob -m 'implement feature X (v1)'          # rev 4:1636
    -$ hg push -B featureX
    -[...]
    -added 1 changesets with 1 changes to 1 files (+1 heads)
    -exporting bookmark featureX
    -
    -
    -

    - When Bob receives his code review, he improves his implementation a bit, amends, and submits the resulting changeset for review: -

    -
    -
    $ echo 'do stuff' > file1
    -$ hg amend -m 'implement feature X (v2)'                  # rev 5:0eb7
    -$ hg push
    -[...]
    -added 1 changesets with 1 changes to 1 files (+1 heads)
    -updating bookmark featureX
    -
    -
    -

    - Unfortunately, that still doesn’t pass muster. Bob’s reviewer insists on proper capitalization and punctuation. -

    -
    -
    $ echo 'Do stuff.' > file1
    -$ hg amend -m 'implement feature X (v3)'                  # rev 6:540b
    -
    -
    -

    - On the bright side, the second review said, “Go ahead and publish once you fix that.” So Bob immediately publishes his third attempt: -

    -
    -
    $ hg push ../public
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -
    -
    -

    - It’s not enough just to update public, though! Other people also use the review repository, and right now it doesn’t have Bob’s latest amendment (“v3”, revision 6:540b), nor does it know that the precursor of that changeset (“v2”, revision 5:0eb7) is obsolete. Thus, Bob pushes to review as well: -

    -
    -
    $ hg push ../review
    -[...]
    -added 1 changesets with 1 changes to 1 files (+1 heads)
    -updating bookmark featureX
    -
    -
    -

    - Figure 7 shows the result of Bob’s work in both review and public. -

    -
    -

    - [figure SG07: review includes Alice’s draft work on bug 15, as well as Bob’s v1, v2, and v3 changes for feature X: v1 and v2 obsolete, v3 public. public contains only the final, public implementation of feature X] -

    -
    -

    - Incidentally, it’s important that Bob push to public before review. If he pushed to review first, then revision 6:540b would still be in draft phase in review, but it would be public in both Bob’s local repository and the public repository. That could lead to confusion at some point, which is easily avoided by pushing first to public. -

    -
    -
    -

    - Example 5: Alice integrates and publishes -

    -

    - Finally, Alice gets back from lunch and sees that the carrier pigeon with her second review has arrived (or maybe it’s in her email inbox). Alice’s reviewer approved her amended changeset, so she pushes it to public: -

    -
    -
    $ hg push ../public
    -[...]
    -remote has heads on branch 'default' that are not known locally: 540ba8f317e6
    -abort: push creates new remote head cbdfbd5a5db2!
    -(pull and merge or see "hg help push" for details about pushing new heads)
    -
    -
    -

    - Oops! Bob has won the race to push first to public. So Alice needs to integrate with Bob: let’s pull his changeset(s) and see what the branch heads are. -

    -
    -
    $ hg pull ../public
    -[...]
    -added 1 changesets with 1 changes to 1 files (+1 heads)
    -(run 'hg heads' to see heads, 'hg merge' to merge)
    -$ hg log -G -q -r 'head()' --template '{rev}:{node|short}  ({author})\n'
    -o  5:540ba8f317e6  (bob)
    -|
    -| @  4:cbdfbd5a5db2  (alice)
    -|/
    -
    -
    -

    - We’ll assume Alice and Bob are perfectly comfortable with rebasing changesets. (After all, they’re already using mutable history in the form of amend.) So Alice rebases her changeset on top of Bob’s and publishes the result: -

    -
    -
    $ hg rebase -d 5
    -$ hg push ../public
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -$ hg push ../review
    -[...]
    -added 1 changesets with 0 changes to 0 files
    -updating bookmark bug15
    -
    -
    -

    - The result, in both review and public repositories, is shown in figure 8. -

    -
    -

    - [figure SG08: review shows v1 and v2 of Alice’s fix, then v1, v2, v3 of Bob’s feature, finally Alice’s fix rebased onto Bob’s. public just shows the final public version of each changeset] -

    -
    -
    -
    -
    -

    - Getting into trouble with shared mutable history -

    -

    - Mercurial with evolve is a powerful tool, and using powerful tools can have consequences. (You can cut yourself badly with a sharp knife, but every competent chef keeps several around. Ever try to chop onions with a spoon?) -

    -

    - In the user guide, we saw examples of unstbale changesets, which are the most common type of troubled changeset. (Recall that a non-obsolete changeset with obsolete ancestors is an orphan.) -

    -

    - Two other types of troubles can happen: divergent and bumped changesets. Both are more likely with shared mutable history, especially mutable history shared by multiple developers. -

    -
    -

    - Setting up -

    -

    - For these examples, we’re going to use a slightly different workflow: as before, Alice and Bob share a public repository. But this time there is no review repository. Instead, Alice and Bob put on their cowboy hats, throw good practice to the wind, and pull directly from each other’s working repositories. -

    -

    - So we throw away everything except public and reclone: -

    -
    -
    $ rm -rf review alice bob
    -$ hg clone public alice
    -updating to branch default
    -2 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -$ hg clone public bob
    -updating to branch default
    -2 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -
    -
    -

    - Once again we have to configure their repositories: enable evolve and (since Alice and Bob will be pulling directly from each other) make their repositories non-publishing. Edit Alice’s configuration: -

    -
    -
    $ hg -R alice config --edit --local
    -
    -
    -

    - and add -

    -
    -
    [extensions]
    -rebase =
    -evolve =
    -
    -[phases]
    -publish = false
    -
    -
    -

    - Then edit Bob’s repository configuration: -

    -
    -
    $ hg -R bob config --edit --local
    -
    -
    -

    - and add the same text. -

    -
    -
    -

    - Example 6: Divergent changesets -

    -

    - When an obsolete changeset has two successors, those successors are divergent. One way to get into such a situation is by failing to communicate with your teammates. Let’s see how that might happen. -

    -

    - First, we’ll have Bob commit a bug fix that could still be improved: -

    -
    -
    $ cd bob
    -$ echo 'pretty good fix' >> file1
    -$ hg commit -u bob -m 'fix bug 24 (v1)'                   # rev 4:2fe6
    -
    -
    -

    - Since Alice and Bob are now in cowboy mode, Alice pulls Bob’s draft changeset and amends it herself. -

    -
    -
    $ cd ../alice
    -$ hg pull -u ../bob
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -$ echo 'better fix (alice)' >> file1
    -$ hg amend -u alice -m 'fix bug 24 (v2 by alice)'
    -
    -
    -

    - But Bob has no idea that Alice just did this. (See how important good communication is?) So he implements a better fix of his own: -

    -
    -
    $ cd ../bob
    -$ echo 'better fix (bob)' >> file1
    -$ hg amend -u bob -m 'fix bug 24 (v2 by bob)'             # rev 6:a360
    -
    -
    -

    - At this point, the divergence exists, but only in theory: Bob’s original changeset, 4:2fe6, is obsolete and has two successors. But those successors are in different repositories, so the trouble is not visible to anyone yet. It will be as soon as Bob pulls from Alice’s repository (or vice-versa). -

    -
    -
    $ hg pull ../alice
    -[...]
    -added 1 changesets with 1 changes to 2 files (+1 heads)
    -(run 'hg heads' to see heads, 'hg merge' to merge)
    -2 new divergent changesets
    -
    -
    -

    - Figure 9 shows the situation in Bob’s repository. -

    -
    -

    - [figure SG09: Bob’s repo with 2 heads for the 2 divergent changesets, 6:a360 and 7:e3f9; wc is at 6:a360; both are successors of obsolete 4:2fe6, hence divergence] -

    -
    -

    - Now we need to get out of trouble. As usual, the answer is to evolve history. -

    -
    -
    $ HGMERGE=internal:other hg evolve
    -merge:[6] fix bug 24 (v2 by bob)
    -with: [7] fix bug 24 (v2 by alice)
    -base: [4] fix bug 24 (v1)
    -0 files updated, 1 files merged, 0 files removed, 0 files unresolved
    -
    -
    -

    - Figure 10 shows how Bob’s repository looks now. -

    -
    -

    - [figure SG10: only one visible head, 9:5ad6, successor to hidden 6:a360 and 7:e3f9] -

    -
    -

    - We carefully dodged a merge conflict by specifying a merge tool (internal:other) that will take Alice’s changes over Bob’s. (You might wonder why Bob wouldn’t prefer his own changes by using internal:local. He’s avoiding a bug in evolve that occurs when evolving divergent changesets using internal:local.) -

    -

    - # XXX this link does not work .. bug: https://bitbucket.org/marmoute/mutable-history/issue/48/ -

    -

    - ** STOP HERE: WORK IN PROGRESS ** -

    -
    -
    -

    - Phase-divergence: when a rewritten changeset is made public -

    -

    - If Alice and Bob are collaborating on some mutable changesets, it’s possible to get into a situation where an otherwise worthwhile changeset cannot be pushed to the public repository; it is phase-divergent with another changeset that was made public first. Let’s demonstrate one way this could happen. -

    -

    - It starts with Alice committing a bug fix. Right now, we don’t yet know if this bug fix is good enough to push to the public repository, but it’s good enough for Alice to commit. -

    -
    -
    $ cd alice
    -$ echo 'fix' > file2
    -$ hg commit -A -m 'fix bug 15'
    -adding file2
    -
    -
    -

    - Now Bob has a bad idea: he decides to pull whatever Alice is working on and tweak her bug fix to his taste: -

    -
    -
    $ cd ../bob
    -$ hg pull -u ../alice
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    -$ echo 'Fix.' > file2
    -$ hg amend -A -m 'fix bug 15 (amended)'
    -
    -
    -

    - (Note the lack of communication between Alice and Bob. Failing to communicate with your colleagues is a good way to get into trouble. Nevertheless, evolve can usually sort things out, as we will see.) -

    -
    -

    - [figure SG06: Bob’s repo with one amendment] -

    -
    -

    - After some testing, Alice realizes her bug fix is just fine as it is: no need for further polishing and amending, this changeset is ready to publish. -

    -
    -
    $ cd ../alice
    -$ hg push
    -[...]
    -added 1 changesets with 1 changes to 1 files
    -
    -
    -

    - This introduces a contradiction: in Bob’s repository, changeset 2:e011 (his copy of Alice’s fix) is obsolete, since Bob amended it. But in Alice’s repository (and the public repository), that changeset is public: it is immutable, carved in stone for all eternity. No changeset can be both obsolete and public, so Bob is in for a surprise the next time he pulls from public: -

    -
    -
    $ cd ../bob
    -$ hg pull -q -u
    -1 new phase-divergent changesets
    -
    -
    -

    - Figure 7 shows what just happened to Bob’s repository: changeset 2:e011 is now public, so it can’t be obsolete. When that changeset was obsolete, it made perfect sense for it to have a successor, namely Bob’s amendment of Alice’s fix (changeset 4:fe88). But it’s illogical for a public changeset to have a successor, so 4:fe88 is troubled: it has become bumped. -

    -
    -

    - [figure SG07: 2:e011 now public not obsolete, 4:fe88 now bumped] -

    -
    -

    - As usual when there’s trouble in your repository, the solution is to evolve it: -

    - -

    - Figure 8 illustrates Bob’s repository after evolving away the bumped changeset. Ignoring the obsolete changesets, Bob now has a nice, clean, simple history. His amendment of Alice’s bug fix lives on, as changeset 5:227d—albeit with a software-generated commit message. (Bob should probably amend that changeset to improve the commit message.) But the important thing is that his repository no longer has any troubled changesets, thanks to evolve. -

    -
    -

    - [figure SG08: 5:227d is new, formerly bumped changeset 4:fe88 now hidden] -

    -
    -
    -
    -
    -

    - Conclusion -

    -

    - Mutable history is a powerful tool. Like a sharp knife, an experienced user can do wonderful things with it, much more wonderful than with a dull knife (never mind a rusty spoon). At the same time, an inattentive or careless user can do harm to himself or others. Mercurial with evolve goes to great lengths to limit the harm you can do by trying to handle all possible types of “troubled” changesets. Nevertheless, having a first-aid kit nearby does not mean you should stop being careful with sharp knives. -

    -

    - Mutable history shared across multiple repositories by a single developer is a natural extension of this model. Once you are used to using a single sharp knife on its own, it’s pretty straightforward to chop onions and mushrooms using the same knife, or to alternate between two chopping boards with different knives. -

    -

    - Mutable history shared by multiple developers is a scary place to go. Imagine a professional kitchen full of expert chefs tossing their favourite knives back and forth, with the occasional axe or chainsaw thrown in to spice things up. If you’re confident that you and your colleagues can do it without losing a limb, go for it. But be sure to practice a lot first before you rely on it! -

    -
    -
    +
    + + +

    + Once you have mastered the art of mutable history in a single repository (see the user guide), you can move up to the next level: shared mutable history. evolve lets you push and pull draft changesets between repositories along with their obsolescence markers. This opens up a number of interesting possibilities. +

    +

    + The simplest scenario is a single developer working across two computers. Say you’re working on code that must be tested on a remote test server, probably in a rack somewhere, only accessible by SSH, and running an “enterprise-grade” (out-of-date) OS. But you probably prefer to write code locally: everything is setup the way you like it, and you can use your preferred editor, IDE, merge/diff tools, etc. +

    +

    + Traditionally, your options are limited: either +

    +
    +
    +
      +
    • (ab)use your source control system by committing half-working code in order to get it onto the remote test server, or +
    • +
    • go behind source control’s back by using rsync (or similar) to transfer your code back-and-forth until it is ready to commit +
    • +
    +
    +
    +

    + The former is less bad with distributed version control systems like Mercurial, but it’s still far from ideal. (One important version control “best practice” is that every commit should make things just a little bit better, i.e. you should never commit code that is worse than what came before.) The latter, avoiding version control entirely, means that you’re walking a tightrope without a safety net. One accidental rsync in the wrong direction could destroy hours of work. +

    +

    + Using Mercurial with evolve to share mutable history solves these problems. As with single-repository evolve, you can commit whenever the code is demonstrably better, even if all the tests aren’t passing yet—just hg amend when they are. And you can transfer those half-baked changesets between repositories to try things out on your test server before anything is carved in stone. +

    +

    + A less common scenario is multiple developers sharing mutable history, typically for code review. We’ll cover this scenario later. First, we will cover single-user sharing. +

    +
    +

    + Sharing with a single developer +

    +
    +

    + Publishing and non-publishing repositories +

    +

    + The key to shared mutable history is to keep your changesets in draft phase as you pass them around. Recall that by default, hg push promotes changesets from draft to public, and public changesets are immutable. You can change this behaviour by reconfiguring the remote repository so that it is non-publishing. (Short version: set phases.publish to false. Long version follows.) +

    +
    +
    +

    + Setting up +

    +

    + We’ll work through an example with three local repositories, although in the real world they’d most likely be on three different computers. First, the public repository is where tested, polished changesets live, and it is where you synchronize with the rest of your team. +

    + +

    + We’ll need two clones where work gets done, test-repo and dev-repo: +

    +
    +
    $ hg clone public test-repo
    +updating to branch default
    +0 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +$ hg clone test-repo dev-repo
    +updating to branch default
    +0 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +
    +
    +

    + dev-repo is your local machine, with GUI merge tools and IDEs and everything configured just the way you like it. test-repo is the test server in a rack somewhere behind SSH. So for the most part, we’ll develop in dev-repo, push to test-repo, test and polish there, and push to public. +

    +

    + The key to shared mutable history is to make the target repository, in this case test-repo, non-publishing. And, of course, we have to enable the evolve extension in both test-repo and dev-repo. +

    +

    + First, edit the configuration for test-repo: +

    +
    +
    $ hg -R test-repo config --edit --local
    +
    +
    +

    + and add +

    +
    +
    [phases]
    +publish = false
    +
    +[extensions]
    +evolve =
    +
    +
    +

    + Then edit the configuration for dev-repo: +

    +
    +
    $ hg -R dev-repo config --edit --local
    +
    +
    +

    + and add +

    + +

    + Keep in mind that in real life, these repositories would probably be on separate computers, so you’d have to login to each one to configure each repository. +

    +

    + To start things off, let’s make one public, immutable changeset: +

    +
    +
    $ cd test-repo
    +$ echo 'my new project' > file1
    +$ hg add file1
    +$ hg commit -m 'create new project'
    +$ hg push
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +
    +
    +

    + and pull that into the development repository: +

    +
    +
    $ cd ../dev-repo
    +$ hg pull -u
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +
    +
    +
    + +
    +

    + Example 2: Amend again, locally +

    +

    + This process can repeat. Perhaps you figure out a more elegant fix to the bug, and want to mutate history so nobody ever knows you had a less-than-perfect idea. We’ll implement it locally in dev-repo and push to test-repo: +

    +
    +
    $ echo 'Fix, fix, and fix.' > file1
    +$ hg amend
    +$ hg push
    +
    +
    +

    + This time around, the temporary amend commit is in dev-repo, and it is not transferred to test-repo—the same as before, just in the opposite direction. Figure 4 shows the two repositories after amending in dev-repo and pushing to test-repo. +

    +
    +

    + [figure SG04: each repo has one temporary amend commit, but they’re different in each one] +

    +
    +

    + Let’s hop over to test-repo to test the more elegant fix: +

    +
    +
    $ cd ../test-repo
    +$ hg update
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +
    +
    +

    + This time, all the tests pass, so no further amending is required. This bug fix is finished, so we push it to the public repository: +

    +
    +
    $ hg push
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +
    +
    +

    + Note that only one changeset—the final version, after two amendments—was actually pushed. Again, Mercurial doesn’t transfer hidden changesets on push and pull. +

    +

    + So the picture in public is much simpler than in either dev-repo or test-repo. Neither of our missteps nor our amendments are publicly visible, just the final, beautifully polished changeset: +

    +
    +

    + [figure SG05: public repo with rev 0:0dc9, 1:de61, both public] +

    +
    +

    + There is one important step left to do. Because we pushed from test-repo to public, the pushed changeset is in public phase in those two repositories. But dev-repo has been out-of-the-loop; changeset de61 is still draft there. If we’re not careful, we might mutate history in dev-repo, obsoleting a changeset that is already public. Let’s avoid that situation for now by pushing up to dev-repo: +

    +
    +
    $ hg push ../dev-repo
    +pushing to ../dev-repo
    +searching for changes
    +no changes found
    +
    +
    +

    + Even though no changesets were pushed, Mercurial still pushed obsolescence markers and phase changes to dev-repo. +

    +

    + A final note: since this fix is now public, it is immutable. It’s no longer possible to amend it: +

    +
    +
    $ hg amend -m 'fix bug 37'
    +abort: cannot amend public changesets
    +
    +
    +

    + This is, after all, the whole point of Mercurial’s phases: to prevent rewriting history that has already been published. +

    +
    +
    +
    +

    + Sharing with multiple developers: code review +

    +

    + Now that you know how to share your own mutable history across multiple computers, you might be wondering if it makes sense to share mutable history with others. It does, but you have to be careful, stay alert, and communicate with your peers. +

    +

    + Code review is a good use case for sharing mutable history across multiple developers: Alice commits a draft changeset, submits it for review, and amends her changeset until her reviewer is satisfied. Meanwhile, Bob is also committing draft changesets for review, amending until his reviewer is satisfied. Once a particular changeset passes review, the respective author (Alice or Bob) pushes it to the public (publishing) repository. +

    +

    + Incidentally, the reviewers here can be anyone: maybe Bob and Alice review each other’s work; maybe the same third party reviews both; or maybe they pick different experts to review their work on different parts of a large codebase. Similarly, it doesn’t matter if reviews are conducted in person, by email, or by carrier pigeon. Code review is outside of the scope of Mercurial, so all we’re looking at here is the mechanics of committing, amending, pushing, and pulling. +

    +
    +

    + Setting up +

    +

    + To demonstrate, let’s start with the public repository as we left it in the last example, with two immutable changesets (figure 5 above). We’ll clone a review repository from it, and then Alice and Bob will both clone from review. +

    +
    +
    $ hg clone public review
    +updating to branch default
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +$ hg clone review alice
    +updating to branch default
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +$ hg clone review bob
    +updating to branch default
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +
    +
    +

    + We need to configure Alice’s and Bob’s working repositories to enable evolve. First, edit Alice’s configuration with +

    +
    +
    $ hg -R alice config --edit --local
    +
    +
    +

    + and add +

    + +

    + Then edit Bob’s repository configuration: +

    +
    +
    $ hg -R bob config --edit --local
    +
    +
    +

    + and add the same text. +

    +
    +
    +

    + Example 3: Alice commits and amends a draft fix +

    +

    + We’ll follow Alice working on a bug fix. We’re going to use bookmarks to make it easier to understand multiple branch heads in the review repository, so Alice starts off by creating a bookmark and committing her first attempt at a fix: +

    +
    +
    $ hg bookmark bug15
    +$ echo 'fix' > file2
    +$ hg commit -A -u alice -m 'fix bug 15 (v1)'
    +adding file2
    +
    +
    +

    + Note the unorthodox “(v1)” in the commit message. We’re just using that to make this tutorial easier to follow; it’s not something we’d recommend in real life. +

    +

    + Of course Alice wouldn’t commit unless her fix worked to her satisfaction, so it must be time to solicit a code review. She does this by pushing to the review repository: +

    +
    +
    $ hg push -B bug15
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +exporting bookmark bug15
    +
    +
    +

    + (The use of -B is important to ensure that we only push the bookmarked head, and that the bookmark itself is pushed. See this guide to bookmarks, especially the Sharing Bookmarks section, if you’re not familiar with bookmarks.) +

    +

    + Some time passes, and Alice receives her code review. As a result, Alice revises her fix and submits it for a second review: +

    +
    +
    $ echo 'Fix.' > file2
    +$ hg amend -m 'fix bug 15 (v2)'
    +$ hg push
    +[...]
    +added 1 changesets with 1 changes to 1 files (+1 heads)
    +updating bookmark bug15
    +
    +
    +

    + Figure 6 shows the state of the review repository at this point. +

    +
    +

    + [figure SG06: rev 2:fn1e is Alice’s obsolete v1, rev 3:cbdf is her v2; both children of rev 1:de61] +

    +
    +

    + After a busy morning of bug fixing, Alice stops for lunch. Let’s see what Bob has been up to. +

    +
    +
    +

    + Example 4: Bob implements and publishes a new feature +

    +

    + Meanwhile, Bob has been working on a new feature. Like Alice, he’ll use a bookmark to track his work, and he’ll push that bookmark to the review repository, so that reviewers know which changesets to review. +

    +
    +
    $ cd ../bob
    +$ echo 'stuff' > file1
    +$ hg bookmark featureX
    +$ hg commit -u bob -m 'implement feature X (v1)'          # rev 4:1636
    +$ hg push -B featureX
    +[...]
    +added 1 changesets with 1 changes to 1 files (+1 heads)
    +exporting bookmark featureX
    +
    +
    +

    + When Bob receives his code review, he improves his implementation a bit, amends, and submits the resulting changeset for review: +

    +
    +
    $ echo 'do stuff' > file1
    +$ hg amend -m 'implement feature X (v2)'                  # rev 5:0eb7
    +$ hg push
    +[...]
    +added 1 changesets with 1 changes to 1 files (+1 heads)
    +updating bookmark featureX
    +
    +
    +

    + Unfortunately, that still doesn’t pass muster. Bob’s reviewer insists on proper capitalization and punctuation. +

    +
    +
    $ echo 'Do stuff.' > file1
    +$ hg amend -m 'implement feature X (v3)'                  # rev 6:540b
    +
    +
    +

    + On the bright side, the second review said, “Go ahead and publish once you fix that.” So Bob immediately publishes his third attempt: +

    +
    +
    $ hg push ../public
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +
    +
    +

    + It’s not enough just to update public, though! Other people also use the review repository, and right now it doesn’t have Bob’s latest amendment (“v3”, revision 6:540b), nor does it know that the precursor of that changeset (“v2”, revision 5:0eb7) is obsolete. Thus, Bob pushes to review as well: +

    +
    +
    $ hg push ../review
    +[...]
    +added 1 changesets with 1 changes to 1 files (+1 heads)
    +updating bookmark featureX
    +
    +
    +

    + Figure 7 shows the result of Bob’s work in both review and public. +

    +
    +

    + [figure SG07: review includes Alice’s draft work on bug 15, as well as Bob’s v1, v2, and v3 changes for feature X: v1 and v2 obsolete, v3 public. public contains only the final, public implementation of feature X] +

    +
    +

    + Incidentally, it’s important that Bob push to public before review. If he pushed to review first, then revision 6:540b would still be in draft phase in review, but it would be public in both Bob’s local repository and the public repository. That could lead to confusion at some point, which is easily avoided by pushing first to public. +

    +
    +
    +

    + Example 5: Alice integrates and publishes +

    +

    + Finally, Alice gets back from lunch and sees that the carrier pigeon with her second review has arrived (or maybe it’s in her email inbox). Alice’s reviewer approved her amended changeset, so she pushes it to public: +

    +
    +
    $ hg push ../public
    +[...]
    +remote has heads on branch 'default' that are not known locally: 540ba8f317e6
    +abort: push creates new remote head cbdfbd5a5db2!
    +(pull and merge or see "hg help push" for details about pushing new heads)
    +
    +
    +

    + Oops! Bob has won the race to push first to public. So Alice needs to integrate with Bob: let’s pull his changeset(s) and see what the branch heads are. +

    +
    +
    $ hg pull ../public
    +[...]
    +added 1 changesets with 1 changes to 1 files (+1 heads)
    +(run 'hg heads' to see heads, 'hg merge' to merge)
    +$ hg log -G -q -r 'head()' --template '{rev}:{node|short}  ({author})\n'
    +o  5:540ba8f317e6  (bob)
    +|
    +| @  4:cbdfbd5a5db2  (alice)
    +|/
    +
    +
    +

    + We’ll assume Alice and Bob are perfectly comfortable with rebasing changesets. (After all, they’re already using mutable history in the form of amend.) So Alice rebases her changeset on top of Bob’s and publishes the result: +

    +
    +
    $ hg rebase -d 5
    +$ hg push ../public
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +$ hg push ../review
    +[...]
    +added 1 changesets with 0 changes to 0 files
    +updating bookmark bug15
    +
    +
    +

    + The result, in both review and public repositories, is shown in figure 8. +

    +
    +

    + [figure SG08: review shows v1 and v2 of Alice’s fix, then v1, v2, v3 of Bob’s feature, finally Alice’s fix rebased onto Bob’s. public just shows the final public version of each changeset] +

    +
    +
    +
    +
    +

    + Getting into trouble with shared mutable history +

    +

    + Mercurial with evolve is a powerful tool, and using powerful tools can have consequences. (You can cut yourself badly with a sharp knife, but every competent chef keeps several around. Ever try to chop onions with a spoon?) +

    +

    + In the user guide, we saw examples of unstbale changesets, which are the most common type of troubled changeset. (Recall that a non-obsolete changeset with obsolete ancestors is an orphan.) +

    +

    + Two other types of troubles can happen: divergent and bumped changesets. Both are more likely with shared mutable history, especially mutable history shared by multiple developers. +

    +
    +

    + Setting up +

    +

    + For these examples, we’re going to use a slightly different workflow: as before, Alice and Bob share a public repository. But this time there is no review repository. Instead, Alice and Bob put on their cowboy hats, throw good practice to the wind, and pull directly from each other’s working repositories. +

    +

    + So we throw away everything except public and reclone: +

    +
    +
    $ rm -rf review alice bob
    +$ hg clone public alice
    +updating to branch default
    +2 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +$ hg clone public bob
    +updating to branch default
    +2 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +
    +
    +

    + Once again we have to configure their repositories: enable evolve and (since Alice and Bob will be pulling directly from each other) make their repositories non-publishing. Edit Alice’s configuration: +

    +
    +
    $ hg -R alice config --edit --local
    +
    +
    +

    + and add +

    +
    +
    [extensions]
    +rebase =
    +evolve =
    +
    +[phases]
    +publish = false
    +
    +
    +

    + Then edit Bob’s repository configuration: +

    +
    +
    $ hg -R bob config --edit --local
    +
    +
    +

    + and add the same text. +

    +
    +
    +

    + Example 6: Divergent changesets +

    +

    + When an obsolete changeset has two successors, those successors are divergent. One way to get into such a situation is by failing to communicate with your teammates. Let’s see how that might happen. +

    +

    + First, we’ll have Bob commit a bug fix that could still be improved: +

    +
    +
    $ cd bob
    +$ echo 'pretty good fix' >> file1
    +$ hg commit -u bob -m 'fix bug 24 (v1)'                   # rev 4:2fe6
    +
    +
    +

    + Since Alice and Bob are now in cowboy mode, Alice pulls Bob’s draft changeset and amends it herself. +

    +
    +
    $ cd ../alice
    +$ hg pull -u ../bob
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +$ echo 'better fix (alice)' >> file1
    +$ hg amend -u alice -m 'fix bug 24 (v2 by alice)'
    +
    +
    +

    + But Bob has no idea that Alice just did this. (See how important good communication is?) So he implements a better fix of his own: +

    +
    +
    $ cd ../bob
    +$ echo 'better fix (bob)' >> file1
    +$ hg amend -u bob -m 'fix bug 24 (v2 by bob)'             # rev 6:a360
    +
    +
    +

    + At this point, the divergence exists, but only in theory: Bob’s original changeset, 4:2fe6, is obsolete and has two successors. But those successors are in different repositories, so the trouble is not visible to anyone yet. It will be as soon as Bob pulls from Alice’s repository (or vice-versa). +

    +
    +
    $ hg pull ../alice
    +[...]
    +added 1 changesets with 1 changes to 2 files (+1 heads)
    +(run 'hg heads' to see heads, 'hg merge' to merge)
    +2 new divergent changesets
    +
    +
    +

    + Figure 9 shows the situation in Bob’s repository. +

    +
    +

    + [figure SG09: Bob’s repo with 2 heads for the 2 divergent changesets, 6:a360 and 7:e3f9; wc is at 6:a360; both are successors of obsolete 4:2fe6, hence divergence] +

    +
    +

    + Now we need to get out of trouble. As usual, the answer is to evolve history. +

    +
    +
    $ HGMERGE=internal:other hg evolve
    +merge:[6] fix bug 24 (v2 by bob)
    +with: [7] fix bug 24 (v2 by alice)
    +base: [4] fix bug 24 (v1)
    +0 files updated, 1 files merged, 0 files removed, 0 files unresolved
    +
    +
    +

    + Figure 10 shows how Bob’s repository looks now. +

    +
    +

    + [figure SG10: only one visible head, 9:5ad6, successor to hidden 6:a360 and 7:e3f9] +

    +
    +

    + We carefully dodged a merge conflict by specifying a merge tool (internal:other) that will take Alice’s changes over Bob’s. (You might wonder why Bob wouldn’t prefer his own changes by using internal:local. He’s avoiding a bug in evolve that occurs when evolving divergent changesets using internal:local.) +

    +

    + # XXX this link does not work .. bug: https://bitbucket.org/marmoute/mutable-history/issue/48/ +

    +

    + ** STOP HERE: WORK IN PROGRESS ** +

    +
    +
    +

    + Phase-divergence: when a rewritten changeset is made public +

    +

    + If Alice and Bob are collaborating on some mutable changesets, it’s possible to get into a situation where an otherwise worthwhile changeset cannot be pushed to the public repository; it is phase-divergent with another changeset that was made public first. Let’s demonstrate one way this could happen. +

    +

    + It starts with Alice committing a bug fix. Right now, we don’t yet know if this bug fix is good enough to push to the public repository, but it’s good enough for Alice to commit. +

    +
    +
    $ cd alice
    +$ echo 'fix' > file2
    +$ hg commit -A -m 'fix bug 15'
    +adding file2
    +
    +
    +

    + Now Bob has a bad idea: he decides to pull whatever Alice is working on and tweak her bug fix to his taste: +

    +
    +
    $ cd ../bob
    +$ hg pull -u ../alice
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +1 files updated, 0 files merged, 0 files removed, 0 files unresolved
    +$ echo 'Fix.' > file2
    +$ hg amend -A -m 'fix bug 15 (amended)'
    +
    +
    +

    + (Note the lack of communication between Alice and Bob. Failing to communicate with your colleagues is a good way to get into trouble. Nevertheless, evolve can usually sort things out, as we will see.) +

    +
    +

    + [figure SG06: Bob’s repo with one amendment] +

    +
    +

    + After some testing, Alice realizes her bug fix is just fine as it is: no need for further polishing and amending, this changeset is ready to publish. +

    +
    +
    $ cd ../alice
    +$ hg push
    +[...]
    +added 1 changesets with 1 changes to 1 files
    +
    +
    +

    + This introduces a contradiction: in Bob’s repository, changeset 2:e011 (his copy of Alice’s fix) is obsolete, since Bob amended it. But in Alice’s repository (and the public repository), that changeset is public: it is immutable, carved in stone for all eternity. No changeset can be both obsolete and public, so Bob is in for a surprise the next time he pulls from public: +

    +
    +
    $ cd ../bob
    +$ hg pull -q -u
    +1 new phase-divergent changesets
    +
    +
    +

    + Figure 7 shows what just happened to Bob’s repository: changeset 2:e011 is now public, so it can’t be obsolete. When that changeset was obsolete, it made perfect sense for it to have a successor, namely Bob’s amendment of Alice’s fix (changeset 4:fe88). But it’s illogical for a public changeset to have a successor, so 4:fe88 is troubled: it has become bumped. +

    +
    +

    + [figure SG07: 2:e011 now public not obsolete, 4:fe88 now bumped] +

    +
    +

    + As usual when there’s trouble in your repository, the solution is to evolve it: +

    + +

    + Figure 8 illustrates Bob’s repository after evolving away the bumped changeset. Ignoring the obsolete changesets, Bob now has a nice, clean, simple history. His amendment of Alice’s bug fix lives on, as changeset 5:227d—albeit with a software-generated commit message. (Bob should probably amend that changeset to improve the commit message.) But the important thing is that his repository no longer has any troubled changesets, thanks to evolve. +

    +
    +

    + [figure SG08: 5:227d is new, formerly bumped changeset 4:fe88 now hidden] +

    +
    +
    +
    +
    +

    + Conclusion +

    +

    + Mutable history is a powerful tool. Like a sharp knife, an experienced user can do wonderful things with it, much more wonderful than with a dull knife (never mind a rusty spoon). At the same time, an inattentive or careless user can do harm to himself or others. Mercurial with evolve goes to great lengths to limit the harm you can do by trying to handle all possible types of “troubled” changesets. Nevertheless, having a first-aid kit nearby does not mean you should stop being careful with sharp knives. +

    +

    + Mutable history shared across multiple repositories by a single developer is a natural extension of this model. Once you are used to using a single sharp knife on its own, it’s pretty straightforward to chop onions and mushrooms using the same knife, or to alternate between two chopping boards with different knives. +

    +

    + Mutable history shared by multiple developers is a scary place to go. Imagine a professional kitchen full of expert chefs tossing their favourite knives back and forth, with the occasional axe or chainsaw thrown in to spice things up. If you’re confident that you and your colleagues can do it without losing a limb, go for it. But be sure to practice a lot first before you rely on it! +

    +
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/metadata-content-missing/expected.html b/article_scraper/resources/tests/readability/metadata-content-missing/expected.html index d2ece7e..e3aef88 100644 --- a/article_scraper/resources/tests/readability/metadata-content-missing/expected.html +++ b/article_scraper/resources/tests/readability/metadata-content-missing/expected.html @@ -1,19 +1,19 @@ -
    -

    Test document title

    -

    - Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod - tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, - quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo - consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse - cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non - proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

    -

    - Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod - tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, - quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo - consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse - cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non - proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

    -
    +
    +

    Test document title

    +

    + Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

    +

    + Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non + proident, sunt in culpa qui officia deserunt mollit anim id est laborum. +

    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/missing-paragraphs/expected.html b/article_scraper/resources/tests/readability/missing-paragraphs/expected.html index 582d845..6584f9a 100644 --- a/article_scraper/resources/tests/readability/missing-paragraphs/expected.html +++ b/article_scraper/resources/tests/readability/missing-paragraphs/expected.html @@ -1,50 +1,50 @@ -
    - - - -

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy - eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam - voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet - clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit - amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam - nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed - diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet.

    - -

    Secondary header

    - -

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy - eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam - voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet - clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit - amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam - nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed - diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet.

    - -

    Secondary header

    - -

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy - eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam - voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet - clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit - amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam - nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed - diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, - sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. - Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor - sit amet.

    -
    +
    + + + +

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam + voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet + clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit + amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam + nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed + diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet.

    + +

    Secondary header

    + +

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam + voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet + clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit + amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam + nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed + diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet.

    + +

    Secondary header

    + +

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy + eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam + voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet + clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit + amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam + nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed + diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, + sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. + Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor + sit amet.

    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/mozilla-1/expected.html b/article_scraper/resources/tests/readability/mozilla-1/expected.html index 6c97465..2b07160 100644 --- a/article_scraper/resources/tests/readability/mozilla-1/expected.html +++ b/article_scraper/resources/tests/readability/mozilla-1/expected.html @@ -1,105 +1,96 @@ -
    -
    - - -

    It’s easier than ever to personalize Firefox and make it work the way - you do. -
    No other browser gives you so much choice and flexibility.

    -
    -

    -
    -
    -
    -
    -

    Designed to
    be redesigned

    - -

    Get fast and easy access to the features you use most in the new menu. - Open the “Customize” panel to add, move or remove any button you want. - Keep your favorite features — add-ons, private browsing, Sync and more - — one quick click away.

    -

    -

    -
    -
    -

    -
    -
    -
    -

    More ways to customize

    - - -
    -
    -
    -
    -

    Themes

    - -

    Make Firefox match your style. Choose from thousands of themes and dress - up your browser with a single click.

    -

    Try it now - -
    Learn more - -

    -

    Next

    -

    Preview of the currently selected theme -

    -
    -
    -
    -

    Add-ons

    -

    Next

    -

    Add-ons are like apps that you install to add features to Firefox. They - let you compare prices, check the weather, listen to music, send a tweet - and more.

    -
      -
    • Read the latest news & blogs
    • -
    • Manage your downloads
    • -
    • Watch videos & view photos
    • -

    Here are a few of our favorites - -
    Learn more - -

    -
    - -

    -

    -
    -
    -
    -

    Awesome Bar

    -

    Next

    -

    The Awesome Bar learns as you browse to make your version of Firefox unique. - Find and return to your favorite sites without having to remember a URL.

    -

    See what it can do for you -

    -
    -

    Firefox Awesome Bar -

    -
    -
    - -
    - -
    -
    +
    +
    + + +

    It’s easier than ever to personalize Firefox and make it work the way + you do. +
    No other browser gives you so much choice and flexibility.

    +
    +

    +
    +
    +
    +
    +

    Designed to
    be redesigned

    + +

    Get fast and easy access to the features you use most in the new menu. + Open the “Customize” panel to add, move or remove any button you want. + Keep your favorite features — add-ons, private browsing, Sync and more + — one quick click away.

    +

    +

    +
    +

    +
    +
    +
    +

    More ways to customize

    + + +
    +
    +
    +
    +

    Themes

    + +

    Make Firefox match your style. Choose from thousands of themes and dress + up your browser with a single click.

    +

    Try it now + +
    Learn more + +

    Next

    Preview of the currently selected theme +

    +
    +
    +
    +

    Add-ons

    +

    Next

    Add-ons are like apps that you install to add features to Firefox. They + let you compare prices, check the weather, listen to music, send a tweet + and more.

    +
      +
    • Read the latest news & blogs
    • +
    • Manage your downloads
    • +
    • Watch videos & view photos
    • +

    Here are a few of our favorites + +
    Learn more + +

    + +

    +

    +
    +
    +
    +

    Awesome Bar

    +

    Next

    The Awesome Bar learns as you browse to make your version of Firefox unique. + Find and return to your favorite sites without having to remember a URL.

    +

    See what it can do for you +

    +

    Firefox Awesome Bar +

    +
    +
    + + +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/mozilla-2/expected.html b/article_scraper/resources/tests/readability/mozilla-2/expected.html index 47754d0..2aeebe4 100644 --- a/article_scraper/resources/tests/readability/mozilla-2/expected.html +++ b/article_scraper/resources/tests/readability/mozilla-2/expected.html @@ -7,7 +7,7 @@
    • - Screenshot + Screenshot

      WebIDE

      Develop, deploy and debug Firefox OS apps directly in your browser, or on a Firefox OS device, with this tool that replaces App Manager.

      @@ -16,7 +16,7 @@
    • - Screenshot + Screenshot

      Valence

      Develop and debug your apps across multiple browsers and devices with this powerful extension that comes pre-installed with Firefox Developer Edition.

      @@ -40,7 +40,7 @@
      • - Screenshot + Screenshot

        Page Inspector

        Examine the HTML and CSS of any Web page and easily modify the structure and layout of a page.

        @@ -49,7 +49,7 @@
      • - Screenshot + Screenshot

        Web Console

        See logged information associated with a Web page and use Web Console to interact with Web pages using JavaScript.

        @@ -58,7 +58,7 @@
      • - Screenshot + Screenshot

        JavaScript Debugger

        Step through JavaScript code and examine or modify its state to help track down bugs.

        @@ -67,7 +67,7 @@
      • - Screenshot + Screenshot

        Network Monitor

        See all the network requests your browser makes, how long each request takes and details of each request.

        @@ -76,7 +76,7 @@
      • - Screenshot + Screenshot

        Web Audio Editor

        Inspect and interact with Web Audio API in real time to ensure that all audio nodes are connected in the way you expect.

        @@ -85,7 +85,7 @@
      • - Screenshot + Screenshot

        Style Editor

        View and edit CSS styles associated with a Web page, create new ones and apply existing CSS stylesheets to any page.

        @@ -94,4 +94,4 @@
      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/msn/expected.html b/article_scraper/resources/tests/readability/msn/expected.html index 55f6fd5..157f0bd 100644 --- a/article_scraper/resources/tests/readability/msn/expected.html +++ b/article_scraper/resources/tests/readability/msn/expected.html @@ -2,8 +2,8 @@

      - - <span style="font-size:13px;">Nintendo/Apple</span> + + <span style="font-size:13px;">Nintendo/Apple</span> © Provided by Business Insider Inc @@ -16,7 +16,7 @@

      The name and basic idea might sound like one of those endless score attack games like "Temple Run," but that's not the case. "Super Mario Run" is divided into hand-crafted levels with a clear end-point like any other Mario game, meaning you're essentially getting the Mario experience for $10 without needing to control his movement.

      $10 might seem like a bit much compared to the $0 people pay for most mobile games, but it's possible the game has $10 worth of levels to play in it. It's also not iPhone exclusive, but the Android version will launch at a later, currently unknown date.

      To see "Super Mario Run" in action, check out the footage below:

      -
      +
      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/normalize-spaces/expected.html b/article_scraper/resources/tests/readability/normalize-spaces/expected.html index e7abead..859bbe8 100644 --- a/article_scraper/resources/tests/readability/normalize-spaces/expected.html +++ b/article_scraper/resources/tests/readability/normalize-spaces/expected.html @@ -23,4 +23,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

      - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/nytimes-1/expected.html b/article_scraper/resources/tests/readability/nytimes-1/expected.html index 3d978c8..9e42900 100644 --- a/article_scraper/resources/tests/readability/nytimes-1/expected.html +++ b/article_scraper/resources/tests/readability/nytimes-1/expected.html @@ -16,9 +16,9 @@
      Photo
      -

      - - +

      + +
      United Nations peacekeepers at a refugee camp in Sudan on Monday. In exchange for the lifting of United States trade sanctions, Sudan has said it will improve access for aid groups, stop supporting rebels in neighboring South Sudan and cooperate with American intelligence agents. @@ -50,8 +50,7 @@

      Mr. Reeves said he thought that the American government was being manipulated and that the Obama administration had made a “deal with the devil.”

      Continue reading the main story -

      - +

      @@ -60,30 +59,17 @@ - -
      - - - -
      + + -
      - - - - - -
      + - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/nytimes-2/expected.html b/article_scraper/resources/tests/readability/nytimes-2/expected.html index 3818957..877f805 100644 --- a/article_scraper/resources/tests/readability/nytimes-2/expected.html +++ b/article_scraper/resources/tests/readability/nytimes-2/expected.html @@ -18,9 +18,9 @@
      Photo
      -

      - - +

      + +
      @@ -31,8 +31,7 @@

      First, let’s say what the Yahoo sale is not. It is not a sale of the publicly traded company. Instead, it is a sale of the Yahoo subsidiary and some related assets to Verizon.

      The sale is being done in two steps. The first step will be the transfer of any assets related to Yahoo business to a singular subsidiary. This includes the stock in the business subsidiaries that make up Yahoo that are not already in the single subsidiary, as well as the odd assets like benefit plan rights. This is what is being sold to Verizon. A license of Yahoo’s oldest patents is being held back in the so-called Excalibur portfolio. This will stay with Yahoo, as will Yahoo’s stakes in Alibaba Group and Yahoo Japan.

      It is hard to overestimate how complex an asset sale like this is. Some of the assets are self-contained, but they must be gathered up and transferred. Employees need to be shuffled around and compensation arrangements redone. Many contracts, like the now-infamous one struck with the search engine Mozilla, which may result in a payment of up to a $1 billion, will contain change-of-control provisions that will be set off and have to be addressed. Tax issues always loom large.

      Continue reading the main story -

      - +

      @@ -57,8 +56,7 @@

      Whether this is the most tax-efficient way is unclear to me as a nontax lawyer (email me if you know). Yahoo is likely to have a tax bill on the sale, possibly a substantial one. And I presume there were legal reasons for not using a Morris Trust structure, in which Yahoo would have been spun off and immediately sold to Verizon so that only Yahoo’s shareholders paid tax on the deal. In truth, the Yahoo assets being sold are only about 10 percent of the value of the company, so the time and logistics for such a sale when Yahoo is a melting ice cube may not have been worth it.

      Finally, if another bidder still wants to acquire Yahoo, it has time. The agreement with Verizon allows Yahoo to terminate the deal and accept a superior offer by paying a $144 million breakup fee to Verizon. And if Yahoo shareholders change their minds and want to stick with Yahoo’s chief executive, Marissa Mayer, and vote down the deal, there is a so-called naked no-vote termination fee of $15 million payable to Verizon to reimburse expenses.

      All in all, this was as hairy a deal as they come. There was the procedural and logistical complications of selling a company when the chief executive wanted to stay. Then there was the fact that this was an asset sale, including all of the challenges that go with it. Throw in all of the tax issues and the fact that this is a public company, and it is likely that the lawyers involved will have nightmares for years to come.

      Continue reading the main story -

      -
      +

      @@ -67,30 +65,17 @@ - -
      - - - -
      + + -
      - - - - - -
      + - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/nytimes-3/expected.html b/article_scraper/resources/tests/readability/nytimes-3/expected.html index 918f9c8..9e9df5a 100644 --- a/article_scraper/resources/tests/readability/nytimes-3/expected.html +++ b/article_scraper/resources/tests/readability/nytimes-3/expected.html @@ -8,7 +8,7 @@

      -

      Image +

      Image

      A Con Edison worker repairing underground cables this month in Flushing, Queens. The likely source of the problem was water and rock salt that had seeped underground.CreditCreditChang W. Lee/The New York Times @@ -17,7 +17,7 @@
      -

      Corey Kilgannon +

      Corey Kilgannon

      @@ -25,9 +25,7 @@
    • -
    • - -
    • +
    @@ -59,7 +57,7 @@
    -

    Image +

    Image

    In the late 1800s, overhead utilities were buried to lessen the exposure to winter weather.CreditKirsten Luce for The New York Times @@ -99,7 +97,7 @@
    There are typically between 400 and 600 water main breaks each year in New York City, an official said.CreditMichael Appleton for The New York Times
    -
    +
    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/nytimes-4/expected.html b/article_scraper/resources/tests/readability/nytimes-4/expected.html index e923b0a..778f648 100644 --- a/article_scraper/resources/tests/readability/nytimes-4/expected.html +++ b/article_scraper/resources/tests/readability/nytimes-4/expected.html @@ -8,7 +8,7 @@

    -

    Image +

    Image

    Interest payments on the federal debt could surpass the Defense Department budget in 2023.CreditCreditJeon Heon-Kyun/EPA, via Shutterstock @@ -21,9 +21,7 @@
  9. -
  10. - -
  11. +
    @@ -221,4 +219,4 @@ - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/nytimes-5/expected.html b/article_scraper/resources/tests/readability/nytimes-5/expected.html index e9b4cd9..4b9c82c 100644 --- a/article_scraper/resources/tests/readability/nytimes-5/expected.html +++ b/article_scraper/resources/tests/readability/nytimes-5/expected.html @@ -12,7 +12,7 @@
  12. - PhotoXi Jinping, el líder de China, arriesgó su prestigio personal cuando su país se postuló para organizar los Juegos de Invierno 2022; hasta ahora el país ha cumplido sus promesas. + PhotoXi Jinping, el líder de China, arriesgó su prestigio personal cuando su país se postuló para organizar los Juegos de Invierno 2022; hasta ahora el país ha cumplido sus promesas.
    CreditKevin Frayer/Getty Images
    @@ -23,7 +23,7 @@
  13. - Photo + Photo
    CreditEllen Surrey
    @@ -50,7 +50,7 @@
  14. - Photo + Photo
    CreditErik Carter
    @@ -75,7 +75,7 @@
  15. - Photo + Photo
    CreditTed + Chelsea Cavanaugh para The New York Times
    @@ -105,16 +105,14 @@

    - Opinión - -

    -Más en Opinión › + Opinión + Más en Opinión ›
    1. - Photo + Photo
      CreditDanielle Chenette
      @@ -125,7 +123,7 @@
    2. - Photo + Photo
      CreditCari Vander Yacht
      @@ -136,7 +134,7 @@
    3. - Photo   + Photo  
      CreditBianca Bagnarelli
      @@ -149,7 +147,7 @@
    4. - Photo + Photo
      CreditKim Raff for The New York Times
      @@ -164,16 +162,14 @@

      - Especial - -

      -Más en Especial › + Especial + Más en Especial ›
      1. - Photo  + Photo 
        CreditPhoto Illustration by Andrew B. Myers for The New York Times
        @@ -194,7 +190,7 @@
      2. - Photo + Photo
        CreditPhoto Illustration by Andrew B. Myers for The New York Times
        @@ -205,7 +201,7 @@
      3. - Photo + Photo
        CreditFotoilustraciones de Andrew B. Myers para The New York Times
        @@ -216,7 +212,7 @@
      4. - Photo + Photo
        CreditFotoilustración de Andrew B. Myers para The New York Times
        @@ -230,16 +226,14 @@

        - El brote de Coronavirus - -

        -Más en El brote de Coronavirus › + El brote de Coronavirus + Más en El brote de Coronavirus ›
        1. - Photo + Photo
          Credit
          @@ -250,7 +244,7 @@
        2. - PhotoLargas filas para hacerse pruebas de coronavirus en Jonesboro, Georgia, este mes. La variante ómicron se identificó a finales de noviembre, por lo que es demasiado pronto para decir cuánto tiempo pueden persistir los síntomas. + PhotoLargas filas para hacerse pruebas de coronavirus en Jonesboro, Georgia, este mes. La variante ómicron se identificó a finales de noviembre, por lo que es demasiado pronto para decir cuánto tiempo pueden persistir los síntomas.
          CreditDustin Chambers para The New York Times
          @@ -271,7 +265,7 @@
        3. - PhotoA 3-D plaster model of a coronavirus spike protein in the office of Dr. Barney Graham of the Vaccine Research Center of the National Institutes of Health. + PhotoA 3-D plaster model of a coronavirus spike protein in the office of Dr. Barney Graham of the Vaccine Research Center of the National Institutes of Health.
          CreditJohnathon Kelso for The New York Times
          @@ -282,7 +276,7 @@
        4. - PhotoUn centro de pruebas de COVID-19 realizadas con saliva en la Universidad de Minnesota, en Mineápolis + PhotoUn centro de pruebas de COVID-19 realizadas con saliva en la Universidad de Minnesota, en Mineápolis
          CreditJenn Ackerman para The New York Times
          @@ -293,7 +287,7 @@
        5. - Photo + Photo
          CreditCharlie Rubin para The New York Times
          @@ -306,16 +300,14 @@

          - Estados Unidos - -

          -Más en Estados Unidos › + Estados Unidos + Más en Estados Unidos ›
          1. - PhotoUna presentación sobre la Operación Estrella Solitaria, en Weslaco, Texas, el año pasado + PhotoUna presentación sobre la Operación Estrella Solitaria, en Weslaco, Texas, el año pasado
            CreditChristopher Lee para The New York Times
            @@ -326,7 +318,7 @@
          2. - PhotoUna multitud se reunió en el National Mall el 6 de enero de 2021, cuando el expresidente Donald Trump cuestionó los resultados de las elecciones de 2020. + PhotoUna multitud se reunió en el National Mall el 6 de enero de 2021, cuando el expresidente Donald Trump cuestionó los resultados de las elecciones de 2020.
            CreditPete Marovich para The New York Times
            @@ -337,7 +329,7 @@
          3. - PhotoNinguna de las más de 729 personas acusadas en relación con los disturbios del Capitolio tiene hasta ahora ninguna conexión con los antifa, según una base de datos de NPR sobre registros de detenciones. + PhotoNinguna de las más de 729 personas acusadas en relación con los disturbios del Capitolio tiene hasta ahora ninguna conexión con los antifa, según una base de datos de NPR sobre registros de detenciones.
            CreditJason Andrew para The New York Times
            @@ -348,7 +340,7 @@
          4. - PhotoEl expresidente Donald Trump el año pasado. Liz Cheney, representante republicana por Wyoming, ha calificado su lenta respuesta al atentado del 6 de enero como una negligencia en el cumplimiento del deber. + PhotoEl expresidente Donald Trump el año pasado. Liz Cheney, representante republicana por Wyoming, ha calificado su lenta respuesta al atentado del 6 de enero como una negligencia en el cumplimiento del deber.
            CreditCooper Neill para The New York Times
            @@ -359,7 +351,7 @@
          5. - - Buscar + Buscar
          6. -
            - - -
            +
          -
          +
        6. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/pixnet/expected.html b/article_scraper/resources/tests/readability/pixnet/expected.html index ec634cb..290717e 100644 --- a/article_scraper/resources/tests/readability/pixnet/expected.html +++ b/article_scraper/resources/tests/readability/pixnet/expected.html @@ -1,6 +1,6 @@

          - 12-IMG_3886.jpg + 12-IMG_3886.jpg

          一波波接續性低溫寒流報到 已將新竹尖石鄉後山一帶層層山巒披上嫣紅的彩衣

          @@ -14,7 +14,7 @@

          營區內除了露營、民宿、餐飲賞楓項目多了許多原木飾品更有畫龍點睛加乘效果

          -

          30-IMG_4228.jpg

          +

          30-IMG_4228.jpg

          廣受歡迎的美樹營地有個很大特色就是楓紅時期楓香樹由綠轉黃、轉紅到楓紅層層

          一來到"美樹"馬上眼睛為之一亮 也會深深地為那多種顏色多層次渲染之下楓紅而迷惑 @@ -29,66 +29,66 @@

          -

          31-IMG_4231.jpg

          +

          31-IMG_4231.jpg

          每年12月向來是攝影班外拍的絕佳場所之一 楓紅期間入園費$50元

          園區給愛攝一族淨空場景而不是散搭帳蓬之下反而影響拍照畫面與構圖取景

          露營的話則須待中午過後再進場搭帳的彈性做法個人也相當支持這樣的權宜之計

          -

          - P1610088.jpg +

          + P1610088.jpg

          來到現場已是落葉飄飄堆疊滿地 不時隨著風吹雨襲而葉落垂地

          -

          - P1610069.jpg +

          + P1610069.jpg

          不忍踩過剛剛掉落的樹葉 沿著前人足跡踏痕輕踩而行

          雖然只是一廂情願的想法 終究還是不可避免地將會化為塵土

          -

          02-P1610080.jpg

          +

          02-P1610080.jpg

          葉落繽紛顯得幾分蕭瑟氣息 空氣中可以嗅得出來依然瀰漫著濕寒水氣

          偶而還會飄下來一些霧氣水滴 不時張望尋找最佳楓葉主題

          -

          04-P1610087.jpg

          +

          04-P1610087.jpg

          外拍的攝影班學員一堆早已不時穿梭其間

          各自努力地找尋自認為最好的拍攝角度

          -

          05-P1610099.jpg

          +

          05-P1610099.jpg

          -

          P1610095.jpg

          +

          P1610095.jpg

          -

          13-IMG_3891.jpg

          +

          13-IMG_3891.jpg

          -

          15-IMG_3906.jpg

          +

          15-IMG_3906.jpg

          "水槽"上面的這幾隻彩繪版貓頭鷹也太可愛了

          同樣的造型加上不同色彩宛如賦予不同的生命力一般 cool!

          -

          16-IMG_3916.jpg

          +

          16-IMG_3916.jpg

          雨水洗塵後的枝頭固然掉落些葉片是否也洗去塵勞憂傷

          -

          17-IMG_3919.jpg

          +

          17-IMG_3919.jpg

          -

          06-IMG_3853.jpg

          +

          06-IMG_3853.jpg

          喜歡拍照的不論是平面掃描、天空搜尋、地上地毯式搜索

          有如小說偵探一般 不放過蛛絲馬跡地用力尋尋覓覓找尋最美角度

          -

          07-P1610104.jpg

          +

          07-P1610104.jpg

          -

          08-IMG_3862.jpg

          +

          08-IMG_3862.jpg

          原本這周是由小朱團長早在一年前就跟"簍信"預定下來的場子

          早上從台北出門之際還是小雨不斷細雨紛飛來到此地雖雨已停

          但多日來的雨勢不斷已有部分區域水漬成攤並不適合落置帳篷

          @@ -98,32 +98,32 @@

          -

          18-P1610141.jpg

          +

          18-P1610141.jpg

          午後從"秀巒"回到美樹之際已經全數撤退只剩下我們三車留下來

          唯有"離開地球表面"睡車上的才可以不受到地上泥濘而影響

          -

          19-IMG_3933.jpg

          +

          19-IMG_3933.jpg

          -

          14-P1610134.jpg

          +

          14-P1610134.jpg

          午後山嵐興起雲氣遊蕩盤旋在對岸山頭 人潮來來去去似乎也沒有減少

          -

          44-P1610283.jpg

          -

          美樹民宿有開設餐廳 室內簡單佈置提供伙食餐飲

          +

          44-P1610283.jpg

          +

          美樹民宿有開設餐廳 室內簡單佈置提供伙食餐飲

          - P1610212.jpg + P1610212.jpg

          這兩間是民宿房間 跟著民宿主人"簍信"聊起來還提到日後將改變成兩層木屋

          一樓則是咖啡飲料/賣店提供訪客來賓有個落腳席座之地 二樓才會是民宿房間

          -

          心中有了計畫想法才會有日後的夢想藍圖 相信將會改變得更好的民宿露營環境

          +

          心中有了計畫想法才會有日後的夢想藍圖 相信將會改變得更好的民宿露營環境

          - P1610219.jpg + P1610219.jpg

          民宿前這一大區楓香林為土質營位 大致區分前、後兩個營區

          前面這一區約可搭上十二帳/車/廳 後面那區也大約4~5帳/車/廳

          @@ -131,26 +131,26 @@

          -

          10-P1610114.jpg

          +

          10-P1610114.jpg

          營區水電方便 水槽也很有特色

          -

          22-P1610245.jpg

          +

          22-P1610245.jpg

          這次選擇左側地勢高些以防午夜下雨泥濘

          -

          20-P1610238.jpg

          +

          20-P1610238.jpg

          "野馬"特地帶來了冬至應景食材ㄜ---湯圓

          這家還是最近被評比第一名氣的湯圓專賣店

          -

          21-P1610241.jpg

          -

          向來對於湯圓是敬謝不敏 沒想到是出乎意料之外的好吃 沒話說!
          +

          21-P1610241.jpg

          +

          向來對於湯圓是敬謝不敏 沒想到是出乎意料之外的好吃 沒話說!

          -

          24-IMG_4113.jpg

          +

          24-IMG_4113.jpg

          喜歡原住民朋友的坦率、真誠 要將民宿營地經營的有聲有色並非容易之事

          午茶時間與"簍信"閒聊分享著他的觀點理念之時很支持對於環境應有生態保護

          @@ -159,46 +159,46 @@

          -

          32-IMG_4248.jpg

          +

          32-IMG_4248.jpg

          -

          25-IMG_4152.jpg

          +

          25-IMG_4152.jpg

          入夜前雨絲終於漸漸緩和下來 雖然氣溫很低卻沒感受到寒冷的跡象

          是山谷中少了寒氣還是美樹營區裡的人熱情洋溢暖化了不少寒意

          -

          IMG_4158.jpg

          -

          聖誕前夕裝點些聖誕飾品 感受一下節慶的氛圍

          +

          IMG_4158.jpg

          +

          聖誕前夕裝點些聖誕飾品 感受一下節慶的氛圍

          -

          26-P1610261.jpg

          +

          26-P1610261.jpg

          晚餐準備了砂鍋魚頭

          -

          46-1021221美樹露營.jpg

          +

          46-1021221美樹露營.jpg

          "蒯嫂"還特地準備著羊肩排、鹹豬肉、柳葉魚...哇!這哩澎湃哩...

           "永老爺"早已備妥了好酒為遠自台南來的蒯兄嫂敬一杯囉

          感謝蒯嫂精心準備的好料理 食指大動好菜色感恩ㄟ!

          -

          27-IMG_4173.jpg

          -

          吃得快精光之際...才想到忘了拍合照...(哇哩咧 ^&*()

          +

          27-IMG_4173.jpg

          +

          吃得快精光之際...才想到忘了拍合照...(哇哩咧 ^&*()

          -

          28-IMG_4178.jpg

          +

          28-IMG_4178.jpg

          -

          29-IMG_4188.jpg

          +

          29-IMG_4188.jpg

          隔日睡到很晚才起床 不用拍日出晨光的營地對我來說都是個幸福的睡眠

          哪怕是葉落飄零落滿地還是睡夢周公召見而去 起床的事~差點都忘記了

          - IMG_4205.jpg + IMG_4205.jpg

          昨天細雨紛飛依然打落了不少落葉中間這株整個都快變成枯枝了

          昨天依稀凋零稀疏的楓葉殘留今兒個完全不復存在(上周是最美的代名詞)

          -

          33-IMG_4255.jpg

          +

          33-IMG_4255.jpg

          上回來得太早沒能見到楓葉泛紅 這次晚了一周已陸續落葉也無從比對楓葉差異性 

          另一種角度看不論青楓、金黃葉紅的楓香、葉落飄零秋滿霜、落葉枯枝的蕭瑟 @@ -207,44 +207,44 @@

          -

          34-P1610269.jpg

          -

          早起的"蒯嫂"已經備好熱騰騰中式稀飯、包子、蔬果 頓時~有幸福的感覺

          +

          34-P1610269.jpg

          +

          早起的"蒯嫂"已經備好熱騰騰中式稀飯、包子、蔬果 頓時~有幸福的感覺

          -

          35-IMG_4303.jpg

          +

          35-IMG_4303.jpg

          星期天早上趁著攝影團還沒入場先來人物場景特寫

          野馬家兩張新"座椅"就當作是試坐囉!拍謝哩

          -

          38-IMG_4330.jpg

          +

          38-IMG_4330.jpg

          - P1610279.jpg + P1610279.jpg

          難得有此無人美景在楓樹下的聖誕氛圍也一定要來一張才行

          -

          37-IMG_4323.jpg

          +

          37-IMG_4323.jpg

          三家合照(Hero也一定要入鏡的)

          -

          40-IMG_4342.jpg

          +

          40-IMG_4342.jpg

          接著攝影團入場帶隊老師請求借個時間也來讓學員練習楓樹下的聖誕飾品

          此時剛好也遇到早在FB社團相互回應卻頭一次謀面的Mr."大雄"真是幸會了

          -

          42-IMG_4382.jpg

          +

          42-IMG_4382.jpg

          接近中午時分陽光漸露 藍天帷幕再次嶄露頭角 ~ 久違了!

          期盼下的天空終於放晴 沒有缺席的藍天還是準時赴約如期出席

          -

          41-IMG_4366.jpg

          +

          41-IMG_4366.jpg

          這兩天肉肉(Hero)天雨濕滑無法自由奔跑都快悶壞了

          天晴後"蒯嫂"帶著散步遊園也好解解悶

          -

          43-IMG_4383.jpg

          +

          43-IMG_4383.jpg

          收拾好裝備準備離開營地 亮麗的天空鮮明對比下的楓樹林又讓人覺得有點捨不得離開

          道別了"美樹營地"準備前往而行"石磊國小"一個很生疏的小學座落在這深山部落裡

          @@ -255,7 +255,7 @@ 資訊

          -

          聯絡電話:03-584-7231  行動: 0937-141993
          林錦武 (泰雅族名: 摟信)
          營地地址:新竹縣尖石鄉玉峰村6鄰20號 +

          聯絡電話:03-584-7231行動: 0937-141993
          林錦武 (泰雅族名: 摟信)
          營地地址:新竹縣尖石鄉玉峰村6鄰20號
          @@ -263,7 +263,7 @@

          每帳$600 兩間衛浴使用燒材鍋爐/ 兩間全天瓦斯 廁所蹲式X 3

          楓紅期間須過中午才可搭帳 水電便利

          -

          GPS: N24 39 16.4 E121 18 19.5

          +

          GPS: N24 39 16.4 E121 18 19.5

          如果您喜歡"史蒂文的家"圖文分享 邀請您到 FB 粉絲團 @@ -277,4 +277,4 @@ -

          +
        7. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/qq/expected.html b/article_scraper/resources/tests/readability/qq/expected.html index cf360cc..baad181 100644 --- a/article_scraper/resources/tests/readability/qq/expected.html +++ b/article_scraper/resources/tests/readability/qq/expected.html @@ -9,12 +9,10 @@
          -

          转播到腾讯微博

          -

          DeepMind新电脑已可利用记忆自学 人工智能迈上新台阶

          -
          +

          转播到腾讯微博

          DeepMind新电脑已可利用记忆自学 人工智能迈上新台阶

          TNW中文站 10月14日报道

          - 谷歌(微博) + 谷歌(微博) 在2014年收购的人工智能公司DeepMind开发出一款能够用自己的记忆学习新知识并利用这些知识来回答问题的计算机。

          这款产品具有极其重要的意义,因为这意味着未来的人工智能技术可能不需要人类来教它就能回答人类提出的问题。

          DeepMind表示,这款名为DNC(可微神经计算机)的AI模型可以接受家谱和伦敦地铁网络地图这样的信息,还可以回答与那些数据结构中的不同项目之间的关系有关的复杂问题。

          @@ -28,11 +26,9 @@

          -

          转播到腾讯微博

          -

          -
          +

          转播到腾讯微博

          + - @@ -59,4 +55,4 @@ -
        8. +
        9. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/quanta-1/expected.html b/article_scraper/resources/tests/readability/quanta-1/expected.html index 513b4b4..8f7af0d 100644 --- a/article_scraper/resources/tests/readability/quanta-1/expected.html +++ b/article_scraper/resources/tests/readability/quanta-1/expected.html @@ -1,5 +1,4 @@ -
          -
          +

          A little over half a century ago, chaos started spilling out of a famous experiment. It came not from a petri dish, a beaker or an astronomical observatory, but from the vacuum tubes and diodes of a Royal McBee LGP-30. This “desk” computer — it was the size of a desk — weighed some 800 pounds and sounded like a passing propeller plane. It was so loud that it even got its own office on the fifth floor in Building 24, a drab structure near the center of the Massachusetts Institute of Technology. Instructions for the computer came from down the hall, from the office of a meteorologist named Edward Norton Lorenz.

          @@ -35,8 +34,7 @@

          For Hamilton, these were formative years. She recalls being out at a party at three or four a.m., realizing that the LGP-30 wasn’t set to produce results by the next morning, and rushing over with a few friends to start it up. Another time, frustrated by all the things that had to be done to make another run after fixing an error, she devised a way to bypass the computer’s clunky debugging process. To Lorenz’s delight, Hamilton would take the paper tape that fed the machine, roll it out the length of the hallway, and edit the binary code with a sharp pencil. “I’d poke holes for ones, and I’d cover up with Scotch tape the others,” she said. “He just got a kick out of it.”

          -
          -
          +

          There were desks in the computer room, but because of the noise, Lorenz, his secretary, his programmer and his graduate students all shared the other office. The plan was to use the desk computer, then a total novelty, to test competing strategies of weather prediction in a way you couldn’t do with pencil and paper.

          @@ -55,8 +53,7 @@

          This meant that in chaotic systems the smallest fluctuations get amplified. Weather predictions fail once they reach some point in the future because we can never measure the initial state of the atmosphere precisely enough. Or, as Lorenz would later present the idea, even a seagull flapping its wings might eventually make a big difference to the weather. (In 1972, the seagull was deposed when a conference organizer, unable to check back about what Lorenz wanted to call an upcoming talk, wrote his own title that switched the metaphor to a butterfly.)

          -
          -
          +

          Many accounts, including the one in Gleick’s book, date the discovery of this butterfly effect to 1961, with the paper following in 1963. But in November 1960, Lorenz described it during the Q&A session following a talk he gave at a conference on numerical weather prediction in Tokyo. After his talk, a question came from a member of the audience: “Did you change the initial condition just slightly and see how much different results were?”

          @@ -141,5 +138,4 @@

          This article was reprinted on Wired.com.

          -
          -
          +
          \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/remove-aria-hidden/expected.html b/article_scraper/resources/tests/readability/remove-aria-hidden/expected.html index 9888ab3..9441cd2 100644 --- a/article_scraper/resources/tests/readability/remove-aria-hidden/expected.html +++ b/article_scraper/resources/tests/readability/remove-aria-hidden/expected.html @@ -4,4 +4,4 @@

          Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

          -
        +
      5. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/remove-extra-paragraphs/expected.html b/article_scraper/resources/tests/readability/remove-extra-paragraphs/expected.html index b67ce8e..cfd6436 100644 --- a/article_scraper/resources/tests/readability/remove-extra-paragraphs/expected.html +++ b/article_scraper/resources/tests/readability/remove-extra-paragraphs/expected.html @@ -1,5 +1,4 @@ -
        -
        +

        Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

        @@ -12,8 +11,7 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

        -
        -
        +

        Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

        @@ -22,5 +20,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

        -
        -
        +
        \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/remove-script-tags/expected.html b/article_scraper/resources/tests/readability/remove-script-tags/expected.html index 7b94e17..2b4dced 100644 --- a/article_scraper/resources/tests/readability/remove-script-tags/expected.html +++ b/article_scraper/resources/tests/readability/remove-script-tags/expected.html @@ -1,5 +1,4 @@ -
        -
        +

        Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

        Ut enim ad minim veniam, @@ -9,8 +8,7 @@

        Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

        -
        -
        +

        Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

        @@ -20,5 +18,4 @@ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

        -
        -
        +
        \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/reordering-paragraphs/expected.html b/article_scraper/resources/tests/readability/reordering-paragraphs/expected.html index b09abcb..2801a90 100644 --- a/article_scraper/resources/tests/readability/reordering-paragraphs/expected.html +++ b/article_scraper/resources/tests/readability/reordering-paragraphs/expected.html @@ -1,5 +1,5 @@
        -
        +

        Regarding item# 11111, under sufficiently extreme conditions, quarks may become deconfined and exist as free particles. In the course of asymptotic freedom, the strong interaction becomes weaker at higher temperatures. @@ -24,5 +24,5 @@ of matter is called quark-gluon plasma.[81] The exact conditions needed to give rise to this state are unknown and have been the subject of a great deal of speculation and experimentation.

        -
        -
        +
        +
      6. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/replace-font-tags/expected.html b/article_scraper/resources/tests/readability/replace-font-tags/expected.html index 6b2b9d4..21171f1 100644 --- a/article_scraper/resources/tests/readability/replace-font-tags/expected.html +++ b/article_scraper/resources/tests/readability/replace-font-tags/expected.html @@ -15,4 +15,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

        -
      +
    5. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/salon-1/expected.html b/article_scraper/resources/tests/readability/salon-1/expected.html index bc73879..46b478b 100644 --- a/article_scraper/resources/tests/readability/salon-1/expected.html +++ b/article_scraper/resources/tests/readability/salon-1/expected.html @@ -1,69 +1,60 @@ -
      -

      Horror stories about the increasingly unpopular taxi service Uber have +

      Horror stories about the increasingly unpopular taxi service Uber have been commonplace in recent months, but there is still much to be learned from its handling of the recent hostage drama in downtown Sydney, Australia. We’re told that we reveal our true character in moments of crisis, and - apparently that’s as true for companies as it is for individuals.

      -

      A number of experts have challenged the idea that the horrific explosion + apparently that’s as true for companies as it is for individuals.

      A number of experts have challenged the idea that the horrific explosion of violence in a Sydney café was “terrorism,” since the attacker was mentally unbalanced and acted alone. But, terror or not, the ordeal was certainly terrifying. Amid the chaos and uncertainty, the city believed itself to - be under a coordinated and deadly attack.

      -

      Uber had an interesting, if predictable, response to the panic and mayhem: - It raised prices. A lot.

      -

      In case you missed the story, the facts are these: Someone named Man Haron + be under a coordinated and deadly attack.

      Uber had an interesting, if predictable, response to the panic and mayhem: + It raised prices. A lot.

      In case you missed the story, the facts are these: Someone named Man Haron Monis, who was considered mentally unstable and had been investigated for murdering his ex-wife, seized hostages in a café that was located in Sydney’s Central Business District or “CBD.” In the process he put up an Islamic flag – “igniting,” as Reuters reported, - “fears of a jihadist attack in the heart of the country’s biggest city.”

      -

      In the midst of the fear, Uber stepped in and tweeted this announcement:  + “fears of a jihadist attack in the heart of the country’s biggest city.”

      In the midst of the fear, Uber stepped in and tweeted this announcement:  “We are all concerned with events in CBD. Fares have increased to encourage more drivers to come online & pick up passengers in the area.” -

      -

      As Mashable reports, +

      As Mashable reports, the company announced that it would charge a minimum of $100 Australian to take passengers from the area immediately surrounding the ongoing crisis, and prices increased by as much as four times the standard amount. A firestorm of criticism quickly erupted – “@Uber_Sydney stop being assholes,” one Twitter response began – and Uber soon found itself - offering free rides out of the troubled area instead.

      -

      That opener suggests that Uber, as part of a community under siege, is - preparing to respond in a civic manner. -

      -

      “… Fares have increased to encourage more drivers to come online & pick up passengers in the area.” -

      -
      + offering free rides out of the troubled area instead.

      That opener suggests that Uber, as part of a community under siege, is + preparing to respond in a civic manner. +

      “… Fares have increased to encourage more drivers to come online & pick up passengers in the area.” +

      But, despite the expression of shared concern, there is no sense of civitas to be found in the statement that follows. There is only a transaction, executed at what the corporation believes to be market value. Lesson #1 about Uber is, therefore, that in its view there is no heroism, only self-interest. This is Ayn Rand’s brutal, irrational and primitive philosophy in its purest - form: altruism is evil, and self-interest is the only true heroism. + form: altruism is evil, and self-interest is the only true heroism.

      There was once a time when we might have read of “hero cabdrivers” or “hero bus drivers” placing themselves in harm’s way to rescue their fellow citizens. For its part, Uber might have suggested that it would use its network of drivers and its scheduling software to recruit volunteer drivers - for a rescue mission. + for a rescue mission.

      Instead, we are told that Uber’s pricing surge was its expression of concern. Uber’s way to address a human crisis is apparently by letting the market govern human behavior, as if there were (in libertarian economist Tyler Cowen’s phrase) “markets in everything” – including the lives of - a city’s beleaguered citizens (and its Uber drivers). + a city’s beleaguered citizens (and its Uber drivers).

      Where would this kind of market-driven practice leave poor or middle-income citizens in a time of crisis? If they can’t afford the “surged” price, apparently it would leave them squarely in the line of fire. And come to think of it, why would Uber drivers value their lives so cheaply, unless - they’re underpaid? + they’re underpaid?

      One of the lessons of Sydney is this: Uber’s philosophy, whether consciously expressed or not, is that life belongs to the highest bidder – and therefore, by implication, the highest bidder’s life has the greatest value. Society, on the other hand, may choose to believe that every life has equal value - – or that lifesaving services should be available at affordable prices. + – or that lifesaving services should be available at affordable prices.

      If nothing else, the Sydney experience should prove once and for all that there is no such thing as “the sharing economy.” Uber is a taxi company, @@ -71,7 +62,7 @@ sharing” service, where someone who happens to be going in the same direction is willing to take along an extra passenger and split gas costs. A ride-sharing service wouldn’t find itself “increasing fares to encourage more drivers” - to come into Sydney’s terrorized Central Business District. + to come into Sydney’s terrorized Central Business District.

      A “sharing economy,” by definition, is lateral in structure. It is a peer-to-peer economy. But Uber, as its name suggests, is hierarchical in structure. @@ -79,20 +70,20 @@ from it while guiding their movements and determining their level of earnings. And its pricing mechanisms impose unpredictable costs on its customers, extracting greater amounts whenever the data suggests customers can be - compelled to pay them. + compelled to pay them.

      -

      This is a top-down economy, not a “shared” one. +

      This is a top-down economy, not a “shared” one.

      A number of Uber’s fans and supporters defended the company on the grounds that its “surge prices,” including those seen during the Sydney crisis, are determined by an algorithm. But an algorithm can be an ideological statement, and is always a cultural artifact. As human creations, algorithms - reflect their creators. + reflect their creators.

      Uber’s tweet during the Sydney crisis made it sound as if human intervention, rather than algorithmic processes, caused prices to soar that day. But it doesn’t really matter if that surge was manually or algorithmically - driven. Either way the prices were Uber’s doing – and its moral choice. + driven. Either way the prices were Uber’s doing – and its moral choice.

      Uber has been strenuously defending its surge pricing in the wake of accusations (apparently justified) @@ -101,11 +92,11 @@ three times the highest rate on two non-emergency days). But if Uber has its way, it will soon enjoy a monopolistic stranglehold on car service rates in most major markets. And it has demonstrated its willingness to - ignore rules and regulations. That means predictable and affordable - taxi fares could become a thing of the past. + ignore rules and regulations. That meanspredictable and affordable + taxi fares could become a thing of the past.

      In practice, surge pricing could become a new, privatized form of taxation - on middle-class taxi customers. + on middle-class taxi customers.

      Even without surge pricing, Uber and its supporters are hiding its full costs. When middle-class workers are underpaid or deprived of benefits @@ -113,7 +104,7 @@ the entire middle-class economy suffers. Overall wages and benefits are suppressed for the majority, while the wealthy few are made even richer. The invisible costs of ventures like Uber are extracted over time, far - surpassing whatever short-term savings they may occasionally offer. + surpassing whatever short-term savings they may occasionally offer.

      Like Walmart, Uber underpays its employees – many of its drivers are employees, in everything but name – and then drains the social safety net to make @@ -124,13 +115,13 @@ Uber’s often woefully insufficient wages, mean that the rest of us are paying its tab instead. And the lack of income security among Uber’s drivers creates another social cost for Americans – in lost tax revenue, and possibly - in increased use of social services. + in increased use of social services.

      The company’s war on regulation will also carry a social price. Uber and - its supporters don’t seem to understand that regulations exist + its supporters don’t seem to understand thatregulations exist for a reason. It’s true that nobody likes excessive bureaucracy, but not all regulations are excessive or onerous. And when they are, it’s a flaw - in execution rather than principle. + in execution rather than principle.

      Regulations were created because they serve a social purpose, ensuring the free and fair exchange of services and resources among all segments @@ -138,19 +129,19 @@ that the public has a vested interest in ensuring they will be readily available at reasonably affordable prices. That’s not unreasonable for taxi services, especially given the fact that they profit from publicly - maintained roads and bridges. + maintained roads and bridges.

      Uber has presented itself as a modernized, efficient alternative to government oversight. But it’s an evasion of regulation, not its replacement. As Alexis Madrigalreports, Uber has deliberately ignored city regulators and used customer demand to force its model of inadequate self-governance - (my conclusion, not his) onto one city after another. + (my conclusion, not his) onto one city after another.

      Uber presented itself as a refreshing alternative to the over-bureaucratized world of urban transportation. But that’s a false choice. We can streamline sclerotic city regulators, upgrade taxi fleets and even provide users with fancy apps that make it easier to call a cab. The company’s binary presentation - – us, or City Hall – frames the debate in artificial terms. + – us, or City Hall – frames the debate in artificial terms.

      Uber claims that its driver rating system is a more efficient way to monitor drivers, but that’s an entirely unproven assumption. While taxi drivers @@ -158,31 +149,31 @@ Uber drivers – for everything from dirty cars and spider bites to assault with a hammer, fondling and rape– suggest that Uber’s system may not work as well as old-fashioned - regulation. It’s certainly not noticeably superior. + regulation. It’s certainly not noticeably superior.

      In fact, prosecutors in San Francisco and Los Angeles say Uber has been lying to its customers about the level and quality of its background checks. The company now promises it will do a better job at screening drivers. But it won’t tell us what measures its taking to improve its safety record, and it’s fighting the kind of driver scrutiny that - taxicab companies have been required to enforce for many decades. + taxicab companies have been required to enforce for many decades.

      Many reports suggest that beleaguered drivers don’t feel much better about the company than victimized passengers do. They tell horror stories about the company’s hiring and management practices. Uber unilaterally slashes drivers’ rates, - while claiming they don’t need to unionize. (The Teamsters disagree.) + while claiming they don’t need to unionize. (The Teamsters disagree.)

      The company also pushes sketchy, substandard loans onto - its drivers – but hey, what could go wrong? + its drivers – but hey, what could go wrong?

      Uber has many libertarian defenders. And yet, it deceives the press and threatens to spy on journalists, lies to its own employees, keeps its practices a secret and routinely invades the privacy of civilians – sometimes merely for entertainment. (It has a tool, with the Orwellian name the “God View,” - that it can use for monitoring customers’ personal movements.) + that it can use for monitoring customers’ personal movements.)

      -

      Aren’t those the kinds of things libertarians say they hate about government? +

      Aren’t those the kinds of things libertarians say they hate about government?

      This isn’t a “gotcha” exercise. It matters. Uber is the poster child for the pro-privatization, anti-regulatory ideology that ascribes magical powers @@ -191,26 +182,25 @@ Plouffe. Uber is built around a relatively simple app (which relies on government-created technology), but it’s not really a tech company. Above all else Uber is an ideological campaign, a neoliberal project whose real - products are deregulation and the dismantling of the social contract. + products are deregulation and the dismantling of the social contract.

      -

      Or maybe, as that tweeter in Sydney suggested, they’re just assholes. +

      Or maybe, as that tweeter in Sydney suggested, they’re just assholes.

      Either way, it’s important that Uber’s worldview and business practices not be allowed to “disrupt” our economy or our social fabric. People who work hard deserve to make a decent living. Society at large deserves access to safe and affordable transportation. And government, as the collective expression of a democratic society, has a role to play in protecting its - citizens. + citizens.

      And then there’s the matter of our collective psyche. In her book “A Paradise Built in Hell: The Extraordinary Communities that Arise in Disaster,” Rebecca Solnit wrote of the purpose, meaning and deep satisfaction people find when they pull together to help one another in the face of adversity.  But in the world Uber seeks to create, those surges of the spirit would - be replaced by surge pricing. + be replaced by surge pricing.

      You don’t need a “God view” to see what happens next. When heroism is - reduced to a transaction, the soul of a society is sold cheap. + reduced to a transaction, the soul of a society is sold cheap.

      -
      -
      +
      \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/seattletimes-1/expected.html b/article_scraper/resources/tests/readability/seattletimes-1/expected.html index a7c51b9..2a7530e 100644 --- a/article_scraper/resources/tests/readability/seattletimes-1/expected.html +++ b/article_scraper/resources/tests/readability/seattletimes-1/expected.html @@ -72,7 +72,7 @@

      - +
      Amazon-owned Whole Foods touted a price cut on halibut as part of an announcement recently about lower prices on hundreds of items. (Ellen M. Banner / The Seattle Times)
      @@ -176,4 +176,4 @@

      -
    6. +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/simplyfound-1/expected.html b/article_scraper/resources/tests/readability/simplyfound-1/expected.html index f67c3a7..7eb94a7 100644 --- a/article_scraper/resources/tests/readability/simplyfound-1/expected.html +++ b/article_scraper/resources/tests/readability/simplyfound-1/expected.html @@ -9,10 +9,10 @@

    Raspberry Pi in educations - Image: Raspberry Pi Foundation

    -

    In celebration of their 4th year anniversary, the foundation has released Raspberry Pi 3 with the same price tag of $35 USD.  The 3rd revision features a 1.2GHz 64-bit quad-core ARM CPU with integrated Bluetooth 4.1 and 802.11n wireless LAN chipsets.  The ARM Cortex-A53 CPU along with other architectural enhancements making it the fastest Raspberry Pi to-date.  The 3rd revision is reportedly about 50-60% times faster than its predecessor Raspberry Pi 2 and about 10 times faster then the original Raspberry PI.

    +

    In celebration of their 4th year anniversary, the foundation has released Raspberry Pi 3 with the same price tag of$35 USD.  The 3rd revision features a 1.2GHz 64-bit quad-core ARM CPU with integrated Bluetooth 4.1 and 802.11n wireless LAN chipsets.  The ARM Cortex-A53 CPU along with other architectural enhancements making it the fastest Raspberry Pi to-date.  The 3rd revision is reportedly about 50-60% times faster than its predecessor Raspberry Pi 2 and about 10 times faster then the original Raspberry PI.

    Raspberry Pi - Various Usage

    Raspberry Pi 3 is now available via many online resellers.  At this time, you should use a recent 32-bit NOOBS or Raspbian image from their downloads page with a promise of a switch to a 64-bit version only if further investigation proves that there is indeed some value in moving to 64-bit mode.

    -
  16. +
  17. \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/social-buttons/expected.html b/article_scraper/resources/tests/readability/social-buttons/expected.html index 7c98510..16a1339 100644 --- a/article_scraper/resources/tests/readability/social-buttons/expected.html +++ b/article_scraper/resources/tests/readability/social-buttons/expected.html @@ -31,4 +31,4 @@ consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    -
  18. + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/style-tags-removal/expected.html b/article_scraper/resources/tests/readability/style-tags-removal/expected.html index f587bdf..58e8da5 100644 --- a/article_scraper/resources/tests/readability/style-tags-removal/expected.html +++ b/article_scraper/resources/tests/readability/style-tags-removal/expected.html @@ -18,4 +18,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/svg-parsing/expected.html b/article_scraper/resources/tests/readability/svg-parsing/expected.html index f0c913c..3e64a73 100644 --- a/article_scraper/resources/tests/readability/svg-parsing/expected.html +++ b/article_scraper/resources/tests/readability/svg-parsing/expected.html @@ -11,12 +11,7 @@ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    - - - - - - +

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo @@ -35,4 +30,4 @@ quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/table-style-attributes/expected.html b/article_scraper/resources/tests/readability/table-style-attributes/expected.html index 4778a67..82663d1 100644 --- a/article_scraper/resources/tests/readability/table-style-attributes/expected.html +++ b/article_scraper/resources/tests/readability/table-style-attributes/expected.html @@ -1,7 +1,7 @@

    linux usability -
    ...or, why do I bother.

    © 2002, 2003 +
    ...or, why do I bother.

    © 2002, 2003 Jamie Zawinski

    @@ -33,7 +33,7 @@
  19. "While you have some valid complaints, I'm going to focus on this one inconsequential error you made in your characterization of one of the many roadblocks you encountered. You suck!"
  20. -
  21. "It's your fault for using Red Hat! You should be using Debian/Mandrake/Gentoo instead!" +
  22. "It's your fault for using Red Hat! You should be using Debian/Mandrake/Gentoo instead!"
  23. "Red Hat 7.2 is totally obsolete! It's almost 14 months old! What were you expecting!"
  24. @@ -78,7 +78,7 @@ RPMs, and it sucks about the same as mplayer, and in about the same ways, th

    Oh, and even though I have libdvdcss installed (as evidenced by the fact that Ogle actually works) Xine won't play the same disc that Ogle will play. It seems to be claiming that the CSS stuff isn't installed, which it clearly is.

    An idiocy that all of these programs have in common is that, in addition to opening a window for the movie, and a window for the control panel, they also spray a constant spatter of curses crud on the terminal they were started from. I imagine at some point, there was some user who said, ``this program is pretty nice, but you know what it's missing? It's missing a lot of pointless chatter about what plugins and fonts have been loaded!''

    -
    And here's the Random Commentary section: +
    And here's the Random Commentary section:
    Makali wrote:
      @@ -116,7 +116,7 @@ RPMs, and it sucks about the same as mplayer, and in about the same ways, th simple -- results in someone suggesting that you either A) patch your kernel or B) change distros. It's inevitable and inescapable, like Hitler.

    -
    +
    -

    [ up ]

    -
    +

    [ up ]

    + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/telegraph/expected.html b/article_scraper/resources/tests/readability/telegraph/expected.html index 7b6decb..34cd7cd 100644 --- a/article_scraper/resources/tests/readability/telegraph/expected.html +++ b/article_scraper/resources/tests/readability/telegraph/expected.html @@ -1,27 +1,20 @@ -
    -
    +

    Zimbabwe President Robert Mugabe, his wife Grace and two key figures from her G40 political faction are under house arrest at Mugabe's "Blue House" compound in Harare and are insisting the 93 year-old finishes his presidential term, a source said.

    The G40 figures are cabinet ministers Jonathan Moyo and Saviour Kasukuwere, who fled to the compound after their homes were attacked by troops in Tuesday night's coup, the source, who said he had spoken to people inside the compound, told Reuters.

    Mr Mugabe is resisting mediation by a Catholic priest to allow the former guerrilla a graceful exit after the military takeover.

    The priest, Fidelis Mukonori, is acting as a middle-man between Mr Mugabe and the generals, who seized power in a targeted operation against "criminals" in his entourage, a senior political source told Reuters.

    The source could not provide details of the talks, which appear to be aimed at a smooth and bloodless transition after the departure of Mr Mugabe, who has led Zimbabwe since independence in 1980.

    Mr Mugabe, still seen by many Africans as a liberation hero, is reviled in the West as a despot whose disastrous handling of the economy and willingness to resort to violence to maintain power destroyed one of Africa's most promising states.

    -
    -
    +

    Zimbabwean intelligence reports seen by Reuters suggest that former security chief Emmerson Mnangagwa, who was ousted as vice-president this month, has been mapping out a post-Mugabe vision with the military and opposition for more than a year.

    -
    -
    +

    Fuelling speculation that Mnangagwa's plan might be rolling into action, opposition leader Morgan Tsvangirai, who has been receiving cancer treatment in Britain and South Africa, returned to Harare late on Wednesday, his spokesman said.

    South Africa said Mr Mugabe had told President Jacob Zuma by telephone on Wednesday that he was confined to his home but was otherwise fine and the military said it was keeping him and his family, including wife Grace, safe.

    -
    -
    +

    Despite the lingering admiration for Mr Mugabe, there is little public affection for 52-year-old Grace, a former government typist who started having an affair with Mr Mugabe in the early 1990s as his first wife, Sally, was dying of kidney disease.

    Dubbed "DisGrace" or "Gucci Grace" on account of her reputed love of shopping, she enjoyed a meteoric rise through the ranks of Mugabe's ruling Zanu-PF in the last two years, culminating in Mnangagwa's removal a week ago - a move seen as clearing the way for her to succeed her husband.

    -
    -
    +

    In contrast to the high political drama unfolding behind closed doors, the streets of the capital remained calm, with people going about their daily business, albeit under the watch of soldiers on armoured vehicles at strategic locations.

    -
    -
    +

    Whatever the final outcome, the events could signal a once-in-a-generation change for the former British colony, a regional breadbasket reduced to destitution by economic policies Mr Mugabe's critics have long blamed on him.

    -
    -
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/title-and-h1-discrepancy/expected.html b/article_scraper/resources/tests/readability/title-and-h1-discrepancy/expected.html index 0b4f1a8..d553a10 100644 --- a/article_scraper/resources/tests/readability/title-and-h1-discrepancy/expected.html +++ b/article_scraper/resources/tests/readability/title-and-h1-discrepancy/expected.html @@ -18,4 +18,4 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/tmz-1/expected.html b/article_scraper/resources/tests/readability/tmz-1/expected.html index ea3c2f4..f10c8c3 100644 --- a/article_scraper/resources/tests/readability/tmz-1/expected.html +++ b/article_scraper/resources/tests/readability/tmz-1/expected.html @@ -13,7 +13,7 @@

    EXCLUSIVE

    - 0225-lupita-nyongo-getty-01Lupita Nyong'o's now-famous Oscar dress + 0225-lupita-nyongo-getty-01Lupita Nyong'o's now-famous Oscar dress -- adorned in pearls -- was stolen right out of her hotel room ... TMZ has learned.

    Law enforcement sources tell TMZ ... the dress was taken out of Lupita's @@ -24,14 +24,14 @@

    We're told there is security footage that cops are looking at that could catch the culprit right in the act. 

    - update_graphic_red_bar12:00 PM PT -- Sheriff's deputies were at The London Thursday + update_graphic_red_bar12:00 PM PT -- Sheriff's deputies were at The London Thursday morning.  We know they were in the manager's office and we're told they have looked at security footage to determine if they can ID the culprit.

    - 0226-SUB-london-hotel-swipe-tmz-02 + 0226-SUB-london-hotel-swipe-tmz-02

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/toc-missing/expected.html b/article_scraper/resources/tests/readability/toc-missing/expected.html index bd72b3d..f8e7337 100644 --- a/article_scraper/resources/tests/readability/toc-missing/expected.html +++ b/article_scraper/resources/tests/readability/toc-missing/expected.html @@ -1,5 +1,5 @@
    -
    +

    Many developers think that having a critical bug in their code is the worst thing that can happen. Well, there is something much worse than that: Having a critical bug in your code and not knowing about it!

    @@ -10,9 +10,9 @@ I'm not a statistician and not a data scientist, I'm just a developer. Before I introduce dependencies into my system I make sure I really can't do without them. So, using some high school level statistics and a fair knowledge of SQL, I implemented a simple anomaly detection system that works.

    - Can you spot the anomaly?<br><small>Photo by <a href="https://unsplash.com/photos/KmKZV8pso-s">Ricardo Gomez Angel</a></small> + Can you spot the anomaly?<br><small>Photo by <a href="https://unsplash.com/photos/KmKZV8pso-s">Ricardo Gomez Angel</a></small>
    - Can you spot the anomaly?
    + Can you spot the anomaly?
    Photo by Ricardo Gomez Angel
    @@ -87,9 +87,9 @@
    -
    +
    -
    +

    Detecting Anomalies

    @@ -104,7 +104,7 @@ The number that stands out in this series is 12.

    - Scatter plot + Scatter plot
    Scatter plot
    @@ -337,7 +337,7 @@

    The quality of our results are directly related to the parameters we set for the query. Later we'll see how using backtesting can help us identify ideal values.

    -
    +

    Analyzing a Server Log

    @@ -464,7 +464,7 @@ To get a sense of the data, let's draw a stacked bar chart by status:

    - stacked bar chart by status, over time + stacked bar chart by status, over time
    stacked bar chart by status, over time
    @@ -593,7 +593,7 @@ It does look like in the last couple of minutes we are getting more errors than expected.

    - Status 400 in the past hour + Status 400 in the past hour
    Status 400 in the past hour
    @@ -601,7 +601,7 @@

    What our naked eye missed in the chart and in the raw data, was found by the query, and was classified as an anomaly. We are off to a great start!

    -
    +

    Backtesting

    @@ -984,7 +984,7 @@
  25. - Anomaly in status code 400 + Anomaly in status code 400
    Anomaly in status code 400
    @@ -994,7 +994,7 @@
    - Anomaly in status code 500 + Anomaly in status code 500
    Anomaly in status code 500
    @@ -1004,7 +1004,7 @@
    - A hidden anomaly in status code 404 + A hidden anomaly in status code 404
    A hidden anomaly in status code 404
    @@ -1033,7 +1033,7 @@ Now that we have a working query to backtest, we can experiment with different values.

    - Experimenting with parameter values + Experimenting with parameter values
    Experimenting with parameter values
    @@ -1042,7 +1042,7 @@ This is a chart showing the alerts our system identified in the past 12 hours:

    - Backtesting with default parameters. <a href="https://popsql.com/queries/-MECQV6GiKr04WdCWM0K/simple-anomaly-detection-with-sql?access_token=2d2c0729f9a1cfa7b6a2dbb5b0adb45c">View in editor</a> + Backtesting with default parameters. <a href="https://popsql.com/queries/-MECQV6GiKr04WdCWM0K/simple-anomaly-detection-with-sql?access_token=2d2c0729f9a1cfa7b6a2dbb5b0adb45c">View in editor</a>
    Backtesting with default parameters. View in editor
    @@ -1054,7 +1054,7 @@ If we decrease the value of the z-score threshold from 3 to 1, we should get more alerts. With a lower threshold, more values are likely to be considered an anomaly:

    - Backtesting with lower z-score threshold + Backtesting with lower z-score threshold
    Backtesting with lower z-score threshold
    @@ -1063,7 +1063,7 @@ If we increase the entries threshold from 10 to 30, we should get less alerts:

    - Backtesting with higher entries threshold + Backtesting with higher entries threshold
    Backtesting with higher entries threshold
    @@ -1072,7 +1072,7 @@ If we increase the backtest period from 60 minutes to 360 minutes, we get more alerts:

    - Backtesting with higher entries threshold + Backtesting with higher entries threshold
    Backtesting with higher entries threshold
    @@ -1080,7 +1080,7 @@

    A good alerting system is a system that produces true alerts, at a reasonable time. Using the backtesting query you can experiment with different values that produces quality alerts you can act on.

    -
    +

    Improving Accuracy

    @@ -1200,15 +1200,15 @@
  26. To reduce the amount of false positives, you can normalize the number of responses to the proportion of the total responses. This way, for example, if you're using a flaky remote service that fails once after every certain amount of requests, using the proportion may not trigger an alert when the increase in errors correlates with an increase in overall traffic.
  27. -
    +

    Conclusion

    The method presented above is a very simple method to detect anomalies and produce actionable alerts that can potentially save you a lot of grief. There are many tools out there that provide similar functionally, but they require either tight integration or $$$. The main appeal of this approach is that you can get started with tools you probably already have, some SQL and a scheduled task!

    -
    +

    UPDATE: many readers asked me how I created the charts in this article... well, I used PopSQL. It’s a new modern SQL editor focused on collaborative editing. If you're in the market for one, go check it out...

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/topicseed-1/expected.html b/article_scraper/resources/tests/readability/topicseed-1/expected.html index 471f301..d4a0122 100644 --- a/article_scraper/resources/tests/readability/topicseed-1/expected.html +++ b/article_scraper/resources/tests/readability/topicseed-1/expected.html @@ -23,7 +23,7 @@ Content depth should be the urgent priority for your content marketing strategy, and clearly defined in your content briefs. Start by dominating your own core topics, before venturing across the pond and write about linked subject matters. Otherwise, you are the opposite of an authority as the definition states that an authority is “a person with extensive or specialized knowledge about a subject; an expert”. Lastly, do not mistake article depth vs. article length: a blog post’s extreme wordcount has nothing to do with its content depth.

    - Assess How Deep Is Your Content + Assess How Deep Is Your Content

    The first task on your list, right now, is to shortlist your core topics. What are you trying to be an expert on? Then, go through each one of your pieces of content and understand how well each blog post is covering its focus topic(s). Not how many times specific keywords appear, or how well the article is outlined and structured. @@ -38,7 +38,7 @@ Remember that skyscraper content and 10x content are not necessarily the answer. These content writing strategies state that in order to beat another piece of content, you need to write 10x more. Either in quantity with a 10x word count or in quality by putting times more information within your own piece of content. Such articles often become unreadable and discourage visitors from absorbing all the knowledge. The best alternative is the create pillar pages centered around core topics, and several articles dealing with each specific section in depth. This is deep content powered by a smart internal linking strategy and search engines love that in this day and age where attention spans are short! With that being said, avoid writing 600-word articles!

    - Rewrite With Content Depth In Mind + Rewrite With Content Depth In Mind

    Once you know which articles are lacking depth of knowledge and information, it is time to rethink each one. For each article, make a list of what essential pieces of information or data are missing. Then decide where to fit them, and decide whether the article would benefit from a full rewrite or not. As a rule of thumb, if you need to change a third of your article, you may need to rewrite it entirely. Of course, this does not mean erasing all work done prior, but it means starting afresh! Trying to fit deep content into an existing blog post gives you constraints so doing it from scratch can actually be easier to fight thin content. @@ -54,7 +54,7 @@ With the massive rise of voice searches, users tend to use full questions for their search queries. What used to be top bottled water brands is now OK google, what is the best bottled-water brand in Texas? The point being, keywords are losing traction to leave space for a more natural language understanding of a blog post’s textual content, and meaning.

    - Yes, Content Depth and Breadth Overlap + Yes, Content Depth and Breadth Overlap

    “A topic can be defined as the company it keeps.” A very accurate saying loved by ontologists within the fields of computational linguistics, and information science. In simpler terms, a topic and all the terminology it is encompassing will inevitably overlap with related topics. Which, in turn, will form topic clusters. @@ -69,7 +69,7 @@ Therefore, content depth and content breadth are not to be opposed. Content marketers should use both strategies in order to reach ultimate topical authority over their choice of subject matters.

    - Depth of Content = Quality + Frequency + Depth of Content = Quality + Frequency

    Up until recently, long-form blog posts generally were evergreen articles that generated a constant stream of organic traffic for a website. This was a lead magnet generation strategy which worked well: hire a writer, include the right keywords, reach over a 5,000-word word count, and hit publish. Then, wait. @@ -90,4 +90,4 @@

    Tools and platforms such as topicseed are here to help you find new article ideas pertaining to your core topics within a few clicks and a few minutes. The number of web pages, Wikipedia articles, and pieces of content, our machine-learning algorithms can analyze in seconds would take you months to digest. Our topicgraph finds closely related concepts in order for your domain to reach topical authority through content depth and content breadth.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/tumblr/expected.html b/article_scraper/resources/tests/readability/tumblr/expected.html index daefbf8..c3598cf 100644 --- a/article_scraper/resources/tests/readability/tumblr/expected.html +++ b/article_scraper/resources/tests/readability/tumblr/expected.html @@ -1,4 +1,4 @@

    Minecraft 1.8 - The Bountiful Update

    -

    + Added Granite, Andesite, and Diorite stone blocks, with smooth versions
    + Added Slime Block
    + Added Iron Trapdoor
    + Added Prismarine and Sea Lantern blocks
    + Added the Ocean Monument
    + Added Red Sandstone
    + Added Banners
    + Added Armor Stands
    + Added Coarse Dirt (dirt where grass won’t grow)
    + Added Guardian mobs, with item drops
    + Added Endermite mob
    + Added Rabbits, with item drops
    + Added Mutton and Cooked Mutton
    + Villagers will harvest crops and plant new ones
    + Mossy Cobblestone and Mossy Stone Bricks are now craftable
    + Chiseled Stone Bricks are now craftable
    + Doors and fences now come in all wood type variants
    + Sponge block has regained its water-absorbing ability and becomes wet
    + Added a spectator game mode (game mode 3)
    + Added one new achievement
    + Added “Customized” world type
    + Added hidden “Debug Mode” world type
    + Worlds can now have a world barrier
    + Added @e target selector for Command Blocks
    + Added /blockdata command
    + Added /clone command
    + Added /execute command
    + Added /fill command
    + Added /particle command
    + Added /testforblocks command
    + Added /title command
    + Added /trigger command
    + Added /worldborder command
    + Added /stats command
    + Containers can be locked in custom maps by using the “Lock” data tag
    + Added logAdminCommands, showDeathMessages, reducedDebugInfo, sendCommandFeedback, and randomTickSpeed game rules
    + Added three new statistics
    + Player skins can now have double layers across the whole model, and left/right arms/legs can be edited independently
    + Added a new player model with smaller arms, and a new player skin called Alex?
    + Added options for configuring what pieces of the skin that are visible
    + Blocks can now have custom visual variations in the resource packs
    + Minecraft Realms now has an activity chart, so you can see who has been online
    + Minecraft Realms now lets you upload your maps
    * Difficulty setting is saved per world, and can be locked if wanted
    * Enchanting has been redone, now costs lapis lazuli in addition to enchantment levels
    * Villager trading has been rebalanced
    * Anvil repairing has been rebalanced
    * Considerable faster client-side performance
    * Max render distance has been increased to 32 chunks (512 blocks)
    * Adventure mode now prevents you from destroying blocks, unless your items have the CanDestroy data tag
    * Resource packs can now also define the shape of blocks and items, and not just their textures
    * Scoreboards have been given a lot of new features
    * Tweaked the F3 debug screen
    * Block ID numbers (such as 1 for stone), are being replaced by ID names (such as minecraft:stone)
    * Server list has been improved
    * A few minor changes to village and temple generation
    * Mob heads for players now show both skin layers
    * Buttons can now be placed on the ceiling
    * Lots and lots of other changes
    * LOTS AND LOTS of other changes
    - Removed Herobrine

    -
    +

    + Added Granite, Andesite, and Diorite stone blocks, with smooth versions
    + Added Slime Block
    + Added Iron Trapdoor
    + Added Prismarine and Sea Lantern blocks
    + Added the Ocean Monument
    + Added Red Sandstone
    + Added Banners
    + Added Armor Stands
    + Added Coarse Dirt (dirt where grass won’t grow)
    + Added Guardian mobs, with item drops
    + Added Endermite mob
    + Added Rabbits, with item drops
    + Added Mutton and Cooked Mutton
    + Villagers will harvest crops and plant new ones
    + Mossy Cobblestone and Mossy Stone Bricks are now craftable
    + Chiseled Stone Bricks are now craftable
    + Doors and fences now come in all wood type variants
    + Sponge block has regained its water-absorbing ability and becomes wet
    + Added a spectator game mode (game mode 3)
    + Added one new achievement
    + Added “Customized” world type
    + Added hidden “Debug Mode” world type
    + Worlds can now have a world barrier
    + Added @e target selector for Command Blocks
    + Added /blockdata command
    + Added /clone command
    + Added /execute command
    + Added /fill command
    + Added /particle command
    + Added /testforblocks command
    + Added /title command
    + Added /trigger command
    + Added /worldborder command
    + Added /stats command
    + Containers can be locked in custom maps by using the “Lock” data tag
    + Added logAdminCommands, showDeathMessages, reducedDebugInfo, sendCommandFeedback, and randomTickSpeed game rules
    + Added three new statistics
    + Player skins can now have double layers across the whole model, and left/right arms/legs can be edited independently
    + Added a new player model with smaller arms, and a new player skin called Alex?
    + Added options for configuring what pieces of the skin that are visible
    + Blocks can now have custom visual variations in the resource packs
    + Minecraft Realms now has an activity chart, so you can see who has been online
    + Minecraft Realms now lets you upload your maps
    * Difficulty setting is saved per world, and can be locked if wanted
    * Enchanting has been redone, now costs lapis lazuli in addition to enchantment levels
    * Villager trading has been rebalanced
    * Anvil repairing has been rebalanced
    * Considerable faster client-side performance
    * Max render distance has been increased to 32 chunks (512 blocks)
    * Adventure mode now prevents you from destroying blocks, unless your items have the CanDestroy data tag
    * Resource packs can now also define the shape of blocks and items, and not just their textures
    * Scoreboards have been given a lot of new features
    * Tweaked the F3 debug screen
    * Block ID numbers (such as 1 for stone), are being replaced by ID names (such as minecraft:stone)
    * Server list has been improved
    * A few minor changes to village and temple generation
    * Mob heads for players now show both skin layers
    * Buttons can now be placed on the ceiling
    * Lots and lots of other changes
    * LOTS AND LOTS of other changes
    - Removed Herobrine

    + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/v8-blog/expected.html b/article_scraper/resources/tests/readability/v8-blog/expected.html index e5874fc..ecbb8c8 100644 --- a/article_scraper/resources/tests/readability/v8-blog/expected.html +++ b/article_scraper/resources/tests/readability/v8-blog/expected.html @@ -8,7 +8,7 @@

    First, let's see what you can do with this new feature! Similar to this post let's start with a "hello world" type program that exports a single function that adds two numbers:

    -
    // add.c
    #include <emscripten.h>

    EMSCRIPTEN_KEEPALIVE
    int add(int x, int y) {
    return x + y;
    }
    +
    // add.c
    #include <emscripten.h>

    EMSCRIPTEN_KEEPALIVE
    int add(int x, int y) {
    return x + y;
    }

    We'd normally build this with something like emcc -O3 add.c -o add.js which would emit add.js and add.wasm. Instead, let's ask emcc to only emit Wasm:

    @@ -20,11 +20,11 @@

    Disassembling it, it's very minimal - just 87 bytes! It contains the obvious add function

    -
    (func $add (param $0 i32) (param $1 i32) (result i32)
    (i32.add
    (local.get $0)
    (local.get $1)
    )
    )
    +
    (func $add (param $0 i32) (param $1 i32) (result i32)
    (i32.add
    (local.get $0)
    (local.get $1)
    )
    )

    and one more function, _start,

    -
    (func $_start
    (nop)
    )
    +
    (func $_start
    (nop)
    )

    _start is part of the WASI spec, and Emscripten's standalone mode emits it so that we can run in WASI runtimes. (Normally _start would do global initialization, but here we just don't need any so it's empty.)

    @@ -34,7 +34,7 @@

    One nice thing about a standalone Wasm file like this is that you can write custom JavaScript to load and run it, which can be very minimal depending on your use case. For example, we can do this in Node.js:

    -
    // load-add.js
    const binary = require('fs').readFileSync('add.wasm');

    WebAssembly.instantiate(binary).then(({ instance }) => {
    console.log(instance.exports.add(40, 2));
    });
    +
    // load-add.js
    const binary = require('fs').readFileSync('add.wasm');

    WebAssembly.instantiate(binary).then(({ instance }) => {
    console.log(instance.exports.add(40, 2));
    });

    Just 4 lines! Running that prints 42 as expected. Note that while this example is very simplistic, there are cases where you simply don't need much JavaScript, and may be able to do better than Emscripten's default JavaScript runtime (which supports a bunch of environments and options). A real-world example of that is in zeux's meshoptimizer - just 57 lines, including memory management, growth, etc.!

    @@ -44,11 +44,11 @@

    Another nice thing about standalone Wasm files is that you can run them in Wasm runtimes like wasmer, wasmtime, or WAVM. For example, consider this hello world:

    -
    // hello.cpp
    #include <stdio.h>

    int main() {
    printf("hello, world!\n");
    return 0;
    }
    +
    // hello.cpp
    #include <stdio.h>

    int main() {
    printf("hello, world!\n");
    return 0;
    }

    We can build and run that in any of those runtimes:

    -
    $ emcc hello.cpp -O3 -o hello.wasm
    $ wasmer run hello.wasm
    hello, world!
    $ wasmtime hello.wasm
    hello, world!
    $ wavm run hello.wasm
    hello, world!
    +
    $ emcc hello.cpp -O3 -o hello.wasm
    $ wasmer run hello.wasm
    hello, world!
    $ wasmtime hello.wasm
    hello, world!
    $ wavm run hello.wasm
    hello, world!

    Emscripten uses WASI APIs as much as possible, so programs like this end up using 100% WASI and can run in WASI-supporting runtimes (see notes later on what programs require more than WASI).

    @@ -175,4 +175,4 @@

    You may also find APIs that do have a non-JS replacement that we haven’t converted yet, as work is still ongoing. Please file bugs, and as always help is welcome!

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/videos-1/expected.html b/article_scraper/resources/tests/readability/videos-1/expected.html index 47fe812..e469e3d 100644 --- a/article_scraper/resources/tests/readability/videos-1/expected.html +++ b/article_scraper/resources/tests/readability/videos-1/expected.html @@ -11,9 +11,7 @@

    21) Star Wars: The Last Jedi

    -
    - -
    +

    I am as shocked as anyone that a Star Wars movie found its way onto my list — but I was bowled over by The Last Jedi, which may be one of the series’ best. In the hands of writer-director Rian Johnson (who will also oversee a new Star Wars trilogy), The Last Jedi is beautiful to look at and keeps its eye on the relationships between characters and how they communicate with one another, in addition to the bigger galactic story. The same characters are back, but they seem infused with new life, and the galaxy with a new kind of hope. The movie’s best details are in the strong bonds that develop between characters, and I left the film with the realization that for the first time in my life, I loved a Star Wars movie. Now I understand the magic.

    @@ -23,9 +21,7 @@

    20) Faces Places

    -
    - -
    +

    The unusual documentary Faces Places (in French, Visages Villages) turns on the friendship between the accomplished street artist JR and legendary film director Agnès Varda, whose work was central to the development of the French New Wave movement. The pair (whose difference in age is 55 years) met after years of admiring each other’s work and decided to create a documentary portrait of France — by making a number of actual portraits. The film chronicles a leg of the "Inside Outside Project," a roving art initiative in which JR makes enormous portraits of people he meets and pastes them onto buildings and walls. In the film, Varda joins him, and as they talk to people around the country, they grow in their understanding of themselves and of each other. The development of their friendship, which is both affectionate and mutually sharpening, forms Faces Places’ emotional center.

    @@ -36,9 +32,7 @@

    19) Ingrid Goes West

    -
    - -
    +

    Ingrid Goes West is a twisted and dark comedy — part addiction narrative, part stalker story — and yet it’s set in a world that’s almost pathologically cheery: the glossy, sunny, nourishing, superfood- and superlative-loving universe of Instagram celebrity. But despite Ingrid Goes West’s spot-on take on that world, the best thing about the film is that it refuses to traffic in lazy buzzwords and easy skewering, particularly at the expense of young women. Instead, the movie conveys that behind every Instagram image and meltdown is a real person, with real insecurities, real feelings, and real problems. And it recognizes that living a life performed in public can be its own kind of self-deluding prison.

    @@ -48,9 +42,7 @@

    18) Lady Macbeth

    -
    - -
    +

    Lady Macbeth is no placid costume drama. Adapted from an 1865 Russian novella by Nikolai Leskov, the movie follows Katherine (the astounding Florence Pugh), a woman in the Lady Macbeth line characterized by a potent cocktail of very few scruples and a lot of determination. She's a chilling avatar for the ways that class and privilege — both obvious and hidden — insulate some people from the consequences of their actions while damning others. Lady Macbeth is also a dazzling directorial debut from William Oldroyd, a thrilling combination of sex, murder, intrigue, and power plays. It’s visually stunning, each frame composed so carefully and deliberately that the wildness and danger roiling just below the surface feels even more frightening. Each scene ratchets up the tension to an explosive, chilling end.

    @@ -60,9 +52,7 @@

    17) BPM (Beats Per Minute)

    -
    - -
    +

    BPM (Beats Per Minute) is a remarkably tender and stirring story of the Paris chapter of ACT UP, an AIDS activism group, and the young people who found themselves caught in the crosshairs of the AIDS crisis in the early 1990s. The film follows both the group's actions and the individual members’ shifting relationships to one another — enemies becoming friends, friends becoming lovers, lovers becoming caretakers — as well as their struggles with the disease wracking their community. As an account of the period, it’s riveting; as an exploration of life and love set at the urgent intersection of the political and the personal, it’s devastating.

    @@ -72,9 +62,7 @@

    16) The Big Sick

    -
    - -
    +

    Few 2017 movies could top the charm and tenderness of The Big Sick, which hits all the right romantic comedy notes with one unusual distinction: It feels like real life. That’s probably because The Big Sick is written by real-life married couple Emily V. Gordon and Silicon Valley's Kumail Nanjiani, and based on their real-life romance. The Big Sick — which stars Nanjiani as a version of himself, alongside Zoe Kazan as Emily — is funny and sweet while not backing away from matters that romantic comedies don’t usually touch on, like serious illness, struggles in long-term marriages, and religion. As it tells the couple’s story, which takes a serious turn when Emily falls ill with a mysterious infection and her parents (played by Holly Hunter and Ray Romano) come to town, it becomes a funny and wise story about real love.

    @@ -84,9 +72,7 @@

    15) Mother!

    -
    - -
    +

    There’s so much pulsing beneath the surface of Mother! that it’s hard to grab on to just one theme as what it “means.” It’s full-on apocalyptic fiction, and like all stories of apocalypse, it’s intended to draw back the veil on reality and show us what’s really beneath. And this movie gets wild: If its gleeful cracking apart of traditional theologies doesn’t get you (there’s a lot of Catholic folk imagery here, complete with an Ash Wednesday-like mud smearing on the foreheads of the faithful), its bonkers scenes of chaos probably will. Mother! is a movie designed to provoke fury, ecstasy, madness, catharsis, and more than a little awe. Watching it, and then participating in the flurry of arguments and discussions unpacking it, was among my best moviegoing experiences of 2017.

    @@ -96,9 +82,7 @@

    14) A Ghost Story

    -
    - -
    +

    Director David Lowery filmed A Ghost Story in secret, then premiered it at the Sundance Film Festival to critical acclaim. The movie starts out being about a grieving widow (Rooney Mara) trying to live through the pain of losing her beloved husband, but it soon shifts focus to the ghost of her husband (Casey Affleck, covered in a sheet), evolving into a compelling rumination on the nature of time, memory, history, and the universe. Bathed in warm humor and wistful longing, it's a film that stays with you long after it’s over, a lingering reminder of the inextricable link between love and place.

    @@ -108,9 +92,7 @@

    13) The Square

    -
    - -
    + @@ -120,9 +102,7 @@

    12) Dunkirk

    -
    - -
    +

    Dunkirk, a true cinematic achievement from acclaimed director Christopher Nolan, backs off conventional notions of narrative and chronology as much as possible, while leaning headfirst into everything else that makes a movie a visceral work of art aimed at the senses: the images, the sounds, the scale, the swelling vibrations of it all. You can’t smell the sea spray, but your brain may trick you into thinking you can. Nolan’s camera pushes the edges of the screen as far as it can as Dunkirk engulfs the audience in something that feels like a lot more than a war movie. It’s a symphony for the brave and broken, and it resolves in a major key — but one with an undercurrent of sorrow, and of sober warning. Courage in the face of danger is not just for characters in movies.

    @@ -132,9 +112,7 @@

    11) Rat Film

    -
    - -
    +

    Rat Film is about rats, yes — and rat poison experts and rat hunters and people who keep rats as pets. But it’s also about the history of eugenics, dubious science, “redlining,” and segregated housing in Baltimore. All these pieces come together to form one big essay, where the meaning of each vignette only becomes clearer in light of the whole. It’s a fast-paced, no-holds-barred exploration of a damning history, and it accrues meaning as the images, sounds, and text pile up.

    @@ -144,9 +122,7 @@

    10) A Quiet Passion

    -
    - -
    +

    A Quiet Passion is technically a biographical film about Emily Dickinson, but it transcends its genre to become something more like poetry. It’s a perplexing and challenging film, crafted without the traditional guardrails that guide most biographical movies — dates, times, major accomplishments, and so on. Time slips away in the film almost imperceptibly, and the narrative arc doesn’t yield easily to the viewer. Cynthia Nixon plays Emily Dickinson, whose poetry and life is a perfect match for the signature style of director Terence Davies: rich in detail, deeply enigmatic, and weighed down with a kind of sparkling, joy-tinged sorrow. A Quiet Passion is a portrait, both visual and narrative, of the kind of saint most modern people can understand: one who is certain of her uncertainty, and yearning to walk the path on which her passion and longing meet.

    @@ -156,9 +132,7 @@

    9) Columbus

    -
    - -
    +

    Columbus is a stunner of a debut from video essayist turned director Kogonada. Haley Lu Richardson stars as Casey, a young woman living in Columbus, Indiana, who cares for her mother, works at a library, and harbors a passion for architecture. (Columbus is a mecca for modernist architecture scholars and enthusiasts.) When a visiting architecture scholar falls into a coma in Columbus, his estranged son Jin (John Cho) arrives to wait for him and strikes up a friendship with Casey, who starts to show him her favorite buildings. The two begin to unlock something in each other that’s hard to define but life-changing for both. Columbus is beautiful and subtle, letting us feel how the places we build and the people we let near us move and mold us.

    @@ -168,9 +142,7 @@

    8) The Florida Project

    -
    - -
    +

    Sean Baker’s The Florida Project unfolds at first like a series of sketches about the characters who live in a purple-painted, $35-a-night motel called the Magic Castle down the street from Disney World. The film is held together by the hysterical antics of a kid named Moonee and her pack of young friends, as well as long-suffering hotel manager Bobby (a splendid, warm Willem Dafoe), who tries to put up with it all while keeping some kind of order. But as The Florida Project goes on, a narrative starts to form, one that chronicles with heartbreaking attention the sort of dilemmas that face poor parents and their children in America, and the broken systems that try to cope with impossible situations.

    @@ -180,9 +152,7 @@

    7) Call Me by Your Name

    -
    - -
    +

    Luca Guadagnino’s gorgeous film Call Me by Your Name adapts André Aciman’s 2007 novel about a precocious 17-year-old named Elio (Timothée Chalamet), who falls in lust and love with his father’s 24-year-old graduate student Oliver (Armie Hammer). It’s remarkable for how it turns literature into pure cinema, all emotion and image and heady sensation. Set in 1983 in Northern Italy, Call Me by Your Name is less about coming out than coming of age, but it also captures a particular sort of love that’s equal parts passion and torment, a kind of irrational heart fire that opens a gate into something longer-lasting. The film is a lush, heady experience for the body, but it’s also an arousal for the soul.

    @@ -192,9 +162,7 @@

    6) Personal Shopper

    -
    - -
    +

    In her second collaboration with French director Olivier Assayas, Kristen Stewart plays a personal shopper to a wealthy socialite, with a sideline as an amateur ghost hunter who’s searching for her dead twin brother. Personal Shopper is deeper than it seems at first blush, a meditation on grief and an exploration of “between” places — on the fringes of wealth, and in the space between life and death. Some souls are linked in a way that can’t be shaken, and whether or not there’s an afterlife doesn’t change the fact that we see and sense them everywhere. (Personal Shopper also has one of the most tense extended scenes involving text messaging ever seen onscreen.)

    @@ -204,9 +172,7 @@

    5) Princess Cyd

    -
    - -
    +

    Stephen Cone is a master of small, carefully realized filmmaking; his earlier films such as The Wise Kids and Henry Gamble’s Birthday Party combine an unusual level of empathy for his characters with an unusual combination of interests: love, desire, sexual awakenings, and religion. Princess Cyd is his most accomplished film yet, about a young woman named Cyd (Jessie Pinnick) who finds herself attracted to Katie (Malic White), a barista, while visiting her Aunt Miranda (Rebecca Spence, playing a character modeled on the author Marilynne Robinson) in Chicago. As she works through her own sexual awakening with Katie, Cyd unwinds some of the ways Miranda’s life has gotten too safe. They provoke each other while forming a bond and being prodded toward a bigger understanding of the world. It is a graceful and honest film, and it feels like a modest miracle.

    @@ -216,9 +182,7 @@

    4) Get Out

    -
    - -
    +

    Racism is sinister, frightening, and deadly. But Get Out (a stunning directorial debut from Key & Peele's Jordan Peele) isn’t about the blatantly, obviously scary kind of racism — burning crosses and lynchings and snarling hate. Instead, it’s interested in showing how the parts of racism that try to be aggressively unscary are just as horrifying, and it’s interested in making us feel that horror in a visceral, bodily way. In the tradition of the best classic social thrillers, Get Out takes a topic that is often approached cerebrally — casual racism — and turns it into something you feel in your tummy. And it does it with a wicked sense of humor.

    @@ -228,9 +192,7 @@

    3) The Work

    -
    - -
    +

    The Work is an outstanding, astonishing accomplishment and a viewing experience that will leave you shaken (but in a good way). At Folsom Prison in California, incarcerated men regularly participate in group therapy, and each year other men from the “outside” apply to participate in an intense four-day period of group therapy alongside Folsom’s inmates. The Work spends almost all of its time inside the room where that therapy happens, observing the strong, visceral, and sometimes violent emotions the men feel as they expose the hurt and raw nerves that have shaped how they encounter the world. Watching is not always easy, but by letting us peek in, the film invites viewers to become part of the experience — as if we, too, are being asked to let go.

    @@ -240,9 +202,7 @@

    2) Ex Libris

    -
    - -
    +

    Frederick Wiseman is one of the towering giants of nonfiction film, a keen observer of American institutions — ranging from prisons to dance companies to welfare offices — for the past half-century. Ex Libris is his mesmerizing look at the New York Public Library and the many functions it fills, which go far beyond housing books. Wiseman works in the observational mode, which means his films contain no captions, dates, or talking-head interviews: We just see what his camera captured, which in this case includes community meetings, benefit dinners, after-school programs, readings with authors and scholars (including Richard Dawkins and Ta-Nehisi Coates), and NYPL patrons going about their business in the library’s branches all over the city. The result is almost hypnotic and, perhaps surprisingly, deeply moving. It makes a case for having faith in the public institutions where ordinary people work — away from the limelight, without trying to score political points — in order to make our communities truly better.

    @@ -252,9 +212,7 @@

    1) Lady Bird

    -
    - -
    +

    Lady Bird topped my list almost instantly, and only rose in my estimation on repeated viewings. For many who saw it (including me), it felt like a movie made not just for but about me. Lady Bird is a masterful, exquisite coming-of-age comedy starring the great Saoirse Ronan as Christine — or “Lady Bird,” as she’s re-christened herself — and it’s as funny, smart, and filled with yearning as its heroine. Writer-director Greta Gerwig made the film as an act of love, not just toward her hometown of Sacramento but also toward girlhood, and toward the feeling of always being on the outside of wherever real life is happening. Lady Bird is the rare movie that manages to be affectionate, entertaining, hilarious, witty, and confident. And one line from it struck me as the guiding principle of many of the year’s best films: “Don’t you think they are the same thing? Love, and attention?”

    @@ -264,4 +222,4 @@

    Honorable mentions: Marjorie Prime, Phantom Thread, Casting JonBenet, The Post, The Shape of Water, Logan Lucky, I, Tonya, The Lost City of Z, Graduation, Spettacolo, Loveless, Restless Creature: Wendy Whelan, In Transit, The Reagan Show

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/videos-2/expected.html b/article_scraper/resources/tests/readability/videos-2/expected.html index 55e1498..f2fe95f 100644 --- a/article_scraper/resources/tests/readability/videos-2/expected.html +++ b/article_scraper/resources/tests/readability/videos-2/expected.html @@ -8,9 +8,7 @@

    Vape Wave (documentaire, 1h28, Planète+)

    -

    - -

    +

    Pendant quelques jours, le doute a plané : l’Etat comptait-il vraiment légiférer contre la cigarette dans les films français, que ce soit via une interdiction pure et simple ou via un système de «punition» (coupe des aides CNC, par exemple) pour les longs-métrages qui sentent le mégot ? Si le rétropédalage de la ministre Buzyn n’en est pas vraiment un (elle n’avait jamais clairement menacé le septième art), la polémique a le mérite de pointer la (sur)représentation clopesque sur écran. Et si, comme c’est le cas dans la vie quotidienne, on voyait progressivement les cigarettes électroniques remplacer les tiges nicotinées authentiques ? Que ceux qui mettraient en doute le potentiel cinématographique des vapoteuses se ruent sur Vape Wave, documentaire militant signé Jan Kounen, ex-fumeur reconverti à la vape dont les images magnifient les volutes de vapeur recrachée.

    @@ -23,9 +21,7 @@

    Dans la tête d’Alan Moore (websérie documentaire, 8x5min, Arte Creative)

    -

    - -

    +

    Le week-end dernier, Libération publiait un portrait de der consacré à l’auteur britannique Alan Moore, connu pour ses BD cultes (V pour Vendetta, Watchmen, From Hell), à l’occasion de la sortie de son deuxième roman, le pavé Jérusalem. En attendant l’imminente sortie d’une version longue de son entretien avec Libé, on pourra se replonger dans les épisodes d’une websérie documentaire d’Arte Creative en 8 épisodes consacré au maître. Brexit, magie, Anonymous font partie des sujets discutés avec le maître au fil de ce programme sobrement intitulé Dans la tête d’Alan Moore. (A.H.)

    @@ -35,9 +31,7 @@

    The Death and Life of Marsha P. Johnson (docu, 1h45, Netflix)

    -

    - -

    +

    Marsha, la «Rosa Parks du mouvement LGBTQ». Marsha «la prostituée, l’actrice et la sainte, modèle d’Andy Warhol» ou encore Marsha l’élaborée, la radicale, «avec ses plumes et ce maquillage qu’elle ne mettait jamais bien». «Queen Marsha» a été retrouvée morte dans l’Hudson en juillet 1992, alors qu’on la voyait encore parader dans les rues de Greenwich Village quelques jours auparavant. Un choc glaçant. Là où son corps a été repêché puis ingratement déposé, les sans-abri ont constitué le lendemain un mémorial de bouteilles et de plantes qui délimitent les contours de l’absente.

    @@ -50,9 +44,7 @@

    Alphonse President (série, 10x26, OCS Max)

    -

    - -

    +

    Un temps baptisée French Touch, la série Alphonse Président est le dernier né des programmes originaux made in OCS. On savait les budgets de la chaîne bien moins généreux que ceux de Canal+ (voire que ceux de France 3 Limousin), et cette série le prouve à nouveau régulièrement, notamment lors d’une scène de conférence de presse alternant plans larges d’une authentique conf' à l’Elysée période François Hollande et plans serrés d’acteurs filmés dans un château des Pays de la Loire où a eu lieu le tournage. Le principal atout (et quel atout) de cette série écrite et réalisée par Nicolas Castro (Des lendemains qui chantent, 2014) réside dans son interprète principal, Michel Vuillermoz.

    @@ -65,9 +57,7 @@

    Jim & Andy (documentaire, 1h33, Netflix) 

    -

    - -

    +

    A la sortie de Man on the Moon (2000), le magnifique film de Milos Forman consacré à Andy Kaufman – comique et génie de la performance absurde mort en 1984 –, le cinéaste et les acteurs insistaient dans chaque interview sur l’in­croyable comportement de Jim Carrey pendant le tournage : il aurait été comme possédé par Kaufman, se prenant pour lui 24 heures sur 24. Certains affirmaient même ne jamais avoir eu l’impression que l’acteur était présent, tant son modèle avait littéralement pris sa place. Nous en avons aujourd’hui la preuve en images car tout cela avait été filmé par Bob Zmuda et Lynne Margulies, l’ancien complice et la veuve de Kaufman.

    @@ -80,14 +70,12 @@

    Braguino (documentaire, 50min, Arte)

    -

    - -

    +

    La querelle peut se trouver derrière toutes les portes, y compris celle de l’exil. On a beau croire avoir tourné le dos à tout, à cette inclination humaine à nourrir sa propre haine, l’allergie peut regermer fissa sur une peau qui frissonne à l’approche de ce voisin que l’on ne comprend pas. Issu d’une lignée de vieux-croyants orthodoxes russes, Sacha Braguine a pris sa famille sous le bras, loin de toute autre présence humaine en taïga sibérienne. Un autre groupe, les Kiline, a décidé d’en faire de même et de s’installer de l’autre côté de la rivière. Qui est arrivé en premier ? Qui menace l’autre ? L’histoire de l’impossible communauté peut commencer.

    - La lecture d’Ermites dans la taïga (1992) de Vassili Peskov, authentique récit sur la famille Lykov opérant une migration similaire en 1938, a poussé l’artiste Clément Cogitore à rencontrer les Braguine, puis à se faire témoin de la bisbille de voisinage en 2016. Il en est revenu avec un nouveau film d’une cinquantaine de minutes : Braguino, soutenu par le prix Le Bal de la jeune création avec l’ADAGP. Le documentaire y frôle son déguisement fictionnel, tant ce qui s’y déroule convoque une dramaturgie comme invoquée par on ne sait quel rituel vaudou […] Lire la suite de la critique de Jérémy Piette sur Liberation.fr, le film diffusé cette semaine sur Arte est visible en intégralité ci-dessus. + La lecture d’Ermites dans la taïga (1992) de Vassili Peskov, authentique récit sur la famille Lykov opérant une migration similaire en 1938, a poussé l’artiste Clément Cogitore à rencontrer les Braguine, puis à se faire témoin de la bisbille de voisinage en 2016. Il en est revenu avec un nouveau film d’une cinquantaine de minutes : Braguino, soutenu par le prix Le Bal de la jeune création avec l’ADAGP.Le documentaire y frôle son déguisement fictionnel, tant ce qui s’y déroule convoque une dramaturgie comme invoquée par on ne sait quel rituel vaudou […] Lire la suite de la critique de Jérémy Piette sur Liberation.fr, le film diffusé cette semaine sur Arte est visible en intégralité ci-dessus.

    Pour un thriller tiré de faits réels @@ -95,15 +83,11 @@

    6 Days (film, 1h34, Netflix)

    -

    - -

    +

    Fin avril 1980, l’ambassade d’Iran à Londres a été le théâtre d’une prise d’otages largement médiatisée : une trentaine de personnes ont ainsi été retenues pendant six jours par des soldats iraniens dissidents exigeant la libération de 91 prisonniers. Avec Margaret Thatcher au 10 Downing Street à l’époque, pas question pour l’Angleterre d’avoir l’air mou du genou sur la réponse à apporter à cette crise scrutée par les caméras du monde entier. Le SAS (Special Air Service) est sur le coup : l’opération Nimrod se met en place pour prendre d’assaut l’ambassade.

    Inspiré par cet épisode, 6 Days de Toa Fraser (The Dead Lands, 2014) est un thriller carré pouvant compter sur l'autorité naturelle de Mark Strong (Kingsman) ici recyclé en flic londonien et sur la néo-badass attitude de Jamie Bell, bien loin du freluquet danseur de Billy Elliot puisqu'on le retrouve ici en soldat chargé d’organiser l’opération de secours. Attention, la bande-annonce ci-dessus dévoile à peu près l’intégralité des scènes d’action du film. (A.H.) -

    -

    Alexandre Hervaud , Jérémy Piette -

    - +

    Alexandre Hervaud , Jérémy Piette +

    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wapo-1/expected.html b/article_scraper/resources/tests/readability/wapo-1/expected.html index 18b0d02..b790b1a 100644 --- a/article_scraper/resources/tests/readability/wapo-1/expected.html +++ b/article_scraper/resources/tests/readability/wapo-1/expected.html @@ -87,8 +87,7 @@ extremists.

    Map: Flow of foreign fighters to Syria -

    -
    +

    After the collapse of the authoritarian system in 2011, hard-line Muslims known as Salafists attacked bars and art galleries. Then, in 2012, hundreds of Islamists assaulted the U.S. Embassy in @@ -114,8 +113,8 @@

    In January, Libyan militants loyal to the Islamic State beheaded 21 Christians — 20 of them Egyptian Copts — along the country’s coast. They later seized the Libyan city of Sirte.

    -

    -
    +

    +

    Officials are worried about the number of Tunisian militants who may have joined the jihadists in Libya — with the goal of returning home to fight @@ -145,4 +144,4 @@

    Tunisia’s Bardo museum is home to amazing Roman treasures

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wapo-2/expected.html b/article_scraper/resources/tests/readability/wapo-2/expected.html index 6afdc3e..13d3b53 100644 --- a/article_scraper/resources/tests/readability/wapo-2/expected.html +++ b/article_scraper/resources/tests/readability/wapo-2/expected.html @@ -1,8 +1,6 @@ -
    -

    -
    Israeli Prime Minister Benjamin Netanyahu reacts as he visits the Western Wall in Jerusalem on March 18 following his party's victory in Israel's general election. (Thomas Coex/AFP/Getty Images) -

    -
    +

    +
    Israeli Prime Minister Benjamin Netanyahu reacts as he visits the Western Wall in Jerusalem on March 18 following his party's victory in Israel's general election. (Thomas Coex/AFP/Getty Images) +

    President Obama told the U.N. General Assembly 18 months ago that he would seek “real breakthroughs on these two issues — Iran’s nuclear program and ­Israeli-Palestinian peace.”

    @@ -98,10 +96,7 @@

    “That could be an issue forced onto the agenda about the same time as a potential nuclear deal.”

    -
    -

    -

    Steven Mufson covers the White House. Since joining The Post, he has covered +

    Steven Mufson covers the White House. Since joining The Post, he has covered economics, China, foreign policy and energy.

    -
    -
    + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/webmd-1/expected.html b/article_scraper/resources/tests/readability/webmd-1/expected.html index 1fe1cae..8a2c0f6 100644 --- a/article_scraper/resources/tests/readability/webmd-1/expected.html +++ b/article_scraper/resources/tests/readability/webmd-1/expected.html @@ -69,4 +69,4 @@ to peanuts and other tree nuts can be especially severe. Nuts are the main reason people get a life-threatening problem called anaphylaxis.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/webmd-2/expected.html b/article_scraper/resources/tests/readability/webmd-2/expected.html index 04c35bf..355b884 100644 --- a/article_scraper/resources/tests/readability/webmd-2/expected.html +++ b/article_scraper/resources/tests/readability/webmd-2/expected.html @@ -21,4 +21,4 @@

    "What the public should know is that the more antibiotics you’ve taken, the higher your superbug risk," says Eric Biondi, MD, who runs a program to decrease unnecessary antibiotic use. "The more encounters you have with the hospital setting, the higher your superbug risk."

    "Superbugs should be a concern to everyone," Coombes says. "Antibiotics are the foundation on which all modern medicine rests. Cancer chemotherapy, organ transplants, surgeries, and childbirth all rely on antibiotics to prevent infections. If you can't treat those, then we lose the medical advances we have made in the last 50 years."

    Here are some of the growing superbug threats identified in the 2015 White House report.

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wikia/expected.html b/article_scraper/resources/tests/readability/wikia/expected.html index f045cda..3c011fd 100644 --- a/article_scraper/resources/tests/readability/wikia/expected.html +++ b/article_scraper/resources/tests/readability/wikia/expected.html @@ -2,26 +2,26 @@

    Although Lucasfilm is already planning a birthday bash for the Star Wars Saga at Celebration Orlando this April, fans might get another present for the saga’s 40th anniversary. According to fan site MakingStarWars.net, rumors abound that Lucasfilm might re-release the unaltered cuts of the saga’s original trilogy.

    If the rumors are true, this is big news for Star Wars fans. Aside from limited VHS releases, the unaltered cuts of the original trilogy films haven’t been available since they premiered in theaters in the 1970s and ’80s. If Lucasfilm indeed re-releases the films’ original cuts, then this will be the first time in decades that fans can see the films in their original forms. Here’s what makes the unaltered cuts of the original trilogy so special.

    The Star Wars Special Editions Caused Controversy - star wars han solo + star wars han solo

    Thanks to the commercial success of Star Wars, George Lucas has revisited and further edited his films for re-releases. The most notable — and controversial — release were the Special Editions of the original trilogy. In 1997, to celebrate the saga’s 20th anniversary, Lucasfilm spent a total of $15 million to remaster A New Hope, The Empire Strikes Back, and Return of the Jedi. The Special Editions had stints in theaters before moving to home media.

    Although most of the Special Editions’ changes were cosmetic, others significantly affected the plot of the films. The most notable example is the “Han shot first” scene in A New Hope. As a result, the Special Editions generated significant controversy among Star Wars fans. Many fans remain skeptical about George Lucas’s decision to finish each original trilogy film “the way it was meant to be.”

    - star wars + star wars

    While the Special Editions represent the most significant edits to the original trilogy, the saga has undergone other changes. Following up on the saga’s first Blu-ray release in 2011, Industrial Light & Magic (ILM) began remastering the entire saga in 3D, starting with the prequel trilogy. The Phantom Menace saw a theatrical 3D re-release in 2012, but Disney’s 2012 acquisition of Lucasfilm indefinitely postponed further 3D releases.

    In 2015, Attack of the Clones and Revenge of the Sith received limited 3D showings at Celebration Anaheim. Other than that, it seems as though Disney has decided to refocus Lucasfilm’s efforts to new films. Of course, that’s why the saga has produced new content beginning with The Force Awakens. However, it looks like Lucasfilm isn’t likely to generate 3D versions of the original trilogy anytime soon.

    Why the Original Film Cuts Matter

    - +

    Admittedly, the differences between the original trilogy’s unaltered cuts and the Special Editions appeal to more hardcore fans. Casual fans are less likely to care about whether Greedo or Han Solo shot first. Still, given Star Wars’ indelible impact on pop culture, there’s certainly a market for the original trilogy’s unaltered cuts. They might not be for every Star Wars fan, but many of us care about them.

    ILM supervisor John Knoll, who first pitched the story idea for Rogue One, said last year that ILM finished a brand new 4K restoration print of A New Hope. For that reason, it seems likely that Lucasfilm will finally give diehard fans the original film cuts that they’ve clamored for. There’s no word yet whether the unaltered cuts will be released in theaters or on home media. At the very least, however, fans will likely get them after all this time. After all, the Special Editions marked the saga’s 20th anniversary. Star Wars turns 40 years old this year, so there’s no telling what’s in store.

    -
    +

    Would you like to be part of the Fandom team? Join our Fan Contributor Program and share your voice on Fandom.com!

    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wikipedia-2/expected.html b/article_scraper/resources/tests/readability/wikipedia-2/expected.html index ac3a1fa..c6a4864 100644 --- a/article_scraper/resources/tests/readability/wikipedia-2/expected.html +++ b/article_scraper/resources/tests/readability/wikipedia-2/expected.html @@ -12,32 +12,28 @@

    New Zealand -

    -
    +


    -

    Aotearoa  (Māori) -

    -
    +

    Aotearoa  (Māori) +

    -

    Blue field with the Union Flag in the top right corner, and four red stars with white borders to the right. +

    Blue field with the Union Flag in the top right corner, and four red stars with white borders to the right.

    Flag -

    -
    +

    -

    A quartered shield, flanked by two figures, topped with a crown. +

    A quartered shield, flanked by two figures, topped with a crown.

    +

    @@ -45,10 +41,9 @@
    -

    Anthems:

    -
    +

    Anthems:

      -
    • "God Defend New Zealand"
      +
    • "God Defend New Zealand"
    • @@ -61,7 +56,7 @@ - A map of the hemisphere centred on New Zealand, using an orthographic projection. + A map of the hemisphere centred on New Zealand, using an orthographic projection.

      Location of New Zealand, including outlying islands, its territorial claim in the Antarctic, and Tokelau

      @@ -72,7 +67,7 @@ Capital - Wellington
      + Wellington
      41°17′S 174°27′E / 41.283°S 174.450°E @@ -95,7 +90,7 @@ English[n 2]
    • - Māori + Māori
    • NZ Sign Language @@ -116,7 +111,7 @@
      • 70.2% European
      • -
      • 16.5% Māori +
      • 16.5% Māori
      • 15.1% Asian
      • @@ -135,7 +130,7 @@ Demonym(s) - New Zealander
        + New Zealander
        Kiwi (informal) @@ -147,9 +142,7 @@ Unitary parliamentary constitutional monarchy - - - +

        @@ -186,7 +179,7 @@ Legislature - Parliament
        + Parliament
        (House of Representatives) @@ -198,9 +191,7 @@

        - - - +

        @@ -228,7 +219,7 @@

        -
        +
        25 November 1947 @@ -354,7 +345,7 @@ Gini (2014) - 33.0[7]
        + 33.0[7]
        medium · 22nd @@ -363,7 +354,7 @@ HDI (2017) - Increase 0.917[8]
        + Increase 0.917[8]
        very high · 16th @@ -398,7 +389,7 @@ Date format - dd/mm/yyyy
        + dd/mm/yyyy
        yyyy-mm-dd[10] @@ -437,10 +428,10 @@

        - New Zealand (Māori: Aotearoa [aɔˈtɛaɾɔa]) is a sovereign island country in the southwestern Pacific Ocean. The country geographically comprises two main landmasses—the North Island (Te Ika-a-Māui), and the South Island (Te Waipounamu)—and around 600 smaller islands. It has a total land area of 268,000 square kilometres (103,500 sq mi). New Zealand is situated some 2,000 kilometres (1,200 mi) east of Australia across the Tasman Sea and roughly 1,000 kilometres (600 mi) south of the Pacific island areas of New Caledonia, Fiji, and Tonga. Because of its remoteness, it was one of the last lands to be settled by humans. During its long period of isolation, New Zealand developed a distinct biodiversity of animal, fungal, and plant life. The country's varied topography and its sharp mountain peaks, such as the Southern Alps, owe much to the tectonic uplift of land and volcanic eruptions. New Zealand's capital city is Wellington, while its most populous city is Auckland. + New Zealand (Māori: Aotearoa [aɔˈtɛaɾɔa]) is a sovereign island country in the southwestern Pacific Ocean. The country geographically comprises two main landmasses—the North Island (Te Ika-a-Māui), and the South Island (Te Waipounamu)—and around 600 smaller islands. It has a total land area of 268,000 square kilometres (103,500 sq mi). New Zealand is situated some 2,000 kilometres (1,200 mi) east of Australia across the Tasman Sea and roughly 1,000 kilometres (600 mi) south of the Pacific island areas of New Caledonia, Fiji, and Tonga. Because of its remoteness, it was one of the last lands to be settled by humans. During its long period of isolation, New Zealand developed a distinct biodiversity of animal, fungal, and plant life. The country's varied topography and its sharp mountain peaks, such as the Southern Alps, owe much to the tectonic uplift of land and volcanic eruptions. New Zealand's capital city is Wellington, while its most populous city is Auckland.

        - Sometime between 1250 and 1300, Polynesians settled in the islands that later were named New Zealand and developed a distinctive Māori culture. In 1642, Dutch explorer Abel Tasman became the first European to sight New Zealand. In 1840, representatives of the United Kingdom and Māori chiefs signed the Treaty of Waitangi, which declared British sovereignty over the islands. In 1841, New Zealand became a colony within the British Empire and in 1907 it became a dominion; it gained full statutory independence in 1947 and the British monarch remained the head of state. Today, the majority of New Zealand's population of 4.9 million is of European descent; the indigenous Māori are the largest minority, followed by Asians and Pacific Islanders. Reflecting this, New Zealand's culture is mainly derived from Māori and early British settlers, with recent broadening arising from increased immigration. The official languages are English, Māori, and New Zealand Sign Language, with English being very dominant. + Sometime between 1250 and 1300, Polynesians settled in the islands that later were named New Zealand and developed a distinctive Māori culture. In 1642, Dutch explorer Abel Tasman became the first European to sight New Zealand. In 1840, representatives of the United Kingdom and Māori chiefs signed the Treaty of Waitangi, which declared British sovereignty over the islands. In 1841, New Zealand became a colony within the British Empire and in 1907 it became a dominion; it gained full statutory independence in 1947 and the British monarch remained the head of state. Today, the majority of New Zealand's population of 4.9 million is of European descent; the indigenous Māori are the largest minority, followed by Asians and Pacific Islanders. Reflecting this, New Zealand's culture is mainly derived from Māori and early British settlers, with recent broadening arising from increased immigration. The official languages are English, Māori, and New Zealand Sign Language, with English being very dominant.

        A developed country, New Zealand ranks highly in international comparisons of national performance, such as quality of life, health, education, protection of civil liberties, and economic freedom. New Zealand underwent major economic changes during the 1980s, which transformed it from a protectionist to a liberalised free-trade economy. The service sector dominates the national economy, followed by the industrial sector, and agriculture; international tourism is a significant source of revenue. Nationally, legislative authority is vested in an elected, unicameral Parliament, while executive political power is exercised by the Cabinet, led by the prime minister, currently Jacinda Ardern. Queen Elizabeth II is the country's monarch and is represented by a governor-general, currently Dame Patsy Reddy. In addition, New Zealand is organised into 11 regional councils and 67 territorial authorities for local government purposes. The Realm of New Zealand also includes Tokelau (a dependent territory); the Cook Islands and Niue (self-governing states in free association with New Zealand); and the Ross Dependency, which is New Zealand's territorial claim in Antarctica. New Zealand is a member of the United Nations, Commonwealth of Nations, ANZUS, Organisation for Economic Co-operation and Development, ASEAN Plus Six, Asia-Pacific Economic Cooperation, the Pacific Community and the Pacific Islands Forum. @@ -557,17 +548,15 @@

        -

        Brown square paper with Dutch writing and a thick red, curved line

        -
        +

        Brown square paper with Dutch writing and a thick red, curved line

        Detail from a 1657 map showing the western coastline of "Nova Zeelandia". (In this map, north is at the bottom.) -

        -
        +

        Dutch explorer Abel Tasman sighted New Zealand in 1642 and named it Staten Land "in honour of the States General" (Dutch parliament). He wrote, "it is possible that this land joins to the Staten Land but it is uncertain",[11] referring to a landmass of the same name at the southern tip of South America, discovered by Jacob Le Maire in 1616.[12][13] In 1645, Dutch cartographers renamed the land Nova Zeelandia after the Dutch province of Zeeland.[14][15] British explorer James Cook subsequently anglicised the name to New Zealand.[16]

        - Aotearoa (pronounced ; often translated as "land of the long white cloud")[17] is the current Māori name for New Zealand. It is unknown whether Māori had a name for the whole country before the arrival of Europeans, with Aotearoa originally referring to just the North Island.[18] Māori had several traditional names for the two main islands, including Te Ika-a-Māui (the fish of Māui) for the North Island and Te Waipounamu (the waters of greenstone) or Te Waka o Aoraki (the canoe of Aoraki) for the South Island.[19] Early European maps labelled the islands North (North Island), Middle (South Island) and South (Stewart Island / Rakiura).[20] In 1830, mapmakers began to use "North" and "South" on their maps to distinguish the two largest islands and by 1907 this was the accepted norm.[16] The New Zealand Geographic Board discovered in 2009 that the names of the North Island and South Island had never been formalised, and names and alternative names were formalised in 2013. This set the names as North Island or Te Ika-a-Māui, and South Island or Te Waipounamu.[21] For each island, either its English or Māori name can be used, or both can be used together.[21] + Aotearoa (pronounced ; often translated as "land of the long white cloud")[17] is the current Māori name for New Zealand. It is unknown whether Māori had a name for the whole country before the arrival of Europeans, with Aotearoa originally referring to just the North Island.[18] Māori had several traditional names for the two main islands, including Te Ika-a-Māui (the fish of Māui) for the North Island and Te Waipounamu (the waters of greenstone) or Te Waka o Aoraki (the canoe of Aoraki) for the South Island.[19] Early European maps labelled the islands North (North Island), Middle (South Island) and South (Stewart Island / Rakiura).[20] In 1830, mapmakers began to use "North" and "South" on their maps to distinguish the two largest islands and by 1907 this was the accepted norm.[16] The New Zealand Geographic Board discovered in 2009 that the names of the North Island and South Island had never been formalised, and names and alternative names were formalised in 2013. This set the names as North Island or Te Ika-a-Māui, and South Island or Te Waipounamu.[21] For each island, either its English or Māori name can be used, or both can be used together.[21]

        @@ -575,33 +564,29 @@

        -

        One set of arrows point from Taiwan to Melanesia to Fiji/Samoa and then to the Marquesas Islands. The population then spread, some going south to New Zealand and others going north to Hawai'i. A second set start in southern Asia and end in Melanesia.

        -
        -

        The Māori people are most likely descended from people who emigrated from Taiwan to Melanesia and then travelled east through to the Society Islands. After a pause of 70 to 265 years, a new wave of exploration led to the discovery and settlement of New Zealand.[22] -

        -
        +

        One set of arrows point from Taiwan to Melanesia to Fiji/Samoa and then to the Marquesas Islands. The population then spread, some going south to New Zealand and others going north to Hawai'i. A second set start in southern Asia and end in Melanesia.

        +

        The Māori people are most likely descended from people who emigrated from Taiwan to Melanesia and then travelled east through to the Society Islands. After a pause of 70 to 265 years, a new wave of exploration led to the discovery and settlement of New Zealand.[22] +

        - New Zealand was one of the last major landmasses settled by humans. Radiocarbon dating, evidence of deforestation[23] and mitochondrial DNA variability within Māori populations[24] suggest New Zealand was first settled by Eastern Polynesians between 1250 and 1300,[19][25] concluding a long series of voyages through the southern Pacific islands.[26] Over the centuries that followed, these settlers developed a distinct culture now known as Māori. The population was divided into iwi (tribes) and hapū (subtribes) who would sometimes cooperate, sometimes compete and sometimes fight against each other.[27] At some point a group of Māori migrated to Rēkohu, now known as the Chatham Islands, where they developed their distinct Moriori culture.[28][29] The Moriori population was all but wiped out between 1835 and 1862, largely because of Taranaki Māori invasion and enslavement in the 1830s, although European diseases also contributed. In 1862 only 101 survived, and the last known full-blooded Moriori died in 1933.[30] + New Zealand was one of the last major landmasses settled by humans. Radiocarbon dating, evidence of deforestation[23] and mitochondrial DNA variability within Māori populations[24] suggest New Zealand was first settled by Eastern Polynesians between 1250 and 1300,[19][25] concluding a long series of voyages through the southern Pacific islands.[26] Over the centuries that followed, these settlers developed a distinct culture now known as Māori. The population was divided into iwi (tribes) and hapū (subtribes) who would sometimes cooperate, sometimes compete and sometimes fight against each other.[27] At some point a group of Māori migrated to Rēkohu, now known as the Chatham Islands, where they developed their distinct Moriori culture.[28][29] The Moriori population was all but wiped out between 1835 and 1862, largely because of Taranaki Māori invasion and enslavement in the 1830s, although European diseases also contributed. In 1862 only 101 survived, and the last known full-blooded Moriori died in 1933.[30]

        -

        An engraving of a sketched coastline on white background

        -
        +

        An engraving of a sketched coastline on white background

        Map of the New Zealand coastline as Cook charted it on his first visit in 1769–70. The track of the Endeavour is also shown. -

        -
        +

        The first Europeans known to have reached New Zealand were Dutch explorer Abel Tasman and his crew in 1642.[31] In a hostile encounter, four crew members were killed and at least one Māori was hit by canister shot.[32] Europeans did not revisit New Zealand until 1769 when British explorer James Cook mapped almost the entire coastline.[31] Following Cook, New Zealand was visited by numerous European and North American whaling, sealing and trading ships. They traded European food, metal tools, weapons and other goods for timber, Māori food, artefacts and water.[33] The introduction of the potato and the musket transformed Māori agriculture and warfare. Potatoes provided a reliable food surplus, which enabled longer and more sustained military campaigns.[34] The resulting intertribal Musket Wars encompassed over 600 battles between 1801 and 1840, killing 30,000–40,000 Māori.[35] From the early 19th century, Christian missionaries began to settle New Zealand, eventually converting most of the Māori population.[36] The Māori population declined to around 40% of its pre-contact level during the 19th century; introduced diseases were the major factor.[37]

        -

        A torn sheet of paper

        +

        A torn sheet of paper

        In 1788 Captain Arthur Phillip assumed the position of Governor of the new British colony of New South Wales which according to his commission included New Zealand.[38] The British Government appointed James Busby as British Resident to New Zealand in 1832 following a petition from northern Māori.[39] In 1835, following an announcement of impending French settlement by Charles de Thierry, the nebulous United Tribes of New Zealand sent a Declaration of Independence to King William IV of the United Kingdom asking for protection.[39] Ongoing unrest, the proposed settlement of New Zealand by the New Zealand Company (which had already sent its first ship of surveyors to buy land from Māori) and the dubious legal standing of the Declaration of Independence prompted the Colonial Office to send Captain William Hobson to claim sovereignty for the United Kingdom and negotiate a treaty with the Māori.[40] The Treaty of Waitangi was first signed in the Bay of Islands on 6 February 1840.[41] In response to the New Zealand Company's attempts to establish an independent settlement in Wellington[42] and French settlers purchasing land in Akaroa,[43] Hobson declared British sovereignty over all of New Zealand on 21 May 1840, even though copies of the Treaty were still circulating throughout the country for Māori to sign.[44] With the signing of the Treaty and declaration of sovereignty the number of immigrants, particularly from the United Kingdom, began to increase.[45]

        -

        Black and white engraving depicting a crowd of people

        +

        Black and white engraving depicting a crowd of people

        New Zealand, still part of the colony of New South Wales, became a separate Colony of New Zealand on 1 July 1841.[46] Armed conflict began between the Colonial government and Māori in 1843 with the Wairau Affray over land and disagreements over sovereignty. These conflicts, mainly in the North Island, saw thousands of Imperial troops and the Royal Navy come to New Zealand and became known as the New Zealand Wars. Following these armed conflicts, large amounts of Māori land was confiscated by the government to meet settler demands.[47] @@ -616,7 +601,7 @@ In 1907, at the request of the New Zealand Parliament, King Edward VII proclaimed New Zealand a Dominion within the British Empire,[54] reflecting its self-governing status.[55] In 1947 the country adopted the Statute of Westminster, confirming that the British Parliament could no longer legislate for New Zealand without the consent of New Zealand.[48]

        - Early in the 20th century, New Zealand was involved in world affairs, fighting in the First and Second World Wars[56] and suffering through the Great Depression.[57] The depression led to the election of the First Labour Government and the establishment of a comprehensive welfare state and a protectionist economy.[58] New Zealand experienced increasing prosperity following the Second World War[59] and Māori began to leave their traditional rural life and move to the cities in search of work.[60] A Māori protest movement developed, which criticised Eurocentrism and worked for greater recognition of Māori culture and of the Treaty of Waitangi.[61] In 1975, a Waitangi Tribunal was set up to investigate alleged breaches of the Treaty, and it was enabled to investigate historic grievances in 1985.[41] The government has negotiated settlements of these grievances with many iwi,[62] although Māori claims to the foreshore and seabed have proved controversial in the 2000s.[63][64] + Early in the 20th century, New Zealand was involved in world affairs, fighting in the First and Second World Wars[56] and suffering through the Great Depression.[57] The depression led to the election of the First Labour Government and the establishment of a comprehensive welfare state and a protectionist economy.[58] New Zealand experienced increasing prosperity following the Second World War[59] and Māori began to leave their traditional rural life and move to the cities in search of work.[60] A Māori protest movement developed, which criticised Eurocentrism and worked for greater recognition of Māori culture and of the Treaty of Waitangi.[61] In 1975, a Waitangi Tribunal was set up to investigate alleged breaches of the Treaty, and it was enabled to investigate historic grievances in 1985.[41] The government has negotiated settlements of these grievances with many iwi,[62] although Māori claims to the foreshore and seabed have proved controversial in the 2000s.[63][64]

        Government and politics @@ -624,12 +609,12 @@
        -

        The Queen wearing her New Zealand insignia +

        The Queen wearing her New Zealand insignia

        -

        A smiling woman wearing a black dress +

        A smiling woman wearing a black dress

        @@ -641,10 +626,10 @@ The New Zealand Parliament holds legislative power and consists of the Queen and the House of Representatives.[72] It also included an upper house, the Legislative Council, until this was abolished in 1950.[72] The supremacy of parliament over the Crown and other government institutions was established in England by the Bill of Rights 1689 and has been ratified as law in New Zealand.[72] The House of Representatives is democratically elected and a government is formed from the party or coalition with the majority of seats. If no majority is formed, a minority government can be formed if support from other parties during confidence and supply votes is assured.[72] The governor-general appoints ministers under advice from the prime minister, who is by convention the parliamentary leader of the governing party or coalition.[73] Cabinet, formed by ministers and led by the prime minister, is the highest policy-making body in government and responsible for deciding significant government actions.[74] Members of Cabinet make major decisions collectively, and are therefore collectively responsible for the consequences of these decisions.[75]

        - A parliamentary general election must be called no later than three years after the previous election.[76] Almost all general elections between 1853 and 1993 were held under the first-past-the-post voting system.[77] Since the 1996 election, a form of proportional representation called mixed-member proportional (MMP) has been used.[66] Under the MMP system, each person has two votes; one is for a candidate standing in the voter's electorate and the other is for a party. Since the 2014 election, there have been 71 electorates (which include seven Māori electorates in which only Māori can optionally vote),[78] and the remaining 49 of the 120 seats are assigned so that representation in parliament reflects the party vote, with the threshold that a party must win at least one electorate or 5% of the total party vote before it is eligible for a seat.[79] + A parliamentary general election must be called no later than three years after the previous election.[76] Almost all general elections between 1853 and 1993 were held under the first-past-the-post voting system.[77] Since the 1996 election, a form of proportional representation called mixed-member proportional (MMP) has been used.[66] Under the MMP system, each person has two votes; one is for a candidate standing in the voter's electorate and the other is for a party. Since the 2014 election, there have been 71 electorates (which include seven Māori electorates in which only Māori can optionally vote),[78] and the remaining 49 of the 120 seats are assigned so that representation in parliament reflects the party vote, with the threshold that a party must win at least one electorate or 5% of the total party vote before it is eligible for a seat.[79]

        -

        A block of buildings fronted by a large statue.

        +

        A block of buildings fronted by a large statue.

        Elections since the 1930s have been dominated by two political parties, National and Labour.[77] Between March 2005 and August 2006, New Zealand became the first country in the world in which all the highest offices in the land—head of state, governor-general, prime minister, speaker and chief justice—were occupied simultaneously by women.[80] The current prime minister is Jacinda Ardern, who has been in office since 26 October 2017.[81] She is the country's third female prime minister.[82] @@ -661,20 +646,18 @@

        -

        A squad of men kneel in the desert sand while performing a war dance

        +

        A squad of men kneel in the desert sand while performing a war dance

        Early colonial New Zealand allowed the British Government to determine external trade and be responsible for foreign policy.[91] The 1923 and 1926 Imperial Conferences decided that New Zealand should be allowed to negotiate its own political treaties and the first commercial treaty was ratified in 1928 with Japan. On 3 September 1939 New Zealand allied itself with Britain and declared war on Germany with Prime Minister Michael Joseph Savage proclaiming, "Where she goes, we go; where she stands, we stand."[92]

        - In 1951 the United Kingdom became increasingly focused on its European interests,[93] while New Zealand joined Australia and the United States in the ANZUS security treaty.[94] The influence of the United States on New Zealand weakened following protests over the Vietnam War,[95] the refusal of the United States to admonish France after the sinking of the Rainbow Warrior,[96] disagreements over environmental and agricultural trade issues and New Zealand's nuclear-free policy.[97][98] Despite the United States' suspension of ANZUS obligations the treaty remained in effect between New Zealand and Australia, whose foreign policy has followed a similar historical trend.[99] Close political contact is maintained between the two countries, with free trade agreements and travel arrangements that allow citizens to visit, live and work in both countries without restrictions.[100] In 2013 there were about 650,000 New Zealand citizens living in Australia, which is equivalent to 15% of the resident population of New Zealand.[101] + In 1951 the United Kingdom became increasingly focused on its European interests,[93] while New Zealand joined Australia and the United States in the ANZUS security treaty.[94] The influence of the United States on New Zealand weakened following protests over the Vietnam War,[95] the refusal of the United States to admonish France after the sinking of the Rainbow Warrior,[96] disagreements over environmental and agricultural trade issues and New Zealand's nuclear-free policy.[97][98] Despite the United States' suspension of ANZUS obligations the treaty remained in effect between New Zealand and Australia, whose foreign policy has followed a similar historical trend.[99] Close political contact is maintained between the two countries, with free trade agreements and travel arrangements that allow citizens to visit, live and work in both countries without restrictions.[100] In 2013 there were about 650,000 New Zealand citizens living in Australia, which is equivalent to 15% of the resident population of New Zealand.[101]

        -

        A soldier in a green army uniform faces forwards

        -
        +

        A soldier in a green army uniform faces forwards

        Anzac Day service at the National War Memorial -

        -
        +

        New Zealand has a strong presence among the Pacific Island countries. A large proportion of New Zealand's aid goes to these countries and many Pacific people migrate to New Zealand for employment.[102] Permanent migration is regulated under the 1970 Samoan Quota Scheme and the 2002 Pacific Access Category, which allow up to 1,100 Samoan nationals and up to 750 other Pacific Islanders respectively to become permanent New Zealand residents each year. A seasonal workers scheme for temporary migration was introduced in 2007 and in 2009 about 8,000 Pacific Islanders were employed under it.[103] A regional power,[104] New Zealand is involved in the Pacific Islands Forum, the Pacific Community, Asia-Pacific Economic Cooperation and the Association of Southeast Asian Nations Regional Forum (including the East Asia Summit).[100] New Zealand is a member of the United Nations,[105] the Commonwealth of Nations[106] and the Organisation for Economic Co-operation and Development (OECD),[107] and participates in the Five Power Defence Arrangements.[108] @@ -683,14 +666,14 @@ New Zealand's military services—the Defence Force—comprise the New Zealand Army, the Royal New Zealand Air Force and the Royal New Zealand Navy.[109] New Zealand's national defence needs are modest, since a direct attack is unlikely.[110] However, its military has had a global presence. The country fought in both world wars, with notable campaigns in Gallipoli, Crete,[111] El Alamein[112] and Cassino.[113] The Gallipoli campaign played an important part in fostering New Zealand's national identity[114][115] and strengthened the ANZAC tradition it shares with Australia.[116]

        - In addition to Vietnam and the two world wars, New Zealand fought in the Second Boer War,[117] the Korean War,[118] the Malayan Emergency,[119] the Gulf War and the Afghanistan War. It has contributed forces to several regional and global peacekeeping missions, such as those in Cyprus, Somalia, Bosnia and Herzegovina, the Sinai, Angola, Cambodia, the Iran–Iraq border, Bougainville, East Timor, and the Solomon Islands.[120] + In addition to Vietnam and the two world wars, New Zealand fought in the Second Boer War,[117] the Korean War,[118] the Malayan Emergency,[119] the Gulf War and the Afghanistan War. It has contributed forces to several regional and global peacekeeping missions, such as those in Cyprus, Somalia, Bosnia and Herzegovina, the Sinai, Angola, Cambodia, the Iran–Iraq border, Bougainville, East Timor, and the Solomon Islands.[120]

        Local government and external territories

        -

        Map with the North, South, Stewart/Rakiura, Tokelau, Cook, Niue, Kermadec, Chatham, Bounty, Antipodes, Snare, Auckland and Campbell Islands highlighted. New Zealand's segment of Antarctica (the Ross Dependency) is also highlighted.

        +

        Map with the North, South, Stewart/Rakiura, Tokelau, Cook, Niue, Kermadec, Chatham, Bounty, Antipodes, Snare, Auckland and Campbell Islands highlighted. New Zealand's segment of Antarctica (the Ross Dependency) is also highlighted.

        The early European settlers divided New Zealand into provinces, which had a degree of autonomy.[121] Because of financial pressures and the desire to consolidate railways, education, land sales and other policies, government was centralised and the provinces were abolished in 1876.[122] The provinces are remembered in regional public holidays[123] and sporting rivalries.[124] @@ -728,19 +711,15 @@ Countries -  New Zealand +  New Zealand - -   - - -   + + + +  Cook Islands -  Cook Islands - - -  Niue +  Niue @@ -756,18 +735,16 @@ Chatham Islands - -   - + - Outlying islands outside any regional authority
        + Outlying islands outside any regional authority
        (the Kermadec Islands, Three Kings Islands, and Subantarctic Islands) Ross Dependency -  Tokelau +  Tokelau 15 islands @@ -822,7 +799,7 @@

        -

        Islands of New Zealand as seen from satellite

        +

        Islands of New Zealand as seen from satellite

        New Zealand is located near the centre of the water hemisphere and is made up of two main islands and a number of smaller islands. The two main islands (the North Island, or Te Ika-a-Māui, and the South Island, or Te Waipounamu) are separated by Cook Strait, 22 kilometres (14 mi) wide at its narrowest point.[138] Besides the North and South Islands, the five largest inhabited islands are Stewart Island (across the Foveaux Strait), Chatham Island, Great Barrier Island (in the Hauraki Gulf),[139] D'Urville Island (in the Marlborough Sounds)[140] and Waiheke Island (about 22 km (14 mi) from central Auckland).[141] @@ -830,12 +807,12 @@

        -

        A large mountain with a lake in the foreground +

        A large mountain with a lake in the foreground

        -

        Snow-capped mountain range +

        Snow-capped mountain range

        The Southern Alps stretch for 500 kilometres down the South Island @@ -858,21 +835,17 @@

        • Landscapes of New Zealand
        • -
        • - -
        • -
        • - -
        • + +
        • -

          +

        • -

          +

        • @@ -882,7 +855,7 @@

          - New Zealand's climate is predominantly temperate maritime (Köppen: Cfb), with mean annual temperatures ranging from 10 °C (50 °F) in the south to 16 °C (61 °F) in the north.[158] Historical maxima and minima are 42.4 °C (108.32 °F) in Rangiora, Canterbury and −25.6 °C (−14.08 °F) in Ranfurly, Otago.[159] Conditions vary sharply across regions from extremely wet on the West Coast of the South Island to almost semi-arid in Central Otago and the Mackenzie Basin of inland Canterbury and subtropical in Northland.[160] Of the seven largest cities, Christchurch is the driest, receiving on average only 640 millimetres (25 in) of rain per year and Wellington the wettest, receiving almost twice that amount.[161] Auckland, Wellington and Christchurch all receive a yearly average of more than 2,000 hours of sunshine. The southern and southwestern parts of the South Island have a cooler and cloudier climate, with around 1,400–1,600 hours; the northern and northeastern parts of the South Island are the sunniest areas of the country and receive about 2,400–2,500 hours.[162] The general snow season is early June until early October, though cold snaps can occur outside this season.[163] Snowfall is common in the eastern and southern parts of the South Island and mountain areas across the country.[158] + New Zealand's climate is predominantly temperate maritime (Köppen: Cfb), with mean annual temperatures ranging from 10 °C (50 °F) in the south to 16 °C (61 °F) in the north.[158] Historical maxima and minima are 42.4 °C (108.32 °F) in Rangiora, Canterbury and −25.6 °C (−14.08 °F) in Ranfurly, Otago.[159] Conditions vary sharply across regions from extremely wet on the West Coast of the South Island to almost semi-arid in Central Otago and the Mackenzie Basin of inland Canterbury and subtropical in Northland.[160] Of the seven largest cities, Christchurch is the driest, receiving on average only 640 millimetres (25 in) of rain per year and Wellington the wettest, receiving almost twice that amount.[161] Auckland, Wellington and Christchurch all receive a yearly average of more than 2,000 hours of sunshine. The southern and southwestern parts of the South Island have a cooler and cloudier climate, with around 1,400–1,600 hours; the northern and northeastern parts of the South Island are the sunniest areas of the country and receive about 2,400–2,500 hours.[162] The general snow season is early June until early October, though cold snaps can occur outside this season.[163] Snowfall is common in the eastern and southern parts of the South Island and mountain areas across the country.[158]

          The table below lists climate normals for the warmest and coldest months in New Zealand's six largest cities. North Island cities are generally warmest in February. South Island cities are warmest in January. @@ -1018,11 +991,9 @@

          -

          Kiwi amongst sticks

          -
          +

          Kiwi amongst sticks

          The endemic flightless kiwi is a national icon. -

          -
          +

          New Zealand's geographic isolation for 80 million years[165] and island biogeography has influenced evolution of the country's species of animals, fungi and plants. Physical isolation has caused biological isolation, resulting in a dynamic evolutionary ecology with examples of very distinctive plants and animals as well as populations of widespread species.[166][167] About 82% of New Zealand's indigenous vascular plants are endemic, covering 1,944 species across 65 genera.[168][169] The number of fungi recorded from New Zealand, including lichen-forming species, is not known, nor is the proportion of those fungi which are endemic, but one estimate suggests there are about 2,300 species of lichen-forming fungi in New Zealand[168] and 40% of these are endemic.[170] The two main types of forest are those dominated by broadleaf trees with emergent podocarps, or by southern beech in cooler climates.[171] The remaining vegetation types consist of grasslands, the majority of which are tussock.[172] @@ -1031,14 +1002,12 @@ Before the arrival of humans, an estimated 80% of the land was covered in forest, with only high alpine, wet, infertile and volcanic areas without trees.[173] Massive deforestation occurred after humans arrived, with around half the forest cover lost to fire after Polynesian settlement.[174] Much of the remaining forest fell after European settlement, being logged or cleared to make room for pastoral farming, leaving forest occupying only 23% of the land.[175]

          -

          An artist's rendition of a Haast's eagle attacking two moa

          -
          +

          An artist's rendition of a Haast's eagle attacking two moa

          The giant Haast's eagle died out when humans hunted its main prey, the moa, to extinction. -

          -
          +

          - The forests were dominated by birds, and the lack of mammalian predators led to some like the kiwi, kakapo, weka and takahē evolving flightlessness.[176] The arrival of humans, associated changes to habitat, and the introduction of rats, ferrets and other mammals led to the extinction of many bird species, including large birds like the moa and Haast's eagle.[177][178] + The forests were dominated by birds, and the lack of mammalian predators led to some like the kiwi, kakapo, weka and takahē evolving flightlessness.[176] The arrival of humans, associated changes to habitat, and the introduction of rats, ferrets and other mammals led to the extinction of many bird species, including large birds like the moa and Haast's eagle.[177][178]

          Other indigenous animals are represented by reptiles (tuatara, skinks and geckos), frogs,[179] spiders,[180] insects (weta)[181] and snails.[182] Some, such as the tuatara, are so unique that they have been called living fossils.[183] Three species of bats (one since extinct) were the only sign of native land mammals in New Zealand until the 2006 discovery of bones from a unique, mouse-sized land mammal at least 16 million years old.[184][185] Marine mammals however are abundant, with almost half the world's cetaceans (whales, dolphins, and porpoises) and large numbers of fur seals reported in New Zealand waters.[186] Many seabirds breed in New Zealand, a third of them unique to the country.[187] More penguin species are found in New Zealand than in any other country.[188] @@ -1053,7 +1022,7 @@

          -

          Boats docked in blue-green water. Plate glass skyscrapers rising up in the background.

          +

          Boats docked in blue-green water. Plate glass skyscrapers rising up in the background.

          New Zealand has an advanced market economy,[193] ranked 16th in the 2018 Human Development Index[8] and third in the 2018 Index of Economic Freedom.[194] It is a high-income economy with a nominal gross domestic product (GDP) per capita of US$36,254.[6] The currency is the New Zealand dollar, informally known as the "Kiwi dollar"; it also circulates in the Cook Islands (see Cook Islands dollar), Niue, Tokelau, and the Pitcairn Islands.[195] @@ -1062,27 +1031,23 @@ Historically, extractive industries have contributed strongly to New Zealand's economy, focussing at different times on sealing, whaling, flax, gold, kauri gum, and native timber.[196] The first shipment of refrigerated meat on the Dunedin in 1882 led to the establishment of meat and dairy exports to Britain, a trade which provided the basis for strong economic growth in New Zealand.[197] High demand for agricultural products from the United Kingdom and the United States helped New Zealanders achieve higher living standards than both Australia and Western Europe in the 1950s and 1960s.[198] In 1973, New Zealand's export market was reduced when the United Kingdom joined the European Economic Community[199] and other compounding factors, such as the 1973 oil and 1979 energy crises, led to a severe economic depression.[200] Living standards in New Zealand fell behind those of Australia and Western Europe, and by 1982 New Zealand had the lowest per-capita income of all the developed nations surveyed by the World Bank.[201] In the mid-1980s New Zealand deregulated its agricultural sector by phasing out subsidies over a three-year period.[202][203] Since 1984, successive governments engaged in major macroeconomic restructuring (known first as Rogernomics and then Ruthanasia), rapidly transforming New Zealand from a protected and highly regulated economy to a liberalised free-trade economy.[204][205]

          -

          Blue water against a backdrop of snow-capped mountains

          -
          +

          Blue water against a backdrop of snow-capped mountains

          Milford Sound is one of New Zealand's most famous tourist destinations.[206] -

          -
          +

          - Unemployment peaked above 10% in 1991 and 1992,[207] following the 1987 share market crash, but eventually fell to a record low (since 1986) of 3.7% in 2007 (ranking third from twenty-seven comparable OECD nations).[207] However, the global financial crisis that followed had a major impact on New Zealand, with the GDP shrinking for five consecutive quarters, the longest recession in over thirty years,[208][209] and unemployment rising back to 7% in late 2009.[210] Unemployment rates for different age groups follow similar trends, but are consistently higher among youth. In the December 2014 quarter, the general unemployment rate was around 5.8%, while the unemployment rate for youth aged 15 to 21 was 15.6%.[207] New Zealand has experienced a series of "brain drains" since the 1970s[211] that still continue today.[212] Nearly one quarter of highly skilled workers live overseas, mostly in Australia and Britain, which is the largest proportion from any developed nation.[213] In recent decades, however, a "brain gain" has brought in educated professionals from Europe and less developed countries.[214][215] Today New Zealand's economy benefits from a high level of innovation.[216] + Unemployment peaked above 10% in 1991 and 1992,[207] following the 1987 share market crash, but eventually fell to a record low (since 1986) of 3.7% in 2007 (ranking third from twenty-seven comparable OECD nations).[207] However, the global financial crisis that followed had a major impact on New Zealand, with the GDP shrinking for five consecutive quarters, the longest recession in over thirty years,[208][209] and unemployment rising back to 7% in late 2009.[210] Unemployment rates for different age groups follow similar trends, but are consistently higher among youth. In the December 2014 quarter, the general unemployment rate was around 5.8%, while the unemployment rate for youth aged 15 to 21 was 15.6%.[207] New Zealand has experienced a series of "brain drains" since the 1970s[211] that still continue today.[212] Nearly one quarter of highly skilled workers live overseas, mostly in Australia and Britain, which is the largest proportion from any developed nation.[213] In recent decades, however, a "brain gain" has brought in educated professionals from Europe and less developed countries.[214][215] Today New Zealand's economy benefits from a high level of innovation.[216]

          Trade

          - New Zealand is heavily dependent on international trade,[217] particularly in agricultural products.[218] Exports account for 24% of its output,[143] making New Zealand vulnerable to international commodity prices and global economic slowdowns. Food products made up 55% of the value of all the country's exports in 2014; wood was the second largest earner (7%).[219] New Zealand's main trading partners, as at June 2018, are China (NZ$27.8b), Australia ($26.2b), the European Union ($22.9b), the United States ($17.6b), and Japan ($8.4b).[220] On 7 April 2008, New Zealand and China signed the New Zealand–China Free Trade Agreement, the first such agreement China has signed with a developed country.[221] The service sector is the largest sector in the economy, followed by manufacturing and construction and then farming and raw material extraction.[143] Tourism plays a significant role in the economy, contributing $12.9 billion (or 5.6%) to New Zealand's total GDP and supporting 7.5% of the total workforce in 2016.[222] International visitor arrivals are expected to increase at a rate of 5.4% annually up to 2022.[222] + New Zealand is heavily dependent on international trade,[217] particularly in agricultural products.[218] Exports account for 24% of its output,[143] making New Zealand vulnerable to international commodity prices and global economic slowdowns. Food products made up 55% of the value of all the country's exports in 2014; wood was the second largest earner (7%).[219] New Zealand's main trading partners, as at June 2018, are China (NZ$27.8b), Australia ($26.2b), the European Union ($22.9b), the United States ($17.6b), and Japan ($8.4b).[220] On 7 April 2008, New Zealand and China signed the New Zealand–China Free Trade Agreement, the first such agreement China has signed with a developed country.[221] The service sector is the largest sector in the economy, followed by manufacturing and construction and then farming and raw material extraction.[143] Tourism plays a significant role in the economy, contributing $12.9 billion (or 5.6%) to New Zealand's total GDP and supporting 7.5% of the total workforce in 2016.[222] International visitor arrivals are expected to increase at a rate of 5.4% annually up to 2022.[222]

          -

          A Romney ewe with her two lambs

          -
          +

          A Romney ewe with her two lambs

          Wool has historically been one of New Zealand's major exports. -

          -
          +

          Wool was New Zealand's major agricultural export during the late 19th century.[196] Even as late as the 1960s it made up over a third of all export revenues,[196] but since then its price has steadily dropped relative to other commodities[223] and wool is no longer profitable for many farmers.[224] In contrast dairy farming increased, with the number of dairy cows doubling between 1990 and 2007,[225] to become New Zealand's largest export earner.[226] In the year to June 2018, dairy products accounted for 17.7% ($14.1 billion) of total exports,[220] and the country's largest company, Fonterra, controls almost one-third of the international dairy trade.[227] Other exports in 2017-18 were meat (8.8%), wood and wood products (6.2%), fruit (3.6%), machinery (2.2%) and wine (2.1%).[220] New Zealand's wine industry has followed a similar trend to dairy, the number of vineyards doubling over the same period,[228] overtaking wool exports for the first time in 2007.[229][230] @@ -1091,7 +1056,7 @@ Infrastructure

          -

          A mid-size jet airliner in flight. The plane livery is all-black and features a New Zealand silver fern mark.

          +

          A mid-size jet airliner in flight. The plane livery is all-black and features a New Zealand silver fern mark.

          In 2015, renewable energy, primarily geothermal and hydroelectric power, generated 40.1% of New Zealand's gross energy supply.[231] Geothermal power alone accounted for 22% of New Zealand's energy in 2015.[231] @@ -1110,7 +1075,7 @@

          -

          Stationary population pyramid broken down into 21 age ranges.

          +

          Stationary population pyramid broken down into 21 age ranges.

          The 2013 New Zealand census enumerated a resident population of 4,242,048, an increase of 5.3% over the 2006 figure.[245][n 8] As of September 2019, the total population has risen to an estimated 4,933,210.[5] @@ -1119,7 +1084,7 @@ New Zealand is a predominantly urban country, with 73.0% of the population living in the seventeen main urban areas (i.e. population 30,000 or greater) and 55.1% living in the four largest cities of Auckland, Christchurch, Wellington, and Hamilton.[247] New Zealand cities generally rank highly on international livability measures. For instance, in 2016 Auckland was ranked the world's third most liveable city and Wellington the twelfth by the Mercer Quality of Living Survey.[248]

          - Life expectancy for New Zealanders in 2012 was 84 years for females, and 80.2 years for males.[249] Life expectancy at birth is forecast to increase from 80 years to 85 years in 2050 and infant mortality is expected to decline.[250] New Zealand's fertility rate of 2.1 is relatively high for a developed country, and natural births account for a significant proportion of population growth. Consequently, the country has a young population compared to most industrialised nations, with 20% of New Zealanders being 14 years old or younger.[143] In 2018 the median age of the New Zealand population was 38.1 years.[251] By 2050 the median age is projected to rise to 43 years and the percentage of people 60 years of age and older to rise from 18% to 29%.[250] In 2008 the leading cause of premature death was cancer, at 29.8%, followed by ischaemic heart disease, 19.7%, and then cerebrovascular disease, 9.2%.[252] As of 2016, total expenditure on health care (including private sector spending) is 9.2% of GDP.[253]
          + Life expectancy for New Zealanders in 2012 was 84 years for females, and 80.2 years for males.[249] Life expectancy at birth is forecast to increase from 80 years to 85 years in 2050 and infant mortality is expected to decline.[250] New Zealand's fertility rate of 2.1 is relatively high for a developed country, and natural births account for a significant proportion of population growth. Consequently, the country has a young population compared to most industrialised nations, with 20% of New Zealanders being 14 years old or younger.[143] In 2018 the median age of the New Zealand population was 38.1 years.[251] By 2050 the median age is projected to rise to 43 years and the percentage of people 60 years of age and older to rise from 18% to 29%.[250] In 2008 the leading cause of premature death was cancer, at 29.8%, followed by ischaemic heart disease, 19.7%, and then cerebrovascular disease, 9.2%.[252] As of 2016, total expenditure on health care (including private sector spending) is 9.2% of GDP.[253]

          @@ -1140,16 +1105,14 @@
          -

          Largest urban areas in New Zealand

          -
          +

          Largest urban areas in New Zealand

          Statistics New Zealand June 2018 estimate (NZSAC92 boundaries)[254] -

          -
          +

          - + @@ -1174,13 +1137,13 @@ - + @@ -1456,42 +1419,38 @@
          -

          Pedestrians crossing a wide street which is flanked by storefronts

          -
          +

          Pedestrians crossing a wide street which is flanked by storefronts

          Pedestrians on Queen Street in Auckland, an ethnically diverse city -

          -
          +

          In the 2013 census, 74.0% of New Zealand residents identified ethnically as European, and 14.9% as Māori. Other major ethnic groups include Asian (11.8%) and Pacific peoples (7.4%), two-thirds of whom live in the Auckland Region.[255][n 3] The population has become more diverse in recent decades: in 1961, the census reported that the population of New Zealand was 92% European and 7% Māori, with Asian and Pacific minorities sharing the remaining 1%.[256]

          - While the demonym for a New Zealand citizen is New Zealander, the informal "Kiwi" is commonly used both internationally[257] and by locals.[258] The Māori loanword Pākehā has been used to refer to New Zealanders of European descent, although others reject this appellation.[259][260] The word Pākehā today is increasingly used to refer to all non-Polynesian New Zealanders.[261] + While the demonym for a New Zealand citizen is New Zealander, the informal "Kiwi" is commonly used both internationally[257] and by locals.[258] The Māori loanword Pākehā has been used to refer to New Zealanders of European descent, although others reject this appellation.[259][260] The word Pākehā today is increasingly used to refer to all non-Polynesian New Zealanders.[261]

          - The Māori were the first people to reach New Zealand, followed by the early European settlers. Following colonisation, immigrants were predominantly from Britain, Ireland and Australia because of restrictive policies similar to the White Australia policy.[262] There was also significant Dutch, Dalmatian,[263] German, and Italian immigration, together with indirect European immigration through Australia, North America, South America and South Africa.[264][265] Net migration increased after the Second World War; in the 1970s and 1980s policies were relaxed and immigration from Asia was promoted.[265][266] In 2009–10, an annual target of 45,000–50,000 permanent residence approvals was set by the New Zealand Immigration Service—more than one new migrant for every 100 New Zealand residents.[267] Just over 25% of New Zealand's population was born overseas, with the majority (52%) living in the Auckland Region. The United Kingdom remains the largest source of New Zealand's overseas population, with a quarter of all overseas-born New Zealanders born there; other major sources of New Zealand's overseas-born population are China, India, Australia, South Africa, Fiji and Samoa.[268] The number of fee-paying international students increased sharply in the late 1990s, with more than 20,000 studying in public tertiary institutions in 2002.[269] + The Māori were the first people to reach New Zealand, followed by the early European settlers. Following colonisation, immigrants were predominantly from Britain, Ireland and Australia because of restrictive policies similar to the White Australia policy.[262] There was also significant Dutch, Dalmatian,[263] German, and Italian immigration, together with indirect European immigration through Australia, North America, South America and South Africa.[264][265] Net migration increased after the Second World War; in the 1970s and 1980s policies were relaxed and immigration from Asia was promoted.[265][266] In 2009–10, an annual target of 45,000–50,000 permanent residence approvals was set by the New Zealand Immigration Service—more than one new migrant for every 100 New Zealand residents.[267] Just over 25% of New Zealand's population was born overseas, with the majority (52%) living in the Auckland Region. The United Kingdom remains the largest source of New Zealand's overseas population, with a quarter of all overseas-born New Zealanders born there; other major sources of New Zealand's overseas-born population are China, India, Australia, South Africa, Fiji and Samoa.[268] The number of fee-paying international students increased sharply in the late 1990s, with more than 20,000 studying in public tertiary institutions in 2002.[269]

          Language

          -

          Map of New Zealand showing the percentage of people in each census area unit who speak Māori. Areas of the North Island exhibit the highest Māori proficiency.

          -
          -

          Speakers of Māori according to the 2013 census[270]

          -

            Less than 5% +

          Map of New Zealand showing the percentage of people in each census area unit who speak Māori. Areas of the North Island exhibit the highest Māori proficiency.

          +

          Speakers of Māori according to the 2013 census[270]

           Less than 5%

          -

            More than 5% +

           More than 5%

          -

            More than 10% +

           More than 10%

          -

            More than 20% +

           More than 20%

          -

            More than 30% +

           More than 30%

          -

            More than 40% +

           More than 40%

          -

            More than 50% +

           More than 50%

          @@ -1499,7 +1458,7 @@ English is the predominant language in New Zealand, spoken by 96.1% of the population.[271] New Zealand English is similar to Australian English and many speakers from the Northern Hemisphere are unable to tell the accents apart.[272] The most prominent differences between the New Zealand English dialect and other English dialects are the shifts in the short front vowels: the short-"i" sound (as in "kit") has centralised towards the schwa sound (the "a" in "comma" and "about"); the short-"e" sound (as in "dress") has moved towards the short-"i" sound; and the short-"a" sound (as in "trap") has moved to the short-"e" sound.[273]

          - After the Second World War, Māori were discouraged from speaking their own language (te reo Māori) in schools and workplaces and it existed as a community language only in a few remote areas.[274] It has recently undergone a process of revitalisation,[275] being declared one of New Zealand's official languages in 1987,[276] and is spoken by 3.7% of the population.[271][n 9] There are now Māori language immersion schools and two television channels that broadcast predominantly in Māori.[278] Many places have both their Māori and English names officially recognised.[279] + After the Second World War, Māori were discouraged from speaking their own language (te reo Māori) in schools and workplaces and it existed as a community language only in a few remote areas.[274] It has recently undergone a process of revitalisation,[275] being declared one of New Zealand's official languages in 1987,[276] and is spoken by 3.7% of the population.[271][n 9] There are now Māori language immersion schools and two television channels that broadcast predominantly in Māori.[278] Many places have both their Māori and English names officially recognised.[279]

          As recorded in the 2013 census,[271] Samoan is the most widely spoken non-official language (2.2%),[n 10] followed by Hindi (1.7%), "Northern Chinese" (including Mandarin, 1.3%) and French (1.2%). 20,235 people (0.5%) reported the ability to use New Zealand Sign Language. It was declared one of New Zealand's official languages in 2006.[280] @@ -1510,21 +1469,19 @@

          -

          Simple white building with two red domed towers

          -
          -

          A Rātana church on a hill near Raetihi. The two-tower construction is characteristic of Rātana buildings. -

          -
          +

          Simple white building with two red domed towers

          +

          A Rātana church on a hill near Raetihi. The two-tower construction is characteristic of Rātana buildings. +

          - Christianity is the predominant religion in New Zealand, although its society is among the most secular in the world.[281][282] In the 2018 census, 51.4% of the population identified with one or more religions, including 38.6% identifying as Christians. Another 48.6% indicated that they had no religion.[n 11] The main Christian denominations are, by number of adherents, Roman Catholicism (10.1%), Anglicanism (6.8%), Presbyterianism (5.5%) and "Christian not further defined" (i.e. people identifying as Christian but not stating the denomination, 6.6%). The Māori-based Ringatū and Rātana religions (1.3%) are also Christian in origin.[284][285] Immigration and demographic change in recent decades has contributed to the growth of minority religions,[286] such as Hinduism (2.6%), Buddhism (1.1%), Islam (1.3%) and Sikhism (0.5%).[284] The Auckland Region exhibited the greatest religious diversity.[284] + Christianity is the predominant religion in New Zealand, although its society is among the most secular in the world.[281][282] In the 2018 census, 51.4% of the population identified with one or more religions, including 38.6% identifying as Christians. Another 48.6% indicated that they had no religion.[n 11] The main Christian denominations are, by number of adherents, Roman Catholicism (10.1%), Anglicanism (6.8%), Presbyterianism (5.5%) and "Christian not further defined" (i.e. people identifying as Christian but not stating the denomination, 6.6%). The Māori-based Ringatū and Rātana religions (1.3%) are also Christian in origin.[284][285] Immigration and demographic change in recent decades has contributed to the growth of minority religions,[286] such as Hinduism (2.6%), Buddhism (1.1%), Islam (1.3%) and Sikhism (0.5%).[284] The Auckland Region exhibited the greatest religious diversity.[284]

          Education

          - Primary and secondary schooling is compulsory for children aged 6 to 16, with the majority attending from the age of 5.[287] There are 13 school years and attending state (public) schools is free to New Zealand citizens and permanent residents from a person's 5th birthday to the end of the calendar year following their 19th birthday.[288] New Zealand has an adult literacy rate of 99%,[143] and over half of the population aged 15 to 29 hold a tertiary qualification.[287] There are five types of government-owned tertiary institutions: universities, colleges of education, polytechnics, specialist colleges, and wānanga,[289] in addition to private training establishments.[290] In the adult population 14.2% have a bachelor's degree or higher, 30.4% have some form of secondary qualification as their highest qualification and 22.4% have no formal qualification.[291] The OECD's Programme for International Student Assessment ranks New Zealand's education system as the seventh best in the world, with students performing exceptionally well in reading, mathematics and science.[292] + Primary and secondary schooling is compulsory for children aged 6 to 16, with the majority attending from the age of 5.[287] There are 13 school years and attending state (public) schools is free to New Zealand citizens and permanent residents from a person's 5th birthday to the end of the calendar year following their 19th birthday.[288] New Zealand has an adult literacy rate of 99%,[143] and over half of the population aged 15 to 29 hold a tertiary qualification.[287] There are five types of government-owned tertiary institutions: universities, colleges of education, polytechnics, specialist colleges, and wānanga,[289] in addition to private training establishments.[290] In the adult population 14.2% have a bachelor's degree or higher, 30.4% have some form of secondary qualification as their highest qualification and 22.4% have no formal qualification.[291] The OECD's Programme for International Student Assessment ranks New Zealand's education system as the seventh best in the world, with students performing exceptionally well in reading, mathematics and science.[292]

          @@ -1532,15 +1489,14 @@

          -

          Tall wooden carving showing Kupe above two tentacled sea creatures +

          Tall wooden carving showing Kupe above two tentacled sea creatures

          Late 20th-century house-post depicting the navigator Kupe fighting two sea creatures -

          -
          +

          - Early Māori adapted the tropically based east Polynesian culture in line with the challenges associated with a larger and more diverse environment, eventually developing their own distinctive culture. Social organisation was largely communal with families (whānau), subtribes (hapū) and tribes (iwi) ruled by a chief (rangatira), whose position was subject to the community's approval.[293] The British and Irish immigrants brought aspects of their own culture to New Zealand and also influenced Māori culture,[294][295] particularly with the introduction of Christianity.[296] However, Māori still regard their allegiance to tribal groups as a vital part of their identity, and Māori kinship roles resemble those of other Polynesian peoples.[297] More recently American, Australian, Asian and other European cultures have exerted influence on New Zealand. Non-Māori Polynesian cultures are also apparent, with Pasifika, the world's largest Polynesian festival, now an annual event in Auckland.[298] + Early Māori adapted the tropically based east Polynesian culture in line with the challenges associated with a larger and more diverse environment, eventually developing their own distinctive culture. Social organisation was largely communal with families (whānau), subtribes (hapū) and tribes (iwi) ruled by a chief (rangatira), whose position was subject to the community's approval.[293] The British and Irish immigrants brought aspects of their own culture to New Zealand and also influenced Māori culture,[294][295] particularly with the introduction of Christianity.[296] However, Māori still regard their allegiance to tribal groups as a vital part of their identity, and Māori kinship roles resemble those of other Polynesian peoples.[297] More recently American, Australian, Asian and other European cultures have exerted influence on New Zealand. Non-Māori Polynesian cultures are also apparent, with Pasifika, the world's largest Polynesian festival, now an annual event in Auckland.[298]

          The largely rural life in early New Zealand led to the image of New Zealanders being rugged, industrious problem solvers.[299] Modesty was expected and enforced through the "tall poppy syndrome", where high achievers received harsh criticism.[300] At the time New Zealand was not known as an intellectual country.[301] From the early 20th century until the late 1960s, Māori culture was suppressed by the attempted assimilation of Māori into British New Zealanders.[274] In the 1960s, as tertiary education became more available and cities expanded[302] urban culture began to dominate.[303] However, rural imagery and themes are common in New Zealand's art, literature and media.[304] @@ -1556,10 +1512,10 @@ As part of the resurgence of Māori culture, the traditional crafts of carving and weaving are now more widely practised and Māori artists are increasing in number and influence.[306] Most Māori carvings feature human figures, generally with three fingers and either a natural-looking, detailed head or a grotesque head.[307] Surface patterns consisting of spirals, ridges, notches and fish scales decorate most carvings.[308] The pre-eminent Māori architecture consisted of carved meeting houses (wharenui) decorated with symbolic carvings and illustrations. These buildings were originally designed to be constantly rebuilt, changing and adapting to different whims or needs.[309]

          - Māori decorated the white wood of buildings, canoes and cenotaphs using red (a mixture of red ochre and shark fat) and black (made from soot) paint and painted pictures of birds, reptiles and other designs on cave walls.[310] Māori tattoos (moko) consisting of coloured soot mixed with gum were cut into the flesh with a bone chisel.[311] Since European arrival paintings and photographs have been dominated by landscapes, originally not as works of art but as factual portrayals of New Zealand.[312] Portraits of Māori were also common, with early painters often portraying them as "noble savages", exotic beauties or friendly natives.[312] The country's isolation delayed the influence of European artistic trends allowing local artists to develop their own distinctive style of regionalism.[313] During the 1960s and 1970s many artists combined traditional Māori and Western techniques, creating unique art forms.[314] New Zealand art and craft has gradually achieved an international audience, with exhibitions in the Venice Biennale in 2001 and the "Paradise Now" exhibition in New York in 2004.[306][315] + Māori decorated the white wood of buildings, canoes and cenotaphs using red (a mixture of red ochre and shark fat) and black (made from soot) paint and painted pictures of birds, reptiles and other designs on cave walls.[310] Māori tattoos (moko) consisting of coloured soot mixed with gum were cut into the flesh with a bone chisel.[311] Since European arrival paintings and photographs have been dominated by landscapes, originally not as works of art but as factual portrayals of New Zealand.[312] Portraits of Māori were also common, with early painters often portraying them as "noble savages", exotic beauties or friendly natives.[312] The country's isolation delayed the influence of European artistic trends allowing local artists to develop their own distinctive style of regionalism.[313] During the 1960s and 1970s many artists combined traditional Māori and Western techniques, creating unique art forms.[314] New Zealand art and craft has gradually achieved an international audience, with exhibitions in the Venice Biennale in 2001 and the "Paradise Now" exhibition in New York in 2004.[306][315]

          -

          Refer to caption

          +

          Refer to caption

          Māori cloaks are made of fine flax fibre and patterned with black, red and white triangles, diamonds and other geometric shapes.[316] Greenstone was fashioned into earrings and necklaces, with the most well-known design being the hei-tiki, a distorted human figure sitting cross-legged with its head tilted to the side.[317] Europeans brought English fashion etiquette to New Zealand, and until the 1950s most people dressed up for social occasions.[318] Standards have since relaxed and New Zealand fashion has received a reputation for being casual, practical and lacklustre.[319][320] However, the local fashion industry has grown significantly since 2000, doubling exports and increasing from a handful to about 50 established labels, with some labels gaining international recognition.[320] @@ -1579,7 +1535,7 @@ New Zealand music has been influenced by blues, jazz, country, rock and roll and hip hop, with many of these genres given a unique New Zealand interpretation.[326] Māori developed traditional chants and songs from their ancient Southeast Asian origins, and after centuries of isolation created a unique "monotonous" and "doleful" sound.[327] Flutes and trumpets were used as musical instruments[328] or as signalling devices during war or special occasions.[329] Early settlers brought over their ethnic music, with brass bands and choral music being popular, and musicians began touring New Zealand in the 1860s.[330][331] Pipe bands became widespread during the early 20th century.[332] The New Zealand recording industry began to develop from 1940 onwards and many New Zealand musicians have obtained success in Britain and the United States.[326] Some artists release Māori language songs and the Māori tradition-based art of kapa haka (song and dance) has made a resurgence.[333] The New Zealand Music Awards are held annually by Recorded Music NZ; the awards were first held in 1965 by Reckitt & Colman as the Loxene Golden Disc awards.[334] Recorded Music NZ also publishes the country's official weekly record charts.[335]

          -

          Hills with inset, round doors. Reflected in water.

          +

          Hills with inset, round doors. Reflected in water.

          Public radio was introduced in New Zealand in 1922.[337] A state-owned television service began in 1960.[338] Deregulation in the 1980s saw a sudden increase in the numbers of radio and television stations.[339] New Zealand television primarily broadcasts American and British programming, along with a large number of Australian and local shows.[340] The number of New Zealand films significantly increased during the 1970s. In 1978 the New Zealand Film Commission started assisting local film-makers and many films attained a world audience, some receiving international acknowledgement.[339] The highest-grossing New Zealand films are Hunt for the Wilderpeople, Boy, The World's Fastest Indian, Once Were Warriors and Whale Rider.[341] The country's diverse scenery and compact size, plus government incentives,[342] have encouraged some producers to shoot big-budget productions in New Zealand, including Avatar, The Lord of the Rings, The Hobbit, The Chronicles of Narnia, King Kong and The Last Samurai.[343] The New Zealand media industry is dominated by a small number of companies, most of which are foreign-owned, although the state retains ownership of some television and radio stations.[344] Since 1994, Freedom House has consistently ranked New Zealand's press freedom in the top twenty, with the 19th freest media in 2015.[345] @@ -1590,10 +1546,10 @@

          -

          Rugby team wearing all black, facing the camera, knees bent, and facing toward a team wearing white

          +

          Rugby team wearing all black, facing the camera, knees bent, and facing toward a team wearing white

          - Most of the major sporting codes played in New Zealand have British origins.[346] Rugby union is considered the national sport[347] and attracts the most spectators.[348] Golf, netball, tennis and cricket have the highest rates of adult participation, while netball, rugby union and football (soccer) are particularly popular among young people.[348][349] Around 54% of New Zealand adolescents participate in sports for their school.[349] Victorious rugby tours to Australia and the United Kingdom in the late 1880s and the early 1900s played an early role in instilling a national identity.[350] Horseracing was also a popular spectator sport and became part of the "Rugby, Racing and Beer" culture during the 1960s.[351] Māori participation in European sports was particularly evident in rugby and the country's team performs a haka, a traditional Māori challenge, before international matches.[352] New Zealand is known for its extreme sports, adventure tourism[353] and strong mountaineering tradition, as seen in the success of notable New Zealander Sir Edmund Hillary.[354][355] Other outdoor pursuits such as cycling, fishing, swimming, running, tramping, canoeing, hunting, snowsports, surfing and sailing are also popular.[356] The Polynesian sport of waka ama racing has experienced a resurgence of interest in New Zealand since the 1980s.[357] + Most of the major sporting codes played in New Zealand have British origins.[346] Rugby union is considered the national sport[347] and attracts the most spectators.[348] Golf, netball, tennis and cricket have the highest rates of adult participation, while netball, rugby union and football (soccer) are particularly popular among young people.[348][349] Around 54% of New Zealand adolescents participate in sports for their school.[349] Victorious rugby tours to Australia and the United Kingdom in the late 1880s and the early 1900s played an early role in instilling a national identity.[350] Horseracing was also a popular spectator sport and became part of the "Rugby, Racing and Beer" culture during the 1960s.[351] Māori participation in European sports was particularly evident in rugby and the country's team performs a haka, a traditional Māori challenge, before international matches.[352] New Zealand is known for its extreme sports, adventure tourism[353] and strong mountaineering tradition, as seen in the success of notable New Zealander Sir Edmund Hillary.[354][355] Other outdoor pursuits such as cycling, fishing, swimming, running, tramping, canoeing, hunting, snowsports, surfing and sailing are also popular.[356] The Polynesian sport of waka ama racing has experienced a resurgence of interest in New Zealand since the 1980s.[357]

          New Zealand has competitive international teams in rugby union, rugby league, netball, cricket, softball, and sailing. New Zealand participated at the Summer Olympics in 1908 and 1912 as a joint team with Australia, before first participating on its own in 1920.[358] The country has ranked highly on a medals-to-population ratio at recent Games.[359][360] The "All Blacks", the national rugby union team, are the most successful in the history of international rugby[361] and the reigning World Cup champions.[362] @@ -1603,14 +1559,12 @@

          -

          Raw meat and vegetables

          -
          -

          Ingredients to be prepared for a hāngi -

          -
          +

          Raw meat and vegetables

          +

          Ingredients to be prepared for a hāngi +

          - The national cuisine has been described as Pacific Rim, incorporating the native Māori cuisine and diverse culinary traditions introduced by settlers and immigrants from Europe, Polynesia and Asia.[363] New Zealand yields produce from land and sea—most crops and livestock, such as maize, potatoes and pigs, were gradually introduced by the early European settlers.[364] Distinctive ingredients or dishes include lamb, salmon, kōura (crayfish),[365] dredge oysters, whitebait, pāua (abalone), mussels, scallops, pipis and tuatua (both are types of New Zealand shellfish),[366] kūmara (sweet potato), kiwifruit, tamarillo and pavlova (considered a national dish).[367][363] A hāngi is a traditional Māori method of cooking food using heated rocks buried in a pit oven. After European colonisation, Māori began cooking with pots and ovens and the hāngi was used less frequently, although it is still used for formal occasions such as tangihanga.[368] + The national cuisine has been described as Pacific Rim, incorporating the native Māori cuisine and diverse culinary traditions introduced by settlers and immigrants from Europe, Polynesia and Asia.[363] New Zealand yields produce from land and sea—most crops and livestock, such as maize, potatoes and pigs, were gradually introduced by the early European settlers.[364] Distinctive ingredients or dishes include lamb, salmon, kōura (crayfish),[365] dredge oysters, whitebait, pāua (abalone), mussels, scallops, pipis and tuatua (both are types of New Zealand shellfish),[366] kūmara (sweet potato), kiwifruit, tamarillo and pavlova (considered a national dish).[367][363] A hāngi is a traditional Māori method of cooking food using heated rocks buried in a pit oven. After European colonisation, Māori began cooking with pots and ovens and the hāngi was used less frequently, although it is still used for formal occasions such as tangihanga.[368]

          @@ -3239,7 +3193,7 @@ New Zealand, directory from UCB Libraries GovPubs
        • - New Zealand at Encyclopædia Britannica + New Zealand at Encyclopædia Britannica
        • New Zealand weather @@ -3248,7 +3202,7 @@ Key Development Forecasts for New Zealand from International Futures
        • - Wikimedia Atlas of New Zealand + Wikimedia Atlas of New Zealand
        • @@ -3256,4 +3210,4 @@ - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wikipedia-3/expected.html b/article_scraper/resources/tests/readability/wikipedia-3/expected.html index ea77f94..4e0644b 100644 --- a/article_scraper/resources/tests/readability/wikipedia-3/expected.html +++ b/article_scraper/resources/tests/readability/wikipedia-3/expected.html @@ -5,27 +5,27 @@ In mathematics, a Hermitian matrix (or self-adjoint matrix) is a complex square matrix that is equal to its own conjugate transpose—that is, the element in the i-th row and j-th column is equal to the complex conjugate of the element in the j-th row and i-th column, for all indices i and j:

          - +

          or in matrix form:

          - . + .

          Hermitian matrices can be understood as the complex extension of real symmetric matrices.

          - If the conjugate transpose of a matrix is denoted by , then the Hermitian property can be written concisely as + If the conjugate transpose of a matrix is denoted by , then the Hermitian property can be written concisely as

          - +

          - Hermitian matrices are named after Charles Hermite, who demonstrated in 1855 that matrices of this form share a property with real symmetric matrices of always having real eigenvalues. Other, equivalent notations in common use are , although note that in quantum mechanics, typically means the complex conjugate only, and not the conjugate transpose. + Hermitian matrices are named after Charles Hermite, who demonstrated in 1855 that matrices of this form share a property with real symmetric matrices of always having real eigenvalues. Other, equivalent notations in common use are , although note that in quantum mechanics, typically means the complex conjugate only, and not the conjugate transpose.

          @@ -38,28 +38,26 @@ Equality with the adjoint[edit]

          - A square matrix is Hermitian if and only if it is equal to its adjoint, that is, it satisfies + A square matrix is Hermitian if and only if it is equal to its adjoint, that is, it satisfies

          -

          -

          -

          for any pair of vectors , where denotes the inner product operation. -

          -

          +

          +

          for any pair of vectors , where denotes the inner product operation. +

          This is also the way that the more general concept of self-adjoint operator is defined.

          Reality of quadratic forms[edit]

          - A square matrix is Hermitian if and only if it is such that + A square matrix is Hermitian if and only if it is such that

          -

          +

          Spectral properties[edit]

          - A square matrix is Hermitian if and only if it is unitarily diagonalizable with real eigenvalues. + A square matrix is Hermitian if and only if it is unitarily diagonalizable with real eigenvalues.

          Applications[edit] @@ -71,14 +69,14 @@ Examples[edit]

          - In this section, the conjugate transpose of matrix is denoted as , the transpose of matrix is denoted as and conjugate of matrix is denoted as . + In this section, the conjugate transpose of matrix is denoted as , the transpose of matrix is denoted as and conjugate of matrix is denoted as .

          See the following example:

          - +

          @@ -88,7 +86,7 @@ Well-known families of Pauli matrices, Gell-Mann matrices and their generalizations are Hermitian. In theoretical physics such Hermitian matrices are often multiplied by imaginary coefficients,[1][2] which results in skew-Hermitian matrices (see below).

          - Here, we offer another useful Hermitian matrix using an abstract example. If a square matrix equals the multiplication of a matrix and its conjugate transpose, that is, , then is a Hermitian positive semi-definite matrix. Furthermore, if is row full-rank, then is positive definite. + Here, we offer another useful Hermitian matrix using an abstract example. If a square matrix equals the multiplication of a matrix and its conjugate transpose, that is, , then is a Hermitian positive semi-definite matrix. Furthermore, if is row full-rank, then is positive definite.

          Properties[edit] @@ -97,7 +95,7 @@

          @@ -37,7 +37,7 @@ - +
          Rank Pop.
          - Auckland
          - Auckland
          - Wellington
          + Auckland
          + Auckland
          + Wellington
          Wellington
          @@ -1208,9 +1171,9 @@ 58,800 - Christchurch
          - Christchurch
          - Hamilton
          + Christchurch
          + Christchurch
          + Hamilton
          Hamilton
          -

          [icon] +

          [icon]

          @@ -117,7 +115,7 @@ Proof: By definition of the Hermitian matrix
          - +
          @@ -134,7 +132,7 @@
          - Proof: by definition. Thus Hij = Hji (matrix symmetry) if and only if (Hij is real). + Proof: by definition. Thus Hij = Hji (matrix symmetry) if and only if (Hij is real).
            @@ -156,7 +154,7 @@
          - Proof: as claimed. + Proof: as claimed.
            @@ -165,7 +163,7 @@
          - Proof: If , then , so as claimed. + Proof: If , then , so as claimed.
            @@ -174,7 +172,7 @@
          - Proof: Note that Thus if and only if . + Proof: Note that Thus if and only if .
          Thus An is Hermitian if A is Hermitian and n is an integer. @@ -189,7 +187,7 @@
          - +
          @@ -203,7 +201,7 @@
          - +
          @@ -217,14 +215,14 @@
          - +
          - where denotes the complex number , called the imaginary unit. + where denotes the complex number , called the imaginary unit.
          @@ -232,12 +230,12 @@
          - +
          - where are the eigenvalues on the diagonal of the diagonal matrix . + where are the eigenvalues on the diagonal of the diagonal matrix .
            @@ -246,10 +244,10 @@
          - Proof: + Proof:
          - Therefore if . + Therefore if .
          (Alternatively, the determinant is the product of the matrix's eigenvalues, and as mentioned before, the eigenvalues of a Hermitian matrix are real.) @@ -259,14 +257,14 @@ Decomposition into Hermitian and skew-Hermitian[edit]

          - Additional facts related to Hermitian matrices include: + Additional facts related to Hermitian matrices include:

            -
          • The sum of a square matrix and its conjugate transpose is Hermitian. +
          • The sum of a square matrix and its conjugate transpose is Hermitian.
            -
          • The difference of a square matrix and its conjugate transpose is skew-Hermitian (also called antihermitian). This implies that the commutator of two Hermitian matrices is skew-Hermitian. +
          • The difference of a square matrix and its conjugate transpose is skew-Hermitian (also called antihermitian). This implies that the commutator of two Hermitian matrices is skew-Hermitian.
            @@ -277,7 +275,7 @@
            - +
            @@ -287,24 +285,24 @@

            - In mathematics, for a given complex Hermitian matrix M and nonzero vector x, the Rayleigh quotient[4] , is defined as:[3]:p. 234[5] + In mathematics, for a given complex Hermitian matrix M and nonzero vector x, the Rayleigh quotient[4] , is defined as:[3]:p. 234[5]

            - . + .

            - For real matrices and vectors, the condition of being Hermitian reduces to that of being symmetric, and the conjugate transpose to the usual transpose . Note that for any non-zero real scalar . Also, recall that a Hermitian (or real symmetric) matrix has real eigenvalues. + For real matrices and vectors, the condition of being Hermitian reduces to that of being symmetric, and the conjugate transpose to the usual transpose . Note that for any non-zero real scalar . Also, recall that a Hermitian (or real symmetric) matrix has real eigenvalues.

            - It can be shown[citation needed] that, for a given matrix, the Rayleigh quotient reaches its minimum value (the smallest eigenvalue of M) when is (the corresponding eigenvector). Similarly, and . + It can be shown[citation needed] that, for a given matrix, the Rayleigh quotient reaches its minimum value (the smallest eigenvalue of M) when is (the corresponding eigenvector). Similarly, and .

            The Rayleigh quotient is used in the min-max theorem to get exact values of all eigenvalues. It is also used in eigenvalue algorithms to obtain an eigenvalue approximation from an eigenvector approximation. Specifically, this is the basis for Rayleigh quotient iteration.

            - The range of the Rayleigh quotient (for matrix that is not necessarily Hermitian) is called a numerical range (or spectrum in functional analysis). When the matrix is Hermitian, the numerical range is equal to the spectral norm. Still in functional analysis, is known as the spectral radius. In the context of C*-algebras or algebraic quantum mechanics, the function that to M associates the Rayleigh quotient R(M, x) for a fixed x and M varying through the algebra would be referred to as "vector state" of the algebra. + The range of the Rayleigh quotient (for matrix that is not necessarily Hermitian) is called a numerical range (or spectrum in functional analysis). When the matrix is Hermitian, the numerical range is equal to the spectral norm. Still in functional analysis, is known as the spectral radius. In the context of C*-algebras or algebraic quantum mechanics, the function that to M associates the Rayleigh quotient R(M, x) for a fixed x and M varying through the algebra would be referred to as "vector state" of the algebra.

            See also[edit] @@ -370,4 +368,4 @@

          - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wikipedia/expected.html b/article_scraper/resources/tests/readability/wikipedia/expected.html index d12bccd..bc38891 100644 --- a/article_scraper/resources/tests/readability/wikipedia/expected.html +++ b/article_scraper/resources/tests/readability/wikipedia/expected.html @@ -4,7 +4,7 @@
          Mozilla
          - Mozilla dinosaur head logo.png + Mozilla dinosaur head logo.png
          Websitemozilla.org/,%20https://www.mozilla.org/tr/mozilla.org/,%20https://www.mozilla.org/tr/

          Mozilla is a free-software community, created in 1998 by members of Netscape. The Mozilla community uses, develops, spreads and supports Mozilla products, thereby promoting exclusively free software and open standards, with only minor exceptions.[1] The community is supported institutionally by the Mozilla Foundation and its tax-paying subsidiary, the Mozilla Corporation.[2]

          @@ -48,20 +48,17 @@

          Contents

        -

        -History[edit] +

        History[edit]

        On January 23, 1998, Netscape made two announcements: first, that Netscape Communicator will be free; second, that the source code will also be free.[3] One day later, Jamie Zawinski from Netscape registered mozilla.org.[4] The project was named Mozilla after the original code name of the Netscape Navigator browser which is a blending of "Mosaic and Godzilla"[5] and used to co-ordinate the development of the Mozilla Application Suite, the open source version of Netscape's internet software, Netscape Communicator.[6][7] Jamie Zawinski says he came up with the name "Mozilla" at a Netscape staff meeting.[8][9] A small group of Netscape employees were tasked with coordination of the new community.

        Originally, Mozilla aimed to be a technology provider for companies, such as Netscape, who would commercialize their open source code.[10] When AOL (Netscape's parent company) greatly reduced its involvement with Mozilla in July 2003, the Mozilla Foundation was designated the legal steward of the project.[11] Soon after, Mozilla deprecated the Mozilla Suite in favor of creating independent applications for each function, primarily the Firefox web browser and the Thunderbird email client, and moved to supply them directly to the public.[12]

        @@ -124,20 +116,17 @@

        In a report released in November 2012, Mozilla reported that their total revenue for 2011 was $163 million, which was up 33% from $123 million in 2010. Mozilla noted that roughly 85% of their revenue comes from their contract with Google.[16]

        At the end of 2013, Mozilla announced a deal with Cisco Systems whereby Firefox would download and use a Cisco-provided binary build of an open source[17] codec to play the proprietary H.264 video format.[18][19] As part of the deal, Cisco would pay any patent licensing fees associated with the binaries that it distributes. Mozilla's CTO, Brendan Eich, acknowledged that this is "not a complete solution" and isn't "perfect".[20] An employee in Mozilla's video formats team, writing in an unofficial capacity, justified[21] it by the need to maintain their large user base, which would be necessary in future battles for truly free video formats.

        In December 2013, Mozilla announced funding for the development of non-free games[22] through its Game Creator Challenge. However, even those games that may be released under a non-free software or open source license must be made with open web technologies and Javascript as per the work criteria outlined in the announcement.

        -

        -Eich CEO promotion controversy[edit] +

        Eich CEO promotion controversy[edit]

        On March 24, 2014, Mozilla promoted Brendan Eich to the role of CEO. This led to boycotts and protests from the LGBT community and its supporters, as Eich previously donated US$1,000[23] in 2008 in support of California's Proposition 8, a California ballot proposition and state constitutional amendment in opposition to same-sex marriage.[24] Eich's donation first became public knowledge in 2012, while he was Mozilla’s chief technical officer, leading to angry responses on Twitter—including the use of the hashtag "#wontworkwithbigots".[25]

        Protests also emerged in 2014 following the announcement of Eich's appointment as CEO of Mozilla. U.S. companies OkCupid and CREDO Mobile received media coverage for their objections, with the former asking its users to boycott the browser,[26] while Credo amassed 50,000 signatures for a petition that called for Eich's resignation

        Due to the controversy, Eich voluntarily stepped down on April 3, 2014[27] and Mitchell Baker, executive chairwoman of Mozilla Corporation, posted a statement on the Mozilla blog: "We didn’t move fast enough to engage with people once the controversy started. Mozilla believes both in equality and freedom of speech. Equality is necessary for meaningful speech. And you need free speech to fight for equality."[28] Eich's resignation promoted a larger backlash from conservatives who felt he had been forced out of the company internally.[citation needed]

        OkCupid co-founder and CEO Sam Yagan had also donated $500[29] to Republican candidate Chris Cannon who proceeded to vote for multiple measures viewed as "anti-gay", including the banning of same-sex marriage.[30][31][32][33] Yagan claims he did not know about Cannon's stance on gay rights and that his contribution was due to the candidate being the ranking Republican participating in the House subcommittee that oversaw Internet and Intellectual Property matters.[34][35][36][37][38]

        Reader comments on articles that were published close to the events were divided between support for OkCupid's actions and opposition to them. Supporters claimed the boycott was justified and saw OkCupid's actions as a firm statement of opposition to intolerance towards the gay community. Opponents saw OkCupid's actions as hypocritical, since Eich is also the inventor of JavaScript, which is still required to browse OkCupid's website, and felt that users should not be punished for the actions of Mozilla and suspected that OkCupid's actions were a publicity stunt.[36][39]

        -

        -Values[edit] +

        Values[edit]

        According to Mozilla's manifesto,[40] which outlines goals, principles, and a pledge, "The Mozilla project uses a community-based approach to create world-class open source software and to develop new types of collaborative activities". Mozilla's manifesto mentions only its beliefs in regards to the Internet and Internet privacy, and has no mention of any political or social viewpoints.

        -

        -Pledge[edit] +

        Pledge[edit]

        According to the Mozilla Foundation:[41]

        @@ -150,145 +139,120 @@
      • Promote the Mozilla Manifesto principles in public discourse and within the Internet industry.
      -

      -Software[edit] +

      Software[edit]

      -

      +

      -

      -Firefox[edit] +

      Firefox[edit]

      Firefox is a web browser, and is Mozilla's flagship software product. It is available in both desktop and mobile versions. Firefox uses the Gecko layout engine to render web pages, which implements current and anticipated web standards.[42] As of late 2015, Firefox has approximately 10-11% of worldwide usage share of web browsers, making it the 4th most-used web browser.[43][44][45]

      Firefox began as an experimental branch of the Mozilla codebase by Dave Hyatt, Joe Hewitt and Blake Ross. They believed the commercial requirements of Netscape's sponsorship and developer-driven feature creep compromised the utility of the Mozilla browser.[46] To combat what they saw as the Mozilla Suite's software bloat, they created a stand-alone browser, with which they intended to replace the Mozilla Suite.

      Firefox was originally named Phoenix but the name was changed so as to avoid trademark conflicts with Phoenix Technologies. The initially-announced replacement, Firebird, provoked objections from the Firebird project community.[47][48] The current name, Firefox, was chosen on February 9, 2004.[49]

      -

      -Firefox Mobile[edit] +

      Firefox Mobile[edit]

      Firefox Mobile (codenamed Fennec) is the build of the Mozilla Firefox web browser for devices such as smartphones and tablet computers.

      Firefox Mobile uses the same Gecko layout engine as Mozilla Firefox. For example, version 1.0 used the same engine as Firefox 3.6, and the following release, 4.0, shared core code with Firefox 4.0. Its features include HTML5 support, Firefox Sync, add-ons support and tabbed browsing.[50]

      Firefox Mobile is currently available for Android 2.2 and above devices with an ARMv7 or ARMv6 CPU.[51] The x86 architecture is not officially supported.[52] Tristan Nitot, president of Mozilla Europe, has said that it's unlikely that an iPhone or a BlackBerry version will be released, citing Apple's iTunes Store application approval policies (which forbid applications competing with Apple's own, and forbid engines which run downloaded code) and BlackBerry's limited operating system as the reasons.[53]

      -

      -Firefox OS[edit] +

      Firefox OS[edit]

      Firefox OS (project name: Boot to Gecko also known as B2G) is an open source operating system in development by Mozilla that aims to support HTML5 apps written using "open Web" technologies rather than platform-specific native APIs. The concept behind Firefox OS is that all user-accessible software will be HTML5 applications, that use Open Web APIs to access the phone's hardware directly via JavaScript.[54]

      Some devices using this OS include[55] Alcatel One Touch Fire, ZTE Open, LG Fireweb.

      -

      -Thunderbird[edit] +

      Thunderbird[edit]

      Thunderbird is a free, open source, cross-platform email and news client developed by the volunteers of the Mozilla Community.

      On July 16, 2012, Mitchell Baker announced that Mozilla's leadership had come to the conclusion that on-going stability was the most important thing for Thunderbird and that innovation in Thunderbird was no longer a priority for Mozilla. In that update Baker also suggested that Mozilla had provided a pathway for community to innovate around Thunderbird if the community chooses.[56]

      -

      -SeaMonkey[edit] +

      SeaMonkey[edit]

      -

      +

      SeaMonkey (formerly the Mozilla Application Suite) is a free and open source cross platform suite of Internet software components including a web browser component, a client for sending and receiving email and USENET newsgroup messages, an HTML editor (Mozilla Composer) and the ChatZilla IRC client.

      On March 10, 2005, the Mozilla Foundation announced that it would not release any official versions of Mozilla Application Suite beyond 1.7.x, since it had now focused on the standalone applications Firefox and Thunderbird.[57] SeaMonkey is now maintained by the SeaMonkey Council, which has trademarked the SeaMonkey name with help from the Mozilla Foundation.[58] The Mozilla Foundation provides project hosting for the SeaMonkey developers.

      -

      -Bugzilla[edit] +

      Bugzilla[edit]

      -

      +

      Bugzilla is a web-based general-purpose bug tracking system, which was released as open source software by Netscape Communications in 1998 along with the rest of the Mozilla codebase, and is currently stewarded by Mozilla. It has been adopted by a variety of organizations for use as a bug tracking system for both free and open source software and proprietary projects and products, including the Mozilla Foundation, the Linux kernel, GNOME, KDE, Red Hat, Novell, Eclipse and LibreOffice.[59]

      -

      -Components[edit] +

      Components[edit]

      -

      -NSS[edit] +

      NSS[edit]

      Network Security Services (NSS) comprises a set of libraries designed to support cross-platform development of security-enabled client and server applications. NSS provides a complete open-source implementation of crypto libraries supporting SSL and S/MIME. NSS was previously tri-licensed under the Mozilla Public License 1.1, the GNU General Public License, and the GNU Lesser General Public License, but upgraded to GPL-compatible MPL 2.0.

      AOL, Red Hat, Sun Microsystems/Oracle Corporation, Google and other companies and individual contributors have co-developed NSS and it is used in a wide range of non-Mozilla products including Evolution, Pidgin, and Apache OpenOffice.

      -

      -SpiderMonkey[edit] +

      SpiderMonkey[edit]

      SpiderMonkey is the original JavaScript engine developed by Brendan Eich when he invented JavaScript in 1995 as a developer at Netscape. It became part of the Mozilla product family when Mozilla inherited Netscape's code-base in 1998. In 2011, Eich transferred the nominal ownership of the SpiderMonkey code and project to Dave Mandelin.[60]

      SpiderMonkey is a cross-platform engine written in C++ which implements ECMAScript, a standard developed from JavaScript.[60][61] It comprises an interpreter, several just-in-time compilers, a decompiler and a garbage collector. Products which embed SpiderMonkey include Firefox, Thunderbird, SeaMonkey, and many non-Mozilla applications.[62]

      -

      -Rhino[edit] +

      Rhino[edit]

      Rhino is an open source JavaScript engine managed by the Mozilla Foundation. It is developed entirely in Java. Rhino converts JavaScript scripts into Java classes. Rhino works in both compiled and interpreted mode.[63]

      -

      -Gecko[edit] +

      Gecko[edit]

      Gecko is a layout engine that supports web pages written using HTML, SVG, and MathML. Gecko is written in C++ and uses NSPR for platform independence. Its source code is licensed under the Mozilla Public License.

      Firefox uses Gecko both for rendering web pages and for rendering its user interface. Gecko is also used by Thunderbird, SeaMonkey, and many non-Mozilla applications.

      -

      -Rust[edit] +

      Rust[edit]

      Rust is a compiled programming language being developed by Mozilla Research. It is designed for safety, concurrency, and performance. Rust is intended for creating large and complex software which needs to be both safe against exploits and fast.

      Rust is being used in an experimental layout engine, Servo, which is developed by Mozilla and Samsung. Servo is not used in any consumer-oriented browsers yet. However, the Servo project developers plan for parts of the Servo source code to be merged into Gecko, and Firefox, incrementally.[64][65]

      -

      -XULRunner[edit] +

      XULRunner[edit]

      XULRunner is a software platform and technology experiment by Mozilla, that allows applications built with the same technologies used by Firefox extensions (XPCOM, Javascript, HTML, CSS, XUL) to be run natively as desktop applications, without requiring Firefox to be installed on the user's machine. XULRunner binaries are available for the Windows, GNU/Linux and OS X operating systems, allowing such applications to be effectively cross platform.

      -

      -pdf.js[edit] +

      pdf.js[edit]

      Pdf.js is a library developed by Mozilla that allows in-browser rendering of pdf documents using the HTML5 Canvas and Javascript. It is included by default in recent versions of Firefox, allowing the browser to render pdf documents without requiring an external plugin; and it is available separately as an extension named "PDF Viewer" for Firefox for Android, SeaMonkey, and the Firefox versions which don't include it built-in. It can also be included as part of a website's scripts, to allow pdf rendering for any browser that implements the required HTML5 features and can run Javascript.

      -

      -Shumway[edit] +

      Shumway[edit]

      Shumway is an open source replacement for the Adobe Flash Player, developed by Mozilla since 2012, using open web technologies as a replacement for Flash technologies. It uses Javascript and HTML5 Canvas elements to render Flash and execute Actionscript. It is included by default in Firefox Nightly and can be installed as an extension for any recent version of Firefox. The current implementation is limited in its capabilities to render Flash content outside simple projects.

      -

      -Other activities[edit] +

      Other activities[edit]

      -

      -Mozilla VR[edit] +

      Mozilla VR[edit]

      Mozilla VR is a team focused on bringing Virtual reality tools, specifications, and standards to the open Web.[66] Mozilla VR maintains A-Frame (VR), a web framework for building VR experiences, and works on advancing WebVR support within web browsers.

      -

      -Mozilla Persona[edit] +

      Mozilla Persona[edit]

      Mozilla Persona is a secure, cross-browser website authentication mechanism which allows a user to use a single username and password (or other authentication method) to log in to multiple sites.[67] Mozilla Persona will be shutting down on November 30, 2016.[68]

      -

      -Mozilla Location Service[edit] +

      Mozilla Location Service[edit]

      This open source crowdsourced geolocation service was started by Mozilla in 2013 and offers a free API.

      -

      -Webmaker[edit] +

      Webmaker[edit]

      Mozilla Webmaker is Mozilla's educational initiative, Webmaker's goal is to "help millions of people move from using the web to making the web." As part of Mozilla’s non-profit mission, Webmaker aims "to help the world increase their understanding of the web, take greater control of their online lives, and create a more web literate planet."[69][70][70]

      -

      -Mozilla Developer Network[edit] +

      Mozilla Developer Network[edit]

      Mozilla maintains a comprehensive developer documentation website called the Mozilla Developer Network which contains information about web technologies including HTML, CSS, SVG, JavaScript, as well Mozilla-specific information. In addition, Mozilla publishes a large number of videos about web technologies and the development of Mozilla projects on the Air Mozilla website.[71][72]

      -

      -[edit] +

      [edit]

      The Mozilla Community consists of over 40,000 active contributors from across the globe[citation needed]. It includes both paid employees and volunteers who work towards the goals set forth[40] in the Mozilla Manifesto. Many of the sub-communities in Mozilla have formed around localization efforts for Mozilla Firefox, and the Mozilla web properties.

      -

      -Local communities[edit] +

      Local communities[edit]

      -

      +

      There are a number of sub-communities that exist based on their geographical locations, where contributors near each other work together on particular activities, such as localization, marketing, PR and user support.

      -

      -Mozilla Reps[edit] +

      Mozilla Reps[edit]

      -

      +

      The Mozilla Reps program aims to empower and support volunteer Mozillians who want to become official representatives of Mozilla in their region/locale.

      The program provides a simple framework and a specific set of tools to help Mozillians to organize and/or attend events, recruit and mentor new contributors, document and share activities, and support their local communities better.

      @@ -301,31 +265,24 @@
    • Support and mentor future Mozilla Reps
    • Document clearly all their activities
    -

    -Conferences and events[edit] +

    Conferences and events[edit]

    -

    -Mozilla Festival[edit] +

    Mozilla Festival[edit]

    -

    -
    +

    - Speakers from the Knight Foundation discuss the future of news at the 2011 Mozilla Festival in London.

    -
    + Speakers from the Knight Foundation discuss the future of news at the 2011 Mozilla Festival in London.

    The Mozilla Festival is an annual event where hundreds of passionate people explore the Web, learn together and make things that can change the world. With the emphasis on making—the mantra of the Festival is "less yack, more hack." Journalists, coders, filmmakers, designers, educators, gamers, makers, youth and anyone else, from all over the world, are encouraged to attend, with attendees from more than 40 countries, working together at the intersection between freedom, the Web, and that years theme.

    The event revolves around design challenges which address key issues based on the chosen theme for that years festival. In previous years the Mozilla Festival has focused on Learning, and Media, with the 2012 festival being based around making. The titles of the festival revolve around the main theme, freedom (as in freedom of speech not free beer), and the Web.

    -

    -MozCamps[edit] +

    MozCamps[edit]

    MozCamps are the critical part of the Grow Mozilla initiative which aims to grow the Mozilla Community. These camps aim to bring core contributors from around the world together. They are intensive multi-day summits that include keynote speeches by Mozilla leadership, workshops and breakout sessions (led by paid and unpaid staff), and fun social outings. All of these activities combine to reward contributors for their hard work, engage them with new products and initiatives, and align all attendees on Mozilla's mission.

    -

    -Mozilla Summit[edit] +

    Mozilla Summit[edit]

    Mozilla Summit are the global event with active contributors and Mozilla employees to develop a shared understanding of Mozilla's mission together. Over 2,000 people representing 90 countries and 114 languages gathered in Santa Clara, Toronto and Brussels in 2013. Mozilla has since its last summit in 2013 replaced summits with all-hands where both employees and volunteers come together to collaborate the event is a scaled down version of Mozilla Summit.

    -

    -See also[edit] +

    See also[edit]

    -

    -References[edit] +

    References[edit]

      -
    1. -^ For exceptions, see "Values" section below -
    2. -
    3. -^ "About the Mozilla Corporation". Mozilla Foundation. +
    4. ^ For exceptions, see "Values" section below
    5. +
    6. ^ "About the Mozilla Corporation". Mozilla Foundation.
    7. -
    8. -^ "Freeing the Source: The Story of Mozilla". Open Sources: Voices from the Open Source Revolution. Retrieved 2016-05-01. +
    9. ^ "Freeing the Source: The Story of Mozilla". Open Sources: Voices from the Open Source Revolution. Retrieved 2016-05-01.
    10. -
    11. -^ "Mozilla.org WHOIS, DNS, & Domain Info". DomainTools. Retrieved 1 May 2016. +
    12. ^ "Mozilla.org WHOIS, DNS, & Domain Info". DomainTools. Retrieved 1 May 2016.
    13. -
    14. -^ Payment, S. (2007). Marc Andreessen and Jim Clark: The Founders of Netscape. Rosen Publishing Group. ISBN 9781404207196. +
    15. ^ Payment, S. (2007). Marc Andreessen and Jim Clark: The Founders of Netscape. Rosen Publishing Group. ISBN 9781404207196.
    16. -
    17. -^ "Netscape Announces mozilla.org, a Dedicated Team and Web Site Supporting Development of Free Client Source Code". Netscape. Archived from the original on October 4, 2002. Retrieved 2012-08-21. +
    18. ^ "Netscape Announces mozilla.org, a Dedicated Team and Web Site Supporting Development of Free Client Source Code". Netscape. Archived from the original on October 4, 2002. Retrieved 2012-08-21.
    19. -
    20. -^ "Mac vendors ponder Netscape gambit.". Macworld. 1 May 1998. Retrieved 2012-08-19. +
    21. ^ "Mac vendors ponder Netscape gambit.". Macworld. 1 May 1998. Retrieved 2012-08-19.
    22. -
    23. -^ Zawinski, Jamie (1996). "nscp dorm". Retrieved 2007-10-12. +
    24. ^ Zawinski, Jamie (1996). "nscp dorm". Retrieved 2007-10-12.
    25. -
    26. -^ Dave Titus with assistance from Andrew Wong. "How was Mozilla born". +
    27. ^ Dave Titus with assistance from Andrew Wong. "How was Mozilla born".
    28. -
    29. -^ "Introduction to Mozilla Source Code". Mozilla. Retrieved 2012-08-18. However, mozilla.org wants to emphasize that these milestones are being produced for testing purposes only. +
    30. ^ "Introduction to Mozilla Source Code". Mozilla. Retrieved 2012-08-18. However, mozilla.org wants to emphasize that these milestones are being produced for testing purposes only.
    31. -
    32. -^ "mozilla.org Announces Launch of the Mozilla Foundation to Lead Open-Source Browser Efforts". Retrieved 2012-08-18. +
    33. ^ "mozilla.org Announces Launch of the Mozilla Foundation to Lead Open-Source Browser Efforts". Retrieved 2012-08-18.
    34. -
    35. -^ Eich, Brendan; David Hyatt (April 2, 2003). "mozilla development roadmap". Mozilla. Retrieved 2009-08-02. +
    36. ^ Eich, Brendan; David Hyatt (April 2, 2003). "mozilla development roadmap". Mozilla. Retrieved 2009-08-02.
    37. -
    38. -^ "Better Browsing on Your Android Smartphone". AllThingsD. Retrieved 2012-08-18. +
    39. ^ "Better Browsing on Your Android Smartphone". AllThingsD. Retrieved 2012-08-18.
    40. -
    41. -^ "Mozilla Releases Test Version of Firefox OS". PC Magazine. Retrieved 2012-08-18. +
    42. ^ "Mozilla Releases Test Version of Firefox OS". PC Magazine. Retrieved 2012-08-18.
    43. -
    44. -^ "Mozilla Marketplace is live, lets you run web apps like desktop programs". Engadget. Retrieved 2012-08-18. +
    45. ^ "Mozilla Marketplace is live, lets you run web apps like desktop programs". Engadget. Retrieved 2012-08-18.
    46. -
    47. -^ Lardinois, Frederic (November 15, 2012). "Mozilla Releases Annual Report For 2011: Revenue Up 33% To $163M, Majority From Google". techcrunch.com. +
    48. ^ Lardinois, Frederic (November 15, 2012). "Mozilla Releases Annual Report For 2011: Revenue Up 33% To $163M, Majority From Google". techcrunch.com.
    49. -
    50. -^ "cisco/openh264 · GitHub". github.com. Retrieved 2014-04-05. +
    51. ^ "cisco/openh264 · GitHub". github.com. Retrieved 2014-04-05.
    52. -
    53. -^ "Mozilla will add H.264 to Firefox as Cisco makes eleventh-hour push for WebRTC's future — Tech News and Analysis". gigaom.com. Retrieved 2014-04-05. +
    54. ^ "Mozilla will add H.264 to Firefox as Cisco makes eleventh-hour push for WebRTC's future — Tech News and Analysis". gigaom.com. Retrieved 2014-04-05.
    55. -
    56. -^ "Cisco to release open-source H.264 codec, Mozilla makes tactical retreat - TechRepublic". techrepublic.com. Retrieved 2014-04-05. +
    57. ^ "Cisco to release open-source H.264 codec, Mozilla makes tactical retreat - TechRepublic". techrepublic.com. Retrieved 2014-04-05.
    58. -
    59. -^ "Video Interoperability on the Web Gets a Boost From Cisco's H.264 Codec". Of course, this is not a not a complete solution. In a perfect world, codecs, like other basic Internet technologies such as TCP/IP, HTTP, and HTML, would be fully open and free +
    60. ^ "Video Interoperability on the Web Gets a Boost From Cisco's H.264 Codec". Of course, this is not a not a complete solution. In a perfect world, codecs, like other basic Internet technologies such as TCP/IP, HTTP, and HTML, would be fully open and free
    61. -
    62. -^ "Comments on Cisco, Mozilla, and H.264". By endorsing Cisco's plan, there's no getting around the fact that we've caved on our principles. That said, principles can't replace being in a practical position to make a difference in the future. - Christopher Montgomery wrote in a personal capacity but works for Mozilla in their codecs team +
    63. ^ "Comments on Cisco, Mozilla, and H.264". By endorsing Cisco's plan, there's no getting around the fact that we've caved on our principles. That said, principles can't replace being in a practical position to make a difference in the future. - Christopher Montgomery wrote in a personal capacity but works for Mozilla in their codecs team
    64. -
    65. -^ "Game Creator Challenge -Contest Terms and Conditions". - submissions to the "amateur" category have to be released as free software, but not for the other two categories +
    66. ^ "Game Creator Challenge -Contest Terms and Conditions". - submissions to the "amateur" category have to be released as free software, but not for the other two categories
    67. -
    68. -^ "Los Angeles Times - Brendan Eich contribution to Proposition 8". latimes.com. Retrieved 2014-07-01. +
    69. ^ "Los Angeles Times - Brendan Eich contribution to Proposition 8". latimes.com. Retrieved 2014-07-01.
    70. -
    71. -^ "Gay Firefox developers boycott Mozilla to protest CEO hire [Updated] | Ars Technica". arstechnica.com. Retrieved 2014-04-05. +
    72. ^ "Gay Firefox developers boycott Mozilla to protest CEO hire [Updated] | Ars Technica". arstechnica.com. Retrieved 2014-04-05.
    73. -
    74. -^ Kelly Faircloth (9 April 2012). "Tech Celeb Makes Prop-8 Donation; Internet Goes Berserk". BetaBeat. BetaBeat. Retrieved 2014-04-28. +
    75. ^ Kelly Faircloth (9 April 2012). "Tech Celeb Makes Prop-8 Donation; Internet Goes Berserk". BetaBeat. BetaBeat. Retrieved 2014-04-28.
    76. -
    77. -^ "Screenshot of OkCupid's statement towards Firefox users". huffingtonpost.com. Retrieved 2014-07-01. +
    78. ^ "Screenshot of OkCupid's statement towards Firefox users". huffingtonpost.com. Retrieved 2014-07-01.
    79. -
    80. -^ "FAQ on CEO Resignation". The Mozilla Blog. Retrieved 2015-04-20. +
    81. ^ "FAQ on CEO Resignation". The Mozilla Blog. Retrieved 2015-04-20.
    82. -
    83. -^ Baker, Mitchell (3 April 2014). "Brendan Eich Steps Down as Mozilla CEO". mozilla blog. Mozilla. Retrieved 2014-04-04. +
    84. ^ Baker, Mitchell (3 April 2014). "Brendan Eich Steps Down as Mozilla CEO". mozilla blog. Mozilla. Retrieved 2014-04-04.
    85. -
    86. -^ "opensecrets.org listing of Sam Yagan's contributions to political candidates". opensecrets.org. Retrieved 2014-07-01. +
    87. ^ "opensecrets.org listing of Sam Yagan's contributions to political candidates". opensecrets.org. Retrieved 2014-07-01.
    88. -
    89. -^ "ontheissues.org listing of votes cast by Chris Cannon". ontheissues.org. Retrieved 2014-07-01. +
    90. ^ "ontheissues.org listing of votes cast by Chris Cannon". ontheissues.org. Retrieved 2014-07-01.
    91. -
    92. -^ "ontheissues.org listing of votes cast on the permanency of the Patriot Act". ontheissues.org. Retrieved 2014-07-01. +
    93. ^ "ontheissues.org listing of votes cast on the permanency of the Patriot Act". ontheissues.org. Retrieved 2014-07-01.
    94. -
    95. -^ "ontheissues.org: Chris Cannon on Homeland Security". ontheissues.org. Retrieved 2014-07-01. +
    96. ^ "ontheissues.org: Chris Cannon on Homeland Security". ontheissues.org. Retrieved 2014-07-01.
    97. -
    98. -^ "ontheissues.org: Chris Cannon on Abortion". ontheissues.org. Retrieved 2014-07-01. +
    99. ^ "ontheissues.org: Chris Cannon on Abortion". ontheissues.org. Retrieved 2014-07-01.
    100. -
    101. -^ Levintova, Hannah (7 April 2014). "OkCupid's CEO Donated to an Anti-Gay Campaign Once, Too". Hanna Levintova article on motherjones.com. motherjones.com. Retrieved 2014-07-01. +
    102. ^ Levintova, Hannah (7 April 2014). "OkCupid's CEO Donated to an Anti-Gay Campaign Once, Too". Hanna Levintova article on motherjones.com. motherjones.com. Retrieved 2014-07-01.
    103. -
    104. -^ Lee, Stephanie M. (8 April 2014). "OKCupid CEO once donated to anti-gay politician". Stephanie M. Lee's blog on sfgate.com. sfgate.com. Retrieved 2014-07-01. +
    105. ^ Lee, Stephanie M. (8 April 2014). "OKCupid CEO once donated to anti-gay politician". Stephanie M. Lee's blog on sfgate.com. sfgate.com. Retrieved 2014-07-01.
    106. -
    107. -^ a b "The Hypocrisy Of Sam Yagan & OkCupid". uncrunched.com blog. uncrunched.com. 6 April 2014. Retrieved 2014-07-01. +
    108. ^ a b "The Hypocrisy Of Sam Yagan & OkCupid". uncrunched.com blog. uncrunched.com. 6 April 2014. Retrieved 2014-07-01.
    109. -
    110. -^ Bellware, Kim (31 March 2014). "OKCupid Publicly Rips Mozilla: 'We Wish Them Nothing But Failure'". Kim Bellware article on huffingtonpost.com. huffingtonpost.com. Retrieved 2014-07-01. +
    111. ^ Bellware, Kim (31 March 2014). "OKCupid Publicly Rips Mozilla: 'We Wish Them Nothing But Failure'". Kim Bellware article on huffingtonpost.com. huffingtonpost.com. Retrieved 2014-07-01.
    112. -
    113. -^ "Mozilla's Appointment Of Brendan Eich As CEO Sparks Controversy After Prop 8 Donation News Re-Emerges". huffingtonpost.com article. huffingtonpost.com. 27 March 2014. Retrieved 2014-07-01. +
    114. ^ "Mozilla's Appointment Of Brendan Eich As CEO Sparks Controversy After Prop 8 Donation News Re-Emerges". huffingtonpost.com article. huffingtonpost.com. 27 March 2014. Retrieved 2014-07-01.
    115. -
    116. -^ Eidelson, Josh (4 April 2014). "OkCupid's gay rights stunt has its limits: Taking a deeper look at the savvy ploy". Josh Eidelson article on salon.com. salon.com. Retrieved 2014-07-01. +
    117. ^ Eidelson, Josh (4 April 2014). "OkCupid's gay rights stunt has its limits: Taking a deeper look at the savvy ploy". Josh Eidelson article on salon.com. salon.com. Retrieved 2014-07-01.
    118. -
    119. -^ a b "Mozilla Manifesto". Mozilla.org. Retrieved 2012-03-21. +
    120. ^ a b "Mozilla Manifesto". Mozilla.org. Retrieved 2012-03-21.
    121. -
    122. -^ "The Mozilla Manifesto". Retrieved 24 July 2015. +
    123. ^ "The Mozilla Manifesto". Retrieved 24 July 2015.
    124. -
    125. -^ "Gecko Layout Engine". download-firefox.org. July 17, 2008. Archived from the original on 2010-11-28. Retrieved 2012-05-10. +
    126. ^ "Gecko Layout Engine". download-firefox.org. July 17, 2008. Archived from the original on 2010-11-28. Retrieved 2012-05-10.
    127. -
    128. -^ "Web Browser Market Share Trends". W3Counter. Awio Web Services LLC. Retrieved 2012-05-10. +
    129. ^ "Web Browser Market Share Trends". W3Counter. Awio Web Services LLC. Retrieved 2012-05-10.
    130. -
    131. -^ "Top 5 Browsers". StatCounter Global Stats. StatCounter. Retrieved 2012-05-10. +
    132. ^ "Top 5 Browsers". StatCounter Global Stats. StatCounter. Retrieved 2012-05-10.
    133. -
    134. -^ "Web browsers (Global marketshare)". Clicky. Roxr Software Ltd. Retrieved 2012-05-10. +
    135. ^ "Web browsers (Global marketshare)". Clicky. Roxr Software Ltd. Retrieved 2012-05-10.
    136. -
    137. -^ Goodger, Ben (February 6, 2006). "Where Did Firefox Come From?". Inside Firefox. Archived from the original on 2011-06-23. Retrieved 2012-01-07. +
    138. ^ Goodger, Ben (February 6, 2006). "Where Did Firefox Come From?". Inside Firefox. Archived from the original on 2011-06-23. Retrieved 2012-01-07.
    139. -
    140. -^ "Mozilla browser becomes Firebird". IBPhoenix. Archived from the original on 2007-09-14. Retrieved 2013-06-10. We at IBPhoenix think that having a browser and a database with the same name in the same space will confuse the market, especially as browsers and databases are often used in the same applications +
    141. ^ "Mozilla browser becomes Firebird". IBPhoenix. Archived from the original on 2007-09-14. Retrieved 2013-06-10. We at IBPhoenix think that having a browser and a database with the same name in the same space will confuse the market, especially as browsers and databases are often used in the same applications
    142. -
    143. -^ Festa, Paul (May 6, 2003). "Mozilla's Firebird gets wings clipped". CNET. Retrieved 2007-01-30. +
    144. ^ Festa, Paul (May 6, 2003). "Mozilla's Firebird gets wings clipped". CNET. Retrieved 2007-01-30.
    145. -
    146. -^ Festa, Paul (February 9, 2004). "Mozilla holds 'fire' in naming fight". CNET News. Retrieved 2007-01-24. +
    147. ^ Festa, Paul (February 9, 2004). "Mozilla holds 'fire' in naming fight". CNET News. Retrieved 2007-01-24.
    148. -
    149. -^ "Mobile features". Mozilla. Retrieved 2012-06-26. +
    150. ^ "Mobile features". Mozilla. Retrieved 2012-06-26.
    151. -
    152. -^ "Mobile System Requirements". +
    153. ^ "Mobile System Requirements".
    154. -
    155. -^ "Firefox Mobile supported devices". +
    156. ^ "Firefox Mobile supported devices".
    157. -
    158. -^ "Mozilla rules out Firefox for iPhone and BlackBerry". +
    159. ^ "Mozilla rules out Firefox for iPhone and BlackBerry".
    160. -
    161. -^ "Boot to Gecko Project". Mozilla. March 2012. Retrieved 2012-03-30. +
    162. ^ "Boot to Gecko Project". Mozilla. March 2012. Retrieved 2012-03-30.
    163. -
    164. -^ "Firefox OS - Devices & Availability". Mozilla. Retrieved 2015-12-30. +
    165. ^ "Firefox OS - Devices & Availability". Mozilla. Retrieved 2015-12-30.
    166. -
    167. -^ "Thunderbird: Stability and Community Innovation | Mitchell's Blog". blog.lizardwrangler.com. Retrieved 2015-04-20. +
    168. ^ "Thunderbird: Stability and Community Innovation | Mitchell's Blog". blog.lizardwrangler.com. Retrieved 2015-04-20.
    169. -
    170. -^ "Two discontinued browsers". LWN.net. 21 December 2005. Retrieved 2012-08-19. +
    171. ^ "Two discontinued browsers". LWN.net. 21 December 2005. Retrieved 2012-08-19.
    172. -
    173. -^ "SeaMonkey trademarks registered!". kairo.at. 2007-05-22. Retrieved 2013-06-10. +
    174. ^ "SeaMonkey trademarks registered!". kairo.at. 2007-05-22. Retrieved 2013-06-10.
    175. -
    176. -^ "Bugzilla Installation List". Retrieved 2014-09-18. +
    177. ^ "Bugzilla Installation List". Retrieved 2014-09-18.
    178. -
    179. -^ a b Eich, Brendan (21 June 2011). "New JavaScript Engine Module Owner". BrendanEich.com. +
    180. ^ a b Eich, Brendan (21 June 2011). "New JavaScript Engine Module Owner". BrendanEich.com.
    181. -
    182. -^ "Bug 759422 - Remove use of e4x in account creation". Bugzilla@Mozilla. 2012-08-17. Retrieved 2012-08-18. +
    183. ^ "Bug 759422 - Remove use of e4x in account creation". Bugzilla@Mozilla. 2012-08-17. Retrieved 2012-08-18.
    184. -
    185. -^ "SpiderMonkey". Mozilla Developer Network. 2012-08-15. Retrieved 2012-08-18. +
    186. ^ "SpiderMonkey". Mozilla Developer Network. 2012-08-15. Retrieved 2012-08-18.
    187. -
    188. -^ "Rhino History". Mozilla Foundation. Retrieved 2008-03-20. +
    189. ^ "Rhino History". Mozilla Foundation. Retrieved 2008-03-20.
    190. -
    191. -^ "Roadmap". Retrieved 10 May 2016. +
    192. ^ "Roadmap". Retrieved 10 May 2016.
    193. -
    194. -^ Larabel, Michael. "Servo Continues Making Progress For Shipping Components In Gecko, Browser.html". Phoronix.com. Retrieved 10 May 2016. +
    195. ^ Larabel, Michael. "Servo Continues Making Progress For Shipping Components In Gecko, Browser.html". Phoronix.com. Retrieved 10 May 2016.
    196. -
    197. -^ "Mozilla VR". Mozilla VR. Retrieved 2016-10-27. +
    198. ^ "Mozilla VR". Mozilla VR. Retrieved 2016-10-27.
    199. -
    200. -^ Persona, Mozilla +
    201. ^ Persona, Mozilla
    202. -
    203. -^ "Persona". Mozilla Developer Network. Retrieved 2016-10-27. +
    204. ^ "Persona". Mozilla Developer Network. Retrieved 2016-10-27.
    205. -
    206. -^ About Mozilla Webmaker, Mozilla +
    207. ^ About Mozilla Webmaker, Mozilla
    208. -
    209. -^ a b Alan Henry. "Mozilla Webmaker Teaches You to Build Web Sites, Apps, and More". Lifehacker. Gawker Media. +
    210. ^ a b Alan Henry. "Mozilla Webmaker Teaches You to Build Web Sites, Apps, and More". Lifehacker. Gawker Media.
    211. -
    212. -^ "Air Mozilla". Mozilla Wiki. +
    213. ^ "Air Mozilla". Mozilla Wiki.
    214. -
    215. -^ "Air Mozilla Reboot, Phase I". +
    216. ^ "Air Mozilla Reboot, Phase I".

    Constant downloads failure in firefox

    -

    -External links[edit] +

    External links[edit]

    - + Wikimedia Commons has media related to Mozilla.
    @@ -650,4 +529,4 @@ -
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/wordpress/expected.html b/article_scraper/resources/tests/readability/wordpress/expected.html index 0610440..2650500 100644 --- a/article_scraper/resources/tests/readability/wordpress/expected.html +++ b/article_scraper/resources/tests/readability/wordpress/expected.html @@ -17,15 +17,15 @@

    - +

    Stack Overflow published its analysis of 2017 hiring trends based on the targeting options employers selected when posting to Stack Overflow Jobs. The report, which compares data from 200 companies since 2015, ranks ReactJS, Docker, and Ansible at the top of the fastest growing skills in demand. When comparing the percentage change from 2015 to 2016, technologies like AJAX, Backbone.js, jQuery, and WordPress are less in demand.

    - +

    Stack Overflow also measured the demand relative to the available developers in different tech skills. The demand for backend, mobile, and database engineers is higher than the number of qualified candidates available. WordPress is last among the oversaturated fields with a surplus of developers relative to available positions.

    - +

    In looking at these results, it’s important to consider the inherent biases within the Stack Overflow ecosystem. In 2016, the site surveyed more than 56,000 developers but noted that the survey was “biased against devs who don’t speak English.” The average age of respondents was 29.6 years old and 92.8% of them were male.

    For two years running, Stack Overflow survey respondents have ranked WordPress among the most dreaded technologies that they would prefer not to use. This may be one reason why employers wouldn’t be looking to advertise positions on the site’s job board, which is the primary source of the data for this report.

    @@ -68,4 +68,4 @@ -
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/yahoo-1/expected.html b/article_scraper/resources/tests/readability/yahoo-1/expected.html index ca528cb..9d547d4 100644 --- a/article_scraper/resources/tests/readability/yahoo-1/expected.html +++ b/article_scraper/resources/tests/readability/yahoo-1/expected.html @@ -1,39 +1,38 @@
    -

    The PlayStation VR

    +

    The PlayStation VR

    -
    +

    Sony’s PlayStation VR.

    -
    -
    +

    Virtual reality has officially reached the consoles. And it’s pretty good! Sony’s PlayStation VR is extremely comfortable and reasonably priced, and while it’s lacking killer apps, it’s loaded with lots of interesting ones.

    But which ones should you buy? I’ve played just about every launch game, and while some are worth your time, others you might want to skip. To help you decide what’s what, I’ve put together this list of the eight PSVR games worth considering.

    “Rez Infinite” ($30)

    -
    +

    Beloved cult hit “Rez” gets the VR treatment to help launch the PSVR, and the results are terrific. It includes a fully remastered take on the original “Rez” – you zoom through a Matrix-like computer system, shooting down enemies to the steady beat of thumping electronica – but the VR setting makes it incredibly immersive. It gets better the more you play it, too; unlock the amazing Area X mode and you’ll find yourself flying, shooting and bobbing your head to some of the trippiest visuals yet seen in VR.

    “Thumper” ($20)

    -
    +

    What would happen if Tron, the board game Simon, a Clown beetle, Cthulhu and a noise band met in VR? Chaos, for sure, and also “Thumper.” Called a “violent rhythm game” by its creators, “Thumper” is, well, a violent rhythm game that’s also a gorgeous, unsettling and totally captivating assault on the senses. With simple controls and a straightforward premise – click the X button and the analog stick in time with the music as you barrel down a neon highway — it’s one of the rare games that works equally well both in and out of VR. But since you have PSVR, play it there. It’s marvelous.

    “Until Dawn: Rush of Blood” ($20)

    -
    +

    Cheeky horror game “Until Dawn” was a breakout hit for the PS4 last year, channeling the classic “dumb teens in the woods” horror trope into an effective interactive drama. Well, forget all that if you fire up “Rush of Blood,” because this one sticks you front and center on a rollercoaster ride from Hell. Literally. You ride through a dimly-lit carnival of terror, dual-wielding pistols as you take down targets, hideous pig monsters and, naturally, maniac clowns. Be warned: If the bad guys don’t get you, the jump scares will.

    “Headmaster” ($20)

    -
    +

    Soccer meets “Portal” in the weird (and weirdly fun) “Headmaster,” a game about heading soccer balls into nets, targets and a variety of other things while stuck in some diabolical training facility. While at first it seems a little basic, increasingly challenging shots and a consistently entertaining narrative keep it from running off the pitch. Funny, ridiculous and as easy as literally moving your head back and forth, it’s a pleasant PSVR surprise.

    “RIGS: Mechanized Combat League” ($50)

    -
    +

    Giant mechs + sports? That’s the gist of this robotic blast-a-thon, which pits two teams of three against one another in gorgeous, explosive and downright fun VR combat. At its best, “RIGS” marries the thrill of fast-paced competitive shooters with the insanity of piloting a giant mech in VR. It can, however, be one of the barfier PSVR games. So pack your Dramamine, you’re going to have to ease yourself into this one.

    “Batman Arkham VR” ($20)

    -
    +

    “I’m Batman,” you will say. And you’ll actually be right this time, because you are Batman in this detective yarn, and you know this because you actually grab the famous cowl and mask, stick it on your head, and stare into the mirrored reflection of Rocksteady Games’ impressive Dark Knight character model. It lacks the action of its fellow “Arkham” games and runs disappointingly short, but it’s a high-quality experience that really shows off how powerfully immersive VR can be.

    “Job Simulator” ($30)

    -
    +

    There are a number of good VR ports in the PSVR launch lineup, but the HTC Vive launch game “Job Simulator” might be the best. Your task? Lots of tasks, actually, from cooking food to fixing cars to working in an office, all for robots, because did I mention you were in the future? Infinitely charming and surprisingly challenging, it’s a great showpiece for VR.

    “Eve Valkyrie” ($60)

    -
    +

    Already a hit on the Oculus Rift, this space dogfighting game was one of the first to really show off how VR can turn a traditional game experience into something special. It’s pricey and not quite as hi-res as the Rift version, but “Eve Valkyrie” does an admirable job filling the void left since “Battlestar Galactica” ended. Too bad there aren’t any Cylons in it (or are there?)

    More games news:

    - - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/yahoo-2/expected.html b/article_scraper/resources/tests/readability/yahoo-2/expected.html index 60e99e0..c6e9ebf 100644 --- a/article_scraper/resources/tests/readability/yahoo-2/expected.html +++ b/article_scraper/resources/tests/readability/yahoo-2/expected.html @@ -5,8 +5,7 @@

    In this photo dated Tuesday, Nov, 29, 2016 the Soyuz-FG rocket booster with the Progress MS-04 cargo ship is installed on a launch pad in Baikonur, Kazakhstan. The unmanned Russian cargo space ship Progress MS-04 broke up in the atmosphere over Siberia on Thursday Dec. 1, 2016, just minutes after the launch en route to the International Space Station due to an unspecified malfunction, the Russian space agency said.(Oleg Urusov/ Roscosmos Space Agency Press Service photo via AP)

    In this photo dated Tuesday, Nov, 29, 2016 the Soyuz-FG rocket booster with the Progress MS-04 cargo ship is installed on a launch pad in Baikonur, Kazakhstan. The unmanned Russian cargo space ship Progress MS-04 broke up in the atmosphere over Siberia on Thursday Dec. 1, 2016, just minutes after the launch en route to the International Space Station due to an unspecified malfunction, the Russian space agency said.(Oleg Urusov/ Roscosmos Space Agency Press Service photo via AP)

    -
    - +

    MOSCOW (AP) — An unmanned Russian cargo spaceship heading to the International Space Station broke up in the atmosphere over Siberia on Thursday due to an unspecified malfunction, the Russian space agency said.

    The Progress MS-04 cargo craft broke up at an altitude of 190 kilometers (118 miles) over the remote Russian Tuva region in Siberia that borders Mongolia, Roscosmos said in a statement. It said most of spaceship's debris burnt up as it entered the atmosphere but some fell to Earth over what it called an uninhabited area.

    @@ -23,5 +22,4 @@

    Aerospace Writer Marcia Dunn in Cape Canaveral, Florida, and Vladimir Isachenkov in Moscow contributed to this report.

    - - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/yahoo-3/expected.html b/article_scraper/resources/tests/readability/yahoo-3/expected.html index ae20402..128dfbf 100644 --- a/article_scraper/resources/tests/readability/yahoo-3/expected.html +++ b/article_scraper/resources/tests/readability/yahoo-3/expected.html @@ -6,11 +6,11 @@
    - - - - - + + + + +

    A photographer and Navy veteran is fighting back after a photo she posted to Facebook started an online backlash.

    Vanessa Hicks said she had no idea her photo would be considered controversial. The photo, from a military family’s newborn photo shoot, showed a newborn infant wrapped in an American flag held by his father, who was in his military uniform.

    Hicks, a Navy veteran herself and the wife of an active-duty Navy member, said her intention was to honor the flag as well as her clients, who wanted to incorporate their military service in the photo shoot.

    @@ -20,17 +20,11 @@

    “This is what he was fighting for, his son wrapped in an American flag,” Hicks told ABC News. However, when she posted the image on her page, she started to get comments accusing her of desecrating the flag.

    On one Facebook page an unidentified poster put up her picture writing and wrote they found it was “disrespectful, rude, tacky, disgusting, and against the U.S. Flag Code.”

    -

    -
    -

    View photo

    -

    .

    -
    -Vanessa Hicks
    +

    View photo

    .

    Vanessa Hicks

    Vanessa Hicks

    -

    -

    The Federal Flag Code has guidelines for the proper treatment of the U.S. Flag but there are no rules for punishment related to violations. In the past, the Supreme Court has found that people are protected from punishment under the First Amendment for manipulating or even burning the flag. +

    The Federal Flag Code has guidelines for the proper treatment of the U.S. Flag but there are no rules for punishment related to violations. In the past, the Supreme Court has found that people are protected from punishment under the First Amendment for manipulating or even burning the flag.

    Hicks said she was surprised when messages suddenly started to pop up on her Facebook page and even her own website criticizing her photos.

    She said she stayed up until 4 a.m. recently to take down comments from her business and company page, even on shoots that had nothing to do with the flag.

    @@ -48,4 +42,4 @@ -
    +
    \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/yahoo-4/expected.html b/article_scraper/resources/tests/readability/yahoo-4/expected.html index c16d819..7e3a3fc 100644 --- a/article_scraper/resources/tests/readability/yahoo-4/expected.html +++ b/article_scraper/resources/tests/readability/yahoo-4/expected.html @@ -4,20 +4,20 @@

    - トレンドマイクロは3月9日、Wi-Fi利用時の通信を暗号化し保護するスマホ・タブレット向けのセキュリティアプリ「フリーWi-Fiプロテクション」(iOS/Android)の発売を開始すると発表した。1年版ライセンスは2900円(税込)で、2年版ライセンスは5000円(税込)。
    -
     フリーWi-Fiプロテクションは、App Storeおよび、Google Playにて販売され、既に提供しているスマホ・タブレット向け総合セキュリティ対策アプリ「ウイルスバスター モバイル」と併用することで、不正アプリや危険なウェブサイトからの保護に加え、通信の盗み見を防ぐことができる。
    -
     2020年の東京オリンピック・パラリンピックの開催などを見据え、フリーWi-Fi(公衆無線LAN)の設置が促進され、フリーWi-Fiの利用者も増加している。 -
    -
     一方で、脆弱な設定のフリーWi-Fiや攻撃者が設置した偽のフリーWi-Fiへの接続などによる情報漏えい、通信の盗み見などのセキュリティリスクが危惧されているという。 -
    -
     正規事業者が提供する安全性の高いフリーWi-Fiのほかにも、通信を暗号化していない安全性の低いフリーWi-Fi、さらにはサイバー犯罪者が設置したフリーWi-Fiなどさまざまなものが混在している。また、利用者は、接続する前にひとつひとつ安全性を確認するのは難しい状況だとしている。 -
    -
     トレンドマイクロがスマートフォン保持者でフリーWi-Fiの利用経験がある人に実施した調査では、回答者の約85%が安全なフリーWi-Fiと危険なフリーWi-Fiは「見分けられない」と回答。さらに、約65%がフリーWi-Fiの利用に不安を感じていると回答している。 -
    -
     こうした環境の変化やユーザの状況を鑑み、フリーWi-Fiプロテクションの提供を開始する。同アプリをインストールすることで利用者は、万が一安全性の低いフリーWi-Fiのアクセスポイントに接続してしまった場合でも、その通信を暗号化でき、通信の盗み見やそれによる情報漏えいのリスクを低減できるようになる。 -
    -
     具体的には、フリーWi-Fi利用時に、スマートフォンがフリーWi-Fiプロテクションインフラに接続することにより、フリーWi-Fiのアクセスポイントを介した通信がVPN(Virtual Private Network)で暗号化される。これにより利用者は、第三者から通信を傍受されることやデータの情報漏えいを防ぐことが可能。さらに、かんたん自動接続の機能により、通信を暗号化していない安全性が低いフリーWi-Fi接続時や利用者が指定したWi-Fiへ接続する際に、自動的に通信を暗号化し、利用者の通信を保護する。
    -
     また、フリーWi-Fiプロテクションインフラと、莫大なセキュリティ情報のビッグデータを保有するクラウド型セキュリティ技術基盤「Trend Micro Smart Protection Network」(SPN)が連携することで、フリーWi-Fiプロテクションインフラを経由してインターネットを利用する際に、利用者がフィッシング詐欺サイトや偽サイトなどへの不正サイトへアクセスすることをブロックできるという。

    + トレンドマイクロは3月9日、Wi-Fi利用時の通信を暗号化し保護するスマホ・タブレット向けのセキュリティアプリ「フリーWi-Fiプロテクション」(iOS/Android)の発売を開始すると発表した。1年版ライセンスは2900円(税込)で、2年版ライセンスは5000円(税込)。
    +
     フリーWi-Fiプロテクションは、App Storeおよび、Google Playにて販売され、既に提供しているスマホ・タブレット向け総合セキュリティ対策アプリ「ウイルスバスター モバイル」と併用することで、不正アプリや危険なウェブサイトからの保護に加え、通信の盗み見を防ぐことができる。
    +
     2020年の東京オリンピック・パラリンピックの開催などを見据え、フリーWi-Fi(公衆無線LAN)の設置が促進され、フリーWi-Fiの利用者も増加している。 +
    +
     一方で、脆弱な設定のフリーWi-Fiや攻撃者が設置した偽のフリーWi-Fiへの接続などによる情報漏えい、通信の盗み見などのセキュリティリスクが危惧されているという。 +
    +
     正規事業者が提供する安全性の高いフリーWi-Fiのほかにも、通信を暗号化していない安全性の低いフリーWi-Fi、さらにはサイバー犯罪者が設置したフリーWi-Fiなどさまざまなものが混在している。また、利用者は、接続する前にひとつひとつ安全性を確認するのは難しい状況だとしている。 +
    +
     トレンドマイクロがスマートフォン保持者でフリーWi-Fiの利用経験がある人に実施した調査では、回答者の約85%が安全なフリーWi-Fiと危険なフリーWi-Fiは「見分けられない」と回答。さらに、約65%がフリーWi-Fiの利用に不安を感じていると回答している。 +
    +
     こうした環境の変化やユーザの状況を鑑み、フリーWi-Fiプロテクションの提供を開始する。同アプリをインストールすることで利用者は、万が一安全性の低いフリーWi-Fiのアクセスポイントに接続してしまった場合でも、その通信を暗号化でき、通信の盗み見やそれによる情報漏えいのリスクを低減できるようになる。 +
    +
     具体的には、フリーWi-Fi利用時に、スマートフォンがフリーWi-Fiプロテクションインフラに接続することにより、フリーWi-Fiのアクセスポイントを介した通信がVPN(Virtual Private Network)で暗号化される。これにより利用者は、第三者から通信を傍受されることやデータの情報漏えいを防ぐことが可能。さらに、かんたん自動接続の機能により、通信を暗号化していない安全性が低いフリーWi-Fi接続時や利用者が指定したWi-Fiへ接続する際に、自動的に通信を暗号化し、利用者の通信を保護する。
    +
     また、フリーWi-Fiプロテクションインフラと、莫大なセキュリティ情報のビッグデータを保有するクラウド型セキュリティ技術基盤「Trend Micro Smart Protection Network」(SPN)が連携することで、フリーWi-Fiプロテクションインフラを経由してインターネットを利用する際に、利用者がフィッシング詐欺サイトや偽サイトなどへの不正サイトへアクセスすることをブロックできるという。

    @@ -28,7 +28,7 @@

    最終更新:3/9(木) 18:45

    - CNET Japan + CNET Japan

    @@ -48,4 +48,4 @@
    - + \ No newline at end of file diff --git a/article_scraper/resources/tests/readability/youth/expected.html b/article_scraper/resources/tests/readability/youth/expected.html index b21872d..bb3cb7d 100644 --- a/article_scraper/resources/tests/readability/youth/expected.html +++ b/article_scraper/resources/tests/readability/youth/expected.html @@ -1,5 +1,5 @@
    -

    海外留学生看两会:出国前后关注点大不同

    +

    海外留学生看两会:出国前后关注点大不同

    图为马素湘在澳大利亚悉尼游玩时的近影。

      出国前后关注点大不同

      出国前:政治科目会出啥考题?

    @@ -9,7 +9,7 @@

       无独有偶,英国剑桥大学的李博灏也有着类似的经历。他表示,在国内读本科时,虽然关注过两会,但并不像现在这样,将关注点放在国家社会经济迫切需要解决的难题与问题上。“出国前更关心与我们学生的实际问题以及切身利益相关的议题,比如奖学金、助学金的发放与申请;相关工作行业就业前景等。”

       在英国求学6年后,李博灏希望能够学有所用,为国家发展过程中遇到的难题寻求解决办法。因此随着课题研究的深入,他更加关注国家和社会目前所面临的挑战,比如中等收入陷阱、供给侧改革、创意创新产业的发展等议题。

       还有一些学子表示,出国前对两会不太了解,出国后反而对两会热点多了些思考。在澳大利亚墨尔本留学的马素湘说:“想不关注都难啊!刷微博看新闻到处都是两会的消息。而且我现在学的是新闻专业,对世界发生的大小事都会留意。随着年龄、阅历增长,家国情怀也渐长,会关心国家发展的各方面问题。”

    -

    +

    图为李博灏在瑞士日内日瓦联合国欧洲总部的近影。

      关注点多与所学专业相关

      法学专业热议法定婚龄 很多人关心供给侧改革

    @@ -17,7 +17,7 @@

       对此提案,卢宇有着自己的看法,“百花齐放的局面固然可喜,但也不能一哄而上,国家应该提高行业准入门槛,完善新能源汽车准入管理规则,从源头上进行制度创新,将一些不具备新能源汽车生产资质的厂家淘汰出局,并高度关注电池系统安全问题,严格执行充电桩生产的国际标准。”

       马素湘表示,“出国读研之前,我在国内学习法学,因此对相关的问题比较感兴趣。今年两会上人大代表黄细花提出把法定婚龄降低到18岁的提案;而在微博的热搜榜上,一本儿童性教育读物引起了极大的争议。我认为降低婚龄并不适合我国国情。因为性教育的缺乏导致我国大部分人在18岁之前没有接受过完整的性教育,思想行动上也不够成熟,如何能够对自己的人生和自己的另一半负责?所以我希望能有人大代表提议在国民儿童阶段完善我国的性教育,而不是为了鼓励生育将法定婚龄提前。”

       李博灏是英国剑桥大学制造业研究所创新设计管理中心的一名博士。他格外关注的话题是供给侧结构性改革,知识产权保护,消费升级等议题。“我的博士研究课题是关于推动创新设计密集型产业的发展从而帮助中等收入国家克服中等收入陷阱的探索,因此一直十分关注国内关于供给侧改革的相关议题。通过本届两会对于该议题的进一步关注,我希望可以有效地帮助我了解供给侧改革与中等收入陷阱问题目前的发展状况以及解决情况;也希望可以与更多的机构取得联系,并帮助他们了解该议题最前沿的研究与解决方案。”

    -

    +

    图为卢宇与祖国五星红旗和联合国会旗的合影。

       两会成为了解国情的窗口

       盼准确把握国家发展需求 愿寻求机遇回国有所作为

    @@ -27,4 +27,4 @@

       在两会上,全国政协委员张近东提出“当前中国经济的发展正在从数量型向质量型转变,消费升级将成为企业新一轮创新发展的动力。”对此,李博灏认为这也是他关注的问题。他认为:“在消费市场持续扩大的大环境下,如何能够通过促进创新设计产业的发展以及消费品品质的提升,推动国内消费增长并促进其在可持续经济增长中的作用,是一个迫切需要解决的问题。在当前供给侧改革的大环境下,消费升级的重要性越发突显。”

       作为两会的资深粉,卢宇聊起两会话题充满了期待。“今年是国家‘十三五’规划的关键时期,‘一带一路’建设也在如火如荼地进行中。作为一名中国留学生,我一直都关注着能在哪些领域为国家、为中外合作共赢做出贡献。‘大众创业、万众创新’提出有几年了。全国各省市在吸引留学人才归国创业就业方面纷纷提出了各种优惠政策,但目前大都集中在沿海发达省份,而且主要惠及理工科博士,政策覆盖面还不够广。期待从国家层面设立工作组加强留学人才的统筹协调,完善顶层设计。人文社科类留学人才是未来国家智库的重要后备力量,也应该适当加强对他们的政策鼓励,更好地服务于‘一带一路’国家战略。”

       卢宇还认为两会应该增设学子代表,列席旁听两会,拓展留学生参政议政渠道。“我相信优秀留学生的国际化视野必将为家乡建设带来新的思路,增添新的活力。”卢宇恳切地说。

    -
    + \ No newline at end of file diff --git a/article_scraper/src/full_text_parser/readability/tests.rs b/article_scraper/src/full_text_parser/readability/tests.rs index 7d299c3..bcc95a6 100644 --- a/article_scraper/src/full_text_parser/readability/tests.rs +++ b/article_scraper/src/full_text_parser/readability/tests.rs @@ -45,7 +45,8 @@ async fn run_test(name: &str) { article.root_node = Some(root); let html = article.get_content().unwrap(); - //std::fs::write(format!("./resources/tests/readability/{name}/expected.html"), &html).unwrap(); + // abuse line below to update all test results after whitespace or similar change + // std::fs::write(format!("./resources/tests/readability/{name}/expected.html"), &html).unwrap(); let expected = std::fs::read_to_string(format!( "./resources/tests/readability/{name}/expected.html"