From b541cd73f8dc7344968e7d9371a307876c400370 Mon Sep 17 00:00:00 2001 From: Jan Lukas Gernert Date: Fri, 24 Mar 2023 08:02:08 +0100 Subject: [PATCH] whitespace fixes --- resources/tests/readability/001/expected.html | 64 +- resources/tests/readability/002/expected.html | 448 ++++++++----- resources/tests/readability/003/expected.html | 10 +- .../tests/readability/aclu/expected.html | 103 +-- .../tests/readability/aktualne/expected.html | 51 +- .../archive-of-our-own/expected.html | 220 ++++--- .../tests/readability/ars-1/expected.html | 84 ++- .../expected.html | 42 +- .../basic-tags-cleaning/expected.html | 17 +- .../tests/readability/bbc-1/expected.html | 18 +- .../tests/readability/blogger/expected.html | 149 +++-- .../tests/readability/breitbart/expected.html | 59 +- .../readability/bug-1255978/expected.html | 165 +++-- .../readability/buzzfeed-1/expected.html | 73 ++- .../tests/readability/citylab-1/expected.html | 127 +++- .../readability/clean-links/expected.html | 569 ++++++++-------- .../cnet-svg-classes/expected.html | 44 +- .../tests/readability/cnet/expected.html | 63 +- resources/tests/readability/cnn/expected.html | 66 +- .../expected.html | 14 +- .../daringfireball-1/expected.html | 54 +- .../readability/data-url-image/expected.html | 14 +- .../tests/readability/dev418/expected.html | 86 ++- .../readability/dropbox-blog/expected.html | 614 ++++++++++++------ .../tests/readability/ebb-org/expected.html | 46 +- .../tests/readability/ehow-1/expected.html | 184 +++--- .../tests/readability/ehow-2/expected.html | 169 +++-- .../readability/embedded-videos/expected.html | 27 +- .../tests/readability/engadget/expected.html | 196 ++++-- .../firefox-nightly-blog/expected.html | 379 ++++++----- .../tests/readability/folha/expected.html | 19 +- resources/tests/readability/gmw/expected.html | 98 +-- .../google-sre-book-1/expected.html | 512 ++++++++++----- .../readability/guardian-1/expected.html | 353 +++++++--- .../tests/readability/heise/expected.html | 36 +- .../readability/herald-sun-1/expected.html | 23 +- .../readability/hidden-nodes/expected.html | 11 +- .../readability/hukumusume/expected.html | 384 ++++++++--- .../tests/readability/iab-1/expected.html | 5 +- .../tests/readability/ietf-1/expected.html | 199 ++++-- .../tests/readability/webmd-1/expected.html | 34 +- src/constants.rs | 3 +- src/full_text_parser/mod.rs | 2 +- src/full_text_parser/readability/mod.rs | 53 +- src/full_text_parser/readability/tests.rs | 7 +- src/util.rs | 25 +- 46 files changed, 3808 insertions(+), 2111 deletions(-) diff --git a/resources/tests/readability/001/expected.html b/resources/tests/readability/001/expected.html index e246772..741c0aa 100644 --- a/resources/tests/readability/001/expected.html +++ b/resources/tests/readability/001/expected.html @@ -1,35 +1,40 @@ -

So finally you're testing your frontend JavaScript code? Great! The more you +

+

So finally you're testing your frontend JavaScript code? Great! The more you write tests, the more confident you are with your code… but how much precisely? That's where code coverage might -help.

-

The idea behind code coverage is to record which parts of your code (functions, +help. +

+

The idea behind code coverage is to record which parts of your code (functions, statements, conditionals and so on) have been executed by your test suite, to compute metrics out of these data and usually to provide tools for navigating and inspecting them.

-

Not a lot of frontend developers I know actually test their frontend code, +

Not a lot of frontend developers I know actually test their frontend code, and I can barely imagine how many of them have ever setup code coverage… Mostly because there are not many frontend-oriented tools in this area I guess.

-

Actually I've only found one which provides an adapter for Mocha and +

Actually I've only found one which provides an adapter for Mocha and actually works…

-
-

Drinking game for web devs: +

+

Drinking game for web devs:
(1) Think of a noun
(2) Google "<noun>.js"
(3) If a library with that name exists - drink

— Shay Friedman (@ironshay) August 22, 2013 -
-

Blanket.js is an easy to install, easy to configure, +

+

Blanket.js is an easy to install, easy to configure, and easy to use JavaScript code coverage library that works both in-browser and -with nodejs.

-

Its use is dead easy, adding Blanket support to your Mocha test suite +with nodejs. +

+

Its use is dead easy, adding Blanket support to your Mocha test suite is just matter of adding this simple line to your HTML test file:

<script src="vendor/blanket.js"
         data-cover-adapter="vendor/mocha-blanket.js"></script>
 
-

Source files: blanket.js, - mocha-blanket.js

-

As an example, let's reuse the silly Cow example we used + +

Source files: blanket.js, + mocha-blanket.js +

+

As an example, let's reuse the silly Cow example we used in a previous episode:

// cow.js
 (function(exports) {
@@ -49,7 +54,8 @@ with nodejs.

}; })(this);
-

And its test suite, powered by Mocha and Chai:

+ +

And its test suite, powered by Mocha and Chai:

var expect = chai.expect;
 
 describe("Cow", function() {
@@ -73,7 +79,8 @@ describe("Cow", function() {
   });
 });
 
-

Let's create the HTML test file for it, featuring Blanket and its adapter + +

Let's create the HTML test file for it, featuring Blanket and its adapter for Mocha:

<!DOCTYPE html>
 <html>
@@ -97,24 +104,29 @@ describe("Cow", function() {
 </body>
 </html>
 
-

Notes:

-
    -
  • Notice the data-cover attribute we added to the script tag + +

    Notes:

    +
      +
    • Notice the data-cover attribute we added to the script tag loading the source of our library;
    • -
    • The HTML test file must be served over HTTP for the adapter to +
    • The HTML test file must be served over HTTP for the adapter to be loaded.
    • -
    -

    Running the tests now gives us something like this:

    -

    screenshot

    -

    As you can see, the report at the bottom highlights that we haven't actually +

+

Running the tests now gives us something like this:

+

+ screenshot +

+

As you can see, the report at the bottom highlights that we haven't actually tested the case where an error is raised in case a target name is missing. We've been informed of that, nothing more, nothing less. We simply know we're missing a test here. Isn't this cool? I think so!

-

Just remember that code coverage will only bring you numbers and +

Just remember that code coverage will only bring you numbers and raw information, not actual proofs that the whole of your code logic has been actually covered. If you ask me, the best inputs you can get about your code logic and implementation ever are the ones issued out of pair programming sessions and code reviews — but that's another story.

-

So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!

+

So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing! +

+
diff --git a/resources/tests/readability/002/expected.html b/resources/tests/readability/002/expected.html index 87552b7..1932918 100644 --- a/resources/tests/readability/002/expected.html +++ b/resources/tests/readability/002/expected.html @@ -1,144 +1,204 @@ -

For more than a decade the Web has used XMLHttpRequest (XHR) to achieve +

+
+

For more than a decade the Web has used XMLHttpRequest (XHR) to achieve asynchronous requests in JavaScript. While very useful, XHR is not a very nice API. It suffers from lack of separation of concerns. The input, output and state are all managed by interacting with one object, and state is tracked using events. Also, the event-based model doesn’t play well with JavaScript’s recent focus on Promise- and generator-based asynchronous programming.

-

The Fetch API intends +

The Fetch API intends to fix most of these problems. It does this by introducing the same primitives to JS that are used in the HTTP protocol. In addition, it introduces a utility function fetch() that succinctly captures the intention of retrieving a resource from the network.

-

The Fetch specification, which +

The Fetch specification, which defines the API, nails down the semantics of a user agent fetching a resource. This, combined with ServiceWorkers, is an attempt to:

-
    -
  1. Improve the offline experience.
  2. -
  3. Expose the building blocks of the Web to the platform as part of the +
      +
    1. Improve the offline experience.
    2. +
    3. Expose the building blocks of the Web to the platform as part of the extensible web movement.
    4. -
    -

    As of this writing, the Fetch API is available in Firefox 39 (currently +

+

As of this writing, the Fetch API is available in Firefox 39 (currently Nightly) and Chrome 42 (currently dev). Github has a Fetch polyfill.

+

Feature detection

-

Fetch API support can be detected by checking for Headers,Request, Response or fetch on + +

Fetch API support can be detected by checking for Headers,Request, Response or fetch on the window or worker scope.

+

Simple fetching

-

The most useful, high-level part of the Fetch API is the fetch() function. + +

The most useful, high-level part of the Fetch API is the fetch() function. In its simplest form it takes a URL and returns a promise that resolves to the response. The response is captured as a Response object.

-
fetch("/data.json").then(function(res){// res instanceof Response == true.if(res.ok){
-    res.json().then(function(data){
-      console.log(data.entries);});}else{
-    console.log("Looks like the response wasn't perfect, got status", res.status);}},function(e){
-  console.log("Fetch failed!", e);});
-

Submitting some parameters, it would look like this:

-
fetch("http://www.example.org/submit.php",{
-  method:"POST",
-  headers:{"Content-Type":"application/x-www-form-urlencoded"},
-  body:"firstName=Nikhil&favColor=blue&password=easytoguess"}).then(function(res){if(res.ok){
-    alert("Perfect! Your settings are saved.");}elseif(res.status==401){
-    alert("Oops! You are not authorized.");}},function(e){
-  alert("Error submitting form!");});
-

The fetch() function’s arguments are the same as those passed +

+
fetch("/data.json").then(function(res) {
+  // res instanceof Response == true.
+  if (res.ok) {
+    res.json().then(function(data) {
+      console.log(data.entries);
+    });
+  } else {
+    console.log("Looks like the response wasn't perfect, got status", res.status);
+  }
+}, function(e) {
+  console.log("Fetch failed!", e);
+});
+
+

Submitting some parameters, it would look like this:

+
+
fetch("http://www.example.org/submit.php", {
+  method: "POST",
+  headers: {
+    "Content-Type": "application/x-www-form-urlencoded"
+  },
+  body: "firstName=Nikhil&favColor=blue&password=easytoguess"
+}).then(function(res) {
+  if (res.ok) {
+    alert("Perfect! Your settings are saved.");
+  } else if (res.status == 401) {
+    alert("Oops! You are not authorized.");
+  }
+}, function(e) {
+  alert("Error submitting form!");
+});
+
+

The fetch() function’s arguments are the same as those passed to the -
Request() constructor, so you may directly pass arbitrarily +
+Request() constructor, so you may directly pass arbitrarily complex requests to fetch() as discussed below.

+

Headers

-

Fetch introduces 3 interfaces. These are Headers, Request and -
Response. They map directly to the underlying HTTP concepts, + +

Fetch introduces 3 interfaces. These are Headers, Request and +
+Response. They map directly to the underlying HTTP concepts, but have
certain visibility filters in place for privacy and security reasons, such as
supporting CORS rules and ensuring cookies aren’t readable by third parties.

-

The Headers interface is +

The Headers interface is a simple multi-map of names to values:

-
var content ="Hello World";var reqHeaders =new Headers();
-reqHeaders.append("Content-Type","text/plain"
+            
+
var content = "Hello World";
+var reqHeaders = new Headers();
+reqHeaders.append("Content-Type", "text/plain"
 reqHeaders.append("Content-Length", content.length.toString());
-reqHeaders.append("X-Custom-Header","ProcessThisImmediately");
-

The same can be achieved by passing an array of arrays or a JS object +reqHeaders.append("X-Custom-Header", "ProcessThisImmediately");

+
+

The same can be achieved by passing an array of arrays or a JS object literal
to the constructor:

-
reqHeaders =new Headers({"Content-Type":"text/plain","Content-Length": content.length.toString(),"X-Custom-Header":"ProcessThisImmediately",});
-

The contents can be queried and retrieved:

-
console.log(reqHeaders.has("Content-Type"));// true
-console.log(reqHeaders.has("Set-Cookie"));// false
-reqHeaders.set("Content-Type","text/html");
-reqHeaders.append("X-Custom-Header","AnotherValue");
+            
+
reqHeaders = new Headers({
+  "Content-Type": "text/plain",
+  "Content-Length": content.length.toString(),
+  "X-Custom-Header": "ProcessThisImmediately",
+});
+
+

The contents can be queried and retrieved:

+
+
console.log(reqHeaders.has("Content-Type")); // true
+console.log(reqHeaders.has("Set-Cookie")); // false
+reqHeaders.set("Content-Type", "text/html");
+reqHeaders.append("X-Custom-Header", "AnotherValue");
  
-console.log(reqHeaders.get("Content-Length"));// 11
-console.log(reqHeaders.getAll("X-Custom-Header"));// ["ProcessThisImmediately", "AnotherValue"]
+console.log(reqHeaders.get("Content-Length")); // 11
+console.log(reqHeaders.getAll("X-Custom-Header")); // ["ProcessThisImmediately", "AnotherValue"]
  
 reqHeaders.delete("X-Custom-Header");
-console.log(reqHeaders.getAll("X-Custom-Header"));// []
-

Some of these operations are only useful in ServiceWorkers, but they provide +console.log(reqHeaders.getAll("X-Custom-Header")); // []

+
+

Some of these operations are only useful in ServiceWorkers, but they provide
a much nicer API to Headers.

-

Since Headers can be sent in requests, or received in responses, and have +

Since Headers can be sent in requests, or received in responses, and have various limitations about what information can and should be mutable, Headers objects have a guard property. This is not exposed to the Web, but it affects which mutation operations are allowed on the Headers object.
Possible values are:

-
    -
  • “none”: default.
  • -
  • “request”: guard for a Headers object obtained from a Request (Request.headers).
  • -
  • “request-no-cors”: guard for a Headers object obtained from a Request +
      +
    • “none”: default.
    • +
    • “request”: guard for a Headers object obtained from a Request (Request.headers).
    • +
    • “request-no-cors”: guard for a Headers object obtained from a Request created
      with mode “no-cors”.
    • -
    • “response”: naturally, for Headers obtained from Response (Response.headers).
    • -
    • “immutable”: Mostly used for ServiceWorkers, renders a Headers object +
    • “response”: naturally, for Headers obtained from Response (Response.headers).
    • +
    • “immutable”: Mostly used for ServiceWorkers, renders a Headers object
      read-only.
    • -
    -

    The details of how each guard affects the behaviors of the Headers object +

+

The details of how each guard affects the behaviors of the Headers object are
in the specification. For example, you may not append or set a “request” guarded Headers’ “Content-Length” header. Similarly, inserting “Set-Cookie” into a Response header is not allowed so that ServiceWorkers may not set cookies via synthesized Responses.

-

All of the Headers methods throw TypeError if name is not a +

All of the Headers methods throw TypeError if name is not a valid HTTP Header name. The mutation operations will throw TypeError if there is an immutable guard. Otherwise they fail silently. For example:

-
var res = Response.error();try{
-  res.headers.set("Origin","http://mybank.com");}catch(e){
-  console.log("Cannot pretend to be a bank!");}
+
+
var res = Response.error();
+try {
+  res.headers.set("Origin", "http://mybank.com");
+} catch(e) {
+  console.log("Cannot pretend to be a bank!");
+}
+
+

Request

-

The Request interface defines a request to fetch a resource over HTTP. + +

The Request interface defines a request to fetch a resource over HTTP. URL, method and headers are expected, but the Request also allows specifying a body, a request mode, credentials and cache hints.

-

The simplest Request is of course, just a URL, as you may do to GET a +

The simplest Request is of course, just a URL, as you may do to GET a resource.

-
var req =new Request("/index.html");
-console.log(req.method);// "GET"
-console.log(req.url);// "http://example.com/index.html"
-

You may also pass a Request to the Request() constructor to +

+
var req = new Request("/index.html");
+console.log(req.method); // "GET"
+console.log(req.url); // "http://example.com/index.html"
+
+

You may also pass a Request to the Request() constructor to create a copy.
(This is not the same as calling the clone() method, which is covered in
the “Reading bodies” section.).

-
var copy =new Request(req);
-console.log(copy.method);// "GET"
-console.log(copy.url);// "http://example.com/index.html"
-

Again, this form is probably only useful in ServiceWorkers.

-

The non-URL attributes of the Request can only be set by passing +

+
var copy = new Request(req);
+console.log(copy.method); // "GET"
+console.log(copy.url); // "http://example.com/index.html"
+
+

Again, this form is probably only useful in ServiceWorkers.

+

The non-URL attributes of the Request can only be set by passing initial
values as a second argument to the constructor. This argument is a dictionary.

-
var uploadReq =new Request("/uploadImage",{
-  method:"POST",
-  headers:{"Content-Type":"image/png",},
-  body:"image data"});
-

The Request’s mode is used to determine if cross-origin requests lead +

+
var uploadReq = new Request("/uploadImage", {
+  method: "POST",
+  headers: {
+    "Content-Type": "image/png",
+  },
+  body: "image data"
+});
+
+

The Request’s mode is used to determine if cross-origin requests lead to valid responses, and which properties on the response are readable. Legal mode values are "same-origin", "no-cors" (default) and "cors".

-

The "same-origin" mode is simple, if a request is made to another +

The "same-origin" mode is simple, if a request is made to another origin with this mode set, the result is simply an error. You could use this to ensure that
a request is always being made to your origin.

-
var arbitraryUrl = document.getElementById("url-input").value;
-fetch(arbitraryUrl,{ mode:"same-origin"}).then(function(res){
-  console.log("Response succeeded?", res.ok);},function(e){
-  console.log("Please enter a same-origin URL!");});
-

The "no-cors" mode captures what the web platform does by default +

+
var arbitraryUrl = document.getElementById("url-input").value;
+fetch(arbitraryUrl, { mode: "same-origin" }).then(function(res) {
+  console.log("Response succeeded?", res.ok);
+}, function(e) {
+  console.log("Please enter a same-origin URL!");
+});
+
+

The "no-cors" mode captures what the web platform does by default for scripts you import from CDNs, images hosted on other domains, and so on. First, it prevents the method from being anything other than “HEAD”, “GET” or “POST”. Second, if any ServiceWorkers intercept these requests, @@ -147,7 +207,7 @@ fetch(arbitraryUrl,{ mode: -

"cors" mode is what you’ll usually use to make known cross-origin +

"cors" mode is what you’ll usually use to make known cross-origin requests to access various APIs offered by other vendors. These are expected to adhere to
the CORS protocol. @@ -155,171 +215,235 @@ fetch(arbitraryUrl,{ mode:most interesting photos today like this:

-
var u =new URLSearchParams();
-u.append('method','flickr.interestingness.getList');
-u.append('api_key','<insert api key here>');
-u.append('format','json');
-u.append('nojsoncallback','1');var apiCall = fetch('https://api.flickr.com/services/rest?'+ u);
+            
+
var u = new URLSearchParams();
+u.append('method', 'flickr.interestingness.getList');
+u.append('api_key', '<insert api key here>');
+u.append('format', 'json');
+u.append('nojsoncallback', '1');
  
-apiCall.then(function(response){return response.json().then(function(json){// photo is a list of photos.return json.photos.photo;});}).then(function(photos){
-  photos.forEach(function(photo){
-    console.log(photo.title);});});
-

You may not read out the “Date” header since Flickr does not allow it +var apiCall = fetch('https://api.flickr.com/services/rest?' + u); +  +apiCall.then(function(response) { + return response.json().then(function(json) { + // photo is a list of photos. + return json.photos.photo; + }); +}).then(function(photos) { + photos.forEach(function(photo) { + console.log(photo.title); + }); +});

+
+

You may not read out the “Date” header since Flickr does not allow it via -
Access-Control-Expose-Headers.

-
response.headers.get("Date");// null
-

The credentials enumeration determines if cookies for the other +
+Access-Control-Expose-Headers.

+
+
response.headers.get("Date"); // null
+
+

The credentials enumeration determines if cookies for the other domain are -
sent to cross-origin requests. This is similar to XHR’s withCredentials
flag, but tri-valued as "omit" (default), "same-origin" and "include".

-

The Request object will also give the ability to offer caching hints to +
sent to cross-origin requests. This is similar to XHR’s withCredentials +
flag, but tri-valued as "omit" (default), "same-origin" and "include".

+

The Request object will also give the ability to offer caching hints to the user-agent. This is currently undergoing some security review. Firefox exposes the attribute, but it has no effect.

-

Requests have two read-only attributes that are relevant to ServiceWorkers +

Requests have two read-only attributes that are relevant to ServiceWorkers
intercepting them. There is the string referrer, which is set by the UA to be
the referrer of the Request. This may be an empty string. The other is -
context which is a rather large enumeration defining +
+context which is a rather large enumeration defining what sort of resource is being fetched. This could be “image” if the request is from an <img>tag in the controlled document, “worker” if it is an attempt to load a worker script, and so on. When used with the fetch() function, it is “fetch”.

+

Response

-

Response instances are returned by calls to fetch(). + +

Response instances are returned by calls to fetch(). They can also be created by JS, but this is only useful in ServiceWorkers.

-

We have already seen some attributes of Response when we looked at fetch(). +

We have already seen some attributes of Response when we looked at fetch(). The most obvious candidates are status, an integer (default value 200) and statusText (default value “OK”), which correspond to the HTTP status code and reason. The ok attribute is just a shorthand for checking that status is in the range 200-299 inclusive.

-

headers is the Response’s Headers object, with guard “response”. +

headers is the Response’s Headers object, with guard “response”. The url attribute reflects the URL of the corresponding request.

-

Response also has a type, which is “basic”, “cors”, “default”, +

Response also has a type, which is “basic”, “cors”, “default”, “error” or
“opaque”.

-
    -
  • +
      +
    • "basic": normal, same origin response, with all headers exposed except
      “Set-Cookie” and “Set-Cookie2″.
    • -
    • +
    • "cors": response was received from a valid cross-origin request. Certain headers and the bodymay be accessed.
    • -
    • +
    • "error": network error. No useful information describing the error is available. The Response’s status is 0, headers are empty and immutable. This is the type for a Response obtained from Response.error().
    • -
    • +
    • "opaque": response for “no-cors” request to cross-origin resource. Severely
      restricted
      -
    • -
    -

    The “error” type results in the fetch() Promise rejecting with +

  • +
+

The “error” type results in the fetch() Promise rejecting with TypeError.

-

There are certain attributes that are useful only in a ServiceWorker scope. +

There are certain attributes that are useful only in a ServiceWorker scope. The
idiomatic way to return a Response to an intercepted request in ServiceWorkers is:

-
addEventListener('fetch',function(event){
-  event.respondWith(new Response("Response body",{
-    headers:{"Content-Type":"text/plain"}});});
-

As you can see, Response has a two argument constructor, where both arguments +

+
addEventListener('fetch', function(event) {
+  event.respondWith(new Response("Response body", {
+    headers: { "Content-Type" : "text/plain" }
+  });
+});
+
+

As you can see, Response has a two argument constructor, where both arguments are optional. The first argument is a body initializer, and the second is a dictionary to set the status, statusText and headers.

-

The static method Response.error() simply returns an error +

The static method Response.error() simply returns an error response. Similarly, Response.redirect(url, status) returns a Response resulting in
a redirect to url.

+

Dealing with bodies

-

Both Requests and Responses may contain body data. We’ve been glossing + +

Both Requests and Responses may contain body data. We’ve been glossing over it because of the various data types body may contain, but we will cover it in detail now.

-

A body is an instance of any of the following types.

-
    -
  • ArrayBuffer
  • -
  • +

    A body is an instance of any of the following types.

    + -

    In addition, Request and Response both offer the following methods to +

+

In addition, Request and Response both offer the following methods to extract their body. These all return a Promise that is eventually resolved with the actual content.

-
    -
  • arrayBuffer()
  • -
  • blob()
  • -
  • json()
  • -
  • text()
  • -
  • formData()
  • -
-

This is a significant improvement over XHR in terms of ease of use of +

    +
  • +arrayBuffer() +
  • +
  • +blob() +
  • +
  • +json() +
  • +
  • +text() +
  • +
  • +formData() +
  • +
+

This is a significant improvement over XHR in terms of ease of use of non-text data!

-

Request bodies can be set by passing body parameters:

-
var form =new FormData(document.getElementById('login-form'));
-fetch("/login",{
-  method:"POST",
+            

Request bodies can be set by passing body parameters:

+
+
var form = new FormData(document.getElementById('login-form'));
+fetch("/login", {
+  method: "POST",
   body: form
-})
-

Responses take the first argument as the body.

-
var res =new Response(new File(["chunk","chunk"],"archive.zip",{ type:"application/zip"}));
-

Both Request and Response (and by extension the fetch() function), +})

+
+

Responses take the first argument as the body.

+
+
var res = new Response(new File(["chunk", "chunk"], "archive.zip",
+                       { type: "application/zip" }));
+
+

Both Request and Response (and by extension the fetch() function), will try to intelligently determine the content type. Request will also automatically set a “Content-Type” header if none is set in the dictionary.

+

Streams and cloning

-

It is important to realise that Request and Response bodies can only be + +

It is important to realise that Request and Response bodies can only be read once! Both interfaces have a boolean attribute bodyUsed to determine if it is safe to read or not.

-
var res =new Response("one time use");
-console.log(res.bodyUsed);// false
-res.text().then(function(v){
-  console.log(res.bodyUsed);// true});
-console.log(res.bodyUsed);// true
+                
+
var res = new Response("one time use");
+console.log(res.bodyUsed); // false
+res.text().then(function(v) {
+  console.log(res.bodyUsed); // true
+});
+console.log(res.bodyUsed); // true
  
-res.text().catch(function(e){
-  console.log("Tried to read already consumed Response");});
-

This decision allows easing the transition to an eventual stream-based Fetch +res.text().catch(function(e) { + console.log("Tried to read already consumed Response"); +});

+
+

This decision allows easing the transition to an eventual stream-based Fetch API. The intention is to let applications consume data as it arrives, allowing for JavaScript to deal with larger files like videos, and perform things like compression and editing on the fly.

-

Often, you’ll want access to the body multiple times. For example, you +

Often, you’ll want access to the body multiple times. For example, you can use the upcoming Cache API to store Requests and Responses for offline use, and Cache requires bodies to be available for reading.

-

So how do you read out the body multiple times within such constraints? +

So how do you read out the body multiple times within such constraints? The API provides a clone() method on the two interfaces. This will return a clone of the object, with a ‘new’ body. clone() MUST be called before the body of the corresponding object has been used. That is, clone() first, read later.

-
addEventListener('fetch',function(evt){var sheep =new Response("Dolly");
-  console.log(sheep.bodyUsed);// falsevar clone = sheep.clone();
-  console.log(clone.bodyUsed);// false
+                
+
addEventListener('fetch', function(evt) {
+  var sheep = new Response("Dolly");
+  console.log(sheep.bodyUsed); // false
+  var clone = sheep.clone();
+  console.log(clone.bodyUsed); // false
  
   clone.text();
-  console.log(sheep.bodyUsed);// false
-  console.log(clone.bodyUsed);// true
+  console.log(sheep.bodyUsed); // false
+  console.log(clone.bodyUsed); // true
  
-  evt.respondWith(cache.add(sheep.clone()).then(function(e){return sheep;});});
+ evt.respondWith(cache.add(sheep.clone()).then(function(e) { + return sheep; + }); +});
+
+

Future improvements

-

Along with the transition to streams, Fetch will eventually have the ability + +

Along with the transition to streams, Fetch will eventually have the ability to abort running fetch()es and some way to report the progress of a fetch. These are provided by XHR, but are a little tricky to fit in the Promise-based nature of the Fetch API.

-

You can contribute to the evolution of this API by participating in discussions +

You can contribute to the evolution of this API by participating in discussions on the WHATWG mailing list and in the issues in the Fetch and ServiceWorkerspecifications.

-

For a better web!

-

The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben
-Kelly for helping with the specification and implementation.

+

For a better web!

+

The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben
+Kelly for helping with the specification and implementation.
+

+ +
+ + +
diff --git a/resources/tests/readability/003/expected.html b/resources/tests/readability/003/expected.html index cfa065f..2013a9d 100644 --- a/resources/tests/readability/003/expected.html +++ b/resources/tests/readability/003/expected.html @@ -1,5 +1,6 @@ -

Test document title

-

+

+

Test document title

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo @@ -7,11 +8,12 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

+

+
diff --git a/resources/tests/readability/aclu/expected.html b/resources/tests/readability/aclu/expected.html index 70cf7f2..a0b80a9 100644 --- a/resources/tests/readability/aclu/expected.html +++ b/resources/tests/readability/aclu/expected.html @@ -1,107 +1,124 @@
-

+

I don't use Facebook. I'm not technophobic — I'm a geek. I've been using email since the early 1990s, I have accounts on hundreds of services around the net, and I do software development and internet protocol design both for work and for fun. I believe that a globe-spanning communications network like the internet can be a positive social force, and I publish much of my own work on the open web.

-

+

But Facebook and other massive web companies represent a strong push toward unaccountable centralized social control, which I think makes our society more unequal and more unjust. The Cambridge Analytica scandal is one instance of this long-running problem with what I call the "surveillance economy." I don't want to submit to these power structures, and I don’t want my presence on such platforms to serve as bait that lures other people into the digital panopticon.

-

+

But while I've never "opted in" to Facebook or any of the other big social networks, Facebook still has a detailed profile that can be used to target me. I've never consented to having Facebook collect my data, which can be used to draw very detailed inferences about my life, my habits, and my relationships. As we aim to take Facebook to task for its breach of user trust, we need to think about what its capabilities imply for society overall. After all, if you do #deleteFacebook, you'll find yourself in my shoes: non-consenting, but still subject to Facebook’s globe-spanning surveillance and targeting network.

-

+

There are at least two major categories of information available to Facebook about non-participants like me: information from other Facebook users, and information from sites on the open web.

-

Information from other Facebook users

-

+

+ Information from other Facebook users +

+

When you sign up for Facebook, it encourages you to upload your list of contacts so that the site can "find your friends." Facebook uses this contact information to learn about people, even if those people don't agree to participate. It also links people together based on who they know, even if the shared contact hasn't agreed to this use.

-

+

For example, I received an email from Facebook that lists the people who have all invited me to join Facebook: my aunt, an old co-worker, a friend from elementary school, etc. This email includes names and email addresses — including my own name — and at least one web bug designed to identify me to Facebook’s web servers when I open the email. Facebook records this group of people as my contacts, even though I've never agreed to this kind of data collection.

-

+

Similarly, I'm sure that I'm in some photographs that someone has uploaded to Facebook — and I'm probably tagged in some of them. I've never agreed to this, but Facebook could still be keeping track.

-

+

So even if you decide you need to join Facebook, remember that you might be giving the company information about someone else who didn't agree to be part of its surveillance platform.

-

Information from sites on the open Web

-

+

+ Information from sites on the open Web +

+

Nearly every website that you visit that has a "Like" button is actually encouraging your browser to tell Facebook about your browsing habits. Even if you don't click on the "Like" button, displaying it requires your browser to send a request to Facebook's servers for the "Like" button itself. That request includes information mentioning the name of the page you are visiting and any Facebook-specific cookies your browser might have collected. (See Facebook's own description of this process.) This is called a "third-party request."

-

+

This makes it possible for Facebook to create a detailed picture of your browsing history — even if you've never even visited Facebook directly, let alone signed up for a Facebook account.

-

+

Think about most of the web pages you've visited — how many of them don't have a "Like" button? If you administer a website and you include a "Like" button on every page, you're helping Facebook to build profiles of your visitors, even those who have opted out of the social network. Facebook’s “Share” buttons on other sites — along with other tools — work a bit differently from the “Like” button, but do effectively the same thing.

-

+

The profiles that Facebook builds on non-users don't necessarily include so-called "personally identifiable information" (PII) like names or email addresses. But they do include fairly unique patterns. Using Chromium's NetLog dumping, I performed a simple five-minute browsing test last week that included visits to various sites — but not Facebook. In that test, the PII-free data that was sent to Facebook included information about which news articles I was reading, my dietary preferences, and my hobbies.

-

+

Given the precision of this kind of mapping and targeting, "PII" isn’t necessary to reveal my identity. How many vegans examine specifications for computer hardware from the ACLU's offices while reading about Cambridge Analytica? Anyway, if Facebook combined that information with the "web bug" from the email mentioned above — which is clearly linked to my name and e-mail address — no guesswork would be required.

-

+

I'd be shocked if Facebook were not connecting those dots given the goals they claim for data collection:

-

+

+

We use the information we have to improve our advertising and measurement systems so we can show you relevant ads on and off our Services and measure the effectiveness and reach of ads and services. -

-

+

+
+

This is, in essence, exactly what Cambridge Analytica did.

-

Consent

-

+

+ Consent +

+

Facebook and other tech companies often deflect accusations against excessive data collection by arguing "consent" — that they harvest and use data with the consent of the users involved.

-

+

But even if we accept that clicking through a "Terms of Service" that no one reads can actually constitute true consent, even if we ignore the fact that these terms are overwhelmingly one-sided and non-negotiable, and even if we accept that it's meaningful for people to give consent when sharing data about other people who may have also opted in — what is the recourse for someone who has not opted into these systems at all?

-

+

Are those of us who have explicitly avoided agreeing to the Facebook terms of service simply fair game for an industry-wide surveillance and targeting network?

-

Privilege

-

+

+ Privilege +

+

I don’t mean to critique people who have created a Facebook profile or suggest they deserve whatever they get.

-

+

My ability to avoid Facebook comes from privilege — I have existing social contacts with whom I know how to stay in touch without using Facebook's network. My job does not require that I use Facebook. I can afford the time and expense to communicate with my electoral representatives and political allies via other channels.

-

+

Many people do not have these privileges and are compelled to "opt in" on Facebook's non-negotiable terms.

-

+

Many journalists, organizers, schools, politicians, and others who have good reasons to oppose Facebook's centralized social control feel compelled by Facebook's reach and scale to participate in their practices, even those we know to be harmful. That includes the ACLU.

-

+

Privacy should not be a luxury good, and while I'm happy to encourage people to opt out of these subtle and socially fraught arrangements, I do not argue that anyone who has signed up has somehow relinquished concerns about their privacy. We need to evaluate privacy concerns in their full social contexts. These are not problems that can be resolved on an individual level, because of the interpersonal nature of much of this data and the complexities of the tradeoffs involved.

-

Technical countermeasures

-

+

+ Technical countermeasures +

+

While they may not solve the problem, there are some technical steps people can take to limit the scope of these surveillance practices. For example, some web browsers do not send "third-party cookies" by default, or they scope cookies so that centralized surveillance doesn't get a single view of one user. The most privacy-preserving modern browser is the Tor Browser, which everyone should have installed and available, even if it's not the browser they choose to use every day. It limits the surveillance ability of systems that you have not signed up for to track you as you move around the web.

-

- You can also modify some browsers — for example, with plug-ins for Firefox and Chrome — so that they do not send third-partyrequests at all. Firefox is also exploring even more privacy-preserving techniques.

-

+

+ You can also modify some browsers — for example, with plug-ins for Firefox and Chrome — so that they do not send third-party requests at all. Firefox is also exploring even more privacy-preserving techniques. +

+

It can’t be denied, though, that these tools are harder to use than the web browsers most people are accustomed to, and they create barriers to some online activities. (For example, logging in to some sites and accessing some web applications is impossible without third-party cookies.)

-

+

Some website operators take their visitors' privacy more seriously than others, by reducing the amount of third-party requests. For example, it's possible to display "share on Facebook" or "Like" buttons without sending user requests to Facebook in the first place. The ACLU's own website does this because we believe that the right to read with privacy is a fundamental protection for civic discourse.

-

+

If you are responsible for running a website, try browsing it with a third-party-blocking extension turned on. Think about how much information you're requiring your users to send to third parties as a condition for using your site. If you care about being a good steward of your visitors' data, you can re-design your website to reduce this kind of leakage.

-

Opting out?

-

+

+ Opting out? +

+

Some advertisers claim that you can "opt out" of their targeted advertising, and even offer a centralized place meant to help you do so. However, my experience with these tools isn't a positive one. They don't appear to work all of the time. (In a recent experiment I conducted, two advertisers’ opt-out mechanisms failed to take effect.) And while advertisers claim to allow the user to opt out of "interest-based ads," it's not clear that the opt-outs govern data collection itself, rather than just the use of the collected data for displaying ads. Moreover, opting out on their terms requires the use of third-party cookies, thereby enabling another mechanism that other advertisers can then exploit.

-

+

It's also not clear how they function over time: How frequently do I need to take these steps? Do they expire? How often should I check back to make sure I’m still opted out? I'd much prefer an approach requiring me to opt in to surveillance and targeting.

-

Fix the surveillance economy, not just Facebook

-

+

+ Fix the surveillance economy, not just Facebook +

+

These are just a few of the mechanisms that enable online tracking. Facebook is just one culprit in this online "surveillance economy," albeit a massive one — the company owns Instagram, Atlas, WhatsApp, and dozens of other internet and technology companies and services. But it’s not the only player in this space. Google’s business model also relies on this kind of surveillance, and there are dozens of smaller players as well.

-

+

As we work to address the fallout from the current storm around Facebook and Cambridge Analytica, we can't afford to lose sight of these larger mechanisms at play. Cambridge Analytica's failures and mistakes are inherent to Facebook's business model. We need to seriously challenge the social structures that encourage people to opt in to this kind of surveillance. At the same time, we also need to protect those of us who manage to opt out.

-
+ diff --git a/resources/tests/readability/aktualne/expected.html b/resources/tests/readability/aktualne/expected.html index 8668585..3b5f940 100644 --- a/resources/tests/readability/aktualne/expected.html +++ b/resources/tests/readability/aktualne/expected.html @@ -3,68 +3,75 @@ Zázrak jedné sezony? West Ham dává pochybovačům stále pádnější odpovědi a fotbalový svět si začíná uvědomovat, že se absolutní anglická fotbalová elita rozrůstá o nového člena. Tým manažera Davida Moyese prohání giganty i v aktuálním ročníku Premier League.

-

+

Pět vítězných soutěžních duelů v řadě, během nich jediný inkasovaný gól. Čtvrté místo v lize, stejný bodový zisk jako loňský šampion Manchester City a nadšené ohlasy z tábora těch nejrenomovanějších komentátorů ostrovního fotbalu.

-

+ +

West Ham je opět v kurzu, nadšené ohlasy po nedělní jasné výhře 4:1 na hřišti Aston Villy zaplnily anglický mediální prostor.

-

+

"Stali se excelentním týmem. Jsou skvělí ve všech částech hřiště a David Moyes si zaslouží obrovský kredit za to, do jaké pozice je dostal," píše na Twitter Gary Lineker.

-

+

"Nenapadá mě jediný důvod, proč by letos nemohli skončit v elitní čtyřce," přidává se Emile Heskey, někdejší útočník Liverpoolu. "Je fér říct, že vypadají fantasticky. Moyes je neskutečně oživil."

-

+

I Heskey si všiml, že se Kladiváři skvěle vyrovnávají s náročným programem a pro ně novou rolí: účastí ve více soutěžích najednou. Moyes zůstává konzervativní v určování základní sestavy, chytře ale rozšířil kádr a v Evropské lize či ligovém poháru nechává některé opory odpočívat. Výjimkou potvrzující pravidlo je přitom Tomáš Souček, o jehož nezbytnosti bude řeč níže.

-

+

"Klíčová věc je ta, že když udělá změny, pořád jim zůstává stejná struktura. To je něco, co pravidelně říkáme třeba o Manchesteru City. Ve hře neustále zůstává nějaká fundamentální filosofie. West Ham to má podobně a už kvůli tomu je třeba před Moyesem smeknout," přirovnává Heskey.

-

Podívejte se na důležité momenty zápasu Aston Villa - West Ham:

-

+

+ Podívejte se na důležité momenty zápasu Aston Villa - West Ham: +

+ + +

V Evropské lize má West Ham po třech zápasech plný bodový zisk. V anglickém ligovém poháru dobyl čtvrtfinále, když vyřadil oba bohaté velkokluby z Manchesteru.

-

+

Čeští fotbalisté nicméně momentálně nejsou ve světlech těch nejjasnějších reflektorů.

-

+

Vladimír Coufal už sice uzdravil poraněné tříslo, v sestavě ale před ním dostal přednost rozjetý Ben Johnson. Anglický mladík další působivé představení okořenil parádním gólem a potvrdil, že se stává tvrdou konkurencí pro českého reprezentačního beka.

-

+

Tomáš Souček zůstává nepostradatelným členem základní sestavy, navzdory tomu, že jeho poslední výkony působí nenápadně.

-

+

"Pořád toho odvádí strašnou spoustu mimo hlavní pozornost. Jsou to důležité věci, které je snadné přehlédnout," píše ve svém hodnocení server Claret and Hugh. 

-

+ +

"S Declanem Ricem vytvořil silné partnerství a udělal spoustu těžké práce. Má dobrou rozehrávku. Jediné, na co si lze stěžovat, jsou jeho občasná špatná rozhodnutí ve finální třetině hřiště," hodnotí českého středopolaře londýnský večerník Evening Standard.

-

+

Web Football.London to vidí podobně. "Opět byl silný ve vzduchu, na obou koncích hřiště. Ve finální fázi se ale nerozhodoval dobře, příliš často volil špatnou variantu."

-

+

Moyes nicméně nenechává Součka oddechnout. V pěti posledních utkáních, které West Ham odehrál během pouhých čtrnácti dnů, chyběl Čech jen pár minut v závěru na Evertonu, když utrpěl zranění v obličeji.

-

+

Fanoušci pravidelně spekulují o únavě, skotský manažer ale - jak se zdá - bude mít v sestavě raději unaveného Součka než kohokoli jiného. Zvlášť, když Alex Král, plánovaný back-up do středu zálohy, stále není k dispozici.

-

+

Zatímco v minulé sezoně Souček častokrát zastínil svého kolegu Rice, letos je to právě anglický reprezentant, kdo si užívá zasloužené ódy na svou adresu.

-

+

"Hraje prostě velkolepě a připomínám, že je mu stále jen dvaadvacet let," kroutí hlavou Lineker. Není sám. Ještě před pár měsíci se většina odborníků pozastavovala nad údajnou cenovkou kolem 100 milionů liber. Nyní už zaznívají hlasy o tom, jak může být i tato hranice při případném přestupu Declana Rice výrazně překročena.

-

+ +

S blížícím se zimním přestupním termínem budou spekulace nabývat na síle, fanoušci Hammers ale věří, že Rice zůstane nejméně do léta. Jeho spokojenost je do očí bijící, stejně jako ochota nechat na hřišti všechno ve prospěch Clarets and Blues.

-

+

"Náš kolektiv je teď opravdu speciální. Působíme ve výjimečném prostředí. Každé ráno se probouzíme s obrovskou touhou po dalším tréninku. Jsme nadšení," tvrdí mladá anglická superstar.

-

+

"Jsme na děleném třetím místě. Lidé se před sezonou hodně ptali, zda to můžeme dokázat znovu. Ukázali jsme, že ano. Ale musíme pokračovat. Tohle musí být náš standard. Nesmíme polevit, pokud chceme být velkým týmem," zdůrazňuje Rice.

-
+

Pokud jste v článku zaznamenali chybu nebo překlep, dejte nám, prosím, vědět prostřednictvím kontaktního formuláře. Děkujeme!

diff --git a/resources/tests/readability/archive-of-our-own/expected.html b/resources/tests/readability/archive-of-our-own/expected.html index 65e4ad1..422e8af 100644 --- a/resources/tests/readability/archive-of-our-own/expected.html +++ b/resources/tests/readability/archive-of-our-own/expected.html @@ -1,311 +1,317 @@
-

+

Chapter Text

-

+

Izuku was struggling to understand how he had even managed to get here, seated before the archvillain of Japan with only a sense of dread to keep him company. All Might sat concealed in an observation room, of the firm opinion that he could only aggravate the prisoner and he sent Izuku off with a strained smile. A vague haze hovered over Izuku’s memory. It started with a simple conversation gone astray on a long drive home.

-

+ +

“So, who is All For One? Do we know anything about him beyond what you told me before? He’s been imprisoned for months now.” Izuku remembered asking All Might from the backseat of the car as Detective Tsukauchi leisurely drove along a sprawling highway.

-

+

Playing on the car radio was an aftermath report of a villain attack in downtown Tokyo. Izuku caught the phrase “liquid body” from the female reporter before Detective Tsukauchi changed the channel.

-

+

“Nope. Still nothing. No one really wants to speak to him,” All Might had replied brightly. “He gives off polite airs, but he’s a piece of work.” All Might’s mostly obstructed shoulders in the front seat shrugged. “Not much you can do with someone like him. Everything that comes out is a threat or taunt.” All Might carefully waved his hand in a circular motion towards the side of his head.

-

+

“No one’s even made it through a full interview with him, from what I’ve heard,” Detective Tsukauchi added from behind the wheel. “He plays mind games with them. The prison also has a “no recent events” policy on any discussions with him as well. Just in case he ends up with ideas or has some means of communicating. Given that people only want to ask him about current events, it doesn’t leave much to talk about.”

-

+

“Wait, they still don’t know what Quirks he has?” Izuku asked exasperatedly. “They can’t if there’s still an information block on visits.”

-

+

“Nope. We have no idea what he can do. They can run DNA tests, but it’s not like anyone apart from him even knows how his Quirk works. They could get matches with any number of people, but if they’re not in a database then we can’t cross-reference them anyway. Even if they run an analysis, the data doesn’t mean anything without the ability to interpret it,” All Might gestured with a skeletal finger. “It’s a waste of time after the initial tests were conducted. They weren’t game to MRI him either, given he’s definitely got a Quirk that creates metal components.”

-

+

“No one’s bothered to ask him anything about… anything?” Izuku asked, dumbfounded. “He must be around two-hundred years old and people can’t think of a single non-current affairs thing to ask him?”

-

+

In some ways it was unfathomable that they’d let a potential resource go to waste. On the other hand, said potential resource had blown up a city, murdered numerous people and terrorised Japan for over a century. At the very least.

-

+

“Well, I tried to ask him about Shigaraki, but he didn’t say much of anything really. Some garbage about you being too dependent on me and him letting Shigaraki run wild and how he just wanted to be the ultimate evil,” All Might shrugged again. “He spends too much time talking about nothing.”

-

+

Izuku shifted his head onto his arm. “But, that’s not really nothing, is it?”

-

+

“What do you mean?” Izuku had the feeling that All Might would have been looking at him with the you’re about to do something stupid aren’t you expression that was thankfully becoming less common.

-

+

“Well, he clearly doesn’t know anything about us, All Might, if he thinks that you’re just going to let go of me after not even two years of being taught. Maybe Shigaraki was dependent on adult figures, but I don’t even remember my dad and mum’s been busy working and keeping the house together. I’ve never had a lot of adult supervision before,” Izuku laughed nervously. “I had to find ways to keep myself entertained. If anything, I’m on the disobedient side of the scale.” All Might outright giggled.

-

+

“I’ll say, especially after what happened with Overhaul. I’m surprised your mother let you leave the dorms again after that.”

-

+

“I’m surprised she didn’t withdraw and ground me until I was thirty.”

-

+

“Oh? That strict?” Tsukauchi asked.

-

+

“She has her moments,” Izuku smiled fondly. “Do you think she’d agree to me asking the archvillain of Japan about his Quirk?” Izuku asked, only partially joking. There was an itch at the back of his head, a feeling of something missing that poked and prodded at his senses.

-

+

All Might coughed and sprayed the dash with a fine red mist. “Absolutely not! I forbid it!”

-

+

“That’s exactly why I’m asking her and not you,” Izuku grinned from the backseat.

-

+

“He’s evil!”

-

+

“He’s ancient. You honestly don’t wonder about the sort of things someone with that life experience and Quirk would have run across to end up the way he did?”

-

+

“Nope, he made it perfectly clear that he always wanted to be the supreme evil,” All Might snipped through folded arms.

-

+

“Yeah, and I’ll just take his word for that, won’t I?” Izuku grinned. “If he does nothing but lie, then that’s probably one too, but there’s a grain of truth in there somewhere.”

-

+

“What would you even do? Harass him into telling you his life story?” All Might sighed.

-

+

“Not when I can kill him with kindness. Who knows, it might even be poisonous for him.”

-

+

“You’re explaining this to your mother. Teacher or not, I’m not being on the receiving end of this one.”

-

+

Izuku blinked for a moment. “You’ll let me?”

-

+

“I’m not entirely for it, but any prospective information on what influenced Shigaraki can only be a good thing. If anything goes south we can pull you out pretty easily. Just be aware of who and what you’re dealing with.” Struggling, All Might turned a serious look to Izuku around the side of the seat. “Only if your mother gives the okay.”

-

+

The conversation turned to school for the rest of the way.

-

+ +

It might have been curiosity or it might have been the nagging sensation that chewed at his brain for the three weeks that he researched the subject of the conversation. All For One was a cryptid. Mystical in more ways than one, he was only a rumour on a network that was two-hundred years old. There were whispers of a shadowy figure who once ruled Japan, intermingled with a string of conspiracies and fragmented events.

-

- Izuku had even braved the dark web, poking and prodding at some of the seedier elements of the world wide web. The internet had rumours, but the dark web had stories.

-

+

+ Izuku had even braved the dark web, poking and prodding at some of the seedier elements of the world wide web. The internet had rumours, but the dark web had stories.
+

+

An implied yakuza wrote about his grandfather who lost a fire manipulation Quirk and his sanity without any reason. His grandfather had been institutionalised, crying and repeating “he took it, he took it” until his dying days. No one could console him.

-

+

Another user spoke of a nursing home where a room full of dementia residents inexplicably became docile and no longer used their Quirks on the increasingly disturbed staff. The nursing home erupted into flames just before a court case against them commenced.

-

+

A user with neon pink text spoke of how their great-great-great-great grandmother with a longevity Quirk had simply aged rapidly one day and passed away in her sleep, her face a mask of terror. No cause had ever been found.

-

+

A hacker provided a grainy CCTV recording of a heist and a scanned collection of documents from over a century ago, where there was a flash of light and entire bank vault had been emptied. What separated it from the usual robbery was that it contained a list containing confidential information on the Quirks of the First Generation. Izuku had greedily snavelled up and saved the video and documents to an external hard drive.

-

+

Paging through, Izuku saw someone recount how their Quirkless uncle had developed a warp Quirk and gone from rags to riches under a mysterious benefactor. A decade ago, the uncle had simply disappeared.

-

+

Numerous and terrifying, the stories were scattered nuggets of gold hidden across the web. They’d never last long, vanishing within hours of posting. Izuku bounced from proxy to proxy, fleeing from a series of deletions that seemed to follow Izuku’s aliased postings across snitch.ru, rabbit.az, aconspiracy.xfiles and their compatriots.

-

+

After thirty-two identity changes (all carefully logged in a separate notebook), a large amount of feigning communal interest in a lucky tabloid article on All For One which had been released at the start of the first of the three weeks, Izuku hung up his tinfoil hat and called it a month. He haphazardly tossed a bulging notebook into his bookshelf and lodged his hard drive in a gap containing seven others and went to dinner.

-

+

It took another week to present his research to All Might and Tsukauchi, whose jaws reached the proverbial floor.

-

+

“We never found any of this,” the Detective Tsukauchi exclaimed. “How did you find all of it?”

-

+

“I asked the right people. Turns out criminals have very long and very unforgiving memories,” Izuku explained through sunken eyes. “There’s more than this that could be linked to him, but these ones seem to be the most obvious.”

-

+

“They would do, you can’t be head of the underworld without making an army of enemies,” All Might agreed. “You know, if you can get any more information about these events, I think you’ll give people a lot of peace of mind.”

-

+

“Provided mum agrees to it.”

-

+

“Only if she agrees to it.”

-

+

It took another month to convince his mother, who eventually gave in once All Might provided an extremely comprehensive schedule of how the visitations and any resulting research would be carefully balanced against Izuku’s schoolwork and internship.

-

+ +

The day of the visit finally arrived, four months after the initial conversation, much to Izuku’s dismay.

-

+

Izuku remembered how he had arrived, with the Detective and All Might escorting him through its sterile, white innards. A list of rules rattled off at the gate, “no current affairs” was chief among them and an assertion that he’d be dragged from the room if need be if Izuku was to breach any of them. No smuggling of communication devices, no weapons, no Quirks, nothing that could compromise the prisoner’s secure status.

-

+

Heavily armoured and drilled guards leading him underground into the deepest bowels of the Tartarus complex.

-

+

Izuku understood the rules, dressed casually in a cotton t-shirt with “Shirt” printed across it in haphazard English and clutching at a carefully screened and utterly blank notebook.

-

+

Across from him, behind reinforced glass, the archvillain of Japan was bound and unmoving.

-

+

“Hello,” Izuku initiated uncertainly. His skin had been crawling the moment he crossed the threshold, a memory of the encounter and escape at the Kamino Ward months ago.

-

+

“Ah, All Might’s disciple,” drawled All For One, “is he too cowardly to come himself? Yet I don’t hear the garments of a hero.” With hardly a word out, All For One had already lunged for the figurative jugular.

-

+

A stray thought of how does he know who I am if he’s blind and isn’t familiar with me? whispered its way through Izuku’s head.

-

+

“Oh, no,” Izuku corrected hastily, almost relieved at the lack of any pretence, “I asked if I could talk to you. This isn’t exactly hero related.”

-

+

“I’m surprised he said yes.” While there was little by way of expression, Izuku could just about sense the contempt dripping from the prisoner’s tone. It wasn’t anything he wasn’t expecting. Kacchan had already said worse to him in earlier years. Water off a duck’s back.

-

+

“Well, he’s not my legal guardian, so I think you should be more surprised that mum said yes. She’s stricter with these things than All Might,” Izuku corrected again. “Mum gave the okay, but that was a stressful discussion.” And there it was, a miniscule twitch from the man opposite. A spasm more than anything else. Interesting. Pinned down as he was, the prisoner oozed irritation.

-

+

“At least your mother is a wise person. I wonder why the student doesn’t heed all of the advice of the teacher.” All For One’s tone didn’t indicate a question, so much as an implicit statement that All Might wasn’t worth listening to in any capacity. Kacchan would have hated the comparison, but the hostility had an almost comfortable familiarity. “He no doubt warned you off speaking to me, overprotective as he is, but here you are.”

-

+

Izuku found himself smiling at the thought of Kacchan’s outrage if he ever found out about the mental comparison as he replied. “I don’t think it’s normal for anyone my age to listen completely to their teachers. We pick and choose and run with what works best for us. He warned me, but I’m still here. Mum warned me as well, but I think she cared more about the time management aspect of it."

-

+

“Is that a recent development?” All For One probed.

-

+

“Not really. My old homeroom teacher told me not to bother applying to U.A.” His mother’s beaming face had carried Izuku through the cheerful and resolute signing of that application form.

-

+

“I see you followed their advice to the letter,” came the snide, dismissive reply.

-

+

Izuku hoisted up his legs and sat cross-legged in his seat. Leaning slightly forward as he did so as to better prop up his notebook.

-

+

“You’re a walking contrarian, aren’t you? All Might told me about his run ins with you. What someone does or doesn’t do really doesn’t matter to you, you’ll just find a way to rationalise it as a negative and go on the attack anyway. What you’re currently doing is drawing attention away from yourself and focusing it on me so you can withhold information.” Izuku flipped open his notebook and put pen to paper. “You’ve got something fairly big to hide and you diverting attention exposes that motivation as existing anyway. The only real questions here are what and why?” Izuku paused in mortification as the man opposite’s lips parted. “I just said that aloud, didn’t I?”

-

+

Of the responses Izuku had expected, it wasn’t laughter. Unrestrained, Izuku would have expected a violent outburst. In this situation, he would have expected another scathing comment. Instead, All For One laughed breathily, leaning into his bonds. Wheezingly he spoke, “I’ll have to change tactics, if that one’s too transparent for you. How refreshing.”

-

+

Doing his best not to glow a blinding red and simultaneously pale at the interest, Izuku carried on. “I add it to the list when you do. I’m not emotionally involved enough to really be impacted by what you’re saying. I know about you in theory, but that’s it. Maybe All Might has a history with you, but I don’t really know enough about you personally to…”

-

+

“Care,” All For One supplied, somewhat subdued as he struggled to breathe. “You’re only here to satisfy your curiosity as to whether or not the stories were true.”

-

- Izuku nodded, scratching at his notebook with his left hand. “Yes and no, I’m actually here to ask you about how your Quirk works.” For now.

-

+

+ Izuku nodded, scratching at his notebook with his left hand. “Yes and no, I’m actually here to ask you about how your Quirk works.” For now. +

+

Another chortle, more restrained that the last.

-

+

"What makes you think others haven’t already asked?” Had All For One been unrestrained, Izuku could imagine the stereotypical scene of the villain confidently leaning back in some overblown chair in a secret lair, drink of choice in hand, if the tone of voice was any indication. Deflections aside, the man easily rose to each comment.

-

+

“Whether or not they asked it’s irrelevant if they can’t read the answers.” Answers didn’t matter if the people involved were too attached to read into the answers. If none of the interviewers had managed a full interview, then it seemed unlikely that any sort of effort was put into understanding the villain.

-

+

“And you think you can? What expertise do you hold above theirs?” Doubt and reprimand weighted the words. Oddly enough, had Izuku been any younger he could have mistaken the man for a disapproving parent rebuking an overly ambitious child. Albeit an extremely evil one.

-

+

Izuku inhaled shortly and went for it. “If there’s something I know, it’s Quirks and how they work. Maybe I don’t know you, but I don’t really need to. Quirks fall under broad categories of function. You can take and give, consent doesn’t seem to be a factor. You either can’t “see” certain types of Quirks or you need to have prior knowledge of it before you take it with what I know about your brother. Despite your nom de guerre, because we both know it’s not your real name, you have a history of giving multiple Quirks and causing brain damage to the receiver. You clearly aren’t impacted by those same restrictions, so it must either alter your brain mapping or adjust functions to allow for simultaneous use and storage. It also must isolate or categories the Quirks you stock, because from the few people who do remember you, you creating certain Quirks is always in the context of giving them to someone else meaning there’s probably an inherent immunity to stop it from tainting your own Quirk with a mutation,” Izuku mumbled, almost to himself. “The only thing really in question about your Quirk is the finer details and whether or not you need to maintain those features or if they’re inherent and your hard limit for holding Quirks.”

-

+

There was silence, for only a moment. “If only my hands were free, I would clap for such a thoughtful assessment. Clearly you’re not all brawn,” All For One positively purred. “Speculate away.” A wide and slightly unhinged smile was directed at Izuku.

-

+

It was all Izuku could do not to wince at the eagerness. An image of a nervous All Might, hidden in the observation room above with the grim-faced prison staff, came to mind.

-

+

“I note that you said thoughtful and not correct,” and Izuku breathed and unsteadily jotted it down in his notebook. “You don’t seem bothered by the guess.”

-

+

“Few people live long enough to question my Quirk, let alone have the talent to guess so thoughtfully at its functions. It seems we share a hobby.” There was something terribly keen in that voice that hadn’t been there before, twisting itself through the compliment.

-

+

“I suppose it helps that you’re playing along out of boredom,” Izuku verbally dodged, unease uncoiling itself from the back of his mind.

-

+

“I was playing along out of boredom,” All For One corrected smoothly. “Now, I’m curious. Admittedly, my prior assumptions of you weren’t generous, but I’ve been too hasty in my assessments before.”

-

+

“I’ll pack up and leave now if that’s the case,” Izuku replied with only half an ear on the conversation as the words on his page began to drastically expand to distract himself from the building anxiety.

-

+

“Sarcasm, so you do have characteristics of a normal teenager. Your willingness to maim yourself has often left me wondering…”

-

+

“You’re deflecting again,” Izuku observed. “I’m not sure if that’s a nervous habit for you or if you’re doing it because I’m close to being right about your Quirk. That being said, I don’t think you know what a normal teenager is if Shigaraki is any indication. He’s about seven years too late for his rebellious phase.”

-

+

“I’m hurt and offended,” came the amused reply.

-

+

“By how Shigaraki ended up or your parenting? You only have yourself to blame for both of them.”

-

+

“How harsh. Shigaraki is a product of society that birthed him. I can’t take credit for all of the hard work,” All For One laid out invitingly. Perhaps someone else would have risen to the bait, but Izuku was already packing his mental bags and heading for the door.

-

+

Clearly the prisoner’s anticipation had registered poorly with someone in the observation room, because a voice rang through the air. “Time’s up Midoriya-kun.”

-

+

“Okay!” Izuku called back and etched out his last thoughtful of words, untangled his legs and rose to his feet.

-

+

“What a shame, my visitations are always so short,” All For One spoke mournfully.

-

+

“Well, you did blow up half a city. They could have just let you suffocate instead. Same time next week, then?” Izuku offered brightly, notebook stuffed into a pocket and was followed out the door by wheezing laughter.

-

+

It was only after he had made it safely back to the communal room where All Might waited did he allow the spring to fade from his step and discard his nervous smile. Shuddering, he turned to All Might whose face was set in a grimace.

-

+

“I won’t say I told you so,” All Might offered, perched on the edge of his couch like a misshapen vulture.

-

+

“He’s… not really what I was expecting. I was expecting someone, more openly evil.” Izuku allowed himself to collapse into the leather of the seat. He shakily reached for the warm tea that had been clearly been prepared the moment Izuku left the cell. “I suppose he does it to lull people into a false sense of security. I didn’t understand how someone with only half a set of expressions could have “villain” written all over them until I met him.”

-

+

“He’s always been like that. He feigns concern and sympathy to lure in society’s outcasts. They’re easy targets,” All Might said through a mouthful of biscuit.

-

+

“Has he ever tried it on any of the One For All successors?”

-

+

“Not really, but you might have accidentally given him the incentive for it. He never had access to any of the One For All wielders while they were young.” All Might snorted, “not that it’ll make a difference with you”.

-

+

“I think he was trying to gauge me for a world view before the wardens ended it. I need more time to work out his response to the stuff on his Quirk.”

-

+

“He’s conversation starved since it’s solitary confinement. If what the people monitoring his brain activity said was true, you’re the most exciting thing to have happened to him in months. He replied after you left, said he was looking forward to it.”

-

+

“That’s pretty sad."

-

+

“It’s even sadder that we’re the only two members of the public who have had anything to do with him. Stain gets a pile of mail from his “fans”, but All For One has nothing,” All Might waved a tea spoon. “That’s what he gets.”

-

+

“Let’s get out of here and tell Detective Tsukauchi how it went.” Izuku gulped down his tea and headed for the exit, with him and All Might reaching it at roughly the same amount of time.

-

+

“At least your mum’s making katsudon for us tonight," was All Might's only optimistic comment.

-

+

Anxiety was still ebbing over Izuku after Tsukauchi had been debriefed in the car.

-

“It seems we share a hobby.” Haunted Izuku on the drive home. As if ripping someone’s Quirk from them and leaving them lying traumatised on the ground was just a fun pastime and not an act of grievous bodily harm. +

+ “It seems we share a hobby.” Haunted Izuku on the drive home. As if ripping someone’s Quirk from them and leaving them lying traumatised on the ground was just a fun pastime and not an act of grievous bodily harm.

-

+

And he’d be dealing with him again in another week.

-
+ diff --git a/resources/tests/readability/ars-1/expected.html b/resources/tests/readability/ars-1/expected.html index 402adc5..22847c1 100644 --- a/resources/tests/readability/ars-1/expected.html +++ b/resources/tests/readability/ars-1/expected.html @@ -1,51 +1,93 @@
-

+
+

Biz & IT —

-

+ +

Two-year-old bug exposes thousands of servers to crippling attack. -

-
Just-released Minecraft exploit makes it easy to crash game servers

+

+
+ +
+
+
+
+ Just-released Minecraft exploit makes it easy to crash game servers +
+ +
+
+ +

A flaw in the wildly popular online game Minecraft makes it easy for just about anyone to crash the server hosting the game, according to a computer programmer who has released proof-of-concept code that exploits the vulnerability.

-

+

"I thought a lot before writing this post," Pakistan-based developer Ammar Askar wrote in a blog post published Thursday, 21 months, he said, after privately reporting the bug to Minecraft developer Mojang. "On the one hand I don't want to expose thousands of servers to a major vulnerability, yet on the other hand Mojang has failed to act on it."

-

+

The bug resides in the networking internals of the Minecraft protocol. It allows the contents of inventory slots to be exchanged, so that, among other things, items in players' hotbars are displayed automatically after logging in. Minecraft items can also store arbitrary metadata in a file format known as Named Binary Tag (NBT), which allows complex data structures to be kept in hierarchical nests. Askar has released proof-of-concept attack code he said exploits the vulnerability to crash any server hosting the game. Here's how it works.

-
-

+

+

The vulnerability stems from the fact that the client is allowed to send the server information about certain slots. This, coupled with the NBT format’s nesting allows us to craft a packet that is incredibly complex for the server to deserialize but trivial for us to generate.

-

+

In my case, I chose to create lists within lists, down to five levels. This is a json representation of what it looks like.

-
rekt:{list:[list:[list:[list:[list:[list:[]list:[]list:[]list:[]...]...]...]...]...]...}
-

+

+
rekt: {
+    list: [
+        list: [
+            list: [
+                list: [
+                    list: [
+                        list: [
+                        ]
+                        list: [
+                        ]
+                        list: [
+                        ]
+                        list: [
+                        ]
+                        ...
+                    ]
+                    ...
+                ]
+                ...
+            ]
+            ...
+        ]
+        ...
+    ]
+    ...
+}
+
+

The root of the object, rekt, contains 300 lists. Each list has a list with 10 sublists, and each of those sublists has 10 of their own, up until 5 levels of recursion. That’s a total of 10^5 * 300 = 30,000,000 lists.

-

+

And this isn’t even the theoretical maximum for this attack. Just the nbt data for this payload is 26.6 megabytes. But luckily Minecraft implements a way to compress large packets, lucky us! zlib shrinks down our evil data to a mere 39 kilobytes.

-

+

Note: in previous versions of Minecraft, there was no protocol wide compression for big packets. Previously, NBT was sent compressed with gzip and prefixed with a signed short of its length, which reduced our maximum payload size to 2^15 - 1. Now that the length is a varint capable of storing integers up to 2^28, our potential for attack has increased significantly.

-

+

When the server will decompress our data, it’ll have 27 megs in a buffer somewhere in memory, but that isn’t the bit that’ll kill it. When it attempts to parse it into NBT, it’ll create java representations of the objects meaning suddenly, the sever is having to create several million java objects including ArrayLists. This runs the server out of memory and causes tremendous CPU load.

-

+

This vulnerability exists on almost all previous and current Minecraft versions as of 1.8.3, the packets used as attack vectors are the 0x08: Block Placement Packet and 0x10: Creative Inventory Action.

-

+

The fix for this vulnerability isn’t exactly that hard, the client should never really send a data structure as complex as NBT of arbitrary size and if it must, some form of recursion and size limits should be implemented.

-

+

These were the fixes that I recommended to Mojang 2 years ago.

-
-

+

+

Ars is asking Mojang for comment and will update this post if company officials respond.

-
-
+ + + diff --git a/resources/tests/readability/base-url-base-element-relative/expected.html b/resources/tests/readability/base-url-base-element-relative/expected.html index 21a19cc..de59b81 100644 --- a/resources/tests/readability/base-url-base-element-relative/expected.html +++ b/resources/tests/readability/base-url-base-element-relative/expected.html @@ -1,5 +1,6 @@ -

Lorem

-

+

+

Lorem

+

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo @@ -7,26 +8,27 @@ cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-

Links

-

link

-

link

-

link

-

link

-

link

-

link

-

link

-

link

-

Images

-

-

-

-

-

-

Foo

-

+

Links

+

link

+

link

+

link

+

link

+

link

+

link

+

link

+

link

+

Images

+

+

+

+

+

+

Foo

+

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

+

+
diff --git a/resources/tests/readability/basic-tags-cleaning/expected.html b/resources/tests/readability/basic-tags-cleaning/expected.html index c5d7e2f..6bf8114 100644 --- a/resources/tests/readability/basic-tags-cleaning/expected.html +++ b/resources/tests/readability/basic-tags-cleaning/expected.html @@ -1,20 +1,23 @@
-

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

-

Ut enim ad minim veniam, +

Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-

Duis aute irure dolor in reprehenderit in voluptate velit esse + +

Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
+
-

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +

Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

-

Duis aute irure dolor in reprehenderit in voluptate velit esse + + +

Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

-
+
diff --git a/resources/tests/readability/bbc-1/expected.html b/resources/tests/readability/bbc-1/expected.html index bb15336..2c031c8 100644 --- a/resources/tests/readability/bbc-1/expected.html +++ b/resources/tests/readability/bbc-1/expected.html @@ -1,5 +1,5 @@
-

President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency.

+

President Barack Obama has admitted that his failure to pass "common sense gun safety laws" in the US is the greatest frustration of his presidency.

In an interview with the BBC, Mr Obama said it was "distressing" not to have made progress on the issue "even in the face of repeated mass killings".

He vowed to keep trying, but the BBC's North America editor Jon Sopel said the president did not sound very confident.

However, Mr Obama said race relations had improved during his presidency.

@@ -19,9 +19,12 @@

Mr Obama lands in Kenya later on Friday for his first visit since becoming president.

But with just 18 months left in power, he said gun control was the area where he has been "most frustrated and most stymied" since coming to power in 2009.

"If you look at the number of Americans killed since 9/11 by terrorism, it's less than 100. If you look at the number that have been killed by gun violence, it's in the tens of thousands," Mr Obama said.

-
Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013
+
Gun control campaigners protest in McPhearson Square in Washington DC - 25 April 2013 +
+ The president said he would continue fighting for greater gun control laws -

"For us not to be able to resolve that issue has been something that is distressing," he added.

+
+

"For us not to be able to resolve that issue has been something that is distressing," he added.

Mr Obama has pushed for stricter gun control throughout his presidency but has been unable to secure any significant changes to the laws.

After nine African-American churchgoers were killed in South Carolina in June, he admitted "politics in this town" meant there were few options available.

line

Analysis: Jon Sopel, BBC News, Washington

@@ -37,9 +40,12 @@

"There are going to be tensions that arise. But if you look at my daughters' generation, they have an attitude about race that's entirely different than even my generation."

Talking about how he was feeling after his recent successes, he said "every president, every leader has strengths and weaknesses".

"One of my strengths is I have a pretty even temperament. I don't get too high when it's high and I don't get too low when it's low," he said.

-
Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015
+
Customer looks at Obama shirts at a stall in Nairobi's Kibera slums, 23 July 2015 +
+ Kenya is getting ready to welcome the US president -

Kenya trip

+
+

Kenya trip

Mr Obama was speaking to the BBC at the White House before departing for Kenya.

His father was Kenyan and the president is expected to meet relatives in Nairobi.

Mr Obama has faced criticism in the country after the US legalised gay marriage. However, in his interview, the president said he would not fall silent on the issue.

@@ -47,4 +53,4 @@

The president also admitted that some African governments, including Kenya's, needed to improve their records on human rights and democracy. However, he defended his decision to engage with and visit those governments.

"Well, they're not ideal institutions. But what we found is, is that when we combined blunt talk with engagement, that gives us the best opportunity to influence and open up space for civil society."

Mr Obama will become the first US president to address the African Union when he travels on to Ethiopia on Sunday.

-
+ diff --git a/resources/tests/readability/blogger/expected.html b/resources/tests/readability/blogger/expected.html index bdfb3b2..182a508 100644 --- a/resources/tests/readability/blogger/expected.html +++ b/resources/tests/readability/blogger/expected.html @@ -3,72 +3,115 @@ I've written a couple of posts in the past few months but they were all for the blog at work so I figured I'm long overdue for one on Silicon Exposed.

So what's a GreenPak?

-

Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their 5th generation parts were just announced, but I started this project before that happened so I'm still targeting the 4th generation.

GreenPak devices are kind of like itty bitty PSoCs - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).

It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.

Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:

- - - -
SLG46620V block diagram (from device datasheet)
+

Silego Technology is a fabless semiconductor company located in the SF Bay area, which makes (among other things) a line of programmable logic devices known as GreenPak. Their 5th generation parts were just announced, but I started this project before that happened so I'm still targeting the 4th generation.
+
GreenPak devices are kind of like itty bitty PSoCs - they have a mixed signal fabric with an ADC, DACs, comparators, voltage references, plus a digital LUT/FF fabric and some typical digital MCU peripherals like counters and oscillators (but no CPU).
+
It's actually an interesting architecture - FPGAs (including some devices marketed as CPLDs) are a 2D array of LUTs connected via wires to adjacent cells, and true (product term) CPLDs are a star topology of AND-OR arrays connected by a crossbar. GreenPak, on the other hand, is a star topology of LUTs, flipflops, and analog/digital hard IP connected to a crossbar.
+
Without further ado, here's a block diagram showing all the cool stuff you get in the SLG46620V:

+ + + + + + + + + +
+ +
SLG46620V block diagram (from device datasheet)

- They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.

The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.

To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.

The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.

While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.

- - - -
Schematic from hell!
+ They're also tiny (the SLG46620V is a 20-pin 0.4mm pitch STQFN measuring 2x3 mm, and the lower gate count SLG46140V is a mere 1.6x2 mm) and probably the cheapest programmable logic device on the market - $0.50 in low volume and less than $0.40 in larger quantities.
+
The Vdd range of GreenPak4 is huge, more like what you'd expect from an MCU than an FPGA! It can run on anything from 1.8 to 5V, although performance is only specified at 1.8, 3.3, and 5V nominal voltages. There's also a dual-rail version that trades one of the GPIO pins for a second power supply pin, allowing you to interface to logic at two different voltage levels.
+
To support low-cost/space-constrained applications, they even have the configuration memory on die. It's one-time programmable and needs external Vpp to program (presumably Silego didn't want to waste die area on charge pumps that would only be used once) but has a SRAM programming mode for prototyping.
+
The best part is that the development software (GreenPak Designer) is free of charge and provided for all major operating systems including Linux! Unfortunately, the only supported design entry method is schematic entry and there's no way to write your design in a HDL.
+
While schematics may be fine for quick tinkering on really simple designs, they quickly get unwieldy. The nightmare of a circuit shown below is just a bunch of counters hooked up to LEDs that blink at various rates.

+ + + + + + + + + +
+ +
Schematic from hell!

- As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.

The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?

This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs says, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.

+ As if this wasn't enough of a problem, the largest GreenPak4 device (the SLG46620V) is split into two halves with limited routing between them, and the GUI doesn't help the user manage this complexity at all - you have to draw your schematic in two halves and add "cross connections" between them.
+
The icing on the cake is that schematics are a pain to diff and collaborate on. Although GreenPak schematics are XML based, which is a touch better than binary, who wants to read a giant XML diff and try to figure out what's going on in the circuit?
+
This isn't going to be a post on the quirks of Silego's software, though - that would be boring. As it turns out, there's one more exciting feature of these chips that I didn't mention earlier: the configuration bitstream is 100% documented in the device datasheet! This is unheard of in the programmable logic world. As Nick of Arachnid Labs says, the chip is "just dying for someone to write a VHDL or Verilog compiler for it". As you can probably guess by from the title of this post, I've been busy doing exactly that.

Great! How does it work?

-

Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, Yosys, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.

The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.

Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)

After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.

- - - -
Example labeling for a subset of the netlist and device graphs
+

Rather than wasting time writing a synthesizer, I decided to write a GreenPak technology library for Clifford Wolf's excellent open source synthesis tool, Yosys, and then make a place-and-route tool to turn that into a final netlist. The post-PAR netlist can then be loaded into GreenPak Designer in order to program the device.
+
The first step of the process is to run the "synth_greenpak4" Yosys flow on the Verilog source. This runs a generic RTL synthesis pass, then some coarse-grained extraction passes to infer shift register and counter cells from behavioral logic, and finally maps the remaining logic to LUT/FF cells and outputs a JSON-formatted netlist.
+
Once the design has been synthesized, my tool (named, surprisingly, gp4par) is then launched on the netlist. It begins by parsing the JSON and constructing a directed graph of cell objects in memory. A second graph, containing all of the primitives in the device and the legal connections between them, is then created based on the device specified on the command line. (As of now only the SLG46620V is supported; the SLG46621V can be added fairly easily but the SLG46140V has a slightly different microarchitecture which will require a bit more work to support.)
+
After the graphs are generated, each node in the netlist graph is assigned a numeric label identifying the type of cell and each node in the device graph is assigned a list of legal labels: for example, an I/O buffer site is legal for an input buffer, output buffer, or bidirectional buffer.

+ + + + + + + + + +
+ +
Example labeling for a subset of the netlist and device graphs

The labeled nodes now need to be placed. The initial placement uses a simple greedy algorithm to create a valid (although not necessarily optimal or even routable) placement:

    -
  1. Loop over the cells in the netlist. If any cell has a LOC constraint, which locks the cell to a specific physical site, attempt to assign the node to the specified site. If the specified node is the wrong type, doesn't exist, or is already used by another constrained node, the constraint is invalid so fail with an error.
  2. -
  3. Loop over all of the unconstrained cells in the netlist and assign them to the first unused site with the right label. If none are available, the design is too big for the device so fail with an error.
  4. -
+
  • Loop over the cells in the netlist. If any cell has a LOC constraint, which locks the cell to a specific physical site, attempt to assign the node to the specified site. If the specified node is the wrong type, doesn't exist, or is already used by another constrained node, the constraint is invalid so fail with an error.
  • +
  • Loop over all of the unconstrained cells in the netlist and assign them to the first unused site with the right label. If none are available, the design is too big for the device so fail with an error.
  • +

    Once the design is placed, the placement optimizer then loops over the design and attempts to improve it. A simulated annealing algorithm is used, where changes to the design are accepted unconditionally if they make the placement better, and with a random, gradually decreasing probability if they make it worse. The optimizer terminates when the design receives a perfect score (indicating an optimal placement) or if it stops making progress for several iterations. Each iteration does the following:

      -
    1. Compute a score for the current design based on the number of unroutable nets, the amount of routing congestion (number of nets crossing between halves of the device), and static timing analysis (not yet implemented, always zero).
    2. -
    3. Make a list of nodes that contributed to this score in some way (having some attached nets unroutable, crossing to the other half of the device, or failing timing).
    4. -
    5. Remove nodes from the list that are LOC'd to a specific location since we're not allowed to move them.
    6. -
    7. Remove nodes from the list that have only one legal placement in the device (for example, oscillator hard IP) since there's nowhere else for them to go.
    8. -
    9. Pick a node from the remainder of the list at random. Call this our pivot.
    10. -
    11. Find a list of candidate placements for the pivot:
    12. -
        -
      1. Consider all routable placements in the other half of the device.
      2. -
      3. If none were found, consider all routable placements anywhere in the device.
      4. -
      5. If none were found, consider all placements anywhere in the device even if they're not routable.
      6. -
      -
    13. Pick one of the candidates at random and move the pivot to that location. If another cell in the netlist is already there, put it in the vacant site left by the pivot.
    14. -
    15. Re-compute the score for the design. If it's better, accept this change and start the next iteration.
    16. -
    17. If the score is worse, accept it with a random probability which decreases as the iteration number goes up. If the change is not accepted, restore the previous placement.
    18. -
    +
  • Compute a score for the current design based on the number of unroutable nets, the amount of routing congestion (number of nets crossing between halves of the device), and static timing analysis (not yet implemented, always zero).
  • +
  • Make a list of nodes that contributed to this score in some way (having some attached nets unroutable, crossing to the other half of the device, or failing timing).
  • +
  • Remove nodes from the list that are LOC'd to a specific location since we're not allowed to move them.
  • +
  • Remove nodes from the list that have only one legal placement in the device (for example, oscillator hard IP) since there's nowhere else for them to go.
  • +
  • Pick a node from the remainder of the list at random. Call this our pivot.
  • +
  • Find a list of candidate placements for the pivot:
  • +
      +
    1. Consider all routable placements in the other half of the device.
    2. +
    3. If none were found, consider all routable placements anywhere in the device.
    4. +
    5. If none were found, consider all placements anywhere in the device even if they're not routable.
    6. +
    +
  • Pick one of the candidates at random and move the pivot to that location. If another cell in the netlist is already there, put it in the vacant site left by the pivot.
  • +
  • Re-compute the score for the design. If it's better, accept this change and start the next iteration.
  • +
  • If the score is worse, accept it with a random probability which decreases as the iteration number goes up. If the change is not accepted, restore the previous placement.
  • +

    - After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.

    The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:

    + After optimization, the design is checked for routability. If any edges in the netlist graph don't correspond to edges in the device graph, the user probably asked for something impossible (for example, trying to hook a flipflop's output to a comparator's reference voltage input) so fail with an error.
    +
    The design is then routed. This is quite simple due to the crossbar structure of the device. For each edge in the netlist:

      -
    1. If dedicated (non-fabric) routing is used for this path, configure the destination's input mux appropriately and stop.
    2. -
    3. If the source and destination are in the same half of the device, configure the destination's input mux appropriately and stop.
    4. -
    5. A cross-connection must be used. Check if we already used one to bring the source signal to the other half of the device. If found, configure the destination to route from that cross-connection and stop.
    6. -
    7. Check if we have any cross-connections left going in this direction. If they're all used, the design is unroutable due to congestion so fail with an error.
    8. -
    9. Pick the next unused cross-connection and configure it to route from the source. Configure the destination to route from the cross-connection and stop.
    10. -
    +
  • If dedicated (non-fabric) routing is used for this path, configure the destination's input mux appropriately and stop.
  • +
  • If the source and destination are in the same half of the device, configure the destination's input mux appropriately and stop.
  • +
  • A cross-connection must be used. Check if we already used one to bring the source signal to the other half of the device. If found, configure the destination to route from that cross-connection and stop.
  • +
  • Check if we have any cross-connections left going in this direction. If they're all used, the design is unroutable due to congestion so fail with an error.
  • +
  • Pick the next unused cross-connection and configure it to route from the source. Configure the destination to route from the cross-connection and stop.
  • +

    Once routing is finished, run a series of post-PAR design rule checks. These currently include the following:

      -
    • If any node has no loads, generate a warning
    • -
    • If an I/O buffer is connected to analog hard IP, fail with an error if it's not configured in analog mode.
    • -
    • Some signals (such as comparator inputs and oscillator power-down controls) are generated by a shared mux and fed to many loads. If different loads require conflicting settings for the shared mux, fail with an error.
    • -
    +
  • If any node has no loads, generate a warning
  • +
  • If an I/O buffer is connected to analog hard IP, fail with an error if it's not configured in analog mode.
  • +
  • Some signals (such as comparator inputs and oscillator power-down controls) are generated by a shared mux and fed to many loads. If different loads require conflicting settings for the shared mux, fail with an error.
  • +

    - If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.

    Finally, generate the bitstream from all of the per-cell configuration and write it to a file.

    + If DRC passes with no errors, configure all of the individual cells in the netlist based on the HDL parameters. Fail with an error if an invalid configuration was requested.
    +
    Finally, generate the bitstream from all of the per-cell configuration and write it to a file.

    Great, let's get started!

    - If you don't already have one, you'll need to buy a GreenPak4 development kit. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install GreenPak Designer.

    Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only my fork on Github is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.

    Download and install gp4par. You can get it from the Github repository.

    Write your HDL, compile with Yosys, P&R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a relatively recent PDF version on my web server.

    If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, here it is.

    Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).

    + If you don't already have one, you'll need to buy a GreenPak4 development kit. The kit includes samples of the SLG46620V (among other devices) and a programmer/emulation board. While you're waiting for it to arrive, install GreenPak Designer.
    +
    Download and install Yosys. Although Clifford is pretty good at merging my pull requests, only my fork on Github is guaranteed to have the most up-to-date support for GreenPak devices so don't be surprised if you can't use a bleeding-edge feature with mainline Yosys.
    +
    Download and install gp4par. You can get it from the Github repository.
    +
    Write your HDL, compile with Yosys, P&R with gp4par, and import the bitstream into GreenPak Designer to program the target device. The most current gp4par manual is included in LaTeX source form in the source tree and is automatically built as part of the compile process. If you're just browsing, there's a relatively recent PDF version on my web server.
    +
    If you'd like to see the Verilog that produced the nightmare of a schematic I showed above, here it is.
    +
    Be advised that this project is still very much a work in progress and there are still a number of SLG46620V features I don't support (see the manual for exact details).

    I love it / it segfaulted / there's a problem in the manual!

    @@ -76,9 +119,15 @@

    You're competing with Silego's IDE. Have they found out and sued you yet?

    - Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.

    After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!

    They've even offered me free hardware to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.

    + Nope. They're fully aware of what I'm doing and are rolling out the red carpet for me. They love the idea of a HDL flow as an alternative to schematic entry and are pretty amazed at how fast it's coming together.
    +
    After I reported a few bugs in their datasheets they decided to skip the middleman and give me direct access to the engineer who writes their documentation so that I can get faster responses. The last time I found a problem (two different parts of the datasheet contradicted each other) an updated datasheet was in my inbox and on their website by the next day. I only wish Xilinx gave me that kind of treatment!
    +
    They've even offered me free hardware to help me add support for their latest product family, although I plan to get GreenPak4 support to a more stable state before taking them up on the offer.

    So what's next?

    -

    Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.

    My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.

    Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).

    Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.

    My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&R suite. +

    Better testing, for starters. I have to verify functionality by hand with a DMM and oscilloscope, which is time consuming.
    +
    My contact at Silego says they're going to be giving me documentation on the SRAM emulation interface soon, so I'm going to make a hardware-in-loop test platform that connects to my desktop and the Silego ZIF socket, and lets me load new bitstreams via a scriptable interface. It'll have FPGA-based digital I/O as well as an ADC and DAC on every device pin, plus an adjustable voltage regulator for power, so I can feed in arbitrary mixed-signal test waveforms and write PC-based unit tests to verify correct behavior.
    +
    Other than that, I want to finish support for the SLG46620V in the next month or two. The SLG46621V will be an easy addition since only one pin and the relevant configuration bits have changed from the 46620 (I suspect they're the same die, just bonded out differently).
    +
    Once that's done I'll have to do some more extensive work to add the SLG46140V since the architecture is a bit different (a lot of the combinatorial logic is merged into multi-function blocks). Luckily, the 46140 has a lot in common architecturally with the GreenPak5 family, so once that's done GreenPak5 will probably be a lot easier to add support for.
    +
    My thanks go out to Clifford Wolf, whitequark, the IRC users in ##openfpga, and everyone at Silego I've worked with to help make this possible. I hope that one day this project will become mature enough that Silego will ship it as an officially supported extension to GreenPak Designer, making history by becoming the first modern programmable logic vendor to ship a fully open source synthesis and P&R suite.

    - + diff --git a/resources/tests/readability/breitbart/expected.html b/resources/tests/readability/breitbart/expected.html index d62beb4..f787dab 100644 --- a/resources/tests/readability/breitbart/expected.html +++ b/resources/tests/readability/breitbart/expected.html @@ -1,19 +1,52 @@
    -
    + + + +
    +

    Supporters of Republican presidential nominee Donald Trump cheer during election night at the New York Hilton Midtown in New York on November 9, 2016.  / AFP / JIM WATSON        (Photo credit should read JIM WATSON/AFP/Getty Images)

    JIM WATSON/AFP/Getty Images

    -
    -
    +
    + + + +
    -

    SIGN UP FOR OUR NEWSLETTER

    -

    Snopes fact checker and staff writer David Emery posted to Twitter asking if there were “any un-angry Trump supporters?”

    -

    Emery, a writer for partisan “fact-checking” website Snopes.com which soon will be in charge of labelling “fake news” alongside ABC News and Politifact, retweeted an article by Vulture magazine relating to the protests of the Hamilton musical following the decision by the cast of the show to make a public announcement to Vice-president elect Mike Pence while he watched the performance with his family.

    -

    SIGN UP FOR OUR NEWSLETTER

    -

    The tweet from Vulture magazine reads, “#Hamilton Chicago show interrupted by angry Trump supporter.” Emery retweeted the story, saying, “Are there un-angry Trump supporters?”

    -

    This isn’t the first time the Snopes.com writer has expressed anti-Trump sentiment on his Twitter page. In another tweet in which Emery links to an article that falsely attributes a quote to President-elect Trump, Emery states, “Incredibly, some people actually think they have to put words in Trump’s mouth to make him look bad.”

    -

    Emery also retweeted an article by New York magazine that claimed President-elect Trump relied on lies to win during his campaign and that we now lived in a “post-truth” society. “Before long we’ll all have forgotten what it was like to live in the same universe; or maybe we already have,” Emery tweeted.

    -

    Facebook believe that Emery, along with other Snopes writers, ABC News, and Politifact are impartial enough to label and silence what they believe to be “fake news” on social media.

    -

    Lucas Nolan is a reporter for Breitbart Tech covering issues of free speech and online censorship. Follow him on Twitter @LucasNolan_ or email him at lnolan@breitbart.com

    -
    + + +
    +

    SIGN UP FOR OUR NEWSLETTER

    + +
    +

    Snopes fact checker and staff writer David Emery posted to Twitter asking if there were “any un-angry Trump supporters?”

    +

    Emery, a writer for partisan “fact-checking” website Snopes.com which soon will be in charge of labelling “fake news” alongside ABC News and Politifact, retweeted an article by Vulture magazine relating to the protests of the Hamilton musical following the decision by the cast of the show to make a public announcement to Vice-president elect Mike Pence while he watched the performance with his family.

    +
    +

    SIGN UP FOR OUR NEWSLETTER

    + +
    +

    The tweet from Vulture magazine reads, “#Hamilton Chicago show interrupted by angry Trump supporter.” Emery retweeted the story, saying, “Are there un-angry Trump supporters?”

    + + + + + + +

    This isn’t the first time the Snopes.com writer has expressed anti-Trump sentiment on his Twitter page. In another tweet in which Emery links to an article that falsely attributes a quote to President-elect Trump, Emery states, “Incredibly, some people actually think they have to put words in Trump’s mouth to make him look bad.”

    + + +

    Emery also retweeted an article by New York magazine that claimed President-elect Trump relied on lies to win during his campaign and that we now lived in a “post-truth” society. “Before long we’ll all have forgotten what it was like to live in the same universe; or maybe we already have,” Emery tweeted.

    + + + + + + +

    Facebook believe that Emery, along with other Snopes writers, ABC News, and Politifact are impartial enough to label and silence what they believe to be “fake news” on social media.

    +

    Lucas Nolan is a reporter for Breitbart Tech covering issues of free speech and online censorship. Follow him on Twitter @LucasNolan_ or email him at lnolan@breitbart.com

    + + + + +
    diff --git a/resources/tests/readability/bug-1255978/expected.html b/resources/tests/readability/bug-1255978/expected.html index b6e9dec..715f747 100644 --- a/resources/tests/readability/bug-1255978/expected.html +++ b/resources/tests/readability/bug-1255978/expected.html @@ -1,67 +1,126 @@
    -

    Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning.

    -

    But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners don’t want you to know.

    -

    Zeev Sharon and Michael Forrest Jones both run hotel start-ups in the US. Forrest Jones runs the start-up Beechmont Hotels Corporation, a hotel operating company that consults with hotel owners on how they can improve their business. Sharon is the CEO of Hotelied, a start-up that allows people to sign up for discounts at luxury hotels.

    -

    But even luxury hotels aren’t always cleaned as often as they should be.

    -

    Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on Quora.

    -

    -
    -

    bandb2.jpg

    -

    Even posh hotels might not wash a blanket in between stays +

    Most people go to hotels for the pleasure of sleeping in a giant bed with clean white sheets and waking up to fresh towels in the morning.

    + +

    But those towels and sheets might not be as clean as they look, according to the hotel bosses that responded to an online thread about the things hotel owners don’t want you to know.

    + +

    Zeev Sharon and Michael Forrest Jones both run hotel start-ups in the US. Forrest Jones runs the start-up Beechmont Hotels Corporation, a hotel operating company that consults with hotel owners on how they can improve their business. Sharon is the CEO of Hotelied, a start-up that allows people to sign up for discounts at luxury hotels.

    + +

    But even luxury hotels aren’t always cleaned as often as they should be.

    + +

    Here are some of the secrets that the receptionist will never tell you when you check in, according to answers posted on Quora.

    + +

    + +
    +
    + +

    bandb2.jpg

    + +
    +

    Even posh hotels might not wash a blanket in between stays

    -
    -

    1. Take any blankets or duvets off the bed

    -

    Forrest Jones said that anything that comes into contact with any of the previous guest’s skin should be taken out and washed every time the room is made, but that even the fanciest hotels don’t always do so. "Hotels are getting away from comforters. Blankets are here to stay, however. But some hotels are still hesitant about washing them every day if they think they can get out of it," he said.

    -

    Video shows bed bug infestation at New York hotel

    -

    -
    -

    hotel-door-getty.jpg

    -

    Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use. +

    + +

    1. Take any blankets or duvets off the bed

    + +

    Forrest Jones said that anything that comes into contact with any of the previous guest’s skin should be taken out and washed every time the room is made, but that even the fanciest hotels don’t always do so. "Hotels are getting away from comforters. Blankets are here to stay, however. But some hotels are still hesitant about washing them every day if they think they can get out of it," he said.

    + +
    + +

    Video shows bed bug infestation at New York hotel

    +
    + +

    + +
    +
    + +

    hotel-door-getty.jpg

    + +
    +

    Forrest Jones advised stuffing the peep hole with a strip of rolled up notepaper when not in use.

    -
    -

    2. Check the peep hole has not been tampered with

    -

    This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.

    -

    -
    -

    luggage-3.jpg

    -

    Put luggage on the floor +

    + +

    2. Check the peep hole has not been tampered with

    + +

    This is not common, but can happen, Forrest Jones said. He advised stuffing the peep hole with a strip of rolled up notepaper when not in use. When someone knocks on the door, the paper can be removed to check who is there. If no one is visible, he recommends calling the front desk immediately. “I look forward to the day when I can tell you to choose only hotels where every employee who has access to guestroom keys is subjected to a complete public records background check, prior to hire, and every year or two thereafter. But for now, I can't,” he said.

    + +

    + +
    +
    + +

    luggage-3.jpg

    + +
    +

    Put luggage on the floor

    -
    -

    3. Don’t use a wooden luggage rack

    -

    Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, it’s a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.

    -

    -
    -

    Lifestyle-hotels.jpg

    -

    The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate +

    + +

    3. Don’t use a wooden luggage rack

    + +

    Bedbugs love wood. Even though a wooden luggage rack might look nicer and more expensive than a metal one, it’s a breeding ground for bugs. Forrest Jones says guests should put the items they plan to take from bags on other pieces of furniture and leave the bag on the floor.

    + +

    + +
    +
    + +

    Lifestyle-hotels.jpg

    + +
    +

    The old rule of thumb is that for every 00 invested in a room, the hotel should charge in average daily rate

    -
    -

    4. Hotel rooms are priced according to how expensive they were to build

    -

    Zeev Sharon said that the old rule of thumb is that for every $1000 invested in a room, the hotel should charge $1 in average daily rate. So a room that cost $300,000 to build, should sell on average for $300/night.

    -

    5. Beware the wall-mounted hairdryer

    -

    It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.

    -

    -

    6. Mini bars almost always lose money

    -

    Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.

    -

    -
    -

    agenda7.jpg

    -

    Towels should always be cleaned between stays +

    + +

    4. Hotel rooms are priced according to how expensive they were to build

    + +

    Zeev Sharon said that the old rule of thumb is that for every $1000 invested in a room, the hotel should charge $1 in average daily rate. So a room that cost $300,000 to build, should sell on average for $300/night.

    + + + +

    5. Beware the wall-mounted hairdryer

    + +

    It contains the most germs of anything in the room. Other studies have said the TV remote and bedside lamp switches are the most unhygienic. “Perhaps because it's something that's easy for the housekeepers to forget to check or to squirt down with disinfectant,” Forrest Jones said.

    + +

    + + + +

    6. Mini bars almost always lose money

    + +

    Despite the snacks in the minibar seeming like the most overpriced food you have ever seen, hotel owners are still struggling to make a profit from those snacks. "Minibars almost always lose money, even when they charge $10 for a Diet Coke,” Sharon said.

    + +

    + +
    +
    + +

    agenda7.jpg

    + +
    +

    Towels should always be cleaned between stays

    -
    -

    7. Always made sure the hand towels are clean when you arrive

    -

    Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.

    - -
      -
    • +
    + +

    7. Always made sure the hand towels are clean when you arrive

    + +

    Forrest Jones made a discovery when he was helping out with the housekeepers. “You know where you almost always find a hand towel in any recently-vacated hotel room that was occupied by a guy? On the floor, next to the bed, about halfway down, maybe a little toward the foot of the bed. Same spot in the floor, next to almost every bed occupied by a man, in every room. I'll leave the rest to your imagination,” he said.

    + + -

    Reuse content

    +
  • Hotels
  • +
  • Hygiene
  • + + +

    Reuse content +

    diff --git a/resources/tests/readability/buzzfeed-1/expected.html b/resources/tests/readability/buzzfeed-1/expected.html index 561d2e3..5ca2fd1 100644 --- a/resources/tests/readability/buzzfeed-1/expected.html +++ b/resources/tests/readability/buzzfeed-1/expected.html @@ -1,34 +1,41 @@
    -
    -

    The mother of a woman who took suspected diet pills bought online has described how her daughter was “literally burning up from within” moments before her death.

    -

    West Merica Police

    -
    -
    -

    Eloise Parry, 21, was taken to Royal Shrewsbury hospital on 12 April after taking a lethal dose of highly toxic “slimming tablets”.

    -

    “The drug was in her system, there was no anti-dote, two tablets was a lethal dose – and she had taken eight,” her mother, Fiona, said in a statement yesterday.

    -

    “As Eloise deteriorated, the staff in A&E did all they could to stabilise her. As the drug kicked in and started to make her metabolism soar, they attempted to cool her down, but they were fighting an uphill battle.

    -

    “She was literally burning up from within.”

    -

    She added: “They never stood a chance of saving her. She burned and crashed.”

    -
    -
    -
    -

    -

    Facebook

    -
    -
    -

    -

    Facebook

    -
    -
    -
    -

    West Mercia police said the tablets were believed to contain dinitrophenol, known as DNP, which is a highly toxic industrial chemical.

    -

    “We are undoubtedly concerned over the origin and sale of these pills and are working with partner agencies to establish where they were bought from and how they were advertised,” said chief inspector Jennifer Mattinson from the West Mercia police.

    -

    The Food Standards Agency warned people to stay away from slimming products that contained DNP.

    -

    “We advise the public not to take any tablets or powders containing DNP, as it is an industrial chemical and not fit for human consumption,” it said in a statement.

    -
    -
    -

    Fiona Parry issued a plea for people to stay away from pills containing the chemical.

    -

    “[Eloise] just never really understood how dangerous the tablets that she took were,” she said. “Most of us don’t believe that a slimming tablet could possibly kill us.

    -

    “DNP is not a miracle slimming pill. It is a deadly toxin.”

    -
    -
    +
    +

    The mother of a woman who took suspected diet pills bought online has described how her daughter was “literally burning up from within” moments before her death.

    + +

    West Merica Police

    +
    +
    +

    Eloise Parry, 21, was taken to Royal Shrewsbury hospital on 12 April after taking a lethal dose of highly toxic “slimming tablets”.

    +

    “The drug was in her system, there was no anti-dote, two tablets was a lethal dose – and she had taken eight,” her mother, Fiona, said in a statement yesterday.

    +

    “As Eloise deteriorated, the staff in A&E did all they could to stabilise her. As the drug kicked in and started to make her metabolism soar, they attempted to cool her down, but they were fighting an uphill battle.

    +

    “She was literally burning up from within.”

    +

    She added: “They never stood a chance of saving her. She burned and crashed.”

    +
    +
    +
    +
    +

    +
    +

    Facebook

    +
    +
    +
    +

    +
    +

    Facebook

    +
    + +
    +
    +

    West Mercia police said the tablets were believed to contain dinitrophenol, known as DNP, which is a highly toxic industrial chemical.

    +

    “We are undoubtedly concerned over the origin and sale of these pills and are working with partner agencies to establish where they were bought from and how they were advertised,” said chief inspector Jennifer Mattinson from the West Mercia police.

    +

    The Food Standards Agency warned people to stay away from slimming products that contained DNP.

    +

    “We advise the public not to take any tablets or powders containing DNP, as it is an industrial chemical and not fit for human consumption,” it said in a statement.

    +
    +
    +

    Fiona Parry issued a plea for people to stay away from pills containing the chemical.

    +
    +

    “[Eloise] just never really understood how dangerous the tablets that she took were,” she said. “Most of us don’t believe that a slimming tablet could possibly kill us.

    +

    “DNP is not a miracle slimming pill. It is a deadly toxin.”

    +
    + diff --git a/resources/tests/readability/citylab-1/expected.html b/resources/tests/readability/citylab-1/expected.html index 3ccd58e..59f44e4 100644 --- a/resources/tests/readability/citylab-1/expected.html +++ b/resources/tests/readability/citylab-1/expected.html @@ -1,23 +1,40 @@ -
    - - -
    - - -
    The Moulin Rouge cabaret in - ParisBenoit - Tessier/Reuters
    -

    +
    + + + + +
    + + + + + + + + + + + + +
    + The Moulin Rouge cabaret in + Paris Benoit + Tessier/Reuters +
    +
    +
    +

    Why Neon Is the Ultimate Symbol of the 20th Century

    -

    -
    -

    + +

    +

    The once-ubiquitous form of lighting was novel when it first emerged in the early 1900s, though it has since come to represent decline.

    -

    + +

    +

    In the summer of 1898, the Scottish chemist Sir William Ramsay made a discovery that would eventually give the Moulin Rouge in Paris, the Las Vegas Strip, and New York’s Times Square their perpetual nighttime glow. Using the boiling point of argon as a @@ -29,7 +46,7 @@ designed glass tubes in which neon gas could be trapped, then electrified, to create a light that glowed reliably for more than 1,000 hours.

    -

    +

    In the 2012 book L’être et le Néon, which has been newly translated into English by Michael Wells, the philosopher Luis de Miranda weaves a history of neon lighting as both artifact and metaphor. Being and @@ -41,7 +58,8 @@ lack of glossy historical images. Nonetheless, de Miranda makes a convincing case for neon as a symbol of the grand modern ambitions of the 20th century.

    -

    + +

    De Miranda beautifully evokes the notion of neon lighting as an icon of the 1900s in his introduction: “When we hear the word neon, an image pops into our heads: a combination of light, colors, symbols, and glass. This image is itself a mood. It @@ -53,7 +71,7 @@ remain charmingly analog: Signs must be made by hand because there’s no cost-effective way to mass-produce them.

    -

    +

    In the 1910s, neon started being used for cosmopolitan flash in Paris at precisely the time and place where the first great modernist works were being created. De Miranda’s recounting of the ingenuity emerging from the French capital a century ago is thrilling @@ -61,7 +79,11 @@ Coco Chanel, the stream-of-consciousness poetry of Gertrude Stein, and the genre-defying music of Claude Debussy—all of which heralded a new age of culture for Europe and for the world. -

    +

    +
    + +
    +

    Amid this artistic groundswell, Georges Claude premiered his neon lights at the Paris Motor Show in December 1910, captivating visitors with 40-foot-tall tubes affixed to the building’s exterior. The lights shone orange-red because neon, by itself, produces that color. @@ -71,14 +93,18 @@ time that neon didn’t produce the ideal color for a standard light bulb and insisted that it posed no commercial threat to incandescent bulbs.

    -

    +

    Of course, the very quality that made neon fixtures a poor choice for interior lighting made them perfect for signs, de Miranda notes. The first of the neon signs was switched on in 1912, advertising a barbershop on Paris’s Boulevard Montmartre, and eventually they were adopted by cinemas and nightclubs. While Claude had a monopoly on neon lighting throughout the 1920s, the leaking of trade secrets and the expiration of a series of patents broke his hold on the rapidly expanding technology. -

    +

    +
    + +
    +

    In the following decades, neon’s nonstop glow and vibrant colors turned ordinary buildings and surfaces into 24/7 billboards for businesses, large and small, that wanted to convey a sense of always being open. The first examples of neon in the United States @@ -89,11 +115,17 @@ At the latter event, a massive neon sign reading Futurama lit the way to a General Motors exhibition that heralded “The World of Tomorrow.”

    -
    +
    + + + +
    Workers remove a hammer and sickle from a neon sign that reads “Glory to Communism,” visible on the roof of the Communist-run electricity-board headquarters in Czechoslovakia in 1989. (AP) -

    +

    +
    +

    De Miranda points out that businesses weren’t alone in embracing neon’s ability to spread messages effectively. By the middle of the century, the lighting was being adopted for more political purposes. “In the 1960s, the Soviets deployed a vast @@ -104,18 +136,26 @@ the mass introduction of neon lighting was a way of getting citizens of Communist cities to see their surroundings with the pizzazz and nighttime glamour of major Western capitals. -

    +

    +
    + +
    +

    Neon, around this time, began to be phased out, thanks to cheaper and less labor-intensive alternatives. In addition, the global economic downturn of the 1970s yielded a landscape in which older, flickering neon signs, which perhaps their owners couldn’t afford to fix or replace, came to look like symbols of decline. Where such signs were once sophisticated and novel, they now seemed dated and even seedy.

    -

    +
    +

    Cities are changing fast. Keep up with the CityLab Daily newsletter.

    + care about. + +

    +

    De Miranda understands this evolution by zooming out and looking at the 1900s as the “neon century.” The author draws a parallel between the physical form of neon lights, which again are essentially containers for electrified gases, and that of a glass @@ -125,11 +165,17 @@ maintain a hybrid status, somewhere between junkyards and museums, not unlike European capitals themselves.”

    -
    +
    + + + +
    Martin Wartman, a student at Northern Kentucky University, works on a neon sign at the Neonworks of Cincinnati workshop connected to the American Sign Museum, in 2016. (John Minchillo / AP) -

    +

    +
    +

    Another mark of neon’s hybridity: Its obsolescence started just as some contemporary artists began using the lights in their sculptures. Bruce Nauman’s 1968 work My Name as Though It Were Written on the Surface of the Moon poked fun at @@ -143,7 +189,7 @@ old-fashioned movie dialogue, with titles such as “You Loved Me Like a Distant Star” and “The Kiss Was Beautiful.”

    -

    +

    Seeing neon lighting tamed in the context of a gallery display fits comfortably with de Miranda’s notion that neon technology is like a time capsule from another age. In museums, works of neon art and design coexist with objects that were ahead of their own @@ -158,13 +204,24 @@ “neonness” need not only be trapped in the past. It might also capture the mysterious glow of the near future—just as it did a century ago.

    -

    This article originally appeared on The - Atlantic.

    +

    + This article originally appeared on The + Atlantic. +

    +

    +
    +

    About the Author

    -

    +
    + +
    diff --git a/resources/tests/readability/clean-links/expected.html b/resources/tests/readability/clean-links/expected.html index d042ec5..a474021 100644 --- a/resources/tests/readability/clean-links/expected.html +++ b/resources/tests/readability/clean-links/expected.html @@ -1,15 +1,17 @@ -
    -

    Study Webtext

    -

    +
    +

    Study Webtext

    +

    "Bartleby the Scrivener: A Story of Wall-Street " (1853) 
    Herman Melville

    -

    To the story text without notes
-

    -

    Prepared by Ann +

    +To the story text without notes
+        +

    +

    Prepared by Ann Woodlief, Virginia Commonwealth University

    -
    Click on text in red for hypertext notes and questions
    -I +
    Click on text in red for hypertext notes and questions
    + I am a rather elderly man. The nature of my avocations for the last thirty years has brought me into more than ordinary contact with what would seem an interesting and somewhat singular set of men of whom as yet @@ -32,7 +34,7 @@ and general surroundings; because some such description is indispensable to an adequate understanding of the chief character about to be presented.

    -

    Imprimis: I am a man who, from his youth upwards, has been +

    Imprimis: I am a man who, from his youth upwards, has been filled with a profound conviction that the easiest way of life is the best.. Hence, though I belong to a profession proverbially energetic and nervous, even to turbulence, at times, yet nothing of that sort have I ever suffered to invade @@ -47,7 +49,7 @@ it, and rings like unto bullion. I will freely add, that I was not insensible to the late John Jacob Astor's good opinion.

    -

    Some time prior to the period at which this little history begins, my +

    Some time prior to the period at which this little history begins, my avocations had been largely increased. The good old office, now extinct in the State of New York, of a Master in Chancery, had been conferred upon me. It was not a very arduous office, but very @@ -59,7 +61,7 @@ act; inasmuch as I had counted upon a life-lease of the profits, whereas I only received those of a few short years. But this is by the way.

    -

    My chambers were up stairs at No.--Wall-street. At one end they looked +

    My chambers were up stairs at No.--Wall-street. At one end they looked upon the white wall of the interior of a spacious sky-light shaft, penetrating the building from top to bottom. This view might have been considered rather tame than otherwise, deficient @@ -72,7 +74,7 @@ panes. Owing to the great height of the surrounding buildings, and my chambers being on the second floor, the interval between this wall and mine not a little resembled a huge square cistern.

    -

    At the period just preceding the advent of Bartleby, I had two persons +

    At the period just preceding the advent of Bartleby, I had two persons as copyists in my employment, and a promising lad as an office-boy. First, Turkey; second, Nippers; third, Ginger Nut.These may seem names, the like of which are not usually found in the Directory. In truth they were nicknames, mutually conferred upon @@ -124,23 +126,23 @@ intolerably fervid, as he oratorically assured me--gesticulating with a long ruler at the other end of the room--that if his services in the morning were useful, how indispensible, then, in the afternoon?

    -

    "With +

    "With submission, sir," said Turkey on this occasion, "I consider myself your right-hand man. In the morning I but marshal and deploy my columns; but in the afternoon I put myself at their head, and gallantly charge the foe, thus!"--and he made a violent thrust with the ruler.

    -

    "But the blots, Turkey," intimated I.

    -

    "True,--but, with submission, sir, behold these hairs! I am getting old. +

    "But the blots, Turkey," intimated I.

    +

    "True,--but, with submission, sir, behold these hairs! I am getting old. Surely, sir, a blot or two of a warm afternoon is not the page--is honorable. With submission, sir, we both are getting old."

    -

    This +

    This appeal to my fellow-feeling was hardly to be resisted. At all events, I saw that go he would not. So I made up my mind to let him stay, resolving, nevertheless, to see to it, that during the afternoon he had to do with my less important papers.

    -

    Nippers, the second on my list, was a whiskered, sallow, and, upon the +

    Nippers, the second on my list, was a whiskered, sallow, and, upon the whole, rather piratical-looking young man of about five and twenty. I always deemed him the victim of two evil powers-- ambition and indigestion. The ambition was evinced by a certain impatience of the @@ -199,7 +201,7 @@ much oats are bad for horses. In fact, precisely as a rash, restive horse is said to feel his oats, so Turkey felt his coat. It made him insolent. He was a man whom prosperity harmed.

    -

    Though concerning the self-indulgent habits of Turkey I had my own private +

    Though concerning the self-indulgent habits of Turkey I had my own private surmises, yet touching Nippers I was well persuaded that whatever might be his faults in other respects, he was, at least, a temperate young man. But indeed, nature herself seemed to have been his vintner, and at his birth charged @@ -211,14 +213,14 @@ as if the table were a perverse voluntary agent, intent on thwarting and vexing him; I plainly perceive that for Nippers, brandy and water were altogether superfluous.

    -

    It was fortunate for me that, owing to its course--indigestion--the irritability +

    It was fortunate for me that, owing to its course--indigestion--the irritability and consequent nervousness of Nippers, were mainly observable in the morning, while in the afternoon he was comparatively mild. So that Turkey's paroxysms only coming on about twelve o'clock, I never had to do with their eccentricities at one time. Their fits relieved each other like guards. When Nippers' was on, Turkey's was off, and vice versa. This was a good natural arrangement under the circumstances.

    -

    Ginger Nut, the third on my list, was a lad some twelve years old. His +

    Ginger Nut, the third on my list, was a lad some twelve years old. His father was a carman, ambitious of seeing his son on the bench instead of a cart, before he died. So he sent him to my office as a student at law, errand boy, and cleaner and sweeper, at the rate of one dollar a week. @@ -244,7 +246,7 @@ me by making an oriental bow, and saying--"With submission, sir, it was generous of me to find you in stationery on my own account."

    -

    Now my original business--that of a conveyancer +

    Now my original business--that of a conveyancer and title hunter, and drawer-up of recondite documents of all sorts--was considerably increased by receiving the master's office. There was now great work for scriveners. Not only must I push the clerks @@ -252,12 +254,12 @@ a motionless young man one morning, stood upon my office threshold, the door being open, for it was summer. I can see that figure now--pallidly neat, pitiably respectable, incurably forlorn! It was Bartleby.

    -

    After a few words touching his qualifications, I engaged him, glad to +

    After a few words touching his qualifications, I engaged him, glad to have among my corps of copyists a man of so singularly sedate an aspect, which I thought might operate beneficially upon the flighty temper of Turkey, and the fiery one of Nippers.

    -

    I should have stated before that ground glass folding-doors divided my +

    I should have stated before that ground glass folding-doors divided my premises into two parts, one of which was occupied by my scriveners, the other by myself. According to my humor I threw open these doors, or closed them. I resolved @@ -274,14 +276,16 @@ a high green folding screen, which might entirely isolate Bartleby from my sight, though not remove him from my voice. And thus, in a manner, privacy and society were - conjoined.

    -

    At first Bartleby did an extraordinary quantity of writing. As if long + conjoined. +

    +

    At first Bartleby did an extraordinary quantity of writing. As if long famishingfor something to copy, he seemed to gorge himself on my documents. There was no pause for digestion. He ran a day and night line, copying by sun-light and by candle-light. I should have been quite delighted with his application, had be been cheerfully - industrious. But he wrote on silently, palely, mechanically.

    -

    It is, of course, an indispensable part of a scrivener's business to verify + industrious. But he wrote on silently, palely, mechanically. +

    +

    It is, of course, an indispensable part of a scrivener's business to verify the accuracy of his copy, word by word. Where there are two or more scriveners in an office, they assist each other in this examination, one reading from the copy, the other holding the original. It is a very @@ -291,7 +295,7 @@ poet Byron would have contentedly sat down with Bartleby to examine a law document of, say five hundred pages, closely written in a crimpy hand.

    -

    Now and then, in the haste of business, it had been my habit to assist +

    Now and then, in the haste of business, it had been my habit to assist in comparing some brief document myself, calling Turkey or Nippers for this purpose. One object I had in placing Bartleby so handy to me behind the screen, was to avail myself of his services on such trivial @@ -304,22 +308,23 @@ nervously extended with the copy, so that immediately upon emerging from his retreat, Bartleby might snatch it and proceed to business without the least delay.

    -

    In this very attitude did I sit when I called to him, rapidly stating +

    In this very attitude did I sit when I called to him, rapidly stating what it was I wanted him to do--namely, to examine a small paper with me. Imagine my surprise, nay, my consternation, when without moving from his privacy, Bartleby in a singularly mild, firm voice, replied,"I - would prefer not to."

    -

    I sat awhile in perfect silence, rallying my stunned faculties. Immediately + would prefer not to." +

    +

    I sat awhile in perfect silence, rallying my stunned faculties. Immediately it occurred to me that my ears had deceived me, or Bartleby had entirely misunderstood my meaning. I repeated my request in the clearest tone I could assume. But in quite as clear a one came the previous reply, "I would prefer not to."

    -

    "Prefer not to," echoed I, rising in high excitement, and crossing the +

    "Prefer not to," echoed I, rising in high excitement, and crossing the room with a stride, "What do you mean? Are you moon-struck? I want you to help me compare this sheet here--take it," and I thrust it towards him.

    -

    "I would prefer not to," said he.

    -

    I looked at him steadfastly. His face was leanly composed; his gray eye +

    "I would prefer not to," said he.

    +

    I looked at him steadfastly. His face was leanly composed; his gray eye dimly calm. Not a wrinkle of agitation rippled him. Had there been the least uneasiness, anger, impatience or impertinence in his manner; in other words, had there been any thing ordinarily @@ -332,7 +337,7 @@ me. I concluded to forget the matter for the present, reserving it for my future leisure. So calling Nippers from the other room, the paper was speedily examined.

    -

    A few days after this, Bartleby concluded four lengthy documents, being +

    A few days after this, Bartleby concluded four lengthy documents, being quadruplicates of a week's testimony taken before me in my High Court of Chancery. It became necessary to examine them. It was an important suit, and great accuracy was imperative. Having all things arranged I called @@ -342,61 +347,63 @@ in a row, each with his document in hand, when I called to Bartleby to join this interesting group.

    -

    "Bartleby! quick, I am waiting."

    -

    I heard a low scrape of his chair legs on the unscraped floor, and soon - he appeared standing at the entrance of his hermitage.

    -

    "What is wanted?" said he mildly.

    -

    "The copies, the copies," said I hurriedly. "We are going to examine them. +

    "Bartleby! quick, I am waiting."

    +

    I heard a low scrape of his chair legs on the unscraped floor, and soon + he appeared standing at the entrance of his hermitage. +

    +

    "What is wanted?" said he mildly.

    +

    "The copies, the copies," said I hurriedly. "We are going to examine them. There"--and I held towards him the fourth quadruplicate.

    -

    "I would prefer not to," he said, and gently disappeared behind the screen.

    -

    For a few moments I was turned into a +

    "I would prefer not to," he said, and gently disappeared behind the screen.

    +

    For a few moments I was turned into a pillar of salt, standing at the head of my seated column of clerks. Recovering myself, I advanced towards the screen, and demanded the reason for such extraordinary conduct.

    -

    "Why do you refuse?"

    -

    "I would prefer not to."

    -

    With any other man I should have flown +

    "Why do you refuse?"

    +

    "I would prefer not to."

    +

    With any other man I should have flown outright into a dreadful passion, scorned all further words, and thrust him ignominiously from my presence. But there was something about Bartleby that not only strangely disarmed me, but in a wonderful manner touched and disconcerted me. I began to reason with him.

    -

    "These are your own copies we are about to examine. It is labor saving +

    "These are your own copies we are about to examine. It is labor saving to you, because one examination will answer for your four papers. It is common usage. Every copyist is bound to help examine his copy. Is it not so? Will you not speak? Answer!"

    -

    "I prefer not to," he replied in a flute-like tone. It seemed to me that +

    "I prefer not to," he replied in a flute-like tone. It seemed to me that while I had been addressing him, he carefully revolved every statement that I made; fully comprehended the meaning; could not gainsay the irresistible conclusion; but, at the same time, some paramount consideration prevailed with him to reply as he did.

    -

    "You are decided, then, not to comply with my request--a request made +

    "You are decided, then, not to comply with my request--a request made according to common usage and common sense?"

    -

    He briefly gave me to understand that on that point my +

    He briefly gave me to understand that on that point my judgment was sound. Yes: his decision was irreversible.

    -

    It is not seldom the case that when a man is browbeaten in some unprecedented and +

    It is not seldom the case that when a man is browbeaten in some unprecedented and violently unreasonable way, he begins to stagger in his own plainest faith. He begins, as it were, vaguely to surmise that, wonderful as it may be, all the justice and all the reason is on the other side. Accordingly, if any disinterested persons are present, he turns to them for some reinforcement for his own - faltering mind.

    -

    "Turkey," said I, "what do you think of this? Am I not right?"

    -

    "With submission, sir," said Turkey, with his blandest tone, "I think + faltering mind. +

    +

    "Turkey," said I, "what do you think of this? Am I not right?"

    +

    "With submission, sir," said Turkey, with his blandest tone, "I think that you are."

    -

    "Nippers," said I, "what do you think of it?"

    -

    "I think I should kick him out of the office."

    -

    (The reader of nice perceptions will here perceive that, it being morning, +

    "Nippers," said I, "what do you think of it?"

    +

    "I think I should kick him out of the office."

    +

    (The reader of nice perceptions will here perceive that, it being morning, Turkey's answer is couched in polite and tranquil terms, but Nippers replies in ill-tempered ones. Or, to repeat a previous sentence, Nipper's ugly mood was on duty, and Turkey's off.)

    -

    "Ginger Nut," said I, willing to enlist the smallest suffrage in my behalf, +

    "Ginger Nut," said I, willing to enlist the smallest suffrage in my behalf, "what do you think of it?"

    -

    "I think, sir, he's a little luny," replied Ginger Nut, with a +

    "I think, sir, he's a little luny," replied Ginger Nut, with a grin.

    -

    "You hear what they say," said I, turning towards the screen, "come forth +

    "You hear what they say," said I, turning towards the screen, "come forth and do your duty."

    -

    But he vouchsafed no reply. I pondered a moment in sore perplexity. But +

    But he vouchsafed no reply. I pondered a moment in sore perplexity. But once more business hurried me. I determined again to postpone the consideration of this dilemma to my future leisure. With a little trouble we made out to examine the papers without Bartleby, though at every page or two, Turkey @@ -406,9 +413,9 @@ the stubborn oaf behind the screen. And for his (Nipper's) part, this was the first and the last time he would do another man's business without pay.

    -

    Meanwhile Bartleby sat in his hermitage, oblivious to every thing but +

    Meanwhile Bartleby sat in his hermitage, oblivious to every thing but his own peculiar business there.

    -

    Some days passed, the scrivener being employed upon another lengthy work. +

    Some days passed, the scrivener being employed upon another lengthy work. His late remarkable conduct led me to regard his way narrowly. I observed that he never went to dinner; indeed that he never went any where. As yet I had never of my personal knowledge known him to be outside of my office. @@ -419,7 +426,7 @@ to me where I sat. That boy would then leave the office jingling a few pence, and reappear with a handful of ginger-nuts which he delivered in the hermitage, receiving two of the cakes for his trouble.

    -

    He lives, then, on ginger-nuts, thought I; never eats a dinner, properly +

    He lives, then, on ginger-nuts, thought I; never eats a dinner, properly speaking; he must be a vegetarian then, but no; he never eats even vegetables, he eats nothing but ginger-nuts. My mind then ran on in reveries @@ -428,8 +435,9 @@ one of their peculiar constituents, and the final flavoring one. Now what was ginger? A hot, spicy thing. Was Bartleby hot and spicy? Not at all. Ginger, then, had no effect upon Bartleby. Probably - he preferred it should have none.

    -

    Nothing so aggravates an earnest person as a passive + he preferred it should have none. +

    +

    Nothing so aggravates an earnest person as a passive resistance. If the individual so resisted be of a not inhumane temper, and the resisting one perfectly harmless in his passivity; then, in the better moods of the former, he will endeavor charitably to construe @@ -452,72 +460,72 @@ to strike fire with my knuckles against a bit of Windsor soap. But one afternoon the evil impulse in me mastered me, and the following little scene ensued:

    -

    "Bartleby," said I, "when those papers are all copied, I will compare +

    "Bartleby," said I, "when those papers are all copied, I will compare them with you."

    -

    "I would prefer not to."

    -

    "How? Surely you do not mean to persist in that mulish +

    "I would prefer not to."

    +

    "How? Surely you do not mean to persist in that mulish vagary?"

    -

    No answer.

    -

    I threw open the folding-doors near by, and turning upon Turkey and Nippers, +

    No answer.

    +

    I threw open the folding-doors near by, and turning upon Turkey and Nippers, exclaimed in an excited manner--

    -

    "He says, a second time, he won't examine his papers. What do you think +

    "He says, a second time, he won't examine his papers. What do you think of it, Turkey?"

    -

    It was afternoon, be it remembered. Turkey sat glowing like a brass boiler, +

    It was afternoon, be it remembered. Turkey sat glowing like a brass boiler, his bald head steaming, his hands reeling among his blotted papers.

    -

    "Think of it?" roared Turkey; "I think I'll just step behind his screen, +

    "Think of it?" roared Turkey; "I think I'll just step behind his screen, and black his eyes for him!"

    -

    So saying, Turkey rose to his feet and threw his arms into a pugilistic +

    So saying, Turkey rose to his feet and threw his arms into a pugilistic position. He was hurrying away to make good his promise, when I detained him, alarmed at the effect of incautiously rousing Turkey's combativeness after dinner.

    -

    "Sit down, Turkey," said I, "and hear what Nippers has to say. What do +

    "Sit down, Turkey," said I, "and hear what Nippers has to say. What do you think of it, Nippers? Would I not be justified in immediately dismissing Bartleby?"

    -

    "Excuse me, that is for you to decide, sir. I think his conduct quite +

    "Excuse me, that is for you to decide, sir. I think his conduct quite unusual, and indeed unjust, as regards Turkey and myself. But it may only be a passing whim."

    -

    "Ah," exclaimed I, "you have strangely changed your mind then--you speak +

    "Ah," exclaimed I, "you have strangely changed your mind then--you speak very gently of him now."

    -

    "All beer," cried Turkey; "gentleness is effects of beer--Nippers and +

    "All beer," cried Turkey; "gentleness is effects of beer--Nippers and I dined together to-day. You see how gentle I am, sir. Shall I go and black his eyes?"

    -

    "You refer to Bartleby, I suppose. No, not to-day, Turkey," I replied; +

    "You refer to Bartleby, I suppose. No, not to-day, Turkey," I replied; "pray, put up your fists."

    -

    I closed the doors, and again advanced towards Bartleby. I felt additional +

    I closed the doors, and again advanced towards Bartleby. I felt additional incentives tempting me to my fate. I burned to be rebelled against again. I remembered that Bartleby never left the office.

    -

    "Bartleby," said I, "Ginger Nut is away; just step round to the Post +

    "Bartleby," said I, "Ginger Nut is away; just step round to the Post Office, won't you? (it was but a three minutes walk,) and see if there is any thing for me."

    -

    "I would prefer not to."

    -

    "You will not?"

    -

    "I prefer not."

    -

    I staggered to my desk, and sat there +

    "I would prefer not to."

    +

    "You will not?"

    +

    "I prefer not."

    +

    I staggered to my desk, and sat there in a deep study. My blind inveteracy returned. Was there any other thing in which I could procure myself to be ignominiously repulsed by this lean, penniless with?--my hired clerk? What added thing is there, perfectly reasonable, that he will be sure to refuse to do?

    -

    "Bartleby!"

    -

    No answer.

    -

    "Bartleby," in a louder tone.

    -

    No answer.

    -

    "Bartleby," I roared.

    -

    Like a +

    "Bartleby!"

    +

    No answer.

    +

    "Bartleby," in a louder tone.

    +

    No answer.

    +

    "Bartleby," I roared.

    +

    Like a very ghost, agreeably to the laws of magical invocation, at the third summons, he appeared at the entrance of his hermitage.

    -

    "Go to the next room, and tell Nippers to come to me."

    -

    "I prefer not to," he respectfully +

    "Go to the next room, and tell Nippers to come to me."

    +

    "I prefer not to," he respectfully and slowly said, and mildly disappeared.

    -

    "Very good, Bartleby," said I, in a quiet sort of serenely severe self-possessed +

    "Very good, Bartleby," said I, in a quiet sort of serenely severe self-possessed tone, intimating the unalterable purpose of some terrible retribution very close at hand. At the moment I half intended something of the kind. But upon the whole, as it was drawing towards my dinner-hour, I thought it best to put on my hat and walk home for the day, suffering much from perplexity and distress of mind.

    -

    Shall +

    Shall I acknowledge it? The conclusion of this whole business was that it soon became a fixed fact of my chambers, that a pale young scrivener, by the name of Bartleby, had a desk there; that he copied for me at the @@ -528,8 +536,9 @@ moreover, said Bartleby was never on any account to be dispatched on the most trivial errand of any sort; and that even if entreated to take upon him such a matter, it was generally understood that he would prefer not - to--in other words, that he would refuse point-blank.

    -

    32 As days passed on, I became considerably reconciled to Bartleby. His + to--in other words, that he would refuse point-blank. +

    +

    32 As days passed on, I became considerably reconciled to Bartleby. His steadiness, his freedom from all dissipation, his incessant industry (except when he chose to throw himself into a standing revery behind his screen), his great stillness, his unalterableness of demeanor under all circumstances, @@ -552,13 +561,13 @@ nature, refrain from bitterly exclaiming upon such perverseness--such unreasonableness. However, every added repulse of this sort which I received only tended to lessen the probability of my repeating the inadvertence.

    -

    Here is must be said, that according to the custom of most legal gentlemen +

    Here is must be said, that according to the custom of most legal gentlemen occupying chambers in densely-populated law buildings, there were several keys to my door. One was kept by a woman residing in the attic, which person weekly scrubbed and daily swept and dusted my apartments. Another was kept by Turkey for convenience sake. The third I sometimes carried in my own pocket. The fourth I knew not who had.

    -

    Now, one Sunday morning I happened to go to Trinity Church, to +

    Now, one Sunday morning I happened to go to Trinity Church, to hear a celebrated preacher, and finding myself rather early on the ground, I thought I would walk round to my chambers for a while. Luckily I had my key with me; but upon applying it to the lock, I found @@ -591,7 +600,7 @@ Besides, it was Sunday; and there was something about Bartleby that forbade the supposition that we would by any secular occupation violate the proprieties of the day.

    -

    Nevertheless, my mind was not pacified; and full of a restless curiosity, +

    Nevertheless, my mind was not pacified; and full of a restless curiosity, at last I returned to the door. Without hindrance I inserted my key, opened it, and entered. Bartleby was not to be seen. I looked round anxiously, peeped behind his screen; but it was very plain that he was gone. Upon @@ -612,8 +621,9 @@ and life, at nightfall echoes with sheer vacancy, and all through Sunday is forlorn. And here Bartleby makes his home; sole spectator of a solitude which he has seen all populous--a sort of innocent and transformed Marius - brooding among the ruins of Carthage!

    -

    For the first + brooding among the ruins of Carthage! +

    +

    For the first time in my life a feeling of overpowering stinging melancholy seized me. Before, I had never experienced aught but a not-unpleasing sadness. The bond of a common humanity now drew me irresistibly to gloom. A fraternal @@ -629,9 +639,9 @@ round me. The scrivener's pale form appeared to me laid out, among uncaring strangers, in its shivering winding sheet.

    -

    Suddenly I was attracted by Bartleby's closed desk, the key in open sight +

    Suddenly I was attracted by Bartleby's closed desk, the key in open sight left in the lock.

    -

    I +

    I mean no mischief, seek the gratification of no heartless curiosity, thought I; besides, the desk is mine, and its contents too, so I will make bold to look within. Every thing was methodically arranged, @@ -639,7 +649,7 @@ files of documents, I groped into their recesses. Presently I felt something there, and dragged it out. It was an old bandanna handkerchief, heavy and knotted. I opened it, and saw it was a savings' bank.

    -

    I now recalled all the quiet mysteries which I had noted in the man. I +

    I now recalled all the quiet mysteries which I had noted in the man. I remembered that he never spoke but to answer; that though at intervals he had considerable time to himself, yet I had never seen him reading--no, not even a newspaper; that for long periods he would stand looking out, @@ -658,7 +668,7 @@ even though I might know, from his long-continued motionlessness, that behind his screen he must be standing in one of those dead-wall reveries of his.

    -

    Revolving all these things, and coupling them with the recently discovered +

    Revolving all these things, and coupling them with the recently discovered fact that he made my office his constant abiding place and home, and not forgetful of his morbid moodiness; revolving all these things, a prudential feeling began to steal over me. My first emotions had been @@ -676,8 +686,9 @@ scrivener was the victim of innate and incurable disorder. I might give alms to his body; but his body did not pain him; it was his soul that suffered, and his - soul I could not reach.

    -

    I did not accomplish the purpose of going to Trinity Church that morning. + soul I could not reach. +

    +

    I did not accomplish the purpose of going to Trinity Church that morning. Somehow, the things I had seen disqualified me for the time from church-going. I walked homeward, thinking what I would do with Bartleby. Finally, I @@ -691,34 +702,35 @@ help to defray the expenses. Moreover, if after reaching home, he found himself at any time in want of aid, a letter from him would be sure of a reply.

    -

    The next morning came.

    -

    "Bartleby," said I, gently calling to him behind the screen.

    -

    No reply.

    -

    "Bartleby," said I, in a still gentler tone, "come here; I am not going +

    The next morning came.

    +

    "Bartleby," said I, gently calling to him behind the screen.

    +

    No reply.

    +

    "Bartleby," said I, in a still gentler tone, "come here; I am not going to ask you to do any thing you would prefer not to do--I simply wish to speak to you."

    -

    Upon this he noiselessly slid into view.

    -

    "Will you tell me, Bartleby, where - you were born?"

    -

    "I would prefer not to."

    -

    "Will you tell me anything about yourself?"

    -

    "I would prefer not to."

    -

    "But what reasonable +

    Upon this he noiselessly slid into view.

    +

    "Will you tell me, Bartleby, where + you were born?" +

    +

    "I would prefer not to."

    +

    "Will you tell me anything about yourself?"

    +

    "I would prefer not to."

    +

    "But what reasonable objection can you have to speak to me? I feel friendly towards you."

    -

    He did not look at me while I spoke, but kept his glance fixed upon my +

    He did not look at me while I spoke, but kept his glance fixed upon my bust of Cicero, which as I then sat, was directly behind me, some six inches above my head. "What is your answer, Bartleby?" said I, after waiting a considerable time for a reply, during which his countenance remained immovable, only there was the faintest conceivable tremor of the white attenuated mouth.

    -

    "At present I prefer to give no answer," he said, and retired into his +

    "At present I prefer to give no answer," he said, and retired into his hermitage.

    -

    It was rather weak in me I confess, but his manner on this occasion nettled +

    It was rather weak in me I confess, but his manner on this occasion nettled me. Not only did there seem to lurk in it a certain disdain, but his perverseness seemed ungrateful, considering the undeniable good usage and indulgence he had received from me.

    -

    Again I sat ruminating what I should do.Mortified as I was at his behavior, +

    Again I sat ruminating what I should do.Mortified as I was at his behavior, and resolved as I had been to dismiss him when I entered my office, nevertheless I strangely felt something superstitious knocking at my heart, and forbidding me to carry out my purpose, and denouncing me for a villain if I dared @@ -729,45 +741,47 @@ Say now you will help to examine papers tomorrow or next day: in short, say now that in a day or two you will begin to be a little reasonable:--say so, Bartleby."

    -

    "At present I would prefer not to be a little reasonable +

    "At present I would prefer not to be a little reasonable was his idly cadaverous reply.,"

    -

    Just then the folding-doors opened, and Nippers approached. He seemed +

    Just then the folding-doors opened, and Nippers approached. He seemed suffering from an unusually bad night's rest, induced by severer indigestion than common. He overheard those final words of Bartleby.

    -

    "Prefer not, +

    "Prefer not, eh?" gritted Nippers--"I'd prefer him, if I were you, sir," addressing me--"I'd prefer him; I'd give him preferences, the stubborn mule! What is it, sir, pray, that he prefers not to do now?"

    -

    Bartleby moved not a limb.

    -

    "Mr. - Nippers," said I, "I'd prefer that you would withdraw for the present."

    -

    Somehow, of late I had got into the way of involuntary using this word +

    Bartleby moved not a limb.

    +

    "Mr. + Nippers," said I, "I'd prefer that you would withdraw for the present." +

    +

    Somehow, of late I had got into the way of involuntary using this word "prefer" upon all sorts of not exactly suitable occasions. And I trembled to think that my contact with the scrivener had already and seriously affected me in a mental way. And what further and deeper aberration might it not yet produce? This apprehension had not been without efficacy in determining me to summary means.

    -

    As Nippers, looking very sour and sulky, was departing, Turkey blandly +

    As Nippers, looking very sour and sulky, was departing, Turkey blandly and deferentially approached.

    -

    "With submission, sir," said he, "yesterday I was thinking about Bartleby +

    "With submission, sir," said he, "yesterday I was thinking about Bartleby here, and I think that if he would but prefer to take a quart of good ale every day, it would do much towards mending him, and enabling him to assist in examining his papers."

    -

    "So you have got the word too," said I, slightly excited.

    -

    "With submission, what word, sir," asked Turkey, respectfully crowding +

    "So you have got the word too," said I, slightly excited.

    +

    "With submission, what word, sir," asked Turkey, respectfully crowding himself into the contracted space behind the screen, and by so doing, making me jostle the scrivener. "What word, sir?"

    -

    "I would prefer to be left alone here," said Bartleby, as if offended +

    "I would prefer to be left alone here," said Bartleby, as if offended at being mobbed - in his privacy.

    -

    "That's the word, Turkey," said I--"that's it."

    -

    "Oh, prefer oh yes--queer word. I never use it myself. But, sir + in his privacy. +

    +

    "That's the word, Turkey," said I--"that's it."

    +

    "Oh, prefer oh yes--queer word. I never use it myself. But, sir as I was saying, if he would but prefer--"

    -

    "Turkey," interrupted I, "you will please withdraw."

    -

    "Oh, certainly, sir, if +

    "Turkey," interrupted I, "you will please withdraw."

    +

    "Oh, certainly, sir, if you prefer that I should."

    -

    As he opened the folding-door to retire, Nippers at his desk caught a +

    As he opened the folding-door to retire, Nippers at his desk caught a glimpse of me, and asked whether I would prefer to have a certain paper copied on blue paper or white. He did not in the least roguishly accent the word prefer. It was plain that it involuntarily rolled from his tongue. @@ -775,21 +789,21 @@ degree turned the tongues, if not the heads of myself and clerks. But I thought it prudent not to break the dismission at once.

    -

    The next day I noticed that Bartleby +

    The next day I noticed that Bartleby did nothing but stand at his window in his dead-wall revery. Upon asking him why he did not write, he said that he had decided upon doing no more writing.

    -

    "Why, how now? what next?" exclaimed I, "do no more writing?"

    -

    "No more."

    -

    "And what is the reason?"

    -

    "Do +

    "Why, how now? what next?" exclaimed I, "do no more writing?"

    +

    "No more."

    +

    "And what is the reason?"

    +

    "Do you not see the reason for yourself," he indifferently replied.

    -

    I looked steadfastly at him, and perceived that his eyes looked dull and +

    I looked steadfastly at him, and perceived that his eyes looked dull and glazed. Instantly it occurred to me, that his unexampled diligence in copying by his dim window for the first few weeks of his stay with me might have temporarily impaired his vision.

    -

    I was touched. I said something in condolence with him. I hinted that +

    I was touched. I said something in condolence with him. I hinted that of course he did wisely in abstaining from writing for a while; and urged him to embrace that opportunity of taking wholesome exercise in the open air. This, however, he @@ -799,17 +813,18 @@ be less inflexible than usual, and carry these letters to the post-office. But he blankly declined. So, much to my inconvenience, I went myself.

    -

    Still added +

    Still added days went by. Whether Bartleby's eyes improved or not, I could not say. To all appearance, I thought they did. But when I asked him if they did, he vouchsafed no answer. At all events, he would do no copying. At last, in reply to my urgings, he informed me that he had permanently given up copying.

    -

    "What!" exclaimed I; "suppose your eyes should get entirely well- better +

    "What!" exclaimed I; "suppose your eyes should get entirely well- better than ever before--would you not copy then?"

    -

    "I have given up copying," he answered, and slid - aside.

    -

    He remained as ever, a +

    "I have given up copying," he answered, and slid + aside. +

    +

    He remained as ever, a fixture in my chamber. Nay--if that were possible--he became still more of a fixture than before. What was to be done? He would do nothing in the office: why should he stay there? In plain fact, he had now become @@ -827,16 +842,17 @@ would but take the first step towards a removal. "And when you finally quit me, Bartleby," added I, "I shall see that you go not away entirely unprovided. Six days from this hour, remember."

    -

    At the expiration of that period, I peeped behind the screen, and lo! - Bartleby was there.

    -

    I buttoned +

    At the expiration of that period, I peeped behind the screen, and lo! + Bartleby was there. +

    +

    I buttoned up my coat, balanced myself; advanced slowly towards him, touched his shoulder, and said, "The time has come; you must quit this place; I am sorry for you; here is money; but you must go."

    -

    "I would prefer not," he replied, with his back still towards me.

    -

    "You must."

    -

    He remained silent.

    -

    Now I had an unbounded confidence in this man's common honesty. He had +

    "I would prefer not," he replied, with his back still towards me.

    +

    "You must."

    +

    He remained silent.

    +

    Now I had an unbounded confidence in this man's common honesty. He had frequently restored to me six pences and shillings carelessly dropped upon the floor, for I am apt to be very reckless in such shirt-button affairs. The proceeding then which followed will not be @@ -844,8 +860,8 @@ said I, "I owe you twelve dollars on account; here are thirty-two; the odd twenty are yours.--Will you take it? and I handed the bills towards him.

    -

    But he made no motion.

    -

    "I will leave them here then," putting them under a weight on the table. +

    But he made no motion.

    +

    "I will leave them here then," putting them under a weight on the table. Then taking my hat and cane and going to the door I tranquilly turned and added--"After you have removed your things from these offices, Bartleby, you will of course lock the door--since every one is now gone for the day @@ -853,10 +869,10 @@ may have it in the morning. I shall not see you again; so good-bye to you. If hereafter in your new place of abode I can be of any service to you, do not fail to advise me by letter. Good-bye, Bartleby, and fare you well."

    -

    But he answered not a word; like the +

    But he answered not a word; like the last column of some ruined temple, he remained standing mute and solitary in the middle of the otherwise deserted room.

    -

    As I walked home in a pensive mood, my vanity +

    As I walked home in a pensive mood, my vanity got the better of my pity. I could not but highly plume myself on my masterly management in getting rid of Bartleby. Masterly I call it, and such it must appear to any dispassionate thinker. The beauty @@ -877,22 +893,22 @@ The great point was, not whether I had assumed that he would quit me, but whether he would prefer so to do. He was more a man of preferences than assumptions.

    -

    After breakfast, I walked down town, arguing the probabilities pro and +

    After breakfast, I walked down town, arguing the probabilities pro and con. One moment I thought it would prove a miserable failure, and Bartleby would be found all alive at my office as usual; the next moment it seemed certain that I should see his chair empty. And so I kept veering about. At the corner of Broadway and Canal- street, I saw quite an excited group of people standing in earnest conversation.

    -

    "I'll take odds he doesn't," said a voice as I passed.

    -

    "Doesn't go?--done!" said I, "put up your money."

    -

    I was instinctively putting my hand in my pocket to produce my own, when +

    "I'll take odds he doesn't," said a voice as I passed.

    +

    "Doesn't go?--done!" said I, "put up your money."

    +

    I was instinctively putting my hand in my pocket to produce my own, when I remembered that this was an election day. The words I had overheard bore no reference to Bartleby, but to the success or non-success of some candidate for the mayoralty. In my intent frame of mind, I had, as it were, imagined that all Broadway shared in my excitement, and were debating the same question with me. I passed on, very thankful that the uproar of the street screened my momentary absent-mindedness.

    -

    As I had intended, I was earlier than usual at my office door. I stood +

    As I had intended, I was earlier than usual at my office door. I stood listening for a moment. All was still. He must be gone. I tried the knob. The door was locked. Yes, my procedure had worked to a charm; he indeed must be vanished. Yet a certain melancholy mixed with this: I was almost @@ -900,8 +916,8 @@ door mat for the key, which Bartleby was to have left there for me, when accidentally my knee knocked against a panel, producing a summoning sound, and in response a voice came to me from within--"Not yet; I am occupied."

    -

    It was Bartleby.

    -

    I was thunderstruck. For an instant I stood like +

    It was Bartleby.

    +

    I was thunderstruck. For an instant I stood like the man who, pipe in mouth, was killed one cloudless afternoon long ago in Virginia, by summer lightning; at his own warm open window he was killed, and remained leaning out there upon the dreamy afternoon, @@ -926,7 +942,7 @@ such an application of the doctrine of assumptions. But upon second thoughts the success of the plan seemed rather dubious. I resolved to argue the matter over with him again.

    -

    Bartleby," said I, entering the office, with a quietly severe expression. +

    Bartleby," said I, entering the office, with a quietly severe expression. "I am seriously displeased. I am pained, Bartleby. I had thought better of you. I had imagined you of such a gentlemanly organization, that in any delicate dilemma a slight hint @@ -934,22 +950,22 @@ I added, unaffectedly starting, "you have not even touched the money yet," pointing to it, just where I had left it the evening previous.

    -

    He answered nothing.

    -

    "Will you, or will you not, quit me?" I now demanded in a sudden +

    He answered nothing.

    +

    "Will you, or will you not, quit me?" I now demanded in a sudden passion, advancing close to him.

    -

    "I would prefer not to quit you," he replied, gently +

    "I would prefer not to quit you," he replied, gently emphasizing the not.

    -

    "What earthly +

    "What earthly right have you to stay here? do you pay any rent? Do you pay my taxes? Or is this property yours?"

    -

    He answered nothing.

    -

    "Are you ready to go on and write now? Are your eyes recovered? Could +

    He answered nothing.

    +

    "Are you ready to go on and write now? Are your eyes recovered? Could you copy a small paper for me this morning? or help examine a few lines? or step round to the post-office? In a word, will you do any thing at all, to give a coloring to your refusal to depart the premises?"

    -

    He silently +

    He silently retired into his hermitage.

    -

    I was now in such a state of nervous resentment that I thought it but +

    I was now in such a state of nervous resentment that I thought it but prudentto check myself at present from further demonstrations. Bartleby and I were alone. I remembered the tragedy of the unfortunate Adams and the still @@ -966,7 +982,7 @@ office, doubtless of a dusty, haggard sort of appearance;--this it must have been, which greatly helped to enhance the irritable desperation of the hapless Colt.

    -

    But when this old +

    But when this old Adam of resentment rose in me and tempted me concerning Bartleby, I grappled him and threw him. How? Why, simply by recalling the divine injunction: "A new commandment give I unto you, that ye @@ -984,7 +1000,7 @@ my exasperated feelings towards the scrivener by benevolently construing his conduct. Poor fellow, poor fellow! thought I, he don't mean any thing; and besides, he has seen hard times, and ought to be indulged.

    -

    I endeavored also immediately to occupy myself, and at the same time +

    I endeavored also immediately to occupy myself, and at the same time to comfort my despondency.I tried to fancy that in the course of the morning, at such time as might prove agreeable to him, Bartleby, of his own free accord, would emerge from his hermitage, and take up some decided @@ -995,7 +1011,7 @@ window in one of his profoundest deadwall reveries. Will it be credited? Ought I to acknowledge it? That afternoon I left the office without saying one further word to him.

    -

    Some days now passed, during which, at leisure intervals I looked a little +

    Some days now passed, during which, at leisure intervals I looked a little into Edwards on the Will," and "Priestly on Necessity." Under the circumstances, those books induced a salutary feeling. Gradually I slid @@ -1011,7 +1027,7 @@ but my mission in this world, Bartleby, is to furnish you with office-room for such period as you may see fit to remain.

    -

    I believe that this wise and blessed frame of mind would have continued +

    I believe that this wise and blessed frame of mind would have continued with me, had it not been for the unsolicited and uncharitable remarks obtruded upon me by my professional friends who visited the rooms. But thus it often @@ -1027,7 +1043,7 @@ talk, Bartleby would remain standing immovable in the middle of the room. So after contemplating him in that position for a time, the attorney would depart, no wiser than he came.

    -

    Also, when a Reference was going on, and the room full of lawyers and +

    Also, when a Reference was going on, and the room full of lawyers and witnesses and business was driving fast; some deeply occupied legal gentleman present, seeing Bartleby wholly unemployed, would request him to run round to his (the legal gentleman's) office and fetch some papers for him. Thereupon, @@ -1048,14 +1064,14 @@ in my room; a great change was wrought in me. I resolved to gather all my faculties together, and for ever rid me of this intolerable incubus.

    -

    Ere revolving any complicated project, however, adapted to this end, I +

    Ere revolving any complicated project, however, adapted to this end, I first simply suggested to Bartleby the propriety of his permanent departure. In a calm and serious tone, I commended the idea to his careful and mature consideration. But having taken three days to meditate upon it, he apprised me that his original determination remained the same; in short, that he still preferred to abide with me.

    -

    What shall I do? I now said to myself, buttoning +

    What shall I do? I now said to myself, buttoning up my coat to the last button. What shall I do? what ought I to do? what does conscience say I should do with this man, or rather ghost. Rid myself of him, I must; go, he shall. But how? You will not thrust @@ -1067,7 +1083,7 @@ your coaxing, he will not budge. Bribes he leaves under your own paperweight on your table; in short, it is quite plain that he prefers to cling to you.

    -

    Then something severe, something unusual must be done. What! surely you +

    Then something severe, something unusual must be done. What! surely you will not have him collared by a constable, and commit his innocent pallor to the common jail? And upon what ground could you procure such a thing to be done?--a vagrant, is he? What! he a vagrant, a wanderer, who @@ -1079,58 +1095,59 @@ me, I must quit him. I will change my offices; I will move elsewhere; and give him fair notice, that if I find him on my new premises I will then proceed against him as a common trespasser.

    -

    Acting accordingly, next day I thus addressed him: "I find these chambers +

    Acting accordingly, next day I thus addressed him: "I find these chambers too far from the City Hall; the air is unwholesome. In a word, I propose to remove my offices next week, and shall no longer require your services. I tell you this now, in order that you may seek another place."

    -

    He made no reply, and nothing more was said.

    -

    On the appointed day I engaged carts and men, proceeded to my chambers, +

    He made no reply, and nothing more was said.

    +

    On the appointed day I engaged carts and men, proceeded to my chambers, and having but little furniture, every thing was removed in a few hours. Throughout, the scrivener remained standing behind the screen, which I directed to be removed the last thing. It was withdrawn; and being folded up like a huge folio, left him the motionless occupant of a naked room. I stood in the entry watching him a moment, while something from within me upbraided me.

    -

    I re-entered, with my hand - in my pocket--and--and my heart in my mouth.

    -

    "Good-bye, Bartleby; I am going--good-bye, and God some way bless you; +

    I re-entered, with my hand + in my pocket--and--and my heart in my mouth. +

    +

    "Good-bye, Bartleby; I am going--good-bye, and God some way bless you; and take that," slipping something in his hand. But it dropped to the floor, and then,--strange to say--I tore myself from him whom I had so longed to be rid of.

    -

    Established in my new quarters, for a day or two I kept the door locked, +

    Established in my new quarters, for a day or two I kept the door locked, and started at every footfall in the passages. When I returned to my rooms after any little absence, I would pause at the threshold for an instant, and attentively listen, ere applying my key. But these fears were needless. Bartleby never came nigh me.

    -

    I thought all was going well, when a perturbed looking stranger visited +

    I thought all was going well, when a perturbed looking stranger visited me, inquiring whether I was the person who had recently occupied rooms at No.--Wall-street.

    -

    Full of forebodings, I replied that I was.

    -

    "Then, sir," said the stranger, +

    Full of forebodings, I replied that I was.

    +

    "Then, sir," said the stranger, who proved a lawyer, "you are responsible for the man you left there. He refuses to do any copying; he refuses to do any thing; he says he prefers not to; and he refuses to quit the premises."

    -

    "I am very sorry, sir," said I, with assumed tranquillity, but an inward +

    "I am very sorry, sir," said I, with assumed tranquillity, but an inward tremor, "but, really, the man you allude to is nothing to me --he is no relation or apprentice of mine, that you should hold me responsible for him."

    -

    "In mercy's name, who is he?"

    -

    "I certainly cannot inform you. I know nothing about him. Formerly I employed +

    "In mercy's name, who is he?"

    +

    "I certainly cannot inform you. I know nothing about him. Formerly I employed him as a copyist; but he has done nothing for me now for some time past."

    -

    "I +

    "I shall settle him then,--good morning, sir."

    -

    Several days passed, and I heard nothing more; and though I often felt +

    Several days passed, and I heard nothing more; and though I often felt a charitable prompting to call at the place and see poor Bartleby, yet a certain squeamishness of I know not what withheld me.

    -

    All is over with him, by this time, thought I at last, when through another +

    All is over with him, by this time, thought I at last, when through another week no further intelligence reached me. But coming to my room the day after, I found several persons waiting at my door in a high state of nervous excitement.

    -

    "That's the man--here he comes," cried the foremost one, whom recognized +

    "That's the man--here he comes," cried the foremost one, whom recognized as the lawyer who had previously called upon me alone.

    -

    "You must take him away, sir, at once," cried a portly person among them, +

    "You must take him away, sir, at once," cried a portly person among them, advancing upon me, and whom I knew to be the landlord of No.--Wall-street. "These gentlemen, my tenants, cannot stand it any longer; Mr. B--" pointing to the lawyer, "has turned him out of his room, and he now persists in @@ -1139,7 +1156,7 @@ clients are leaving the offices; some fears are entertained of a mob; something you must do, and that without delay."

    -

    Aghast +

    Aghast at this torment, I fell back before it, and would fain have locked myselfin my new quarters. In vain I persisted that Bartleby was nothing to me--no more than to any one else. In vain:--I was the last @@ -1150,39 +1167,39 @@ if the lawyer would give me a confidential interview with the scrivener, in his (the lawyer's) own room, I would that afternoon strive my best to rid them of the nuisance they complained of.

    -

    Going up stairs to my old haunt, there was Bartleby silently sitting upon +

    Going up stairs to my old haunt, there was Bartleby silently sitting upon the banister at the landing.

    -

    "What are you doing here, Bartleby?" said I.

    -

    "Sitting upon the banister," he mildly replied.

    -

    I motioned him into the lawyer's room, who then left us.

    -

    "Bartleby," +

    "What are you doing here, Bartleby?" said I.

    +

    "Sitting upon the banister," he mildly replied.

    +

    I motioned him into the lawyer's room, who then left us.

    +

    "Bartleby," said I, "are you aware that you are the cause of great tribulation to me, by persisting in occupying the entry after being dismissed from the office?"

    -

    No answer.

    -

    "Now one of two things must take place. Either you must do something or +

    No answer.

    +

    "Now one of two things must take place. Either you must do something or something must be done to you. Now what sort of business would you like to engage in? Would you like to re-engage in copying for some one?"

    -

    "No; I would prefer not to make any change."

    -

    "Would you like a clerkship in a dry-goods store?"

    -

    "There is too much confinement about that. No, I would not like a clerkship; +

    "No; I would prefer not to make any change."

    +

    "Would you like a clerkship in a dry-goods store?"

    +

    "There is too much confinement about that. No, I would not like a clerkship; but I am not particular."

    -

    "Too much confinement," I cried, "why you keep yourself confined all the +

    "Too much confinement," I cried, "why you keep yourself confined all the time!"

    -

    "I would prefer not to take a clerkship," he rejoined, as if to settle +

    "I would prefer not to take a clerkship," he rejoined, as if to settle that little item at once.

    -

    "How would a bar-tender's business suit you? There is no trying of the +

    "How would a bar-tender's business suit you? There is no trying of the eyesight in that."

    -

    "I would not like it at all; though, as I said before, I am not particular."

    -

    His unwonted wordiness inspirited me. I returned to the charge.

    -

    "Well then, would you like to travel through the country collecting bills +

    "I would not like it at all; though, as I said before, I am not particular."

    +

    His unwonted wordiness inspirited me. I returned to the charge.

    +

    "Well then, would you like to travel through the country collecting bills for the merchants? That would improve your health."

    -

    "No, I would prefer to be doing something else."

    -

    "How then would going as a companion to Europe, to entertain some young +

    "No, I would prefer to be doing something else."

    +

    "How then would going as a companion to Europe, to entertain some young gentleman with your conversation,--how would that suit you?"

    -

    "Not at all. It does not strike me that there is any thing definite about +

    "Not at all. It does not strike me that there is any thing definite about that. I like to be stationary. But I am not particular.

    -

    "Stationary you shall be then," I cried, now losing all patience, and +

    "Stationary you shall be then," I cried, now losing all patience, and for the first time in all my exasperating connection with him fairly flying into a passion. "If you do not go away from these premises before night, I shall feel bound--indeed I am bound--to-- to--to quit the premises @@ -1190,14 +1207,15 @@ threat to try to frighten his immobility into compliance. Despairing of all further efforts, I was precipitately leaving him, when a final thought occurred to me--one - which had not been wholly unindulged before.

    -

    "Bartleby," said I, in the kindest tone + which had not been wholly unindulged before. +

    +

    "Bartleby," said I, in the kindest tone I could assume under such exciting circumstances, "will you go home with me now--not to my office, but my dwelling--and remain there till we can conclude upon some convenient arrangement for you at our leisure? Come, let us start now, right away."

    -

    "No: at present I would prefer not to make any change at all."

    -

    I answered nothing; but effectualy dodging every one by the suddenness +

    "No: at present I would prefer not to make any change at all."

    +

    I answered nothing; but effectualy dodging every one by the suddenness and rapidity of my flight, rushed from the building, ran up Wall-street towards Broadway, and jumping into the first omnibus was soon removed from pursuit. As soon as tranquility returned I distinctly @@ -1212,7 +1230,7 @@ through the suburbs, in my rockaway; crossed over to Jersey City and Hoboken, and paid fugitive visits to Manhattanville and Astoria. In fact I almost lived in my rockaway for the time.

    -

    When again I entered my office, lo, a note from the landlord lay upon +

    When again I entered my office, lo, a note from the landlord lay upon desk. opened it with trembling hands. informed me that writer had sent to police, and Bartleby removed the Tombs as a vagrant. Moreover, since I knew more @@ -1222,15 +1240,16 @@ The landlord's energetic, summary disposition, had led him to adopt a procedure which I do not think I would have decided upon myself; and yet as a last resort, under such peculiar circumstances, it seemed the only plan.

    -

    As I afterwards learned, the poor scrivener, when told that he must be +

    As I afterwards learned, the poor scrivener, when told that he must be conducted to the Tombs, offered not the slightest obstacle, but in his pale unmoving way, silently - acquiesced.

    -

    Some of the compassionate and curious bystanders joined the party; and + acquiesced. +

    +

    Some of the compassionate and curious bystanders joined the party; and headed by one of the constables arm in arm with Bartleby, the silent procession filed its way through all the noise, and heat, and joy of the roaring thoroughfares at noon.

    -

    The same day I received the note I went to the Tombs, or to speak more +

    The same day I received the note I went to the Tombs, or to speak more properly, the Halls of Justice. Seeking the right officer, I stated the purpose of my call, and was informed that the individual I described was indeed within. I then assured the functionary that Bartleby was a perfectly @@ -1240,81 +1259,82 @@ harsh might be done--though indeed I hardly knew what. At all events, if nothing else could be decided upon, the alms-house must receive him. I then begged to have an interview.

    -

    Being under no disgraceful charge, and quite serene and harmless in all +

    Being under no disgraceful charge, and quite serene and harmless in all his ways, they had permitted him freely to wander about the prison, and especially in the inclosed grass-platted yards thereof. And so I found him there, standing all alone in the quietest of the yards, his face towards a high wall, while all around, from the narrow slits of the jail windows, I thought I - saw peering out upon him the eyes of murderers and thieves.

    -

    "Bartleby!"

    -

    "I + saw peering out upon him the eyes of murderers and thieves. +

    +

    "Bartleby!"

    +

    "I know you," he said, without looking round,--"and I want nothing to say to you."

    -

    "It was not I that brought you here, Bartleby," said I, keenly +

    "It was not I that brought you here, Bartleby," said I, keenly pained at his implied suspicion. "And to you, this should not be so vile a place. Nothing reproachful attaches to you by being here. And see, it is not so sad a place as one might think. Look, there is the sky, and here is the grass."

    -

    "I know where I am," he replied, but would say nothing more, and so I +

    "I know where I am," he replied, but would say nothing more, and so I left him.

    -

    As I entered the corridor again, a broad meat-like +

    As I entered the corridor again, a broad meat-like man in an apron, accosted me, and jerking his thumb over his shoulder said--"Is that your friend?"

    -

    "Yes."

    -

    "Does he want to starve? If he does, let him live on the prison fare, +

    "Yes."

    +

    "Does he want to starve? If he does, let him live on the prison fare, that's all.

    -

    "Who are you?" asked I, not knowing what to make of such an unofficially +

    "Who are you?" asked I, not knowing what to make of such an unofficially speaking person in such a place.

    -

    "I am the grub-man. Such gentlemen as have friends here, hire me to provide +

    "I am the grub-man. Such gentlemen as have friends here, hire me to provide them with something good to eat."

    -

    "Is this so?" said I, turning to the turnkey.

    -

    He said it was.

    -

    "Well then," said I, slipping some silver into the grub-man's hands (for +

    "Is this so?" said I, turning to the turnkey.

    +

    He said it was.

    +

    "Well then," said I, slipping some silver into the grub-man's hands (for so they called him). "I want you to give particular attention to my friend there; let him have the best dinner you can get. And you must be as polite to him as possible."

    -

    "Introduce me, will you?" said the grub-man, looking at me with an expression +

    "Introduce me, will you?" said the grub-man, looking at me with an expression which seemed to say he was all impatience for an opportunity to give a specimen of his breeding.

    -

    Thinking it would prove of benefit to the scrivener, I acquiesced; and +

    Thinking it would prove of benefit to the scrivener, I acquiesced; and asking the grub-man his name, went up with him to Bartleby.

    -

    "Bartleby, this is a +

    "Bartleby, this is a friend; you will find him very useful to you."

    -

    "Your +

    "Your sarvant, sir, your sarvant," said the grub-man, making a low salutation behind his apron. "Hope you find it pleasant here, sir;--spacious grounds--cool apartments, sir--hope you'll stay with us some time--try to make it agreeable. What will you have for dinner today?"

    -

    "I prefer not to dine to-day," said Bartleby, turning away. "It would +

    "I prefer not to dine to-day," said Bartleby, turning away. "It would disagree with me; I am unused to dinners." So saying he slowly moved to the other side of the inclosure, and took up a position fronting the dead-wall.

    -

    "How's this?" said the grub-man, addressing me with a stare of astonishment. +

    "How's this?" said the grub-man, addressing me with a stare of astonishment. "He's odd, aint he?"

    -

    "I think he is a little deranged," said I, sadly.

    -

    "Deranged? deranged is it? Well now, upon my word, I thought that friend +

    "I think he is a little deranged," said I, sadly.

    +

    "Deranged? deranged is it? Well now, upon my word, I thought that friend of yourn was a gentleman forger; they are always pale and genteel-like, them forgers. I can't help pity 'em--can't help it, sir. Did you know Monroe Edwards?" he added touchingly, and paused. Then, laying his hand pityingly on my shoulder, sighed, "he died of consumption at Sing-Sing. so you weren't acquainted with Monroe?"

    -

    "No, I was never socially acquainted with any forgers. But I cannot stop +

    "No, I was never socially acquainted with any forgers. But I cannot stop longer. Look to my friend yonder. You will not lose by it. I will see you again."

    -

    Some few days after this, I again obtained admission to the Tombs, and +

    Some few days after this, I again obtained admission to the Tombs, and went through the corridors in quest of Bartleby; but without finding him.

    -

    "I saw him coming from his cell not long ago," said a turnkey, "may be +

    "I saw him coming from his cell not long ago," said a turnkey, "may be he's gone to loiter in the yards."

    -

    So I went in that direction.

    -

    "Are you looking for the silent man?" said another turnkey passing me. +

    So I went in that direction.

    +

    "Are you looking for the silent man?" said another turnkey passing me. "Yonder he lies--sleeping in the yard there. 'Tis not twenty minutes since I saw him lie down."

    -

    The yard was entirely quiet. It was not accessible to the common prisoners. +

    The yard was entirely quiet. It was not accessible to the common prisoners. The surrounding walls, of amazing thickness, kept off all sound behind them. The Egyptian character of the masonry weighed upon me with its gloom. @@ -1322,21 +1342,21 @@ turf grew under foot. The heart of the eternal pyramids, it seemed, wherein, by some strange magic, through the clefts, grass-seed, dropped by birds, had sprung.

    -

    Strangely huddled at the base of the wall, his +

    Strangely huddled at the base of the wall, his knees drawn up, and lying on his side, his head touching the cold stones, I saw the wasted Bartleby. But nothing stirred. I paused; then went close up to him; stooped over, and saw that his dim eyes were open; otherwise he seemed profoundly sleeping. Something prompted me to touch him. I felt his hand, when a tingling shiver ran up my arm and down my spine to my feet.

    -

    The round face of the grub-man peered upon me now. "His dinner is ready. +

    The round face of the grub-man peered upon me now. "His dinner is ready. Won't he dine to-day, either? Or does he live without dining?"

    -

    "Lives without dining," said I, and closed the eyes.

    -

    "Eh!--He's asleep, aint he?"

    -

    "With +

    "Lives without dining," said I, and closed the eyes.

    +

    "Eh!--He's asleep, aint he?"

    +

    "With kings and counsellors," murmured I.

    -

    * * * * * * * *

    -

    There would seem little need for proceeding further in this history. Imagination +

    * * * * * * * *

    +

    There would seem little need for proceeding further in this history. Imagination will readily supply the meagre recital of poor Bartleby's interment. But ere parting with the reader, let me say, that if this little narrative has sufficiently interested him, to awaken curiosity as to who Bartleby @@ -1363,7 +1383,8 @@ whom it would relieve, nor eats nor hungers any more; pardon for those who died despairing; hope for those who died unhoping; good tidings for those who died stifled by unrelieved calamities. On - errands of life, these letters speed to death.

    -

    Ah + errands of life, these letters speed to death. +

    +

    Ah Bartleby! Ah humanity!

    -
    +

    diff --git a/resources/tests/readability/cnet-svg-classes/expected.html b/resources/tests/readability/cnet-svg-classes/expected.html index c4ff6e6..bb85b1f 100644 --- a/resources/tests/readability/cnet-svg-classes/expected.html +++ b/resources/tests/readability/cnet-svg-classes/expected.html @@ -1,20 +1,30 @@
    -
    twitter-lite +
    twitter-lite -

    Twitter Lite estará disponible en Google Play Store en 11 países de América Latina.

    + +

    Twitter Lite estará disponible en Google Play Store en 11 países de América Latina.

    Twitter -

    Twitter ha dado a conocer que Twitter Lite llegará a un total de 24 nuevos países a partir de hoy, 11 de ellos de América Latina.

    -

    Según explicó en un comunicadoTwitter Lite ahora estará disponible en Bolivia, Brasil, Chile, Colombia, Costa Rica, Ecuador, México, Panamá, Perú, El Salvador y Venezuela.

    -

    Twitter Lite es la versión ligera de la aplicación de la red social para Android, disponible en la Google Play Store. Con este app los usuarios que experimentan fallos de red o que viven en países con redes con poca velocidad de conexión como Venezuela podrán descargar los tuits de forma más rápida.

    -

    Entre sus novedades, Twitter Lite permite la carga rápida de tuits en redes 2G y 3G, y ofrece ayuda offline en caso de que pierdas tu conexión; a eso debemos sumar que minimiza el uso de datos y ofrece un modo de ahorro, en el que únicamente se descargan las fotos o videos de los tuits que quieres ver.

    -

    Además, el app ocupa menos espacio en tu teléfono móvil, al reducir a 3MB su peso.

    -

    Twitter dio a conocer Twitter Lite en abril en India, y desde entonces ha estado trabajando para llevarlo a más países. La empresa en los últimos meses también se ha involucrado de forma definitiva en la eliminación de los abusos en la red social, tomando medidas incluso en la verificación de cuentas.

    -
    -
    -
    -
    -

    Reproduciendo:Mira esto: Google Assistant mejora, hay más cambios en Twitter y...

    -

    - 8:09

    -
    -
    + + +

    Twitter ha dado a conocer que Twitter Lite llegará a un total de 24 nuevos países a partir de hoy, 11 de ellos de América Latina.

    +

    Según explicó en un comunicado Twitter Lite ahora estará disponible en Bolivia, Brasil, Chile, Colombia, Costa Rica, Ecuador, México, Panamá, Perú, El Salvador y Venezuela.

    +

    Twitter Lite es la versión ligera de la aplicación de la red social para Android, disponible en la Google Play Store. Con este app los usuarios que experimentan fallos de red o que viven en países con redes con poca velocidad de conexión como Venezuela podrán descargar los tuits de forma más rápida.

    + +

    Entre sus novedades, Twitter Lite permite la carga rápida de tuits en redes 2G y 3G, y ofrece ayuda offline en caso de que pierdas tu conexión; a eso debemos sumar que minimiza el uso de datos y ofrece un modo de ahorro, en el que únicamente se descargan las fotos o videos de los tuits que quieres ver.

    + + +

    Además, el app ocupa menos espacio en tu teléfono móvil, al reducir a 3MB su peso.

    +

    Twitter dio a conocer Twitter Lite en abril en India, y desde entonces ha estado trabajando para llevarlo a más países. La empresa en los últimos meses también se ha involucrado de forma definitiva en la eliminación de los abusos en la red social, tomando medidas incluso en la verificación de cuentas.

    + +
    +
    +
    +
    +

    + Reproduciendo: Mira esto: Google Assistant mejora, hay más cambios en Twitter y...

    +

    + 8:09

    +
    + + + diff --git a/resources/tests/readability/cnet/expected.html b/resources/tests/readability/cnet/expected.html index 44c90c8..4c34c23 100644 --- a/resources/tests/readability/cnet/expected.html +++ b/resources/tests/readability/cnet/expected.html @@ -1,19 +1,48 @@
    -
    + + + + +
    -

    Facebook CEO Mark Zuckerberg, the man with the acquisition plan.

    Photo by James Martin/CNET -

    Anyone who has ever been involved in closing a billion-dollar acquisition deal will tell you that you don't go in without a clear, well thought out plan.

    -

    Facebook CEO Mark Zuckerberg knows a thing or two about how to seal the deal on blockbuster buys. After all, he's the man behind his company's $19 billion acquisition of WhatsApp, he personally brokered its $1 billion buyout of Instagram and closed the $3 billion deal to buy Oculus VR.

    -

    Zuckerberg offered a primer on the strategies he and his company employ when they see an attractive target during testimony Tuesday in a lawsuit with ZeniMax Media, which accuses Oculus and Facebook of "misappropriating" trade secrets and copyright infringement. At the heart of the lawsuit is technology that helped create liftoff for virtual reality, one of the hottest gadget trends today.

    -

    A key Facebook approach is building a long-term relationship with your target, Zuckerberg said at the trial. These deals don't just pop up over night, he said according to a transcript reviewed by Business Insider. They take time to cultivate.

    -

    I've been building relationships, at least in Instagram and the WhatsApp cases, for years with the founders and the people that are involved in these companies, which made [it] so that when it became time or when we thought it was the right time to move, we felt like we had a good amount of context and had good relationships so that we could move quickly, which was competitively important and why a lot of these acquisitions, I think, came to us instead of our competitors and ended up being very good acquisitions over time that a lot of competitors wished they had gotten instead.

    -

    He also stressed the need assure your target that you have a shared vision about how you will collaborate after the deal is put to bed. Zuckerberg said this was reason Facebook was able to acquire Oculus for less than its original $4 billion asking price.

    -
    If this [deal] is going to happen, it's not going to be because we offer a lot of money, although we're going to have to offer a fair price for the company that is more than what they felt like they could do on their own. But they also need to feel like this was actually going to help their mission.
    -

    When that doesn't work, Zuckerberg said scare tactics is an effective, if undesirable, way of persuading small startups that they face a better chance of survival if they have Facebook to guide their way rather than going it alone.

    -
    That's less my thing, but I think if you are trying to help convince people that they want to join you, helping them understand all the pain that they would have to go through to build it out independently is a valuable tactic.
    -

    It also pays to be weary of competing suitors for your startup, Zuckerberg said, and be willing to move fast to stave off rivals and get the deal done.

    -
    Often, if a company knows we're offering something, they will offer more. So being able to move quickly not only increases our chance of being able to get a deal done if we want to, but it makes it so we don't have end up having to pay a lot more because the process drags out.
    -

    It wasn't clear why these strategies didn't work on Snapchat CEO Evan Spiegel, who famously rebuffed a $3 billion takeover offer from Facebook in 2013.

    -

    Tech Enabled: CNET chronicles tech's role in providing new kinds of accessibility. Check it out here.

    -

    Technically Literate: Original works of short fiction with unique perspectives on tech, exclusively on CNET. Here.

    -
    + +

    Facebook CEO Mark Zuckerberg, the man with the acquisition plan.

    Photo by James Martin/CNET +
    + +

    Anyone who has ever been involved in closing a billion-dollar acquisition deal will tell you that you don't go in without a clear, well thought out plan.

    + +

    Facebook CEO Mark Zuckerberg knows a thing or two about how to seal the deal on blockbuster buys. After all, he's the man behind his company's $19 billion acquisition of WhatsApp, he personally brokered its $1 billion buyout of Instagram and closed the $3 billion deal to buy Oculus VR.

    +

    Zuckerberg offered a primer on the strategies he and his company employ when they see an attractive target during testimony Tuesday in a lawsuit with ZeniMax Media, which accuses Oculus and Facebook of "misappropriating" trade secrets and copyright infringement. At the heart of the lawsuit is technology that helped create liftoff for virtual reality, one of the hottest gadget trends today.

    +

    A key Facebook approach is building a long-term relationship with your target, Zuckerberg said at the trial. These deals don't just pop up over night, he said according to a transcript reviewed by Business Insider. They take time to cultivate.

    +
    +

    I've been building relationships, at least in Instagram and the WhatsApp cases, for years with the founders and the people that are involved in these companies, which made [it] so that when it became time or when we thought it was the right time to move, we felt like we had a good amount of context and had good relationships so that we could move quickly, which was competitively important and why a lot of these acquisitions, I think, came to us instead of our competitors and ended up being very good acquisitions over time that a lot of competitors wished they had gotten instead.

    +
    +

    He also stressed the need assure your target that you have a shared vision about how you will collaborate after the deal is put to bed. Zuckerberg said this was reason Facebook was able to acquire Oculus for less than its original $4 billion asking price.

    +
    If this [deal] is going to happen, it's not going to be because we offer a lot of money, although we're going to have to offer a fair price for the company that is more than what they felt like they could do on their own. But they also need to feel like this was actually going to help their mission.
    + + + + + + +

    When that doesn't work, Zuckerberg said scare tactics is an effective, if undesirable, way of persuading small startups that they face a better chance of survival if they have Facebook to guide their way rather than going it alone.

    +
    That's less my thing, but I think if you are trying to help convince people that they want to join you, helping them understand all the pain that they would have to go through to build it out independently is a valuable tactic.
    +

    It also pays to be weary of competing suitors for your startup, Zuckerberg said, and be willing to move fast to stave off rivals and get the deal done.

    +
    Often, if a company knows we're offering something, they will offer more. So being able to move quickly not only increases our chance of being able to get a deal done if we want to, but it makes it so we don't have end up having to pay a lot more because the process drags out.
    +

    It wasn't clear why these strategies didn't work on Snapchat CEO Evan Spiegel, who famously rebuffed a $3 billion takeover offer from Facebook in 2013.

    + + + + + + +

    Tech Enabled: CNET chronicles tech's role in providing new kinds of accessibility. Check it out here.

    +

    Technically Literate: Original works of short fiction with unique perspectives on tech, exclusively on CNET. Here.

    + + + + + + + + diff --git a/resources/tests/readability/cnn/expected.html b/resources/tests/readability/cnn/expected.html index 29de03a..ac1ad81 100644 --- a/resources/tests/readability/cnn/expected.html +++ b/resources/tests/readability/cnn/expected.html @@ -1,19 +1,51 @@
    -

    The U.S. has long been heralded as a land of opportunity -- a place where anyone can succeed regardless of the economic class they were born into.

    -

    But a new report released on Monday by Stanford University's Center on Poverty and Inequality calls that into question.

    -

    The report assessed poverty levels, income and wealth inequality, economic mobility and unemployment levels among 10 wealthy countries with social welfare programs.

    -

    + + +

    The U.S. has long been heralded as a land of opportunity -- a place where anyone can succeed regardless of the economic class they were born into.

    +

    But a new report released on Monday by Stanford University's Center on Poverty and Inequality calls that into question.

    + +

    The report assessed poverty levels, income and wealth inequality, economic mobility and unemployment levels among 10 wealthy countries with social welfare programs.

    +
    + + + + + +

    Powered by SmartAsset.com -

    -

    Among its key findings: the class you're born into matters much more in the U.S. than many of the other countries.

    -

    As the report states: "[T]he birth lottery matters more in the U.S. than in most well-off countries."

    -

    But this wasn't the only finding that suggests the U.S. isn't quite living up to its reputation as a country where everyone has an equal chance to get ahead through sheer will and hard work.

    -

    Related: Rich are paying more in taxes but not as much as they used to

    -

    The report also suggested the U.S. might not be the "jobs machine" it thinks it is, when compared to other countries.

    -

    It ranked near the bottom of the pack based on the levels of unemployment among men and women of prime working age. The study determined this by taking the ratio of employed men and women between the ages of 25 and 54 compared to the total population of each country.

    -

    The overall rankings of the countries were as follows:
    1. Finland
    2. Norway
    3. Australia
    4. Canada
    5. Germany
    6. France
    7. United Kingdom
    8. Italy
    9. Spain
    10. United States

    -

    The low ranking the U.S. received was due to its extreme levels of wealth and income inequality and the ineffectiveness of its "safety net" -- social programs aimed at reducing poverty.

    -

    Related: Chicago is America's most segregated city

    -

    The report concluded that the American safety net was ineffective because it provides only half the financial help people need. Additionally, the levels of assistance in the U.S. are generally lower than in other countries.

    -

    CNNMoney (New York) First published February 1, 2016: 1:28 AM ET

    -
    +

    + + + + + + + + + + +

    Among its key findings: the class you're born into matters much more in the U.S. than many of the other countries.

    +

    As the report states: "[T]he birth lottery matters more in the U.S. than in most well-off countries."

    + +

    But this wasn't the only finding that suggests the U.S. isn't quite living up to its reputation as a country where everyone has an equal chance to get ahead through sheer will and hard work.

    +

    Related: Rich are paying more in taxes but not as much as they used to

    + +

    The report also suggested the U.S. might not be the "jobs machine" it thinks it is, when compared to other countries.

    +

    It ranked near the bottom of the pack based on the levels of unemployment among men and women of prime working age. The study determined this by taking the ratio of employed men and women between the ages of 25 and 54 compared to the total population of each country.

    +

    The overall rankings of the countries were as follows:
    1. Finland
    2. Norway
    3. Australia
    4. Canada
    5. Germany
    6. France
    7. United Kingdom
    8. Italy
    9. Spain
    10. United States
    +
    +
    +
    +
    +
    +
    +
    +
    +

    +

    The low ranking the U.S. received was due to its extreme levels of wealth and income inequality and the ineffectiveness of its "safety net" -- social programs aimed at reducing poverty.

    +

    Related: Chicago is America's most segregated city

    +

    The report concluded that the American safety net was ineffective because it provides only half the financial help people need. Additionally, the levels of assistance in the U.S. are generally lower than in other countries.

    + + +

    CNNMoney (New York) First published February 1, 2016: 1:28 AM ET

    + diff --git a/resources/tests/readability/comment-inside-script-parsing/expected.html b/resources/tests/readability/comment-inside-script-parsing/expected.html index ac877ac..adcc161 100644 --- a/resources/tests/readability/comment-inside-script-parsing/expected.html +++ b/resources/tests/readability/comment-inside-script-parsing/expected.html @@ -1,21 +1,21 @@
    -

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

    -

    Ut enim ad minim veniam, +

    Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -

    Duis aute irure dolor in reprehenderit in voluptate velit esse +

    Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    -
    +
    -

    Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, +

    Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -

    Duis aute irure dolor in reprehenderit in voluptate velit esse +

    Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    -
    +
    diff --git a/resources/tests/readability/daringfireball-1/expected.html b/resources/tests/readability/daringfireball-1/expected.html index 23eeebf..3e30a20 100644 --- a/resources/tests/readability/daringfireball-1/expected.html +++ b/resources/tests/readability/daringfireball-1/expected.html @@ -1,26 +1,30 @@
    -

    About This Site

    -

    Daring Fireball is written and produced by John Gruber.

    -

    Photograph of the author.
    Portrait by George Del Barrio

    -

    Mac Apps

    - -

    iPhone Apps

    - -

    Server Software

    -

    The Daring Fireball website is hosted by Joyent.

    -

    Articles and links are published through Movable Type. In addition to my own SmartyPants and Markdown plug-ins, Daring Fireball uses several excellent Movable Type plug-ins, including Brad Choate’s MT-Regex and MT-IfEmpty, and Nat Irons’s Amputator.

    -

    Stats are tracked using Mint. Additional web nerdery, including the membership system, is fueled by Perl, PHP, and MySQL.

    -

    Web Standards

    -

    Web standards are important, and Daring Fireball adheres to them. Specifically, Daring Fireball’s HTML markup should validate as either HTML 5 or XHTML 4.01 Transitional, its layout is constructed using valid CSS, and its syndicated feed is valid Atom.

    -

    If Daring Fireball looks goofy in your browser, you’re likely using a shitty browser that doesn’t support web standards. Internet Explorer, I’m looking in your direction. If you complain about this, I will laugh at you, because I do not care. If, however, you are using a modern, standards-compliant browser and have trouble viewing or reading Daring Fireball, please do let me know.

    -
    +

    About This Site

    +

    Daring Fireball is written and produced by John Gruber.

    +

    + Photograph of the author. +
    Portrait by George Del Barrio

    +

    Mac Apps

    + +

    iPhone Apps

    + +

    Server Software

    +

    The Daring Fireball website is hosted by Joyent.

    +

    Articles and links are published through Movable Type. In addition to my own SmartyPants and Markdown plug-ins, Daring Fireball uses several excellent Movable Type plug-ins, including Brad Choate’s MT-Regex and MT-IfEmpty, and Nat Irons’s Amputator.

    +

    Stats are tracked using Mint. Additional web nerdery, including the membership system, is fueled by Perl, PHP, and MySQL.

    +

    Web Standards

    +

    Web standards are important, and Daring Fireball adheres to them. Specifically, Daring Fireball’s HTML markup should validate as either HTML 5 or XHTML 4.01 Transitional, its layout is constructed using valid CSS, and its syndicated feed is valid Atom.

    +

    If Daring Fireball looks goofy in your browser, you’re likely using a shitty browser that doesn’t support web standards. Internet Explorer, I’m looking in your direction. If you complain about this, I will laugh at you, because I do not care. If, however, you are using a modern, standards-compliant browser and have trouble viewing or reading Daring Fireball, please do let me know.

    + diff --git a/resources/tests/readability/data-url-image/expected.html b/resources/tests/readability/data-url-image/expected.html index 01b3ee9..4dfe601 100644 --- a/resources/tests/readability/data-url-image/expected.html +++ b/resources/tests/readability/data-url-image/expected.html @@ -1,7 +1,11 @@
    -Illustration for article titled The Spectacular Story Of Metroid

    Lorem ipsum dolor sit amet consectetur adipisicing elit. Natus eaque totam provident obcaecati nisi praesentium iusto velit fuga debitis quidem ut repellat corrupti, eligendi inventore quibusdam perspiciatis delectus omnis pariatur excepturi quasi fugit? A adipisci natus nostrum, qui aperiam, at culpa corrupti autem enim earum vitae. Nostrum et officiis facere ex recusandae tenetur, delectus odit provident soluta id perferendis ducimus quibusdam corporis rerum voluptatem architecto sequi beatae quod mollitia voluptatibus earum tempora inventore ut. Deserunt reprehenderit recusandae nostrum, eaque fuga cum, repellat, perspiciatis ducimus in non consequatur ratione. Sint rerum necessitatibus deleniti odio earum voluptatum eos modi ab dolor minus.

    -Illustration for article titled The Spectacular Story Of Metroid

    Quae veniam recusandae vel cupiditate doloribus pariatur, dolorum saepe hic quos mollitia harum nihil molestias magni modi maiores? Ea accusamus velit vel doloremque pariatur voluptate? Consequuntur dolorum fugit facere architecto eveniet beatae eligendi rerum nisi maxime, voluptatum a repellendus perferendis dolores eius repudiandae accusamus animi eum esse adipisci voluptatibus incidunt necessitatibus! Adipisci id, quos libero dolor odit dignissimos et, cum explicabo est facere aliquid mollitia! Iure nihil nobis beatae quibusdam, dolor saepe reiciendis, exercitationem hic accusamus quasi commodi modi molestiae ipsum, maxime non nesciunt deserunt itaque quo cupiditate rerum ipsa ad minus vel quod. Earum dolorum velit aut minus hic?

    -

    Quidem nam tempore ex harum repellat modi expedita quia rerum est ut ullam, quisquam repudiandae at dolorum. Sint atque fugit facere id blanditiis dolor omnis rem repudiandae, laboriosam unde delectus illo voluptate dolorem consectetur rerum expedita quas debitis deleniti iure amet. At sequi distinctio facere eaque, esse molestiae tenetur, unde sapiente fugit minus eos ex aperiam atque? Animi soluta, dolorem sequi doloremque pariatur in. Impedit, voluptates dolorem natus perspiciatis pariatur eligendi labore rerum facilis est error. Illum eveniet nostrum atque nobis molestiae expedita perferendis, esse ullam corrupti sit explicabo beatae. Sapiente aliquid nesciunt reiciendis incidunt cumque et officiis minus aliquam?

    -

    Veritatis sit nostrum officiis alias ea quam nulla veniam eos, quidem nisi, magni molestias ut odio. Ea, mollitia nesciunt. Maiores iste quos quas sapiente sit at esse quidem? Laboriosam fugiat nobis voluptatum reiciendis aliquam excepturi ipsum? Perspiciatis expedita sit quod, optio assumenda veritatis culpa, neque explicabo distinctio facere incidunt magnam accusamus, quidem animi delectus doloremque vel molestiae a quae dolorem. Sit, at? Numquam possimus animi esse incidunt quos quibusdam. Ab sit eligendi laborum beatae maxime suscipit, obcaecati enim cum nam autem, dicta nobis corrupti ut aut nostrum ex excepturi perferendis, soluta veniam. Excepturi beatae suscipit enim dignissimos! Quam, esse praesentium!

    - + Illustration for article titled The Spectacular Story Of Metroid +

    Lorem ipsum dolor sit amet consectetur adipisicing elit. Natus eaque totam provident obcaecati nisi praesentium iusto velit fuga debitis quidem ut repellat corrupti, eligendi inventore quibusdam perspiciatis delectus omnis pariatur excepturi quasi fugit? A adipisci natus nostrum, qui aperiam, at culpa corrupti autem enim earum vitae. Nostrum et officiis facere ex recusandae tenetur, delectus odit provident soluta id perferendis ducimus quibusdam corporis rerum voluptatem architecto sequi beatae quod mollitia voluptatibus earum tempora inventore ut. Deserunt reprehenderit recusandae nostrum, eaque fuga cum, repellat, perspiciatis ducimus in non consequatur ratione. Sint rerum necessitatibus deleniti odio earum voluptatum eos modi ab dolor minus.

    + Illustration for article titled The Spectacular Story Of Metroid +

    Quae veniam recusandae vel cupiditate doloribus pariatur, dolorum saepe hic quos mollitia harum nihil molestias magni modi maiores? Ea accusamus velit vel doloremque pariatur voluptate? Consequuntur dolorum fugit facere architecto eveniet beatae eligendi rerum nisi maxime, voluptatum a repellendus perferendis dolores eius repudiandae accusamus animi eum esse adipisci voluptatibus incidunt necessitatibus! Adipisci id, quos libero dolor odit dignissimos et, cum explicabo est facere aliquid mollitia! Iure nihil nobis beatae quibusdam, dolor saepe reiciendis, exercitationem hic accusamus quasi commodi modi molestiae ipsum, maxime non nesciunt deserunt itaque quo cupiditate rerum ipsa ad minus vel quod. Earum dolorum velit aut minus hic?

    + +

    Quidem nam tempore ex harum repellat modi expedita quia rerum est ut ullam, quisquam repudiandae at dolorum. Sint atque fugit facere id blanditiis dolor omnis rem repudiandae, laboriosam unde delectus illo voluptate dolorem consectetur rerum expedita quas debitis deleniti iure amet. At sequi distinctio facere eaque, esse molestiae tenetur, unde sapiente fugit minus eos ex aperiam atque? Animi soluta, dolorem sequi doloremque pariatur in. Impedit, voluptates dolorem natus perspiciatis pariatur eligendi labore rerum facilis est error. Illum eveniet nostrum atque nobis molestiae expedita perferendis, esse ullam corrupti sit explicabo beatae. Sapiente aliquid nesciunt reiciendis incidunt cumque et officiis minus aliquam?

    + +

    Veritatis sit nostrum officiis alias ea quam nulla veniam eos, quidem nisi, magni molestias ut odio. Ea, mollitia nesciunt. Maiores iste quos quas sapiente sit at esse quidem? Laboriosam fugiat nobis voluptatum reiciendis aliquam excepturi ipsum? Perspiciatis expedita sit quod, optio assumenda veritatis culpa, neque explicabo distinctio facere incidunt magnam accusamus, quidem animi delectus doloremque vel molestiae a quae dolorem. Sit, at? Numquam possimus animi esse incidunt quos quibusdam. Ab sit eligendi laborum beatae maxime suscipit, obcaecati enim cum nam autem, dicta nobis corrupti ut aut nostrum ex excepturi perferendis, soluta veniam. Excepturi beatae suscipit enim dignissimos! Quam, esse praesentium!

    +
    diff --git a/resources/tests/readability/dev418/expected.html b/resources/tests/readability/dev418/expected.html index 98e12f8..d9b9bfd 100644 --- a/resources/tests/readability/dev418/expected.html +++ b/resources/tests/readability/dev418/expected.html @@ -1,52 +1,80 @@
    -

    + +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -
    -

    +
    +

    Single <img>

    -

    An image

    -

    +

    + An image +

    +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -
    -

    +
    +

    Single <figure>

    -
    An image
    +
    + An image +
    Caption of the figure -

    +

    +
    +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -
    -

    +
    +

    <ul> List of <img>

    -
      -
    • An image
    • -
    • An image
    • -
    • An image
    • -
    -

    +

      +
    • + An image +
    • +
    • + An image +
    • +
    • + An image +
    • +
    +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -
    -

    +
    +

    <ul> List of <figure>

    -
      -
    • An image
      +
        +
      • +
        + An image +
        Caption of the figure -
      • -
      • An image
        +
        +
        +
      • +
      • +
        + An image +
        Caption of the figure -
      • -
      • An image
        +
        +
        +
      • +
      • +
        + An image +
        Caption of the figure -
      • -
      -

      +

      +
      +
    • +
    +

    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

    -
    + diff --git a/resources/tests/readability/dropbox-blog/expected.html b/resources/tests/readability/dropbox-blog/expected.html index 4885383..1fd339c 100644 --- a/resources/tests/readability/dropbox-blog/expected.html +++ b/resources/tests/readability/dropbox-blog/expected.html @@ -1,319 +1,529 @@
    -
    -

    +

    +

    I joined Dropbox not long after graduating with a Master’s degree in computer science. Aside from an internship, this was my first big-league engineering job. My team had already begun designing a critical internal service that most of our software would use: It would handle asynchronous computing requests behind the scenes, powering everything from dragging a file into a Dropbox folder to scheduling a marketing campaign.

    -

    +

    This Asynchronous Task Framework (ATF) would replace multiple bespoke async systems used by different engineering teams. It would reduce redundant development, incompatibilities, and reliance on legacy software. There were no open-source projects or buy-not-build solutions that worked well for our use case and scale, so we had to create our own. ATF is both an important and interesting challenge, though, so we were happy to design, build and deploy our own in-house service.

    -

    +

    ATF not only had to work well, it had to work well at scale: It would be a foundational building block of Dropbox infrastructure. It would need to handle 10,000 async tasks per second from the start, and be architected for future growth. It would need to support nearly 100 unique async task types from the start, again with room to grow. There were at least two dozen engineering teams that would want to use it for entirely different parts of our codebase, for many products and services. 

    -

    +

    As any engineer would, we Googled to see what other companies with mega-scale services had done to handle async tasks. We were disappointed to find little material published by engineers who built supersized async services.

    -

    +

    Now that ATF is deployed and currently serving 9,000 async tasks scheduled per second and in use by 28 engineering teams internally, we’re glad to fill that information gap. We’ve documented Dropbox ATF thoroughly, as a reference and guide for the engineering community seeking their own async solutions.

    -
    -

    +

    +
    +

    +

    Introduction -

    -
    -

    + +

    +
    +
    +

    Scheduling asynchronous tasks on-demand is a critical capability that powers many features and internal platforms at Dropbox. Async Task Framework (ATF) is the infrastructural system that supports this capability at Dropbox through a callback-based architecture. ATF enables developers to define callbacks, and schedule tasks that execute against these pre-defined callbacks.

    -

    +

    Since its introduction over a year ago, ATF has gone on to become an important building block in the Dropbox infrastructure, used by nearly 30 internal teams across our codebase. It currently supports 100+ use cases which require either immediate or delayed task scheduling. 

    -
    -

    +

    +
    +

    +

    Glossary -

    -
    -

    + +

    +
    +
    +

    Some basic terms repeatedly used in this post, defined as used in the context of this discussion.

    -

    Lambda: A callback implementing business logic. +

    + Lambda: A callback implementing business logic.

    -

    Task: Unit of execution of a lambda. Each asynchronous job scheduled with ATF is a task.

    -

    Collection: A labeled subset of tasks belonging to a lambda. If send email is implemented as a lambda, then password reset email and marketing email would be collections.

    -

     Priority: Labels defining priority of execution of tasks within a lambda. 

    -
    -

    +

    + Task: Unit of execution of a lambda. Each asynchronous job scheduled with ATF is a task. +

    +

    + Collection: A labeled subset of tasks belonging to a lambda. If send email is implemented as a lambda, then password reset email and marketing email would be collections. +

    +

    +  Priority: Labels defining priority of execution of tasks within a lambda.  +

    +

    +
    +

    +

    Features -

    -
    -

    Task scheduling
    + +

    +
    +
    +

    + Task scheduling
    Clients can schedule tasks to execute at a specified time. Tasks can be scheduled for immediate execution, or delayed to fit the use case.

    -

    Priority based execution
    +

    + Priority based execution
    Tasks should be associated with a priority. Tasks with higher priority should get executed before tasks with a lower priority once they are ready for execution.

    -

    Task gating
    +

    + Task gating
    ATF enables the the gating of tasks based on lambda, or a subset of tasks on a lambda based on collection. Tasks can be gated to be completely dropped or paused until a suitable time for execution.

    -

    Track task status
    +

    + Track task status
    Clients can query the status of a scheduled task.

    -
    -

    +

    +
    +

    +

    System guarantees -

    -
    -

    At-least once task execution
    The ATF system guarantees that a task is executed at least once after being scheduled. Execution is said to be complete once the user-defined callback signals task completion to the ATF system. + +

    +
    +
    +

    + At-least once task execution
    The ATF system guarantees that a task is executed at least once after being scheduled. Execution is said to be complete once the user-defined callback signals task completion to the ATF system.

    -

    No concurrent task execution
    The ATF system guarantees that at most one instance of a task will be actively executing at any given in point. This helps users write their callbacks without designing for concurrent execution of the same task from different locations. +

    + No concurrent task execution
    The ATF system guarantees that at most one instance of a task will be actively executing at any given in point. This helps users write their callbacks without designing for concurrent execution of the same task from different locations.

    -

    Isolation
    Tasks in a given lambda are isolated from the tasks in other lambdas. This isolation spans across several dimensions, including worker capacity for task execution and resource use for task scheduling. Tasks on the same lambda but different priority levels are also isolated in their resource use for task scheduling. +

    + Isolation
    Tasks in a given lambda are isolated from the tasks in other lambdas. This isolation spans across several dimensions, including worker capacity for task execution and resource use for task scheduling. Tasks on the same lambda but different priority levels are also isolated in their resource use for task scheduling.

    -

    Delivery latency
    95% of tasks begin execution within five seconds from their scheduled execution time. +

    + Delivery latency
    95% of tasks begin execution within five seconds from their scheduled execution time.

    -

    High availability for task scheduling
    The ATF service is 99.9% available to accept task scheduling requests from any client. +

    + High availability for task scheduling
    The ATF service is 99.9% available to accept task scheduling requests from any client.

    -
    -

    +

    +
    +

    +

    Lambda requirements -

    -
    -

    + +

    +
    +
    +

    Following are some restrictions we place on the callback logic (lambda):

    -

    Idempotence
    +

    + Idempotence
    A single task on a lambda can be executed multiple times within the ATF system. Developers should ensure that their lambda logic and correctness of task execution in clients are not affected by this.

    -

    Resiliency
    +

    + Resiliency
    Worker processes which execute tasks might die at any point during task execution. ATF retries abruptly interrupted tasks, which could also be retried on different hosts. Lambda owners must design their lambdas such that retries on different hosts do not affect lambda correctness.

    -

    Terminal state handling
    ATF retries tasks until they are signaled to be complete from the lambda logic. Client code can mark a task as successfully completed, fatally terminated, or retriable. It is critical that lambda owners design clients to signal task completion appropriately to avoid misbehavior such as infinite retries.  +

    + Terminal state handling
    ATF retries tasks until they are signaled to be complete from the lambda logic. Client code can mark a task as successfully completed, fatally terminated, or retriable. It is critical that lambda owners design clients to signal task completion appropriately to avoid misbehavior such as infinite retries. 

    -
    -

    +

    +
    +

    +

    Architecture -

    -
    Async Task Framework (ATF) [Fig 1]
    + +

    +
    +
    +
    + Async Task Framework (ATF) [Fig 1] +
    Async Task Framework (ATF) [Fig 1] -
    -
    -

    + + +

    +
    +

    In this section, we describe the high-level architecture of ATF and give brief description of its different components. (See Fig. 1 above.) In this section, we describe the high-level architecture of ATF and give brief description of its different components. (See Fig. 1 above.) Dropbox uses gRPC for remote calls and our in-house Edgestore to store tasks.

    -

    +

    ATF consists of the following components: 

    -
      -
    • Frontend +
        +
      • Frontend
      • -
      • Task Store +
      • Task Store
      • -
      • Store Consumer +
      • Store Consumer
      • -
      • Queue +
      • Queue
      • -
      • Controller +
      • Controller
      • -
      • Executor +
      • Executor
      • -
      • Heartbeat and Status Controller (HSC)
        -
      • -
      -

      Frontend
      - This is the service that schedules requests via an RPC interface. The frontend accepts RPC requests from clients and schedules tasks by interacting with ATF’s task store described below.

      -

      Task Store
      ATF tasks are stored in and triggered from the task store. The task store could be any generic data store with indexed querying capability. In ATF’s case, We use our in-house metadata store Edgestore to power the task store. More details can be found in the DataModel section below. +

    • Heartbeat and Status Controller (HSC)
      +
    • +
    +

    + Frontend
    + This is the service that schedules requests via an RPC interface. The frontend accepts RPC requests from clients and schedules tasks by interacting with ATF’s task store described below.

    -

    Store Consumer
    The Store Consumer is a service that periodically polls the task store to find tasks that are ready for execution and pushes them onto the right queues, as described in the queue section below. These could be tasks that are newly ready for execution, or older tasks that are ready for execution again because they either failed in a retriable way on execution, or were dropped elsewhere within the ATF system.  +

    + Task Store
    ATF tasks are stored in and triggered from the task store. The task store could be any generic data store with indexed querying capability. In ATF’s case, We use our in-house metadata store Edgestore to power the task store. More details can be found in the Data Model section below.

    -

    +

    + Store Consumer
    The Store Consumer is a service that periodically polls the task store to find tasks that are ready for execution and pushes them onto the right queues, as described in the queue section below. These could be tasks that are newly ready for execution, or older tasks that are ready for execution again because they either failed in a retriable way on execution, or were dropped elsewhere within the ATF system.  +

    +

    Below is a simple walkthrough of the Store Consumer’s function: 

    -
    -
    repeat every second:
    +                            
    +
    + +
    repeat every second:
       1. poll tasks ready for execution from task store
       2. push tasks onto the right queues
    -  3. update task statuses
    -
    -

    + 3. update task statuses +

    +
    +

    The Store Consumer polls tasks that failed in earlier execution attempts. This helps with the at-least-once guarantee that the ATF system provides. More details on how the Store Consumer polls new and previously failed tasks is presented in the Lifecycle of a task section below.

    -

    Queue
    ATF uses AWS Simple Queue Service (SQS) to queue tasks internally. These queues act as a buffer between the Store Consumer and Controllers (described below). Each <lambda, priority>  pair gets a dedicated SQS queue. The total number of SQS queues used by ATF is #lambdas x #priorities. +

    + Queue
    ATF uses AWS Simple Queue Service (SQS) to queue tasks internally. These queues act as a buffer between the Store Consumer and Controllers (described below). Each <lambda, priority>  pair gets a dedicated SQS queue. The total number of SQS queues used by ATF is #lambdas x #priorities.

    -

    Controller
    Worker hosts are physical hosts dedicated for task execution. Each worker host has one controller process responsible for polling tasks from SQS queues in a background thread, and then pushing them onto process local buffered queues. The Controller is only aware of the lambdas it is serving and thus polls only the limited set of necessary queues.  +

    + Controller
    Worker hosts are physical hosts dedicated for task execution. Each worker host has one controller process responsible for polling tasks from SQS queues in a background thread, and then pushing them onto process local buffered queues. The Controller is only aware of the lambdas it is serving and thus polls only the limited set of necessary queues. 

    -

    +

    The Controller serves tasks from its process local queue as a response to NextWork RPCs. This is the layer where execution level task prioritization occurs. The Controller has different process level queues for tasks of different priorities and can thus prioritize tasks in response to NextWork RPCs.

    -

    Executor
    The Executor is a process with multiple threads, responsible for the actual task execution. Each thread within an Executor process follows this simple loop: +

    + Executor
    The Executor is a process with multiple threads, responsible for the actual task execution. Each thread within an Executor process follows this simple loop:

    -
    -
    while True:
    +                            
    +
    + +
    while True:
       w = get_next_work()
    -  do_work(w)
    -
    -

    + do_work(w) +

    +
    +

    Each worker host has a single Controller process and multiple executor processes. Both the Controller and Executors work in a “pull” model, in which active loops continuously long-poll for new work to be done.

    -

    Heartbeat and Status Controller (HSC)
    +

    + Heartbeat and Status Controller (HSC)
    The HSC serves RPCs for claiming a task for execution (ClaimTask), setting task status after execution (SetResults) and heartbeats during task execution (Heartbeat). ClaimTask requests originate from the Controllers in response to NextWork requests. Heartbeat and SetResults requests originate from executor processes during and after task execution. The HSC interacts with the task store to update the task status on the kind of request it receives.

    -
    -

    +

    +
    +

    +

    Data model -

    -
    -

    + +

    +
    +
    +

    ATF uses our in-house metadata store, Edgestore, as a task store. Edgestore objects can be Entities or Associations (assoc), each of which can have user-defined attributes. Associations are used to represent relationships between entities. Edgestore supports indexing only on attributes of associations.

    -

    +

    Based on this design, we have two kinds of ATF-related objects in Edgestore. The ATF association stores scheduling information, such as the next scheduled timestamp at which the Store Consumer should poll a given task (either for the first time or for a retry). The ATF entity stores all task related information that is used to track the task state and payload for task execution. We query on associations from the Store Consumer in a pull model to pick up tasks ready for execution.

    -
    -

    +

    +
    +

    +

    Lifecycle of a task -

    -
    -
      -
    1. Client performs a Schedule RPC call to Frontend with task information, including execution time.  + +

      +
    +
    +
      +
    1. Client performs a Schedule RPC call to Frontend with task information, including execution time. 
    2. -
    3. Frontend creates Edgestore entity and assoc for the task.  +
    4. Frontend creates Edgestore entity and assoc for the task. 
    5. -
    6. When it is time to process the task, Store Consumer pulls the task from Edgestore and pushes it to a related SQS queue.  +
    7. When it is time to process the task, Store Consumer pulls the task from Edgestore and pushes it to a related SQS queue. 
    8. -
    9. -Executor makes NextWork RPC call to Controller, which pulls tasks from the SQS queue, makes a ClaimTask RPC to the HSC and then returns the task to the Executor.  +
    10. + Executor makes NextWork RPC call to Controller, which pulls tasks from the SQS queue, makes a ClaimTask RPC to the HSC and then returns the task to the Executor
    11. -
    12. -Executor invokes the callback for the task. While processing, Executor performs Heartbeat RPC calls to Heartbeat and Status Controller (HSC). Once processing is done, Executor performs TaskStatus RPC call to HSC.  +
    13. + Executor invokes the callback for the task. While processing, Executor performs Heartbeat RPC calls to Heartbeat and Status Controller (HSC). Once processing is done, Executor performs TaskStatus RPC call to HSC
    14. -
    15. Upon getting Heartbeat and TaskStatus RPC calls, HSC updates the Edgestore entity and assoc. +
    16. Upon getting Heartbeat and TaskStatus RPC calls, HSC updates the Edgestore entity and assoc.
    17. -
    -

    + +

    Every state update in the lifecycle of a task is accompanied by an update to the next trigger timestamp in the assoc. This ensures that the Store Consumer pulls the task again if there is no change in state of the task within the next trigger timestamp. This helps ATF achieve its at-least-once delivery guarantee by ensuring that no task is dropped.

    -

    +

    Following are the task entity and association states in ATF and their corresponding timestamp updates:

    - - - - - - - - - - - - + + + +

    Entity status

    Assoc status

    next trigger timestamp in Assoc

    Comment

    new

    new

    scheduled_timestamp of the task -

    + + + + + + + + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + + - - - - + + + + - + - + - - - - - + + + + + - - - - - - + + + + + + - - -
    +

    + Entity status +

    +
    +

    + Assoc status +

    +
    +

    + next trigger timestamp in Assoc +

    +
    +

    + Comment +

    +
    +

    + new +

    +
    +

    + new +

    +
    +

    + scheduled_timestamp of the task +

    +
    +

    Pick up new tasks that are ready.  -

    enqueued

    started

    enqueued_timestamp + enqueue_timeout

    +

    +
    +

    + enqueued +

    +
    +

    + started +

    +
    +

    + enqueued_timestamp + enqueue_timeout +

    +
    +

    Re-enqueue task if it has been in enqueued state for too long. This can happen if the queue loses data or the controller goes down after polling the queue and before the task is claimed. -

    claimed

    started

    claimed_timestamp + claim_timeout

    +

    +
    +

    + claimed +

    +
    +

    + started +

    +
    +

    + claimed_timestamp + claim_timeout +

    +
    +

    Re-enqueue if task is claimed but never transfered to processing. This can happen if Controller is down after claiming a task. Task status is changed to enqueued after re-enqueue. -

    processing

    started

    heartbeat_timestamp + heartbeat_timeout` -

    +

    +
    +

    + processing +

    +
    +

    + started +

    +
    +

    + heartbeat_timestamp + heartbeat_timeout` +

    +
    +

    Re-enqueue if task hasn’t sent heartbeat for too long. This can happen if Executor is down. Task status is changed to enqueued after re-enqueue.  -

    retriable failure

    +

    +
    +

    + retriable failure +

    +
    +

    started -

    +

    +
    +

    compute next_timestamp according to backoff logic -

    +

    +
    +

    Exponential backoff for tasks with retriable failure.  -

    success

    completed

    +

    +
    +

    + success +

    +
    +

    + completed +

    +
    +

    N/A -

    fatal_failure

    completed

    +

    +
    + +
    +

    + fatal_failure +

    +
    +

    + completed +

    +
    +

    N/A -

    -

    +

    +
    + +
    +

    The store consumer polls for tasks based on the following query:

    -

    assoc_status= && next_timestamp<=time.now()

    -

    - Below is the state machine that defines task state transitions: 

    -
    -
    Task State Transitions [Fig 2]
    -

    +

    + assoc_status= && next_timestamp<=time.now()
    +

    +

    + Below is the state machine that defines task state transitions: 
    +

    +

    +
    +
    + Task State Transitions [Fig 2] +
    +
    +
    +

    +

    Achieving guarantees -

    -
    -

    At-least-once task execution
    At-least-once execution is guaranteed in ATF by retrying a task until it completes execution (which is signaled by a Success or a FatalFailure state). All ATF system errors are implicitly considered retriable failures, and lambda owners have an option of marking tasks with a RetriableFailure state. Tasks might be dropped from the ATF execution pipeline in different parts of the system through transient RPC failures and failures on dependencies like Edgestore or SQS. These transient failures at different parts of the system do not affect the at-least-once guarantee, though, because of the system of timeouts and re-polling from Store Consumer. + +

    +
    +
    +

    + At-least-once task execution
    At-least-once execution is guaranteed in ATF by retrying a task until it completes execution (which is signaled by a Success or a FatalFailure state). All ATF system errors are implicitly considered retriable failures, and lambda owners have an option of marking tasks with a RetriableFailure state. Tasks might be dropped from the ATF execution pipeline in different parts of the system through transient RPC failures and failures on dependencies like Edgestore or SQS. These transient failures at different parts of the system do not affect the at-least-once guarantee, though, because of the system of timeouts and re-polling from Store Consumer.

    -

    No concurrent task execution
    Concurrent task execution is avoided through a combination of two methods in ATF. First, tasks are explicitly claimed through an exclusive task state (Claimed) before starting execution. Once the task execution is complete, the task status is updated to one of Success, FatalFailure or RetriableFailure. A task can be claimed only if its existing task state is Enqueued (retried tasks go to the Enqueued state as well once they are re-pushed onto SQS). +

    + No concurrent task execution
    Concurrent task execution is avoided through a combination of two methods in ATF. First, tasks are explicitly claimed through an exclusive task state (Claimed) before starting execution. Once the task execution is complete, the task status is updated to one of Success, FatalFailure or RetriableFailure. A task can be claimed only if its existing task state is Enqueued (retried tasks go to the Enqueued state as well once they are re-pushed onto SQS).

    -

    - However, there might be situations where once a long running task starts execution, its heartbeats might fail repeatedly yet the task execution continues. ATF would retry this task by polling it from the store consumer because the heartbeat timeouts would’ve expired. This task can then be claimed by another worker and lead to concurrent execution. 

    -

    +

    + However, there might be situations where once a long running task starts execution, its heartbeats might fail repeatedly yet the task execution continues. ATF would retry this task by polling it from the store consumer because the heartbeat timeouts would’ve expired. This task can then be claimed by another worker and lead to concurrent execution. 
    +

    +

    To avoid this situation, there is a termination logic in the Executor processes whereby an Executor process terminates itself as soon as three consecutive heartbeat calls fail. Each heartbeat timeout is large enough to eclipse three consecutive heartbeat failures. This ensures that the Store Consumer cannot pull such tasks before the termination logic ends them—the second method that helps achieve this guarantee.

    -

    Isolation
    Isolation of lambdas is achieved through dedicated worker clusters, dedicated queues, and dedicated per-lambda scheduling quotas. In addition, isolation across different priorities within the same lambda is likewise achieved through dedicated queues and scheduling bandwidth. +

    + Isolation
    Isolation of lambdas is achieved through dedicated worker clusters, dedicated queues, and dedicated per-lambda scheduling quotas. In addition, isolation across different priorities within the same lambda is likewise achieved through dedicated queues and scheduling bandwidth.

    -

    Delivery latency
    ATF use cases do not require ultra-low task delivery latencies. Task delivery latencies on the order of a couple of seconds are acceptable. Tasks ready for execution are periodically polled by the Store Consumer and this period of polling largely controls the task delivery latency. Using this as a tuning lever, ATF can achieve different delivery latencies as required. Increasing poll frequency reduces task delivery latency and vice versa. Currently, we have calibrated ATF to poll for ready tasks once every two seconds. +

    + Delivery latency
    ATF use cases do not require ultra-low task delivery latencies. Task delivery latencies on the order of a couple of seconds are acceptable. Tasks ready for execution are periodically polled by the Store Consumer and this period of polling largely controls the task delivery latency. Using this as a tuning lever, ATF can achieve different delivery latencies as required. Increasing poll frequency reduces task delivery latency and vice versa. Currently, we have calibrated ATF to poll for ready tasks once every two seconds.

    -
    -

    +

    +
    +

    +

    Ownership model -

    -

    + +

    +
    +

    ATF is designed to be a self-serve framework for developers at Dropbox. The design is very intentional in driving an ownership model where lambda owners own all aspects of their lambdas’ operations. To promote this, all lambda worker clusters are owned by the lambda owners. They have full control over operations on these clusters, including code deployments and capacity management. Each executor process is bound to one lambda. Owners have the option of deploying multiple lambdas on their worker clusters simply by spawning new executor processes on their hosts.

    -

    +
    +

    +

    Extending ATF -

    -
    -

    +

    +

    +
    +
    +

    As described above, ATF provides an infrastructural building block for scheduling asynchronous tasks. With this foundation established, ATF can be extended to support more generic use cases and provide more features as a framework. Following are some examples of what could be built as an extension to ATF. 

    -

    Periodic task execution
    Currently, ATF is a system for one-time task scheduling. Building support for periodic task execution as an extension to this framework would be useful in unlocking new capabilities for our clients. +

    + Periodic task execution
    Currently, ATF is a system for one-time task scheduling. Building support for periodic task execution as an extension to this framework would be useful in unlocking new capabilities for our clients.

    -

    Better support for task chaining
    Currently, it is possible to chain tasks on ATF by scheduling a task onto ATF that then schedules other tasks onto ATF during its execution. Although it is possible to do this in the current ATF setup, visibility and control on this chaining is absent at the framework level. Another natural extension here would be to better support task chaining through framework-level visibility and control, to make this use case a first class concept in the ATF model. +

    + Better support for task chaining
    Currently, it is possible to chain tasks on ATF by scheduling a task onto ATF that then schedules other tasks onto ATF during its execution. Although it is possible to do this in the current ATF setup, visibility and control on this chaining is absent at the framework level. Another natural extension here would be to better support task chaining through framework-level visibility and control, to make this use case a first class concept in the ATF model.

    -

    Dead letter queues for misbehaving tasks
    One common source of maintenance overhead we observe on ATF is that some tasks get stuck in infinite retry loops due to occasional bugs in lambda logic. This requires manual intervention from the ATF framework owners in some cases where there are a large number of tasks stuck in such loops, occupying a lot of the scheduling bandwidth in the system. Typical manual actions in response to such a situation include pausing execution of the lambdas with misbehaving tasks, or dropping them outright. +

    + Dead letter queues for misbehaving tasks
    One common source of maintenance overhead we observe on ATF is that some tasks get stuck in infinite retry loops due to occasional bugs in lambda logic. This requires manual intervention from the ATF framework owners in some cases where there are a large number of tasks stuck in such loops, occupying a lot of the scheduling bandwidth in the system. Typical manual actions in response to such a situation include pausing execution of the lambdas with misbehaving tasks, or dropping them outright.

    -

    - One way to reduce this operational overhead and provide an easy interface for lambda owners to recover from such incidents would be to create dead letter queues filled with such misbehaving tasks. The ATF framework could impose a maximum number of retries before tasks are pushed onto the dead letter queue. We could create and expose tools that make it easy to reschedule tasks from the dead letter queue back into the ATF system, once the associated lambda bugs are fixed.

    -
    -

    +

    + One way to reduce this operational overhead and provide an easy interface for lambda owners to recover from such incidents would be to create dead letter queues filled with such misbehaving tasks. The ATF framework could impose a maximum number of retries before tasks are pushed onto the dead letter queue. We could create and expose tools that make it easy to reschedule tasks from the dead letter queue back into the ATF system, once the associated lambda bugs are fixed.
    +

    +

    +
    +

    +

    Conclusion -

    -

    - We hope this post helps engineers elsewhere to develop better async task frameworks of their own. Many thanks to everyone who worked on this project: Anirudh Jayakumar, Deepak Gupta, Dmitry Kopytkov, Koundinya Muppalla, Peng Kang, Rajiv Desai, Ryan Armstrong, Steve Rodrigues, Thomissa Comellas, Xiaonan Zhang and Yuhuan Du.

    -
    + +

    + +

    + We hope this post helps engineers elsewhere to develop better async task frameworks of their own. Many thanks to everyone who worked on this project: Anirudh Jayakumar, Deepak Gupta, Dmitry Kopytkov, Koundinya Muppalla, Peng Kang, Rajiv Desai, Ryan Armstrong, Steve Rodrigues, Thomissa Comellas, Xiaonan Zhang and Yuhuan Du.
    +   +

    + diff --git a/resources/tests/readability/ebb-org/expected.html b/resources/tests/readability/ebb-org/expected.html index d6554fa..02e230e 100644 --- a/resources/tests/readability/ebb-org/expected.html +++ b/resources/tests/readability/ebb-org/expected.html @@ -1,49 +1,57 @@
    -

    + +

    Tuesday 15 October 2019 by Bradley M. Kuhn

    -

    +

    The last 33 days have been unprecedentedly difficult for the software freedom community and for me personally. Folks have been emailing, phoning, texting, tagging me on social media (— the last of which has been funny, because all my social media accounts are placeholder accounts). But, just about everyone has urged me to comment on the serious issues that the software freedom community now faces. Until now, I have stayed silent regarding all these current topics: from Richard M. Stallman (RMS)'s public statements, to his resignation from the Free Software Foundation (FSF), to the Epstein scandal and its connection to MIT. I've also avoided generally commenting on software freedom organizational governance during this period. I did this for good reason, which is explained below. However, in this blog post, I now share my primary comments on the matters that seem to currently be of the utmost attention of the Open Source and Free Software communities.

    -

    - I have been silent the last month because, until two days ago, I was an at-large member of FSF's Board of Directors, and a Voting Member of the FSF. As a member of FSF's two leadership bodies, I was abiding by a reasonable request from the FSF management and my duty to the organization. Specifically, the FSF asked that all communication during the crisis comedirectly from FSF officers and not from at-large directors and/or Voting Members. Furthermore, the FSF management asked all Directors and Voting Members to remain silent on this entire matter — even on issues only tangentially related to the current situation, and even when speaking in our own capacity (e.g., on our own blogs like this one). The FSF is an important organization, and I take any request from the FSF seriously — so I abided fully with their request. +

    + I have been silent the last month because, until two days ago, I was an at-large member of FSF's Board of Directors, and a Voting Member of the FSF. As a member of FSF's two leadership bodies, I was abiding by a reasonable request from the FSF management and my duty to the organization. Specifically, the FSF asked that all communication during the crisis come directly from FSF officers and not from at-large directors and/or Voting Members. Furthermore, the FSF management asked all Directors and Voting Members to remain silent on this entire matter — even on issues only tangentially related to the current situation, and even when speaking in our own capacity (e.g., on our own blogs like this one). The FSF is an important organization, and I take any request from the FSF seriously — so I abided fully with their request.

    -

    +

    The situation was further complicated because folks at my employer, Software Freedom Conservancy (where I also serve on the Board of Directors) had strong opinions about this matter as well. Fortunately, the FSF and Conservancy both had already created clear protocols for what I should do if ever there was a disagreement or divergence of views between Conservancy and FSF. I therefore was recused fully from the planning, drafting, and timing of Conservancy's statement on this matter. I thank my colleagues at the Conservancy for working so carefully to keep me entirely outside the loop on their statement and to diligently assure that it was straight-forward for me to manage any potential organizational disagreements. I also thank those at the FSF who outlined clear protocols (ahead of time, back in March 2019) in case a situation like this ever came up. I also know my colleagues at Conservancy care deeply, as I do, about the health and welfare of the FSF and its mission of fighting for universal software freedom for all. None of us want, nor have, any substantive disagreement over software freedom issues.

    -

    +

    I take very seriously my duty to the various organizations where I have (or have had) affiliations. More generally, I champion non-profit organizational transparency. Unfortunately, the current crisis left me in a quandary between the overarching goal of community transparency and abiding by FSF management's directives. Now that I've left the FSF Board of Directors, FSF's Voting Membership, and all my FSF volunteer roles (which ends my 22-year uninterrupted affiliation with the FSF), I can now comment on the substantive issues that face not just the FSF, but the Free Software community as a whole, while continuing to adhere to my past duty of acting in FSF's best interest. In other words, my affiliation with the FSF has come to an end for many good and useful reasons. The end to this affiliation allows me to speak directly about the core issues at the heart of the community's current crisis.

    -

    +

    Firstly, all these events — from RMS' public comments on the MIT mailing list, to RMS' resignation from the FSF to RMS' discussions about the next steps for the GNU project — seem to many to have happened ridiculously quickly. But it wasn't actually fast at all. In fact, these events were culmination of issues that were slowly growing in concern to many people, including me.

    -

    - For the last two years, I had been a loud internal voice in the FSF leadership regarding RMS' Free-Software-unrelated public statements; I felt strongly that it was in the best interest of the FSF to actively seek to limit such statements, and that it was my duty to FSF to speak out about this within the organization. Those who only learned of this story in the last month (understandably) believed Selam G.'s Medium post raised an entirely new issue. Infact, RMS'viewsandstatementspostedonstallman.orgaboutsexualmoralityescalatedfortheworseoverthelastfewyears. When the escalation started, I still considered RMS both a friend and colleague, and I attempted to argue with him at length to convince him that some of his positions were harmful to sexual assault survivors and those who are sex trafficked, and to the people who devote their lives in service to such individuals. More importantly to the FSF, I attempted to persuade RMS that launching a controversial campaign on sexual behavior and morality was counter to his and FSF's mission to advance software freedom, and told RMS that my duty as an FSF Director was to assure the best outcome for the FSF, which IMO didn't include having a leader who made such statements. Not only is human sexual behavior not a topic on which RMS has adequate academic expertise, but also his positions appear to ignore significant research and widely available information on the subject. Many of his comments, while occasionally politically intriguing, lack empathy for people who experienced trauma. +

    + For the last two years, I had been a loud internal voice in the FSF leadership regarding RMS' Free-Software-unrelated public statements; I felt strongly that it was in the best interest of the FSF to actively seek to limit such statements, and that it was my duty to FSF to speak out about this within the organization. Those who only learned of this story in the last month (understandably) believed Selam G.'s Medium post raised an entirely new issue. In fact, RMS' views and statements posted on stallman.org about sexual morality escalated for the worse over the last few years. When the escalation started, I still considered RMS both a friend and colleague, and I attempted to argue with him at length to convince him that some of his positions were harmful to sexual assault survivors and those who are sex trafficked, and to the people who devote their lives in service to such individuals. More importantly to the FSF, I attempted to persuade RMS that launching a controversial campaign on sexual behavior and morality was counter to his and FSF's mission to advance software freedom, and told RMS that my duty as an FSF Director was to assure the best outcome for the FSF, which IMO didn't include having a leader who made such statements. Not only is human sexual behavior not a topic on which RMS has adequate academic expertise, but also his positions appear to ignore significant research and widely available information on the subject. Many of his comments, while occasionally politically intriguing, lack empathy for people who experienced trauma.

    -

    +

    IMO, this is not and has never been a Free Speech issue. I do believe freedom of speech links directly to software freedom: indeed, I see the freedom to publish software under Free licenses as almost a corollary to the freedom of speech. However, we do not need to follow leadership from those whose views we fundamentally disagree. Moreover, organizations need not and should not elevate spokespeople and leaders who speak regularly on unrelated issues that organizations find do not advance their mission, and/or that alienate important constituents. I, like many other software freedom leaders, curtail my public comments on issues not related to FOSS. (Indeed, I would not even be commenting on this issue if it had not become a central issue of concern to the software freedom community.) Leaders have power, and they must exercise the power of their words with restraint, not with impunity.

    -

    +

    RMS has consistently argued that there was a campaign of “prudish intimidation” — seeking to keep him quiet about his views on sexuality. After years of conversing with RMS about how his non-software-freedom views were a distraction, an indulgence, and downright problematic, his general response was to make even more public comments of this nature. The issue is not about RMS' right to say what he believes, nor is it even about whether or not you agree or disagree with RMS' statements. The question is whether an organization should have a designated leader who is on a sustained, public campaign advocating about an unrelated issue that many consider controversial. It really doesn't matter what your view about the controversial issue is; a leader who refuses to stop talking loudly about unrelated issues eventually creates an untenable distraction from the radical activism you're actively trying to advance. The message of universal software freedom is a radical cause; it's basically impossible for one individual to effectively push forward two unrelated controversial agendas at once. In short, the radical message of software freedom became overshadowed by RMS' radical views about sexual morality.

    -

    +

    And here is where I say the thing that may infuriate many but it's what I believe: I think RMS took a useful step by resigning some of his leadership roles at the FSF. I thank RMS for taking that step, and I wish the FSF Directors well in their efforts to assure that the FSF becomes a welcoming organization to all who care about universal software freedom. The FSF's mission is essential to our technological future, and we should all support that mission. I care deeply about that mission myself and have worked and will continue to work in our community in the best interest of the mission.

    -

    - I'm admittedly struggling to find a way to work again with RMS, given his views on sexual morality and his behaviors stemming from those views. I explicitly do not agree with this “(re-)definition” of sexual assault. Furthermore, I believe uninformed statements about sexual assault are irresponsible and cause harm to victims. #MeToo is not a “frenzy”; it is a global movement by individuals who have been harmed seeking to hold both bad actors and society-at-large accountable for ignoring systemic wrongs. Nevertheless, I still am proud of the essay that I co-wrote with RMS and still find manyofRMS'otheressayscompelling, important, andrelevant. +

    + I'm admittedly struggling to find a way to work again with RMS, given his views on sexual morality and his behaviors stemming from those views. I explicitly do not agree with this “(re-)definition” of sexual assault. Furthermore, I believe uninformed statements about sexual assault are irresponsible and cause harm to victims. #MeToo is not a “frenzy”; it is a global movement by individuals who have been harmed seeking to hold both bad actors and society-at-large accountable for ignoring systemic wrongs. Nevertheless, I still am proud of the essay that I co-wrote with RMS and still find many of RMS' other essays compelling, important, and relevant.

    -

    +

    I want the FSF to succeed in its mission and enter a new era of accomplishments. I've spent the last 22 years, without a break, dedicating substantial time, effort, care and loyalty to the various FSF roles that I've had: including employee, volunteer, at-large Director, and Voting Member. Even though my duties to the FSF are done, and my relationship with the FSF is no longer formal, I still think the FSF is a valuable institution worth helping and saving, specifically because the FSF was founded for a mission that I deeply support. And we should also realize that RMS — a human being (who is flawed like the rest of us) — invented that mission.

    -

    +

    As culture change becomes more rapid, I hope we can find reasonable nuance and moderation on our complex analysis about people and their disparate views, while we also hold individuals fully accountable for their actions. That's the difficulty we face in the post-post-modern culture of the early twenty-first century. Most importantly, I believe we must find a way to stand firm for software freedom while also making a safe environment for victims of sexual assault, sexual abuse, gaslighting, and other deplorable actions.

    -

    +

    Posted on Tuesday 15 October 2019 at 09:11 by Bradley M. Kuhn.

    -
    -

    #include <std/disclaimer.h>
    use Standard::Disclaimer;
    from standard import disclaimer
    SELECT full_text FROM standard WHERE type = 'disclaimer';

    + + +
    +

    + #include <std/disclaimer.h>
    + use Standard::Disclaimer;
    + from standard import disclaimer
    + SELECT full_text FROM standard WHERE type = 'disclaimer'; +

    Both previously and presently, I have been employed by and/or done work for various organizations that also have views on Free, Libre, and Open Source Software. As should be blatantly obvious, this is my website, not theirs, so please do not assume views and opinions here belong to any such organization. Since I do co-own ebb.org with my wife, it may not be so obvious that these aren't her views and opinions, either.

    diff --git a/resources/tests/readability/ehow-1/expected.html b/resources/tests/readability/ehow-1/expected.html index 8d80a2d..3bea2b2 100644 --- a/resources/tests/readability/ehow-1/expected.html +++ b/resources/tests/readability/ehow-1/expected.html @@ -1,94 +1,114 @@
    -

    -How to Build a Terrarium

    -

    Glass cloche terrariums are not only appealing to the eye, but they also preserve a bit of nature in your home and serve as a simple, yet beautiful, piece of art. Closed terrariums are easy to care for, as they retain much of their own moisture and provide a warm environment with a consistent level of humidity. You won’t have to water the terrariums unless you see that the walls are not misting up. Small growing plants that don’t require a lot of light work best such as succulents, ferns, moss, even orchids.

    -
    Glass cloche terrariums
    Glass cloche terrariums (Lucy Akins)
    -
    -

    Other People Are Reading

    -
    -

    What You'll Need:

    +
    +

    +How to Build a Terrarium

    + + +
    +
    +

    Glass cloche terrariums are not only appealing to the eye, but they also preserve a bit of nature in your home and serve as a simple, yet beautiful, piece of art. Closed terrariums are easy to care for, as they retain much of their own moisture and provide a warm environment with a consistent level of humidity. You won’t have to water the terrariums unless you see that the walls are not misting up. Small growing plants that don’t require a lot of light work best such as succulents, ferns, moss, even orchids.

    +
    Glass cloche terrariums
    +
    Glass cloche terrariums (Lucy Akins)
    +
    +
    +
    +

    Other People Are Reading

    + +
    +

    What You'll Need:

      -
    • Cloche
    • -
    • Planter saucer, small shallow dish or desired platform
    • -
    • Floral foam oasis
    • -
    • Ruler
    • -
    • Spoon
    • -
    • Floral wire pins or paper clips
    • -
    • Small plants (from a florist or nursery)
    • -
    • Moss
    • -
    • Tweezers
    • -
    • Other small decorative items (optional)
    • -
    -
    -
    -
    -

    Step 1

    +
  • Cloche
  • +
  • Planter saucer, small shallow dish or desired platform
  • +
  • Floral foam oasis
  • +
  • Ruler
  • +
  • Spoon
  • +
  • Floral wire pins or paper clips
  • +
  • Small plants (from a florist or nursery)
  • +
  • Moss
  • +
  • Tweezers
  • +
  • Other small decorative items (optional)
  • + +
    +
    +

    Step 1

    Measure the circumference of your cloche and cut the foam oasis about 3/4 inch (2 cm) smaller. Place the foam oasis into a container full of water and allow to soak until it sinks to the bottom. Dig out a hole on the oasis large enough to fit your plant, being careful not to pierce all the way through to the bottom.

    -
    -
    Dig a hole in the oasis.
    Dig a hole in the oasis. (Lucy Akins)
    -
    -
    -
    -

    Step 2

    +
    +
    Dig a hole in the oasis.
    +
    Dig a hole in the oasis. (Lucy Akins)
    +
    + + +
    +

    Step 2

    Insert your plant into the hole.

    -
    -
    Orchid in foam oasis
    Orchid in foam oasis (Lucy Akins)
    -
    -
    -
    -

    Step 3

    +
    +
    Orchid in foam oasis
    +
    Orchid in foam oasis (Lucy Akins)
    +
    +
    +

    Step 3

    You can add various plants if you wish.

    -
    -
    Various foliage
    Various foliage (Lucy Akins)
    -
    -
    -
    -

    Step 4

    +
    +
    Various foliage
    +
    Various foliage (Lucy Akins)
    +
    +
    +

    Step 4

    Using floral pins, attach enough moss around the oasis to cover it.

    -
    -
    Attach moss.
    Attach moss. (Lucy Akins)
    -
    -
    -
    -

    Step 5

    +
    +
    Attach moss.
    +
    Attach moss. (Lucy Akins)
    +
    +
    +

    Step 5

    Gently place the cloche over the oasis. The glass may push some of the moss upward, exposing some of the foam.

    -
    -
    Place cloche over oasis.
    Place cloche over oasis. (Lucy Akins)
    -
    -
    -
    -

    Step 6

    +
    +
    Place cloche over oasis.
    +
    Place cloche over oasis. (Lucy Akins)
    +
    +
    +

    Step 6

    Simply pull down the moss with tweezers or insert more moss to fill in the empty spaces.

    -
    -
    Rearrange moss.
    Rearrange moss. (Lucy Akins)
    -
    -
    -
    -

    Step 7

    +
    +
    Rearrange moss.
    +
    Rearrange moss. (Lucy Akins)
    +
    +
    +

    Step 7

    You can use any platform you wish. In this case, a small saucer was used.

    -
    -
    Place cloche on a platform to sit on.
    Place cloche on a platform to sit on. (Lucy Akins)
    -
    -
    -
    -

    Step 8

    +
    +
    Place cloche on a platform to sit on.
    +
    Place cloche on a platform to sit on. (Lucy Akins)
    +
    +
    +

    Step 8

    This particular terrarium rests on a planter saucer and features a small white pumpkin.

    -
    -
    Cloche placed on a terracotta saucer
    Cloche placed on a terracotta saucer (Lucy Akins)
    -
    -
    -
    -

    Step 9

    +
    +
    Cloche placed on a terracotta saucer
    +
    Cloche placed on a terracotta saucer (Lucy Akins)
    +
    +
    +

    Step 9

    This particular terrarium was placed on a wood slice and a little toy squirrel was placed inside to add a little whimsy.

    -
    -
    Placed on a wooden slice
    Placed on a wooden slice (Lucy Akins)
    -
    -
    -
    -

    Finished Terrarium

    +
    +
    Placed on a wooden slice
    +
    Placed on a wooden slice (Lucy Akins)
    +
    +
    +

    Finished Terrarium

    Displayed alone or in a group, these pretty arrangements allow you to add a little nature to your decor or tablescape.

    -
    -
    Cloche terrarium
    Cloche terrarium (Lucy Akins)
    -
    -

    Featured

    -
    + +
    Cloche terrarium
    +
    Cloche terrarium (Lucy Akins)
    + + + + + + + +
    +

    Featured

    + +
    +
    diff --git a/resources/tests/readability/ehow-2/expected.html b/resources/tests/readability/ehow-2/expected.html index 200fcf7..d17cda0 100644 --- a/resources/tests/readability/ehow-2/expected.html +++ b/resources/tests/readability/ehow-2/expected.html @@ -1,58 +1,151 @@
    - -

    -
    -
    + }">

    +
    + +

    +

    + +
    -

    Graduation parties are a great way to commemorate the years of hard work teens and college co-eds devote to education. They’re also costly for mom and dad.

    -

    The average cost of a graduation party in 2013 was a whopping $1,200, according to Graduationparty.com; $700 of that was allocated for food. However that budget was based on Midwestern statistics, and parties in urban areas like New York City are thought to have a much higher price tag.

    -

    Thankfully, there are plenty of creative ways to trim a little grad party fat without sacrificing any of the fun or celebratory spirit.

    -
    -
    Graduation
    +
    +
    +
    +

    Graduation parties are a great way to commemorate the years of hard work teens and college co-eds devote to education. They’re also costly for mom and dad.

    +

    The average cost of a graduation party in 2013 was a whopping $1,200, according to Graduationparty.com; $700 of that was allocated for food. However that budget was based on Midwestern statistics, and parties in urban areas like New York City are thought to have a much higher price tag.

    +

    Thankfully, there are plenty of creative ways to trim a little grad party fat without sacrificing any of the fun or celebratory spirit.

    +
    +
    + Graduation +
    +
    (Mike Watson Images/Moodboard/Getty)
    -
    -
    -

    Parties hosted at restaurants, clubhouses and country clubs eliminate the need to spend hours cleaning up once party guests have gone home. But that convenience comes with a price tag. A country club may charge as much as $2,000 for room rental and restaurant food and beverage will almost always cost more than food prepped and served at home.

    -
    Save money hosting the party at home.
    +
    + + + + +
    +

    Parties hosted at restaurants, clubhouses and country clubs eliminate the need to spend hours cleaning up once party guests have gone home. But that convenience comes with a price tag. A country club may charge as much as $2,000 for room rental and restaurant food and beverage will almost always cost more than food prepped and served at home.

    +
    + Save money hosting the party at home. +
    +
    Thomas Jackson/Digital Vision/Getty Images
    -
    +
    +
    +
    + + +

    Instead of hiring a DJ, use your iPod or Smartphone to spin the tunes. Both easily hook up to most speakers or mp3 compatible docks to play music from your music library. Or download Pandora, the free online radio app, and play hours of music for free.

    -

    Personalize the music with a playlist of the grad’s favorite songs or songs that were big hits during his or her years in school.

    -
    Online radio can take the place of a hired DJ.
    +

    Personalize the music with a playlist of the grad’s favorite songs or songs that were big hits during his or her years in school.

    +
    + Online radio can take the place of a hired DJ. +
    +
    Spencer Platt/Getty Images News/Getty Images
    -
    -

    Avoid canned drinks, which guests often open, but don't finish. Serve pitchers of tap water with lemon and cucumber slices or sliced strawberries for an interesting and refreshing flavor. Opt for punches and non-alcoholic drinks for high school graduates that allow guests to dole out the exact amount they want to drink.

    -
    Serve drinks in pitchers, not in cans.
    +
    +
    +
    + + +
    +

    Avoid canned drinks, which guests often open, but don't finish. Serve pitchers of tap water with lemon and cucumber slices or sliced strawberries for an interesting and refreshing flavor. Opt for punches and non-alcoholic drinks for high school graduates that allow guests to dole out the exact amount they want to drink.

    +
    + Serve drinks in pitchers, not in cans. +
    +
    evgenyb/iStock/Getty Images
    -
    -

    Instead of inviting everyone you – and the graduate – know or ever knew, scale back the guest list. Forgo inviting guests that you or your grad haven't seen for eons. There is no reason to provide provisions for people who are essentially out of your lives. Sticking to a small, but personal, guest list allows more time to mingle with loved ones during the party, too.

    -
    Limit guests to those close to the graduate.
    +
    + +
    +
    + + +
    +

    Instead of inviting everyone you – and the graduate – know or ever knew, scale back the guest list. Forgo inviting guests that you or your grad haven't seen for eons. There is no reason to provide provisions for people who are essentially out of your lives. Sticking to a small, but personal, guest list allows more time to mingle with loved ones during the party, too.

    +
    + Limit guests to those close to the graduate. +
    +
    Kane Skennar/Photodisc/Getty Images
    -
    -

    See if your grad and his best friend, girlfriend or close family member would consider hosting a joint party. You can split some of the expenses, especially when the two graduates share mutual friends. You'll also have another parent to bounce ideas off of and to help you stick to your budget when you're tempted to splurge.

    -
    Throw a joint bash for big savings.
    +
    +
    +
    + + +
    +

    See if your grad and his best friend, girlfriend or close family member would consider hosting a joint party. You can split some of the expenses, especially when the two graduates share mutual friends. You'll also have another parent to bounce ideas off of and to help you stick to your budget when you're tempted to splurge.

    +
    + Throw a joint bash for big savings. +
    +
    Mike Watson Images/Moodboard/Getty
    -
    +
    +
    +
    + + +

    Skip carving stations of prime rib and jumbo shrimp as appetizers, especially for high school graduation parties. Instead, serve some of the graduate's favorite side dishes that are cost effective, like a big pot of spaghetti with breadsticks. Opt for easy and simple food such as pizza, finger food and mini appetizers.

    -

    Avoid pre-packaged foods and pre-made deli platters. These can be quite costly. Instead, make your own cheese and deli platters for less than half the cost of pre-made.

    -
    Cost effective appetizers are just as satisfying as pre-made deli platters.
    +

    Avoid pre-packaged foods and pre-made deli platters. These can be quite costly. Instead, make your own cheese and deli platters for less than half the cost of pre-made.

    +
    + Cost effective appetizers are just as satisfying as pre-made deli platters. +
    +
    Mark Stout/iStock/Getty Images
    -
    -

    Instead of an evening dinner party, host a grad lunch or all appetizers party. Brunch and lunch fare or finger food costs less than dinner. Guests also tend to consume less alcohol in the middle of the day, which keeps cost down.

    -
    A brunch gathering will cost less than a dinner party.
    +
    +
    +
    + + +
    +

    Instead of an evening dinner party, host a grad lunch or all appetizers party. Brunch and lunch fare or finger food costs less than dinner. Guests also tend to consume less alcohol in the middle of the day, which keeps cost down.

    +
    + A brunch gathering will cost less than a dinner party. +
    +
    Mark Stout/iStock/Getty Images
    -
    -

    Other People Are Reading

    -

    Decorate your party in the graduate's current school colors or the colors of the school he or she will be headed to next. Décor that is not specifically graduation-themed may cost a bit less, and any leftovers can be re-used for future parties, picnics and events.

    -
    Theme the party by color without graduation-specific decor.
    +
    +
    +
    +

    Other People Are Reading

    +
    + +
    + +
    +
    + + +
    +

    Decorate your party in the graduate's current school colors or the colors of the school he or she will be headed to next. Décor that is not specifically graduation-themed may cost a bit less, and any leftovers can be re-used for future parties, picnics and events.

    +
    + Theme the party by color without graduation-specific decor. +
    +
    jethuynh/iStock/Getty Images
    -

    Related Searches

    -

    Promoted By Zergnet

    + + + + + +

    + Related Searches +

    + + +

    Promoted By Zergnet

    + +
    + diff --git a/resources/tests/readability/embedded-videos/expected.html b/resources/tests/readability/embedded-videos/expected.html index e3f2fd2..89513a2 100644 --- a/resources/tests/readability/embedded-videos/expected.html +++ b/resources/tests/readability/embedded-videos/expected.html @@ -1,21 +1,26 @@ -

    Lorem

    -

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod +

    +

    Lorem

    +

    Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.

    -

    Videos

    -

    At root

    -

    In a paragraph

    -

    -

    In a div

    -
    -

    Foo

    -

    +

    Videos

    +

    At root

    + + + +

    In a paragraph

    +

    +

    In a div

    +
    +

    Foo

    +

    Tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. -

    +

    +
    diff --git a/resources/tests/readability/engadget/expected.html b/resources/tests/readability/engadget/expected.html index 793ebb9..b38701e 100644 --- a/resources/tests/readability/engadget/expected.html +++ b/resources/tests/readability/engadget/expected.html @@ -1,5 +1,6 @@
    -

    The Xbox +

    +

    The Xbox One X is the ultimate video game system. It sports more horsepower than any system ever. And it plays more titles in native 4K than Sony's @@ -12,44 +13,64 @@ more to play the console's exclusive titles in 4K. Everyone else might be better off waiting, or opting for the $279 Xbox - One S.

    -

    Gallery: Xbox One + One S.

    +

    +
    +

    Gallery: Xbox One X | 14 Photos

    -

    -
    -
      -
    • Most +
      +

      + +

      + +
      +
      + + +
      +
      + +
        +
      • Most powerful hardware ever in a home console
      • -
      • Solid +
      • Solid selection of enhanced titles
      • -
      • 4K Blu-ray +
      • 4K Blu-ray drive is great for movie fans
      • -
      -
        -
      • Expensive +
      +
      +
      + +
        +
      • Expensive
      • -
      • Not worth +
      • Not worth it if you don’t have a 4K TV
      • -
      • Still no VR +
      • Still no VR support
      • -
      -
      -

      As promised, the Xbox One X is the +

    +
    +
    +
    + +

    As promised, the Xbox One X is the most powerful game console ever. In practice, though, it really just puts Microsoft on equal footing with Sony’s PlayStation 4 Pro. 4K/HDR enhanced games look great, but - it’s lack of VR is disappointing in 2017.

    -
    + it’s lack of VR is disappointing in 2017.

    +
    +
    -
    -

    Hardware

    -

    -

    Despite all the power inside, the One X is + +

    +

    Hardware

    +

    +

    Despite all the power inside, the One X is Microsoft's smallest console to date. It looks similar to the Xbox One S, except it has an entirely matte black case and is slightly slimmer. It's also @@ -57,7 +78,7 @@ but it feels far heavier than you'd expect for its size, thanks to all of its new hardware. The One S, in comparison, weighs two pounds less.

    -

    The Xbox One X's real upgrades are under the hood. It +

    The Xbox One X's real upgrades are under the hood. It features an 8-core CPU running at 2.3Ghz, 12GB of GDDR5 RAM, a 1 terabyte hard drive and an upgraded AMD Polaris GPU with 6 teraflops of computing power. @@ -66,8 +87,8 @@ That additional horsepower means the Xbox One X can run more games in full native 4K than the Sony's console.

    -

    -

    Along the front, there's the slot-loading 4K Blu-ray +

    +

    Along the front, there's the slot-loading 4K Blu-ray drive, a physical power button, a single USB port and a controller pairing button. And around back, there are HDMI out and in ports, the latter of which @@ -76,10 +97,15 @@ out, and gigabit Ethernet. If you've still got a Kinect around, you'll need to use a USB adapter to plug it in.

    -
    -
    Devindra Hardawar/AOL
    -
    -

    The console's controller hasn't changed since its +

    +
    +
    +
    Devindra Hardawar/AOL +
    +
    +
    +
    +

    The console's controller hasn't changed since its last mini-upgrade with the Xbox One S. That revision rounded out its seams, improved bumper performance and added a 3.5mm headphone jack. It's still a great @@ -90,11 +116,16 @@ extra expense. And manually swapping batteries feels like a bad user experience when every other console has rechargeable controllers.

    -

    In use

    -
    -
    Devindra Hardawar/AOL
    -
    -

    You won't find any major differences between the One +

    In use

    +
    +
    +
    +
    Devindra Hardawar/AOL +
    +
    +
    +
    +

    You won't find any major differences between the One X and the last Xbox at first — aside from a more dramatic startup sequence. Navigating the Xbox interface is fast and zippy, but mostly that's due @@ -105,14 +136,14 @@ setup and it'll make it feel just like your old machine. It's also a lot faster than waiting for everything to download from Xbox Live.

    -

    You'll still have to set aside some time if you want +

    You'll still have to set aside some time if you want to play an Xbox One X-enhanced title, though. Those 4K textures will make games significantly larger, but Microsoft says it's come up with a few ways to help developers make downloading them more efficient. For example, language packs and other optional content won't get installed by default.

    -

    We only had a few enhanced titles to test out during +

    We only had a few enhanced titles to test out during our review: Gears of War 4, Killer Instinct and Super Lucky's Tale. They each took advantage of the console in different @@ -124,10 +155,15 @@ dip occasionally. I was also surprised that load times were on-par with what I've seen with the game on the Xbox One S.

    -
    -
    -
    -

    You can also play in Performance mode, which bumps +

    +
    + +
    + +
    +
    +
    +

    You can also play in Performance mode, which bumps the frame rate up to 60FPS and uses higher quality graphical effects, while rendering it lower in 1080p. Personally, I preferred this, since it makes @@ -136,7 +172,7 @@ PlayStation 4 Pro games also let you choose how you wanted to distribute its power, so in some ways Microsoft is just following in its footsteps.

    -

    I've been playing Gears of War 4 on my +

    I've been playing Gears of War 4 on my gaming PC (which is connected to my home theater) over the past year, and I was impressed that the Xbox One X is able to deliver a similar experience. @@ -150,11 +186,21 @@ but there were clearly plenty of graphics settings it couldn't take advantage of, in particular higher levels of bloom lighting and shadow detail.

    -
    -

    Gallery: Xbox + +

    + +
    +

    Gallery: Xbox One X screenshots | 9 Photos

    -

    -

    Killer Instinct and Super Lucky's +

    +

    + +

    + +
    + +
    +

    Killer Instinct and Super Lucky's Tale run in 4K at a smooth 60FPS. They both looked and played better than their standard versions, though I was surprised they didn't take @@ -163,14 +209,14 @@ resolution. Unless you're sitting very close to a TV above 50-inches, you'd likely have a hard time telling between 4K and 1080p.

    -

    That poses a problem for Microsoft: It's betting that +

    That poses a problem for Microsoft: It's betting that gamers will actually want true 4K rendering. In practice, though, PlayStation 4 Pro titles running in HDR and resolutions between 1080p and 4K often look just as good to the naked eye. The Xbox One X's big advantage is that its hardware could let more games reach 60FPS compared to Sony's console.

    -

    Microsoft says over 130 Xbox One X-enhanced titles +

    Microsoft says over 130 Xbox One X-enhanced titles are in the works. That includes already-released games like Forza Motorsport 7 and Assassin's Creed Origins, as well as upcoming titles @@ -185,16 +231,21 @@ who were prescient about how they built their games. Basically, don't expect your entire 360 library to get enhanced.

    -
    -
    -
    -

    Even if a game isn't specifically tuned for the new +

    +
    + +
    + +
    +
    +
    +

    Even if a game isn't specifically tuned for the new console, Microsoft says you might still see some performance improvements. The PlayStation 4 Pro, meanwhile, has over one hundred games built for its hardware, and its boost mode can speed up some older games.

    -

    Microsoft is still pushing the Xbox as more than just +

    Microsoft is still pushing the Xbox as more than just a game console, though. 4K Blu-rays loaded up quickly, and I didn't notice many delays as I skipped around films. Planet Earth II, in @@ -204,8 +255,10 @@ capable HDR 10 standard. That makes sense since it's more widely supported, but it would have been nice to see Dolby's, too.

    -

    -

    And speaking of Dolby technology, Microsoft is also +

    + +

    +

    And speaking of Dolby technology, Microsoft is also highlighting Atmos support on the One X, just like it did with the One S. The company's app lets you configure the console to pass audio Atmos signals to @@ -218,7 +271,7 @@ supports Atmos for a handful of films (something that the Xbox One S and PlayStation 4 offer, as well).

    -

    One thing you won't find in the new Xbox is VR +

    One thing you won't find in the new Xbox is VR support. Microsoft has mentioned that the console will offer some sort of mixed reality, but it hasn't offered up any details yet. It's technically @@ -227,11 +280,16 @@ shame that Microsoft is being so wishy-washy because Sony has had a very successful head start with the PlayStation VR.

    -

    Pricing and the competition

    -
    -
    Devindra Hardawar/AOL
    -
    -

    The biggest knock against the Xbox One X is its $500 +

    Pricing and the competition

    +
    +
    +
    +
    Devindra Hardawar/AOL +
    +
    +
    +
    +

    The biggest knock against the Xbox One X is its $500 price. The PS4 Pro launched at $400 last year, and there's a good chance we'll see plenty of deals around the holidays. If your friends are on Xbox @@ -240,7 +298,7 @@ want to play third-party titles that come to both platforms, though, the PS4 Pro is clearly the better deal.

    -

    If you're looking to upgrade from an original Xbox +

    If you're looking to upgrade from an original Xbox One, and you have a new TV, the One X might be more compelling. It's faster and offers more features than the One S, and more importantly, it'll last you @@ -252,7 +310,7 @@ high-res textures and have more graphical effects, but it's simply not worth the upgrade since those TVs don't support HDR.

    -

    If price isn't a huge concern for you, it's worth +

    If price isn't a huge concern for you, it's worth considering investing in a gaming PC. A decent one costs between $600 and $800, plus the price of a monitor, but it'll easily be more powerful than the @@ -261,9 +319,9 @@ game publishers are offering most major titles on PC, you won't be missing out on much by ditching consoles.

    -

    Wrap-up

    -

    -

    Ultimately, the Xbox One X offers some major +

    Wrap-up

    +

    +

    Ultimately, the Xbox One X offers some major performance upgrades that gamers will notice -- especially if you're coming from an original Xbox One. But it's also a bit disappointing since it's @@ -271,6 +329,8 @@ offer VR yet. For Microsoft fans, though, none of that will matter. It's exactly what the company promised: the fastest game console ever made.

    -
    -
    +
    + + +
    diff --git a/resources/tests/readability/firefox-nightly-blog/expected.html b/resources/tests/readability/firefox-nightly-blog/expected.html index 143d227..ac1c190 100644 --- a/resources/tests/readability/firefox-nightly-blog/expected.html +++ b/resources/tests/readability/firefox-nightly-blog/expected.html @@ -1,240 +1,295 @@ -
    -

    +
    +
    + +
    +

    Highlights

    -
      -
    • Here’s our Firefox Year in Review!
    • -
    • Here’s our Performance Year in Review!
    • -
    • We’ve just landed Bug 1553982, which aims to prevent starting an update while another Firefox instance is running (the cause of that about:restartrequired error page you may have seen). -
      • -

        The about:restartrequired error page, saying "Sorry. We just need to do one small thing to keep going. Nightly has just been updated in the background. Click Restart Nightly to complete the update. We will restore all your pages, windows and tabs afterwards, so you can be on your way quickly.", followed by a button to restart Nightly.

        +
          +
        • + Here’s our Firefox Year in Review! +
        • +
        • + Here’s our Performance Year in Review! +
        • +
        • We’ve just landed Bug 1553982, which aims to prevent starting an update while another Firefox instance is running (the cause of that about:restartrequired error page you may have seen). +
            +
          • +
            +

            The about:restartrequired error page, saying "Sorry. We just need to do one small thing to keep going. Nightly has just been updated in the background. Click Restart Nightly to complete the update. We will restore all your pages, windows and tabs afterwards, so you can be on your way quickly.", followed by a button to restart Nightly.

            Users who run multiple user profiles concurrently will probably see this less!

            -
          -
        • -
        • Also just about to land is Bug 353804, which provides some support for downloading new updates when we already have an update downloaded but haven’t installed it yet. That should prevent many cases of restarting to finish an update and then immediately being notified about another one. +
        +
      • +
    • -
    • Thanks to evilpie, users can now import logins from Keepass(XC) into Firefox -
    • -
    • From Firefox 85 it’s now possible to disable tab-to-search on a per-engine basis, by unchecking a search engine in Search Preferences. That will both hide the shortcut button and disable tab-to-search for the engine. (Bug 1681512) +
    • Also just about to land is Bug 353804, which provides some support for downloading new updates when we already have an update downloaded but haven’t installed it yet. That should prevent many cases of restarting to finish an update and then immediately being notified about another one.
    • -
    • From Firefox 85 it’s also possible to disable tab-to-search globally by unchecking the Search Engines checkbox in the Address Bar Preferences, under Privacy & Security. +
    • Thanks to evilpie, users can now import logins from Keepass(XC) into Firefox
    • -
    • Firefox now supports printing non-contiguous page ranges (e.g. 1-3, 6, 7) – Bug 499640 -
    • -
    • DevTools and Marionette are now fully Fission compatible! Congratulations to those teams! -
      • Reminder: Nightly users can help us test Fission by enabling it in about:preferences#experimental, and filing bugs here -
      -
    • -
    -

    +
  • From Firefox 85 it’s now possible to disable tab-to-search on a per-engine basis, by unchecking a search engine in Search Preferences. That will both hide the shortcut button and disable tab-to-search for the engine. (Bug 1681512) +
  • +
  • From Firefox 85 it’s also possible to disable tab-to-search globally by unchecking the Search Engines checkbox in the Address Bar Preferences, under Privacy & Security. +
  • +
  • Firefox now supports printing non-contiguous page ranges (e.g. 1-3, 6, 7) – Bug 499640 +
  • +
  • DevTools and Marionette are now fully Fission compatible! Congratulations to those teams! +
      +
    • Reminder: Nightly users can help us test Fission by enabling it in about:preferences#experimental, and filing bugs here +
    • +
    +
  • + +

    Friends of the Firefox team

    -

    +

    Introductions/Shout-Outs

    -
    • [harry] Amy Churchwell joins the Search & Navigation team today. She transferred internally from Marketing Engineering. Welcome Amy! -
    -

    Resolved bugs (excluding employees)

    -

    +
      +
    • [harry] Amy Churchwell joins the Search & Navigation team today. She transferred internally from Marketing Engineering. Welcome Amy! +
    • +
    +

    + Resolved bugs (excluding employees) +

    +

    Fixed more than one bug

    -
      -
    • Masatoshi Kimura [:emk] +
        +
      • Masatoshi Kimura [:emk]
      • -
      • Michelle Goossens [:masterwayz] +
      • Michelle Goossens [:masterwayz]
      • -
      • Sonia +
      • Sonia
      • -
      • Tim Nguyen :ntim +
      • Tim Nguyen :ntim
      • -
      -

      +

    +

    New contributors (🌟 = first patch)

    - +

    Project Updates

    -

    +

    Add-ons / Web Extensions

    -
    +
    Addon Manager & about:addons
    -
    • Starting from Firefox 85, Mozilla-signed privileged addons can be installed from a third party website without triggering the “third party addon install doorhanger” (and without having to add new “install” site permission for those hosts, e.g. as we had to do for fpn.firefox.com) – Bug 1681331 -
    -
      -
    • Fixed addon startup issue when an extension sideloaded in the profile is updated on disk (Bug 1664144) + +
        +
      • Fixed addon startup issue when an extension sideloaded in the profile is updated on disk (Bug 1664144)
      • -
      -
      +
    • Some more small about:addons cleanup from ntim (Bug 1678173, Bug 1678865, Bug 1678866). Thanks a lot, ntim! +
    • +
    + +
    WebExtensions Framework
    -
      -
    • -Ankush Duacontributed a fix for the devtools optional_permission (the devtools optional_permission can be used by extension, like ABP, that provides a devtools panel as a secondary feature of the addon) – Bug 1671579 -
    • -
    • Fixed content scripts applied to webpages loaded as subframes of an extension browserAction/pageAction popup when Fission is enabled – Bug 1680877 -
    • -
    • Fixed addon startup issue when webRequest is moved from permissions to optional_permissions in an addon update (regression from Bug 1624235) – Bug 1637059 -
    • -
    -

    +
      +
    • + Ankush Dua contributed a fix for the devtools optional_permission (the devtools optional_permission can be used by extension, like ABP, that provides a devtools panel as a secondary feature of the addon) – Bug 1671579 +
    • +
    • Fixed content scripts applied to webpages loaded as subframes of an extension browserAction/pageAction popup when Fission is enabled – Bug 1680877 +
    • +
    • Fixed addon startup issue when webRequest is moved from permissions to optional_permissions in an addon update (regression from Bug 1624235) – Bug 1637059 +
    • +
    +

    Developer Tools

    -
      -
    • -DevTools FissionM2 – Making DevTools Fission compatible DONE. -
      • -

        A table showing the total number of remaining bugs for the MVP to make the DevTools Fission-compatible.

        +
          +
        • + DevTools Fission M2 – Making DevTools Fission compatible DONE. +
            +
          • +
            +

            A table showing the total number of remaining bugs for the MVP to make the DevTools Fission-compatible.

            Our DevTools are ready for Fission (out-of-process iframes)!

            -
          -
        • -
        • -Marionette Fission – Making Marionette Fission compatible DONE -
          • -

            A table showing the total number of remaining bugs for the MVP to make Marionette Fission-compatible.

            +
            +
          • +
          +
        • +
        • + Marionette Fission – Making Marionette Fission compatible DONE +
            +
          • +
            +

            A table showing the total number of remaining bugs for the MVP to make Marionette Fission-compatible.

            Marionette, the framework that allows Firefox to be tested with automation, is now Fission compatible too!

            -
          -
        • -
        -

        +

        +
      • +
      +
    • +
    +

    Fission

    - -

    + +

    Installer & Updater

    -
    • Background updater work is also proceeding, with Bug 1676296 landing last week to support managing scheduled tasks in Gecko, and more development still also happening on the background task framework. -
    -

    +
      +
    • Background updater work is also proceeding, with Bug 1676296 landing last week to support managing scheduled tasks in Gecko, and more development still also happening on the background task framework. +
    • +
    +

    New Tab Page and Pocket

    -
    • We’re running three experiments: +
        +
      • We’re running three experiments:
          -
        • Newtab Pocket stories in AU and NZ +
        • Newtab Pocket stories in AU and NZ
        • -
        • New signup/login call-to-action in the Pocket doorhanger +
        • New signup/login call-to-action in the Pocket doorhanger
        • -
        • We’re testing some changes to newtab story personalization +
        • We’re testing some changes to newtab story personalization
        • -
        -
      -

      Password Manager

      -
        -
      • Dimi fixed Bug 1677710 The password manager code triggers main thread sqlite disk I/O off of the gather-telemetry notification +
    • -
    • And Bug 1678200 Remove or update probes expiring in Firefox 86: pwmgr.doorhanger_submitted#doorhanger_submitted +
    +

    + Password Manager +

    +
      +
    • Dimi fixed Bug 1677710 The password manager code triggers main thread sqlite disk I/O off of the gather-telemetry notification
    • -
    • Thanks for Kenrick95 for fixing Bug 1678616 about:logins menu problem +
    • And Bug 1678200 Remove or update probes expiring in Firefox 86: pwmgr.doorhanger_submitted#doorhanger_submitted
    • -
    • 2021 Planning underway +
    • Thanks for Kenrick95 for fixing Bug 1678616 about:logins menu problem
    • -
    -

    +
  • 2021 Planning underway +
  • + +

    PDFs & Printing

    -
      -
    • mstriemer put a Printing… message in the dialog and hid the popup dialog which showed progress, the cancel button on that dialog caused problems and it looked dated Bug 1679133 -
    • -
    • mstriemer hid the print setting that don’t relate to PDFs when a PDF is being printed Bug 1669725 -
    • -
    • mstriemer updated the form to be disabled when loading a printer’s settings. Sometimes loading a physical printer’s settings can take a few settings and changes could be lost in this time Bug 1676388 -
    • -
    • emalysz made a change to avoid updating the preview for some settings that can’t change the preview output Bug 1676199 -
    • -
    • sfoster added a paginator to the preview when it’s hovered to show current page, next/prev/first/last buttons Bug 1654684 -
    • -
    • emalysz added support for non-contiguous page ranges (ex: 1-3, 6, 7) Bug 499640 -
    • -
    • emalysz fixed an issue where the form could get disabled with custom margins interactions Bug 1674106 -
    • -
    -

    +
      +
    • mstriemer put a Printing… message in the dialog and hid the popup dialog which showed progress, the cancel button on that dialog caused problems and it looked dated Bug 1679133 +
    • +
    • mstriemer hid the print setting that don’t relate to PDFs when a PDF is being printed Bug 1669725 +
    • +
    • mstriemer updated the form to be disabled when loading a printer’s settings. Sometimes loading a physical printer’s settings can take a few settings and changes could be lost in this time Bug 1676388 +
    • +
    • emalysz made a change to avoid updating the preview for some settings that can’t change the preview output Bug 1676199 +
    • +
    • sfoster added a paginator to the preview when it’s hovered to show current page, next/prev/first/last buttons Bug 1654684 +
    • +
    • emalysz added support for non-contiguous page ranges (ex: 1-3, 6, 7) Bug 499640 +
    • +
    • emalysz fixed an issue where the form could get disabled with custom margins interactions Bug 1674106 +
    • +
    +

    Performance

    - -
  • mconley fixed an AsyncShutdown hang caused by the about:home startup cache -
  • -
  • mconley re-enabled TART -
  • -
  • dthayer has some fixes and polish for the pre-XUL skeleton UI -
  • - -

    +
  • Gijs made the Bookmarks Toolbar initialization occur later in the startup window +
  • +
  • Gijs fixed some flicker that occurred when launching the browser with the Bookmarks Toolbar enabled +
  • +
  • mconley fixed an AsyncShutdown hang caused by the about:home startup cache +
  • +
  • mconley re-enabled TART +
  • +
  • dthayer has some fixes and polish for the pre-XUL skeleton UI +
  • + +

    Picture-in-Picture

    - + + +

    Search and Navigation

    -
      -
    • Fixed regressions related to Input Method Editor, in particular loss of the last token (Bug 1673669) and race conditions causing the wrong search engine to be used or Search Mode to be lost (Bug 1679697, Bug 1678647) +
        +
      • Fixed regressions related to Input Method Editor, in particular loss of the last token (Bug 1673669) and race conditions causing the wrong search engine to be used or Search Mode to be lost (Bug 1679697, Bug 1678647)
      • -
      • Introduced a new advanced preference to keep the Address Bar results panel open during IME composition. This provides a better experience for keyboard layouts that don’t open a picker panel. In the future we hope to be able to auto-detect that situation, but in the meanwhile, you can flip browser.urlbar.imeCompositionClosesPanel to false and test the alternative behavior (Bug 1673971) +
      • Introduced a new advanced preference to keep the Address Bar results panel open during IME composition. This provides a better experience for keyboard layouts that don’t open a picker panel. In the future we hope to be able to auto-detect that situation, but in the meanwhile, you can flip browser.urlbar.imeCompositionClosesPanel to false and test the alternative behavior (Bug 1673971)
      • -
      • URL canonization (www.*.com) now uses https by default, the protocol can be customized through the browser.fixup.alternate.protocol advanced pref (Bug 1638215) +
      • URL canonization (www.*.com) now uses https by default, the protocol can be customized through the browser.fixup.alternate.protocol advanced pref (Bug 1638215)
      • -
      • Work continued on the weather QuickSuggest experiment, but its release has been moved to January. +
      • Work continued on the weather QuickSuggest experiment, but its release has been moved to January.
      • -
      • Region.jsm now can use a Geolocation monitor to update without hitting the network (Bug 1663501) +
      • Region.jsm now can use a Geolocation monitor to update without hitting the network (Bug 1663501)
      • -
      • Fixed a bug where search engines were being re-added on startup after their removal, when using a language pack (Bug 1675624) +
      • Fixed a bug where search engines were being re-added on startup after their removal, when using a language pack (Bug 1675624)
      • -
      -
    + +

    + +
    +
    diff --git a/resources/tests/readability/folha/expected.html b/resources/tests/readability/folha/expected.html index d67fefe..65947c1 100644 --- a/resources/tests/readability/folha/expected.html +++ b/resources/tests/readability/folha/expected.html @@ -1,23 +1,24 @@
    -

    - Após rechaçar um encontro da seleção brasileira com o presidente eleito JairBolsonaro, o técnico Tite declarou que errou ao levar a taça da Copa Libertadores de 2012, conquistada pelo Corinthians, ao ex-presidente Luiz Inácio Lula da Silva. +

    + Após rechaçar um encontro da seleção brasileira com o presidente eleito Jair Bolsonaro, o técnico Tite declarou que errou ao levar a taça da Copa Libertadores de 2012, conquistada pelo Corinthians, ao ex-presidente Luiz Inácio Lula da Silva.

    -

    +

    Ao lado de representantes do clube paulista, o atual comandante do Brasil ainda entregou uma réplica do troféu a Lula.

    -

    +

    "Em 2012 eu errei. Ele não era presidente, mas fui ao Instituto e mandei felicitações por um aniversário. Não me posicionei politicamente. Não tenho partido político, tenho sim a torcida para que o Brasil seja melhor em igualdade social. E que nossas prioridades sejam educação e punição. Que seja dada a possibilidade de estudo ao garoto de São Braz, que não tem chão batido para ir à escola, ou da periferia de Caixas ou do morro do Rio de Janeiro. Seja dada a ele a prioridade de estudo e não a outras situações", falou Tite ao programa "Grande Círculo", que ainda irá ao ar no SporTV.

    -

    +

    Na ocasião, Tite e outros representantes do Corinthians foram ao Instituto Lula para mostrar a taça original da Libertadores ao ex-presidente.

    -

    + +

    O assunto foi levantado porque recentemente Tite foi questionado se aceitaria um encontro da seleção brasileira com Bolsonaro em uma conquista de título ou antes da Copa América de 2019, por exemplo. O treinador deixou claro que preferiria evitar esse tipo de formalidade.

    -

    +

    Apesar disso, Tite não questionou a ação de Palmeiras e CBF, que convidaram Bolsonaro para a festa do título do Campeonato Brasileiro. O presidente eleito até levantou a taça conquistada pelo clube alviverde.

    -

    +

    "Em 2012 eu fiz e errei. O protocolo e a situação gerada no jogo do Palmeiras são fatos de opinião pessoal. CBF e Palmeiras, enquanto instituições têm a opinião. Errei lá atrás, não faria com o presidente antes da Copa e nem agora porque entendo que misturar esporte e política não é legal. Fiz errado lá atrás? Sim. Faria de novo? Não", acrescentou o comandante.

    -
    + diff --git a/resources/tests/readability/gmw/expected.html b/resources/tests/readability/gmw/expected.html index e06e352..6fe4c9c 100644 --- a/resources/tests/readability/gmw/expected.html +++ b/resources/tests/readability/gmw/expected.html @@ -1,43 +1,57 @@
    -

      翱翔于距地球数千公里的太空中,进入广袤漆黑的未知领域,是一项艰苦卓绝的工作。这让人感到巨大压力和极度恐慌。那么,为什么不能让宇航员来一杯“地球末日”鸡尾酒来放松一下?

    -

      不幸的是,对于希望能喝上一杯的太空探险者,那些将他们送上太空的政府机构普遍禁止他们染指包括酒在内的含酒精饮料。

    -

      但是,很快普通人都会有机会向人类“最终的边疆”出发——以平民化旅行的形式,去探索和殖民火星。确实,火星之旅将是一次令人感到痛苦的旅行,可能一去不复返并要几年时间才能完成,但是否应该允许参与者在旅程中痛饮一番?或至少携带能在火星上发酵自制酒精饮料的设备?

    -

    (Credit: Nasa)

    -

      图注:巴兹?奥尔德林(Buzz Aldrin)可能是第二个在月球上行走的人,但他是第一个在月球上喝酒的人

    -

      事实是,历史上酒与太空探险有一种复杂的关系。让我们来看看喝了酒的航天员究竟会发生什么—— 如果我们开始给予进入太空的人类更大的自由度,又可能会发生什么。

    -

      人们普遍认为,当一个人所处的海拔越高,喝醉后会越容易感到头昏。因此,人们自然地想到,当人身处地球轨道上时,饮酒会对人体有更强烈的致眩作用。但这种说法可能不是正确的。

    -

      事实上,有证据表明,早在上世纪八十年代就澄清了这一传言。1985年,美国联邦航空管理局(UFAA)开展了一项研究,以验证人在不同的海拔高度饮酒,是否会影响执行复杂任务时的表现和酒精测定仪的读数。

    -

      在这项研究中,17名男子被要求在地面和一间模拟海拔3.7公里的房间内喝下一些伏特加。然后,他们被要求完成各种任务,包括心算口算问题、用操纵杆在示波器上跟踪灯光以及各种其它测试。研究人员发现,“酒精和海拔高度对酒精测定仪读数或完成任务的表现情况没有交互作用”。

    -

      所以,人乘坐飞机时醉得更快是个传说?纽约州立大学(State University of New York,SUNY)社会学荣誉教授戴夫·汉森(Dave Hanson)研究酒精和饮酒超过40年,他认为确实如此。他说:“我不认为它(在太空中饮酒)会有任何不同。”

    -

      他认为高原反应可能类似于宿醉,但它也可能类似于中毒。他说:“如果人们没有感受到充分的大气压力,他们也会觉得喝醉了一样。”

    -

      相反,那些声称在飞机上比在地面上醉得更快的人,可能只是经历了“自认喝醉(think-drink)”效应,这种效应多年来已被广泛研究。它表明,如果人们认为自己喝醉了,那他们的一举一动会真的像喝醉了一样—— 而不是实际上他们真的醉了。

    -

      汉森指出:“如果人们脑子里一直认为在飞机上酒精会对他们产生与平常不同的作用,那么他们乘坐飞机时真的会觉得酒精对他们产生了不同的作用。”

    -

      所以,如果酒精对人体的物理效应与海拔高度无关,那么在国际空间站上睡前小饮一杯不应该是一个大问题,对吧?错了。

    -

      美国宇航局约翰逊航天中心发言人丹尼尔·霍特(Daniel Huot)表示:“国际空间站上的宇航员不允许喝酒。在国际空间站上,酒精和其它挥发性化合物的使用受到控制,因为它们的挥发物可能对该站的水回收系统产生影响。”

    -

      为此,国际空间站上的宇航员甚至没有被提供含有酒精的产品,例如漱口水、香水或须后水。如果在国际空间站上饮酒狂欢,溢出的啤酒也可能存在损坏设备的风险。

    -

    (Credit: iStock)

    -

      图注:测试表明,有关人在高空中喝酒更容易醉的传言是不正确的

    -

      然后是责任的问题。我们不允许汽车司机或飞机飞行员喝醉后驾驶,所以并不奇怪同样的规则适用于国际空间站上的宇航员。毕竟国际空间站的造价高达1500亿美元,而且在接近真空的太空中其运行速度达到了每小时27680公里。

    -

      然而,2007年,美国宇航局(NASA)成立了一个负责调查宇航员健康状况的独立小组,称历史上该机构至少有两名宇航员在即将飞行前喝了大量的酒,但仍然被允许飞行。Nasa安全负责人随后的审查发现并没有证据支持这一指控。宇航员在飞行前12小时是严禁饮酒的,因为他们需要充分的思维能力和清醒的意识。

    -

      出台这一规则的原因很清楚。在1985年UFAA开展的关于酒精在不同海拔高度影响的研究中,研究人员得出结论,酒精的影响与海拔高度无关。无论参与测试的人员在什么海拔高度喝酒,其酒精测量仪的读数都是一样的。他们的行为表现受到的影响也相同,但如果提供给测试人员的是安慰剂,则身处高空比身处海平面的行为表现要更差一些。这表明,无论是否摄入酒精,海拔高度可能对心理表现有轻微的影响。

    -

      国际空间站禁止享用啤酒等有大量泡沫的饮料,可能有另一个原因:没有重力的帮助,液体和气体会在宇航员的胃里不停地翻滚,导致他们不断地打嗝。

    -

      然而,尽管有严格的规则,这并不意味着太空中的人类不会接触发酵液体。在国际空间站上进行了大量有关酒精的实验—— 但没有发生让众人去饮酒的情况,所以没有人真正了解太空中人体对酒精具体有怎样的反应。

    -

      NASA发言人斯蒂芬妮?席尔霍尔茨(Stephanie Schierhol)表示:“我们研究了太空中宇航员身体的各种变化,包括微生物层面的。我们有一个营养计划,以确保他们的身体获得保持健康所需要的营养。显然,在实施‘天空实验室(skylab)’项目时,他们曾将雪利酒与宇航员一起送到太空中,但宇航员在零重力飞行时使用雪利酒的测试结果不太好。”天空实验室是美国第一座空间站。

    -

      席尔霍尔茨补充说,在测试中使用雪利酒“引发呕吐反射,公众也反对”。

    -

      也许最令人惊讶的是,人类在月球表面上喝的第一种液体是葡萄酒。前NASA宇航员巴兹·奥尔德林(Buzz Aldrin)在采访和他撰写的书中表示,1969年,在和尼尔·阿姆斯特朗(Neil Armstrong)走出登月舱之前的圣餐仪式上,他喝了少量葡萄酒。举行这一仪式时与地面的通信出现了暂停,因此这一过程从来没有播出。

    -

      虽然Nasa对太空中酒精的使用有严格的规定,但在这方面俄罗斯过去似乎更为宽松。在其“和平号”空间站上,宇航员允许喝点干邑和伏特加。当他们发现国际空间站将严格禁止饮酒时,显然有不少怨言。

    -

      然而,奇怪的是,酒仍然能通过各种方式出现在国际空间站上。2015年,日本酿酒商三得利(Suntory)的全球创新中心将该公司一些获奖的威士忌运送到国际空间站,参与一项旨在验证“能否通过利用微重力环境增强酒精饮料醇厚性”的实验。换句话说,在微重力下酒的陈酿过程可能不同,导致它的陈酿进程更快、味道更好。对此,地球上的每家酿酒商都想进一步地了解。

    -

      几年前,即2011年9月至2014年9月,Nasa赞助了一个试验,研究微重力环境对威士忌中未发酵麦芽与烧焦橡木颗粒的影响,这两种物质能对威士忌的陈酿起帮助作用。在太空中逗留将近1000天后,用于测试的威士忌的单宁成分保持不变——但是太空中橡木颗粒产生了更高浓度的木质素分解产物,这种物质能赋予威士忌特别的风味。

    -

      Nasa表示:“这种试验不仅对麦芽威士忌行业有影响,而且对整个食品和饮料行业也有影响。送上太空的威士忌与对照样品之间的风味差异是如此显著,需要进一步分析以破解不同口味产生的原因。”

    -

      因此,即使宇航员自己被禁止在地球轨道上饮酒,但他们正在做的工作可以提高在地上消费的酒的质量。

    -

      相比之下,执行登陆火星任务的人将远离家乡几年,而不是几个月,因此可能会有人提出有关禁止饮酒的规定可以放松一些。

    -

      然而,像戴夫?汉森这样的专家认为,继续禁止饮酒并没有什么害处。除了实际的安全问题,饮酒还可能有其它挑战。汉森认为,地球人存在许多社会文化方面的差异,而且人连续几年时间呆在一个狭小的空间里,很容易突然发怒,这些因素都使饮酒问题变得很棘手。

    -

    (Credit: David Frohman/Peachstate Historical Consulting Inc)

    -

      图注:奥尔德林的圣餐杯回到了地球上

    -

      他说:“这是一个政治问题,也是一个文化方面的问题,但不是一个科学上的问题。这将是未来一个可能产生冲突领域,因为人们具有不同的文化背景,他们对饮酒的态度不同。”他进一步指出,如果你与穆斯林、摩门教徒或禁酒主义者分配在同一间宿舍怎么办?面对未来人们可能在一个没有期限的时间内呆在一个有限的空间里,需要“尽早解决”如何协调不同文化观点的问题。

    -

      所以,当宇航员在地球轨道上时,将还不得不满足于通过欣赏外面的景色来振作精神,而不要指望沉溺于烈酒中。我们留在地球上的人,则可以准备好适量的香槟酒,以迎接他们的归来。

    -

      原标题:他晚于阿姆斯特朗登月 却是首个敢在月球喝酒的人

    -

      出品︱网易科学人栏目组 胖胖

    -

      作者︱春春

    -

    [责任编辑:肖春芳]

    -
    + + + +

      翱翔于距地球数千公里的太空中,进入广袤漆黑的未知领域,是一项艰苦卓绝的工作。这让人感到巨大压力和极度恐慌。那么,为什么不能让宇航员来一杯“地球末日”鸡尾酒来放松一下?

    +

      不幸的是,对于希望能喝上一杯的太空探险者,那些将他们送上太空的政府机构普遍禁止他们染指包括酒在内的含酒精饮料。

    +

      但是,很快普通人都会有机会向人类“最终的边疆”出发——以平民化旅行的形式,去探索和殖民火星。确实,火星之旅将是一次令人感到痛苦的旅行,可能一去不复返并要几年时间才能完成,但是否应该允许参与者在旅程中痛饮一番?或至少携带能在火星上发酵自制酒精饮料的设备?

    +

    (Credit: Nasa)

    +

    +   图注:巴兹?奥尔德林(Buzz Aldrin)可能是第二个在月球上行走的人,但他是第一个在月球上喝酒的人 +

    +

      事实是,历史上酒与太空探险有一种复杂的关系。让我们来看看喝了酒的航天员究竟会发生什么—— 如果我们开始给予进入太空的人类更大的自由度,又可能会发生什么。

    +

      人们普遍认为,当一个人所处的海拔越高,喝醉后会越容易感到头昏。因此,人们自然地想到,当人身处地球轨道上时,饮酒会对人体有更强烈的致眩作用。但这种说法可能不是正确的。

    +

      事实上,有证据表明,早在上世纪八十年代就澄清了这一传言。1985年,美国联邦航空管理局(UFAA)开展了一项研究,以验证人在不同的海拔高度饮酒,是否会影响执行复杂任务时的表现和酒精测定仪的读数。

    +

      在这项研究中,17名男子被要求在地面和一间模拟海拔3.7公里的房间内喝下一些伏特加。然后,他们被要求完成各种任务,包括心算口算问题、用操纵杆在示波器上跟踪灯光以及各种其它测试。研究人员发现,“酒精和海拔高度对酒精测定仪读数或完成任务的表现情况没有交互作用”。

    +

      所以,人乘坐飞机时醉得更快是个传说?纽约州立大学(State University of New York,SUNY)社会学荣誉教授戴夫·汉森(Dave Hanson)研究酒精和饮酒超过40年,他认为确实如此。他说:“我不认为它(在太空中饮酒)会有任何不同。”

    +

      他认为高原反应可能类似于宿醉,但它也可能类似于中毒。他说:“如果人们没有感受到充分的大气压力,他们也会觉得喝醉了一样。”

    +

      相反,那些声称在飞机上比在地面上醉得更快的人,可能只是经历了“自认喝醉(think-drink)”效应,这种效应多年来已被广泛研究。它表明,如果人们认为自己喝醉了,那他们的一举一动会真的像喝醉了一样—— 而不是实际上他们真的醉了。

    +

      汉森指出:“如果人们脑子里一直认为在飞机上酒精会对他们产生与平常不同的作用,那么他们乘坐飞机时真的会觉得酒精对他们产生了不同的作用。”

    +

      所以,如果酒精对人体的物理效应与海拔高度无关,那么在国际空间站上睡前小饮一杯不应该是一个大问题,对吧?错了。

    +

      美国宇航局约翰逊航天中心发言人丹尼尔·霍特(Daniel Huot)表示:“国际空间站上的宇航员不允许喝酒。在国际空间站上,酒精和其它挥发性化合物的使用受到控制,因为它们的挥发物可能对该站的水回收系统产生影响。”

    +

      为此,国际空间站上的宇航员甚至没有被提供含有酒精的产品,例如漱口水、香水或须后水。如果在国际空间站上饮酒狂欢,溢出的啤酒也可能存在损坏设备的风险。

    +

    (Credit: iStock)

    +

    +   图注:测试表明,有关人在高空中喝酒更容易醉的传言是不正确的 +

    +

      然后是责任的问题。我们不允许汽车司机或飞机飞行员喝醉后驾驶,所以并不奇怪同样的规则适用于国际空间站上的宇航员。毕竟国际空间站的造价高达1500亿美元,而且在接近真空的太空中其运行速度达到了每小时27680公里。

    +

      然而,2007年,美国宇航局(NASA)成立了一个负责调查宇航员健康状况的独立小组,称历史上该机构至少有两名宇航员在即将飞行前喝了大量的酒,但仍然被允许飞行。Nasa安全负责人随后的审查发现并没有证据支持这一指控。宇航员在飞行前12小时是严禁饮酒的,因为他们需要充分的思维能力和清醒的意识。

    +

      出台这一规则的原因很清楚。在1985年UFAA开展的关于酒精在不同海拔高度影响的研究中,研究人员得出结论,酒精的影响与海拔高度无关。无论参与测试的人员在什么海拔高度喝酒,其酒精测量仪的读数都是一样的。他们的行为表现受到的影响也相同,但如果提供给测试人员的是安慰剂,则身处高空比身处海平面的行为表现要更差一些。这表明,无论是否摄入酒精,海拔高度可能对心理表现有轻微的影响。

    +

      国际空间站禁止享用啤酒等有大量泡沫的饮料,可能有另一个原因:没有重力的帮助,液体和气体会在宇航员的胃里不停地翻滚,导致他们不断地打嗝。

    +

      然而,尽管有严格的规则,这并不意味着太空中的人类不会接触发酵液体。在国际空间站上进行了大量有关酒精的实验—— 但没有发生让众人去饮酒的情况,所以没有人真正了解太空中人体对酒精具体有怎样的反应。

    +

      NASA发言人斯蒂芬妮?席尔霍尔茨(Stephanie Schierhol)表示:“我们研究了太空中宇航员身体的各种变化,包括微生物层面的。我们有一个营养计划,以确保他们的身体获得保持健康所需要的营养。显然,在实施‘天空实验室(skylab)’项目时,他们曾将雪利酒与宇航员一起送到太空中,但宇航员在零重力飞行时使用雪利酒的测试结果不太好。”天空实验室是美国第一座空间站。

    +

      席尔霍尔茨补充说,在测试中使用雪利酒“引发呕吐反射,公众也反对”。

    +

      也许最令人惊讶的是,人类在月球表面上喝的第一种液体是葡萄酒。前NASA宇航员巴兹·奥尔德林(Buzz Aldrin)在采访和他撰写的书中表示,1969年,在和尼尔·阿姆斯特朗(Neil Armstrong)走出登月舱之前的圣餐仪式上,他喝了少量葡萄酒。举行这一仪式时与地面的通信出现了暂停,因此这一过程从来没有播出。

    +

      虽然Nasa对太空中酒精的使用有严格的规定,但在这方面俄罗斯过去似乎更为宽松。在其“和平号”空间站上,宇航员允许喝点干邑和伏特加。当他们发现国际空间站将严格禁止饮酒时,显然有不少怨言。

    +

      然而,奇怪的是,酒仍然能通过各种方式出现在国际空间站上。2015年,日本酿酒商三得利(Suntory)的全球创新中心将该公司一些获奖的威士忌运送到国际空间站,参与一项旨在验证“能否通过利用微重力环境增强酒精饮料醇厚性”的实验。换句话说,在微重力下酒的陈酿过程可能不同,导致它的陈酿进程更快、味道更好。对此,地球上的每家酿酒商都想进一步地了解。

    +

      几年前,即2011年9月至2014年9月,Nasa赞助了一个试验,研究微重力环境对威士忌中未发酵麦芽与烧焦橡木颗粒的影响,这两种物质能对威士忌的陈酿起帮助作用。在太空中逗留将近1000天后,用于测试的威士忌的单宁成分保持不变——但是太空中橡木颗粒产生了更高浓度的木质素分解产物,这种物质能赋予威士忌特别的风味。

    +

      Nasa表示:“这种试验不仅对麦芽威士忌行业有影响,而且对整个食品和饮料行业也有影响。送上太空的威士忌与对照样品之间的风味差异是如此显著,需要进一步分析以破解不同口味产生的原因。”

    +

      因此,即使宇航员自己被禁止在地球轨道上饮酒,但他们正在做的工作可以提高在地上消费的酒的质量。

    +

      相比之下,执行登陆火星任务的人将远离家乡几年,而不是几个月,因此可能会有人提出有关禁止饮酒的规定可以放松一些。

    +

      然而,像戴夫?汉森这样的专家认为,继续禁止饮酒并没有什么害处。除了实际的安全问题,饮酒还可能有其它挑战。汉森认为,地球人存在许多社会文化方面的差异,而且人连续几年时间呆在一个狭小的空间里,很容易突然发怒,这些因素都使饮酒问题变得很棘手。

    +

    (Credit: David Frohman/Peachstate Historical Consulting Inc)

    +

    +   图注:奥尔德林的圣餐杯回到了地球上 +

    +

      他说:“这是一个政治问题,也是一个文化方面的问题,但不是一个科学上的问题。这将是未来一个可能产生冲突领域,因为人们具有不同的文化背景,他们对饮酒的态度不同。”他进一步指出,如果你与穆斯林、摩门教徒或禁酒主义者分配在同一间宿舍怎么办?面对未来人们可能在一个没有期限的时间内呆在一个有限的空间里,需要“尽早解决”如何协调不同文化观点的问题。

    +

      所以,当宇航员在地球轨道上时,将还不得不满足于通过欣赏外面的景色来振作精神,而不要指望沉溺于烈酒中。我们留在地球上的人,则可以准备好适量的香槟酒,以迎接他们的归来。

    +

      原标题:他晚于阿姆斯特朗登月 却是首个敢在月球喝酒的人

    +

      出品︱网易科学人栏目组 胖胖

    +

      作者︱春春 + +

    + +

    [责任编辑:肖春芳]

    + + + diff --git a/resources/tests/readability/google-sre-book-1/expected.html b/resources/tests/readability/google-sre-book-1/expected.html index 8ec1d3e..46d0359 100644 --- a/resources/tests/readability/google-sre-book-1/expected.html +++ b/resources/tests/readability/google-sre-book-1/expected.html @@ -1,302 +1,458 @@ -

    +
    +

    Monitoring Distributed Systems

    -

    + +

    Google’s SRE teams have some basic principles and best practices for building successful monitoring and alerting systems. This chapter offers guidelines for what issues should interrupt a human via a page, and how to deal with issues that aren’t serious enough to trigger a page.

    -

    +
    +

    Definitions

    -

    There’s no uniformly shared vocabulary for discussing all topics related to monitoring. Even within Google, usage of the following terms varies, but the most common interpretations are listed here. +

    + There’s no uniformly shared vocabulary for discussing all topics related to monitoring. Even within Google, usage of the following terms varies, but the most common interpretations are listed here.

    -
    -

    +

    + +
    +

    Collecting, processing, aggregating, and displaying real-time quantitative data about a system, such as query counts and types, error counts and types, processing times, and server lifetimes. -

    -

    Monitoring based on metrics exposed by the internals of the system, including logs, interfaces like the Java Virtual Machine Profiling Interface, or an HTTP handler that emits internal statistics. -

    -

    Testing externally visible behavior as a user would see it. -

    -

    An application (usually web-based) that provides a summary view of a service’s core metrics. A dashboard may have filters, selectors, and so on, but is prebuilt to expose the metrics most important to its users. The dashboard might also display team information such as ticket queue length, a list of high-priority bugs, the current on-call engineer for a given area of responsibility, or recent pushes. -

    -

    A notification intended to be read by a human and that is pushed to a system such as a bug or ticket queue, an email alias, or a pager. Respectively, these alerts are classified as tickets, email alerts,22 and pages. -

    -

    A defect in a software or human system that, if repaired, instills confidence that this event won’t happen again in the same way. A given incident might have multiple root causes: for example, perhaps it was caused by a combination of insufficient process automation, software that crashed on bogus input, and insufficient testing of the script used to generate the configuration. Each of these factors might stand alone as a root cause, and each should be repaired. -

    -
    +

    +
    + +
    +

    + Monitoring based on metrics exposed by the internals of the system, including logs, interfaces like the Java Virtual Machine Profiling Interface, or an HTTP handler that emits internal statistics. +

    +
    + +
    +

    + Testing externally visible behavior as a user would see it. +

    +
    + +
    +

    + An application (usually web-based) that provides a summary view of a service’s core metrics. A dashboard may have filters, selectors, and so on, but is prebuilt to expose the metrics most important to its users. The dashboard might also display team information such as ticket queue length, a list of high-priority bugs, the current on-call engineer for a given area of responsibility, or recent pushes. +

    +
    + +
    +

    + A notification intended to be read by a human and that is pushed to a system such as a bug or ticket queue, an email alias, or a pager. Respectively, these alerts are classified as tickets, email alerts,22 and pages. +

    +
    + +
    +

    + A defect in a software or human system that, if repaired, instills confidence that this event won’t happen again in the same way. A given incident might have multiple root causes: for example, perhaps it was caused by a combination of insufficient process automation, software that crashed on bogus input, and insufficient testing of the script used to generate the configuration. Each of these factors might stand alone as a root cause, and each should be repaired. +

    +
    +
    Node and machine
    -
    -

    Used interchangeably to indicate a single instance of a running kernel in either a physical server, virtual machine, or container. There might be multiple services worth monitoring on a single machine. The services may either be: +

    +

    + Used interchangeably to indicate a single instance of a running kernel in either a physical server, virtual machine, or container. There might be multiple services worth monitoring on a single machine. The services may either be:

    -
      -
    • Related to each other: for example, a caching server and a web server +
        +
      • Related to each other: for example, a caching server and a web server
      • -
      • Unrelated services sharing hardware: for example, a code repository and a master for a configuration system like Puppet or Chef -
      • -
      -
    -

    +

  • Unrelated services sharing hardware: for example, a code repository and a master for a configuration system like Puppet or Chef +
  • + +
    + +
    +

    Any change to a service’s running software or its configuration. -

    -

    +

    + + +

    +
    +

    Why Monitor?

    -

    There are many reasons to monitor a system, including: +

    + There are many reasons to monitor a system, including:

    -
    -

    +

    + +
    +

    How big is my database and how fast is it growing? How quickly is my daily-active user count growing? -

    -

    +

    +
    + +
    +

    Are queries faster with Acme Bucket of Bytes 2.72 versus Ajax DB 3.14? How much better is my memcache hit rate with an extra node? Is my site slower than it was last week? -

    -

    +

    +
    + +
    +

    Something is broken, and somebody needs to fix it right now! Or, something might break soon, so somebody should look soon. -

    -

    Dashboards should answer basic questions about your service, and normally include some form of the four golden signals (discussed in The Four Golden Signals). -

    -

    +

    +
    + +
    +

    + Dashboards should answer basic questions about your service, and normally include some form of the four golden signals (discussed in The Four Golden Signals). +

    +
    + +
    +

    Our latency just shot up; what else happened around the same time? -

    -
    -

    +

    +
    +
    +

    System monitoring is also helpful in supplying raw input into business analytics and in facilitating analysis of security breaches. Because this book focuses on the engineering domains in which SRE has particular expertise, we won’t discuss these applications of monitoring here.

    -

    +

    Monitoring and alerting enables a system to tell us when it’s broken, or perhaps to tell us what’s about to break. When the system isn’t able to automatically fix itself, we want a human to investigate the alert, determine if there’s a real problem at hand, mitigate the problem, and determine the root cause of the problem. Unless you’re performing security auditing on very narrowly scoped components of a system, you should never trigger an alert simply because "something seems a bit weird."

    -

    +

    Paging a human is a quite expensive use of an employee’s time. If an employee is at work, a page interrupts their workflow. If the employee is at home, a page interrupts their personal time, and perhaps even their sleep. When pages occur too frequently, employees second-guess, skim, or even ignore incoming alerts, sometimes even ignoring a "real" page that’s masked by the noise. Outages can be prolonged because other noise interferes with a rapid diagnosis and fix. Effective alerting systems have good signal and very low noise. -

    +

    +

    +
    +

    Setting Reasonable Expectations for Monitoring

    -

    Monitoring a complex application is a significant engineering endeavor in and of itself. Even with substantial existing infrastructure for instrumentation, collection, display, and alerting in place, a Google SRE team with 10–12 members typically has one or sometimes two members whose primary assignment is to build and maintain monitoring systems for their service. This number has decreased over time as we generalize and centralize common monitoring infrastructure, but every SRE team typically has at least one “monitoring person.” (That being said, while it can be fun to have access to traffic graph dashboards and the like, SRE teams carefully avoid any situation that requires someone to “stare at a screen to watch for problems.”) +

    + Monitoring a complex application is a significant engineering endeavor in and of itself. Even with substantial existing infrastructure for instrumentation, collection, display, and alerting in place, a Google SRE team with 10–12 members typically has one or sometimes two members whose primary assignment is to build and maintain monitoring systems for their service. This number has decreased over time as we generalize and centralize common monitoring infrastructure, but every SRE team typically has at least one “monitoring person.” (That being said, while it can be fun to have access to traffic graph dashboards and the like, SRE teams carefully avoid any situation that requires someone to “stare at a screen to watch for problems.”)

    -

    In general, Google has trended toward simpler and faster monitoring systems, with better tools for post hoc analysis. We avoid "magic" systems that try to learn thresholds or automatically detect causality. Rules that detect unexpected changes in end-user request rates are one counterexample; while these rules are still kept as simple as possible, they give a very quick detection of a very simple, specific, severe anomaly. Other uses of monitoring data such as capacity planning and traffic prediction can tolerate more fragility, and thus, more complexity. Observational experiments conducted over a very long time horizon (months or years) with a low sampling rate (hours or days) can also often tolerate more fragility because occasional missed samples won’t hide a long-running trend. +

    + In general, Google has trended toward simpler and faster monitoring systems, with better tools for post hoc analysis. We avoid "magic" systems that try to learn thresholds or automatically detect causality. Rules that detect unexpected changes in end-user request rates are one counterexample; while these rules are still kept as simple as possible, they give a very quick detection of a very simple, specific, severe anomaly. Other uses of monitoring data such as capacity planning and traffic prediction can tolerate more fragility, and thus, more complexity. Observational experiments conducted over a very long time horizon (months or years) with a low sampling rate (hours or days) can also often tolerate more fragility because occasional missed samples won’t hide a long-running trend.

    -

    Google SRE has experienced only limited success with complex dependency hierarchies. We seldom use rules such as, "If I know the database is slow, alert for a slow database; otherwise, alert for the website being generally slow." Dependency-reliant rules usually pertain to very stable parts of our system, such as our system for draining user traffic away from a datacenter. For example, "If a datacenter is drained, then don’t alert me on its latency" is one common datacenter alerting rule. Few teams at Google maintain complex dependency hierarchies because our infrastructure has a steady rate of continuous refactoring. +

    + Google SRE has experienced only limited success with complex dependency hierarchies. We seldom use rules such as, "If I know the database is slow, alert for a slow database; otherwise, alert for the website being generally slow." Dependency-reliant rules usually pertain to very stable parts of our system, such as our system for draining user traffic away from a datacenter. For example, "If a datacenter is drained, then don’t alert me on its latency" is one common datacenter alerting rule. Few teams at Google maintain complex dependency hierarchies because our infrastructure has a steady rate of continuous refactoring.

    -

    +

    Some of the ideas described in this chapter are still aspirational: there is always room to move more rapidly from symptom to root cause(s), especially in ever-changing systems. So while this chapter sets out some goals for monitoring systems, and some ways to achieve these goals, it’s important that monitoring systems—especially the critical path from the onset of a production problem, through a page to a human, through basic triage and deep debugging—be kept simple and comprehensible by everyone on the team.

    -

    +

    Similarly, to keep noise low and signal high, the elements of your monitoring system that direct to a pager need to be very simple and robust. Rules that generate alerts for humans should be simple to understand and represent a clear failure. -

    +

    +

    +
    +

    Symptoms Versus Causes

    -

    Your monitoring system should address two questions: what’s broken, and why? +

    + Your monitoring system should address two questions: what’s broken, and why?

    -

    +

    The "what’s broken" indicates the symptom; the "why" indicates a (possibly intermediate) cause. Table 6-1 lists some hypothetical symptoms and corresponding causes.

    - -
    -Table 6-1. Example symptoms and causes + + - - - - - - - - + + + + + + + + + - - - - + + + + - - - - + + + + - - - - + + + + - - -
    + Table 6-1. Example symptoms and causes
    SymptomCause

    I’m serving HTTP 500s or 404s

    +

    + Symptom + + Cause +
    +

    + I’m serving HTTP 500s or 404s +

    +
    +

    Database servers are refusing connections -

    My responses are slow

    +

    +
    +

    + My responses are slow +

    +
    +

    CPUs are overloaded by a bogosort, or an Ethernet cable is crimped under a rack, visible as partial packet loss -

    Users in Antarctica aren’t receiving animated cat GIFs

    +

    +
    +

    + Users in Antarctica aren’t receiving animated cat GIFs +

    +
    +

    Your Content Distribution Network hates scientists and felines, and thus blacklisted some client IPs -

    Private content is world-readable

    +

    +
    +

    + Private content is world-readable +

    +
    +

    A new software push caused ACLs to be forgotten and allowed all requests -

    -

    +

    + + + +
    +

    "What" versus "why" is one of the most important distinctions in writing good monitoring with maximum signal and minimum noise. -

    +

    +

    +
    +

    Black-Box Versus White-Box

    -

    We combine heavy use of white-box monitoring with modest but critical uses of black-box monitoring. The simplest way to think about black-box monitoring versus white-box monitoring is that black-box monitoring is symptom-oriented and represents active—not predicted—problems: "The system isn’t working correctly, right now." White-box monitoring depends on the ability to inspect the innards of the system, such as logs or HTTP endpoints, with instrumentation. White-box monitoring therefore allows detection of imminent problems, failures masked by retries, and so forth. +

    + We combine heavy use of white-box monitoring with modest but critical uses of black-box monitoring. The simplest way to think about black-box monitoring versus white-box monitoring is that black-box monitoring is symptom-oriented and represents active—not predicted—problems: "The system isn’t working correctly, right now." White-box monitoring depends on the ability to inspect the innards of the system, such as logs or HTTP endpoints, with instrumentation. White-box monitoring therefore allows detection of imminent problems, failures masked by retries, and so forth.

    -

    +

    Note that in a multilayered system, one person’s symptom is another person’s cause. For example, suppose that a database’s performance is slow. Slow database reads are a symptom for the database SRE who detects them. However, for the frontend SRE observing a slow website, the same slow database reads are a cause. Therefore, white-box monitoring is sometimes symptom-oriented, and sometimes cause-oriented, depending on just how informative your white-box is.

    -

    +

    When collecting telemetry for debugging, white-box monitoring is essential. If web servers seem slow on database-heavy requests, you need to know both how fast the web server perceives the database to be, and how fast the database believes itself to be. Otherwise, you can’t distinguish an actually slow database server from a network problem between your web server and your database.

    -

    +

    For paging, black-box monitoring has the key benefit of forcing discipline to only nag a human when a problem is both already ongoing and contributing to real symptoms. On the other hand, for not-yet-occurring but imminent problems, black-box monitoring is fairly useless. -

    +

    +

    +
    +

    The Four Golden Signals

    -

    The four golden signals of monitoring are latency, traffic, errors, and saturation. If you can only measure four metrics of your user-facing system, focus on these four. +

    + The four golden signals of monitoring are latency, traffic, errors, and saturation. If you can only measure four metrics of your user-facing system, focus on these four.

    -
    -

    The time it takes to service a request. It’s important to distinguish between the latency of successful requests and the latency of failed requests. For example, an HTTP 500 error triggered due to loss of connection to a database or other critical backend might be served very quickly; however, as an HTTP 500 error indicates a failed request, factoring 500s into your overall latency might result in misleading calculations. On the other hand, a slow error is even worse than a fast error! Therefore, it’s important to track error latency, as opposed to just filtering out errors. -

    -

    A measure of how much demand is being placed on your system, measured in a high-level system-specific metric. For a web service, this measurement is usually HTTP requests per second, perhaps broken out by the nature of the requests (e.g., static versus dynamic content). For an audio streaming system, this measurement might focus on network I/O rate or concurrent sessions. For a key-value storage system, this measurement might be transactions and retrievals per second. -

    -

    The rate of requests that fail, either explicitly (e.g., HTTP 500s), implicitly (for example, an HTTP 200 success response, but coupled with the wrong content), or by policy (for example, "If you committed to one-second response times, any request over one second is an error"). Where protocol response codes are insufficient to express all failure conditions, secondary (internal) protocols may be necessary to track partial failure modes. Monitoring these cases can be drastically different: catching HTTP 500s at your load balancer can do a decent job of catching all completely failed requests, while only end-to-end system tests can detect that you’re serving the wrong content. -

    -
    -

    How "full" your service is. A measure of your system fraction, emphasizing the resources that are most constrained (e.g., in a memory-constrained system, show memory; in an I/O-constrained system, show I/O). Note that many systems degrade in performance before they achieve 100% utilization, so having a utilization target is essential. +

    + +
    +

    + The time it takes to service a request. It’s important to distinguish between the latency of successful requests and the latency of failed requests. For example, an HTTP 500 error triggered due to loss of connection to a database or other critical backend might be served very quickly; however, as an HTTP 500 error indicates a failed request, factoring 500s into your overall latency might result in misleading calculations. On the other hand, a slow error is even worse than a fast error! Therefore, it’s important to track error latency, as opposed to just filtering out errors.

    -

    +

    + +
    +

    + A measure of how much demand is being placed on your system, measured in a high-level system-specific metric. For a web service, this measurement is usually HTTP requests per second, perhaps broken out by the nature of the requests (e.g., static versus dynamic content). For an audio streaming system, this measurement might focus on network I/O rate or concurrent sessions. For a key-value storage system, this measurement might be transactions and retrievals per second. +

    +
    + +
    +

    + The rate of requests that fail, either explicitly (e.g., HTTP 500s), implicitly (for example, an HTTP 200 success response, but coupled with the wrong content), or by policy (for example, "If you committed to one-second response times, any request over one second is an error"). Where protocol response codes are insufficient to express all failure conditions, secondary (internal) protocols may be necessary to track partial failure modes. Monitoring these cases can be drastically different: catching HTTP 500s at your load balancer can do a decent job of catching all completely failed requests, while only end-to-end system tests can detect that you’re serving the wrong content. +

    +
    + +
    +

    + How "full" your service is. A measure of your system fraction, emphasizing the resources that are most constrained (e.g., in a memory-constrained system, show memory; in an I/O-constrained system, show I/O). Note that many systems degrade in performance before they achieve 100% utilization, so having a utilization target is essential. +

    +

    In complex systems, saturation can be supplemented with higher-level load measurement: can your service properly handle double the traffic, handle only 10% more traffic, or handle even less traffic than it currently receives? For very simple services that have no parameters that alter the complexity of the request (e.g., "Give me a nonce" or "I need a globally unique monotonic integer") that rarely change configuration, a static value from a load test might be adequate. As discussed in the previous paragraph, however, most services need to use indirect signals like CPU utilization or network bandwidth that have a known upper bound. Latency increases are often a leading indicator of saturation. Measuring your 99th percentile response time over some small window (e.g., one minute) can give a very early signal of saturation.

    -

    +

    Finally, saturation is also concerned with predictions of impending saturation, such as "It looks like your database will fill its hard drive in 4 hours."

    -
    -
    -

    +

    +
    +

    If you measure all four golden signals and page a human when one signal is problematic (or, in the case of saturation, nearly problematic), your service will be at least decently covered by monitoring. -

    +

    +

    +
    +

    Worrying About Your Tail (or, Instrumentation and Performance)

    -

    When building a monitoring system from scratch, it’s tempting to design a system based upon the mean of some quantity: the mean latency, the mean CPU usage of your nodes, or the mean fullness of your databases. The danger presented by the latter two cases is obvious: CPUs and databases can easily be utilized in a very imbalanced way. The same holds for latency. If you run a web service with an average latency of 100 ms at 1,000 requests per second, 1% of requests might easily take 5 seconds.23 If your users depend on several such web services to render their page, the 99th percentile of one backend can easily become the median response of your frontend. +

    + When building a monitoring system from scratch, it’s tempting to design a system based upon the mean of some quantity: the mean latency, the mean CPU usage of your nodes, or the mean fullness of your databases. The danger presented by the latter two cases is obvious: CPUs and databases can easily be utilized in a very imbalanced way. The same holds for latency. If you run a web service with an average latency of 100 ms at 1,000 requests per second, 1% of requests might easily take 5 seconds.23 If your users depend on several such web services to render their page, the 99th percentile of one backend can easily become the median response of your frontend.

    -

    +

    The simplest way to differentiate between a slow average and a very slow "tail" of requests is to collect request counts bucketed by latencies (suitable for rendering a histogram), rather than actual latencies: how many requests did I serve that took between 0 ms and 10 ms, between 10 ms and 30 ms, between 30 ms and 100 ms, between 100 ms and 300 ms, and so on? Distributing the histogram boundaries approximately exponentially (in this case by factors of roughly 3) is often an easy way to visualize the distribution of your requests. -

    +

    +

    +
    +

    Choosing an Appropriate Resolution for Measurements

    -

    Different aspects of a system should be measured with different levels of granularity. For example: +

    + Different aspects of a system should be measured with different levels of granularity. For example:

    -
      -
    • Observing CPU load over the time span of a minute won’t reveal even quite long-lived spikes that drive high tail latencies. +
        +
      • Observing CPU load over the time span of a minute won’t reveal even quite long-lived spikes that drive high tail latencies.
      • -
      • On the other hand, for a web service targeting no more than 9 hours aggregate downtime per year (99.9% annual uptime), probing for a 200 (success) status more than once or twice a minute is probably unnecessarily frequent. +
      • On the other hand, for a web service targeting no more than 9 hours aggregate downtime per year (99.9% annual uptime), probing for a 200 (success) status more than once or twice a minute is probably unnecessarily frequent.
      • -
      • Similarly, checking hard drive fullness for a service targeting 99.9% availability more than once every 1–2 minutes is probably unnecessary. +
      • Similarly, checking hard drive fullness for a service targeting 99.9% availability more than once every 1–2 minutes is probably unnecessary.
      • -
      -

      +

    +

    Take care in how you structure the granularity of your measurements. Collecting per-second measurements of CPU load might yield interesting data, but such frequent measurements may be very expensive to collect, store, and analyze. If your monitoring goal calls for high resolution but doesn’t require extremely low latency, you can reduce these costs by performing internal sampling on the server, then configuring an external system to collect and aggregate that distribution over time or across servers. You might:

    -
      -
    1. Record the current CPU utilization each second. +
        +
      1. Record the current CPU utilization each second.
      2. -
      3. Using buckets of 5% granularity, increment the appropriate CPU utilization bucket each second. +
      4. Using buckets of 5% granularity, increment the appropriate CPU utilization bucket each second.
      5. -
      6. Aggregate those values every minute. +
      7. Aggregate those values every minute.
      8. -
      -

      +

    +

    This strategy allows you to observe brief CPU hotspots without incurring very high cost due to collection and retention. -

    +

    +

    +
    +

    As Simple as Possible, No Simpler

    -

    Piling all these requirements on top of each other can add up to a very complex monitoring system—your system might end up with the following levels of complexity: +

    + Piling all these requirements on top of each other can add up to a very complex monitoring system—your system might end up with the following levels of complexity:

    -
      -
    • Alerts on different latency thresholds, at different percentiles, on all kinds of different metrics +
        +
      • Alerts on different latency thresholds, at different percentiles, on all kinds of different metrics
      • -
      • Extra code to detect and expose possible causes +
      • Extra code to detect and expose possible causes
      • -
      • Associated dashboards for each of these possible causes +
      • Associated dashboards for each of these possible causes
      • -
      -

      +

    +

    The sources of potential complexity are never-ending. Like all software systems, monitoring can become so complex that it’s fragile, complicated to change, and a maintenance burden.

    -

    +

    Therefore, design your monitoring system with an eye toward simplicity. In choosing what to monitor, keep the following guidelines in mind:

    -
      -
    • The rules that catch real incidents most often should be as simple, predictable, and reliable as possible. +
        +
      • The rules that catch real incidents most often should be as simple, predictable, and reliable as possible.
      • -
      • Data collection, aggregation, and alerting configuration that is rarely exercised (e.g., less than once a quarter for some SRE teams) should be up for removal. +
      • Data collection, aggregation, and alerting configuration that is rarely exercised (e.g., less than once a quarter for some SRE teams) should be up for removal.
      • -
      • Signals that are collected, but not exposed in any prebaked dashboard nor used by any alert, are candidates for removal. +
      • Signals that are collected, but not exposed in any prebaked dashboard nor used by any alert, are candidates for removal.
      • -
      -

      +

    +

    In Google’s experience, basic collection and aggregation of metrics, paired with alerting and dashboards, has worked well as a relatively standalone system. (In fact Google’s monitoring system is broken up into several binaries, but typically people learn about all aspects of these binaries.) It can be tempting to combine monitoring with other aspects of inspecting complex systems, such as detailed system profiling, single-process debugging, tracking details about exceptions or crashes, load testing, log collection and analysis, or traffic inspection. While most of these subjects share commonalities with basic monitoring, blending together too many results in overly complex and fragile systems. As in many other aspects of software engineering, maintaining distinct systems with clear, simple, loosely coupled points of integration is a better strategy (for example, using web APIs for pulling summary data in a format that can remain constant over an extended period of time). -

    +

    +

    +
    +

    Tying These Principles Together

    -

    +

    The principles discussed in this chapter can be tied together into a philosophy on monitoring and alerting that’s widely endorsed and followed within Google SRE teams. While this monitoring philosophy is a bit aspirational, it’s a good starting point for writing or reviewing a new alert, and it can help your organization ask the right questions, regardless of the size of your organization or the complexity of your service or system.

    -

    When creating rules for monitoring and alerting, asking the following questions can help you avoid false positives and pager burnout:24

    -
      -
    • Does this rule detect an otherwise undetected condition that is urgent, actionable, and actively or imminently user-visible?25 -
    • -
    • Will I ever be able to ignore this alert, knowing it’s benign? When and why will I be able to ignore this alert, and how can I avoid this scenario? -
    • -
    • Does this alert definitely indicate that users are being negatively affected? Are there detectable cases in which users aren’t being negatively impacted, such as drained traffic or test deployments, that should be filtered out? -
    • -
    • Can I take action in response to this alert? Is that action urgent, or could it wait until morning? Could the action be safely automated? Will that action be a long-term fix, or just a short-term workaround? -
    • -
    • Are other people getting paged for this issue, therefore rendering at least one of the pages unnecessary? -
    • -
    -

    These questions reflect a fundamental philosophy on pages and pagers: +

    + When creating rules for monitoring and alerting, asking the following questions can help you avoid false positives and pager burnout:24

    -
      -
    • Every time the pager goes off, I should be able to react with a sense of urgency. I can only react with a sense of urgency a few times a day before I become fatigued. +
        +
      • Does this rule detect an otherwise undetected condition that is urgent, actionable, and actively or imminently user-visible?25
      • -
      • Every page should be actionable. +
      • Will I ever be able to ignore this alert, knowing it’s benign? When and why will I be able to ignore this alert, and how can I avoid this scenario?
      • -
      • Every page response should require intelligence. If a page merely merits a robotic response, it shouldn’t be a page. +
      • Does this alert definitely indicate that users are being negatively affected? Are there detectable cases in which users aren’t being negatively impacted, such as drained traffic or test deployments, that should be filtered out?
      • -
      • Pages should be about a novel problem or an event that hasn’t been seen before. +
      • Can I take action in response to this alert? Is that action urgent, or could it wait until morning? Could the action be safely automated? Will that action be a long-term fix, or just a short-term workaround?
      • -
      -

      +

    • Are other people getting paged for this issue, therefore rendering at least one of the pages unnecessary? +
    • +
    +

    + These questions reflect a fundamental philosophy on pages and pagers: +

    +
      +
    • Every time the pager goes off, I should be able to react with a sense of urgency. I can only react with a sense of urgency a few times a day before I become fatigued. +
    • +
    • Every page should be actionable. +
    • +
    • Every page response should require intelligence. If a page merely merits a robotic response, it shouldn’t be a page. +
    • +
    • Pages should be about a novel problem or an event that hasn’t been seen before. +
    • +
    +

    Such a perspective dissipates certain distinctions: if a page satisfies the preceding four bullets, it’s irrelevant whether the page is triggered by white-box or black-box monitoring. This perspective also amplifies certain distinctions: it’s better to spend much more effort on catching symptoms than causes; when it comes to causes, only worry about very definite, very imminent causes. -

    +

    +

    +
    +

    Monitoring for the Long Term

    -

    In modern production systems, monitoring systems track an ever-evolving system with changing software architecture, load characteristics, and performance targets. An alert that’s currently exceptionally rare and hard to automate might become frequent, perhaps even meriting a hacked-together script to resolve it. At this point, someone should find and eliminate the root causes of the problem; if such resolution isn’t possible, the alert response deserves to be fully automated. +

    + In modern production systems, monitoring systems track an ever-evolving system with changing software architecture, load characteristics, and performance targets. An alert that’s currently exceptionally rare and hard to automate might become frequent, perhaps even meriting a hacked-together script to resolve it. At this point, someone should find and eliminate the root causes of the problem; if such resolution isn’t possible, the alert response deserves to be fully automated.

    -

    +

    It’s important that decisions about monitoring be made with long-term goals in mind. Every page that happens today distracts a human from improving the system for tomorrow, so there is often a case for taking a short-term hit to availability or performance in order to improve the long-term outlook for the system. Let’s take a look at two case studies that illustrate this trade-off.

    -

    Google’s internal infrastructure is typically offered and measured against a service level objective (SLO; see Service Level Objectives). Many years ago, the Bigtable service’s SLO was based on a synthetic well-behaved client’s mean performance. Because of problems in Bigtable and lower layers of the storage stack, the mean performance was driven by a "large" tail: the worst 5% of requests were often significantly slower than the rest. +

    + +

    + Google’s internal infrastructure is typically offered and measured against a service level objective (SLO; see Service Level Objectives). Many years ago, the Bigtable service’s SLO was based on a synthetic well-behaved client’s mean performance. Because of problems in Bigtable and lower layers of the storage stack, the mean performance was driven by a "large" tail: the worst 5% of requests were often significantly slower than the rest.

    -

    +

    Email alerts were triggered as the SLO approached, and paging alerts were triggered when the SLO was exceeded. Both types of alerts were firing voluminously, consuming unacceptable amounts of engineering time: the team spent significant amounts of time triaging the alerts to find the few that were really actionable, and we often missed the problems that actually affected users, because so few of them did. Many of the pages were non-urgent, due to well-understood problems in the infrastructure, and had either rote responses or received no response.

    -

    +

    To remedy the situation, the team used a three-pronged approach: while making great efforts to improve the performance of Bigtable, we also temporarily dialed back our SLO target, using the 75th percentile request latency. We also disabled email alerts, as there were so many that spending time diagnosing them was infeasible.

    -

    +

    This strategy gave us enough breathing room to actually fix the longer-term problems in Bigtable and the lower layers of the storage stack, rather than constantly fixing tactical problems. On-call engineers could actually accomplish work when they weren’t being kept up by pages at all hours. Ultimately, temporarily backing off on our alerts allowed us to make faster progress toward a better service. -

    In the very early days of Gmail, the service was built on a retrofitted distributed process management system called Workqueue, which was originally created for batch processing of pieces of the search index. Workqueue was "adapted" to long-lived processes and subsequently applied to Gmail, but certain bugs in the relatively opaque codebase in the scheduler proved hard to beat.

    -

    +

    +
    + +

    + In the very early days of Gmail, the service was built on a retrofitted distributed process management system called Workqueue, which was originally created for batch processing of pieces of the search index. Workqueue was "adapted" to long-lived processes and subsequently applied to Gmail, but certain bugs in the relatively opaque codebase in the scheduler proved hard to beat. +

    +

    At that time, the Gmail monitoring was structured such that alerts fired when individual tasks were “de-scheduled” by Workqueue. This setup was less than ideal because even at that time, Gmail had many, many thousands of tasks, each task representing a fraction of a percent of our users. We cared deeply about providing a good user experience for Gmail users, but such an alerting setup was unmaintainable.

    -

    +

    To address this problem, Gmail SRE built a tool that helped “poke” the scheduler in just the right way to minimize impact to users. The team had several discussions about whether or not we should simply automate the entire loop from detecting the problem to nudging the rescheduler, until a better long-term solution was achieved, but some worried this kind of workaround would delay a real fix.

    -

    +

    This kind of tension is common within a team, and often reflects an underlying mistrust of the team’s self-discipline: while some team members want to implement a “hack” to allow time for a proper fix, others worry that a hack will be forgotten or that the proper fix will be deprioritized indefinitely. This concern is credible, as it’s easy to build layers of unmaintainable technical debt by patching over problems instead of making real fixes. Managers and technical leaders play a key role in implementing true, long-term fixes by supporting and prioritizing potentially time-consuming long-term fixes even when the initial “pain” of paging subsides.

    -

    - Pages with rote, algorithmic responses should be a red flag. Unwillingness on the part of your team to automate such pages implies that the team lacks confidence that they can clean up their technical debt. This is a major problem worth escalating.

    A common theme connects the previous examples of Bigtable and Gmail: a tension between short-term and long-term availability. Often, sheer force of effort can help a rickety system achieve high availability, but this path is usually short-lived and fraught with burnout and dependence on a small number of heroic team members. Taking a controlled, short-term decrease in availability is often a painful, but strategic trade for the long-run stability of the system. It’s important not to think of every page as an event in isolation, but to consider whether the overall level of paging leads toward a healthy, appropriately available system with a healthy, viable team and long-term outlook. We review statistics about page frequency (usually expressed as incidents per shift, where an incident might be composed of a few related pages) in quarterly reports with management, ensuring that decision makers are kept up to date on the pager load and overall health of their teams. -

    +

    + Pages with rote, algorithmic responses should be a red flag. Unwillingness on the part of your team to automate such pages implies that the team lacks confidence that they can clean up their technical debt. This is a major problem worth escalating. +

    +

    +
    + +

    + A common theme connects the previous examples of Bigtable and Gmail: a tension between short-term and long-term availability. Often, sheer force of effort can help a rickety system achieve high availability, but this path is usually short-lived and fraught with burnout and dependence on a small number of heroic team members. Taking a controlled, short-term decrease in availability is often a painful, but strategic trade for the long-run stability of the system. It’s important not to think of every page as an event in isolation, but to consider whether the overall level of paging leads toward a healthy, appropriately available system with a healthy, viable team and long-term outlook. We review statistics about page frequency (usually expressed as incidents per shift, where an incident might be composed of a few related pages) in quarterly reports with management, ensuring that decision makers are kept up to date on the pager load and overall health of their teams. +

    +
    +
    +
    +

    Conclusion

    -

    +

    A healthy monitoring and alerting pipeline is simple and easy to reason about. It focuses primarily on symptoms for paging, reserving cause-oriented heuristics to serve as aids to debugging problems. Monitoring symptoms is easier the further "up" your stack you monitor, though monitoring saturation and performance of subsystems such as databases often must be performed directly on the subsystem itself. Email alerts are of very limited value and tend to easily become overrun with noise; instead, you should favor a dashboard that monitors all ongoing subcritical problems for the sort of information that typically ends up in email alerts. A dashboard might also be paired with a log, in order to analyze historical correlations.

    -

    +

    Over the long haul, achieving a successful on-call rotation and product includes choosing to alert on symptoms or imminent real problems, adapting your targets to goals that are actually achievable, and making sure that your monitoring supports rapid diagnosis. -

    +

    +

    + +
    diff --git a/resources/tests/readability/guardian-1/expected.html b/resources/tests/readability/guardian-1/expected.html index bd364bb..21c388e 100644 --- a/resources/tests/readability/guardian-1/expected.html +++ b/resources/tests/readability/guardian-1/expected.html @@ -1,170 +1,323 @@
    -

    Whale whisperer Hori Parata was just seven years old when he attended his first mass stranding, a beaching of porpoises in New Zealand’s Northland, their cries screeching through the air on the deserted stretch of sand. +

    + Whale whisperer Hori Parata was just seven years old when he attended his first mass stranding, a beaching of porpoises in New Zealand’s Northland, their cries screeching through the air on the deserted stretch of sand.

    -

    +

    Seven decades later, Parata, 75, has now overseen more than 500 strandings and is renowned in New Zealand as the leading Māori whale expert, called on by tribes around the country for cultural guidance as marine strandings become increasingly complex and fatal.

    -

    +

    “Man’s greed in the ocean is hurting the whales,” says Parata, a fierce and uncompromising elder of the Ngātiwai tribe of eastern Northland.

    -
    - - -
    Hori Parata at his Pātaua farm, the place where he was born and grew up.
    -
    • +

      + + + + +
      + + + Hori Parata at his Pātaua farm, the place where he was born and grew up. +
      + +
      +
      +
        +
      • +

        Hori Parata at his Pātaua farm, the place where he was born and grew up -

      -

      +

      +
    • +
    +

    “We’re having to put up with a lot of stuff today. The public want to hug the whales, they want to touch them, they want to feel good – that’s not the thing. We feel that is ridiculous.”

    -

    +

    Whale experts regard New Zealand – or Aotearoa as it is called by Māori – as the whale stranding capital of the world, with more than 5,000 incidents recorded since 1840, and an average of 300 individual animals beaching themselves each year.

    -
    - - -
    Kauri (Tekaurinui Robert) Parata, watched by his father Hori Parata, carves a traditional Maōri design at their home in Whangārei. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata
    -
    • +

      + + + + +
      + + + Kauri (Tekaurinui Robert) Parata, watched by his father Hori Parata, carves a traditional Maōri design at their home in Whangārei. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata +
      + +
      +
      +
        +
      • +

        Kauri (Te Kaurinui Robert) Parata, watched by his father, Hori Parata, carves a traditional Māori design at their home in Whangārei. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father -

      -

      +

      +
    • +
    +

    Concrete information on why whales strand remains elusive, but “sickness, navigational error, geographical features, a rapidly falling tide, being chased by a predator, or extreme weather” are all thought to contribute, according to the New Zealand Department of Conservation.

    -

    +

    Climate change is to blame too, scientists think, with warming ocean temperatures moving whales’ prey closer to the shore and forcing them to pursue their food into shallow waters.

    -
    - - -
    A bin of small whale bones.
    -
    - - -
    The baleen recovered from a stranded Pygmy Right Whale.
    -
    - - -
    Squid beaks, from the stomach of a Sperm Whale.
    -
    • +

      + + + + +
      + + + A bin of small whale bones. +
      + +
      +
      +
      + + + + +
      + + + The baleen recovered from a stranded Pygmy Right Whale. +
      + +
      +
      +
      + + + + +
      + + + Squid beaks, from the stomach of a Sperm Whale. +
      + +
      +
      +
        +
      • +

        Clockwise from top: small whale bones; squid beaks, from the stomach of a sperm whale; the baleen filter-feeder system recovered from a stranded pygmy right whale. -

      -

      +

      +

    • +
    +

    ‘Unprecedented’ strandings

    -

    +

    November marked the beginning of whale stranding season, and it started with a surge in incidents, according to whale rescue group Project Jonah, with 140 pilot whales beaching and dying on Stewart Island, 10 rare pygmy whales on Ninety Mile beach, 51 stranded and dead on the Chatham Islands and a spate of individual cases around the country.

    -

    +

    And as more whales beach and die – from exhaustion, heat stroke or seagulls feasting on their flesh – an acute sense of grief is growing among New Zealand’s indigenous people, who regard whales as their ancestors and taonga (treasures).

    -

    +

    “These days it is like a zoo. People just want to come and gawk at us, without even trying to understand what is happening with the animals and the environment,” says Parata, bristling with anger.

    -
    whale strandings

    +

    + whale strandings +
    +

    “When will we talk about what is hurting these animals out on the sea? They are drowning out there, they can’t breathe, they beach themselves to be with the Aunties.”

    -

    +

    Ngātiwai believe the whales beach when they are ready to die and want to return to their families, the Māori people. Then, their human families use the whales’ gift of their bodies for sacred carvings, for traditional medicines, and even for compost.

    -

    +

    There are marked tribal differences across New Zealand and while some tribes work to refloat stranded whales, others like Parata’s Ngātiwai stand back and allow the Department of Conservation and volunteer groups to take the lead in rescue efforts.

    -

    +

    Then the tribe moves in en masse and holds a karakia (prayer), names each animal and sets to work removing their bones, blubber, eyes and teeth for cultural purposes.

    -
    - - -
    Buck Cullen with his daughter Kaiarahi (10 months) in his back yard where he is storing a pair of massive Sperm Whale jawbones. Buck is a integral member of the whale recovery team, alongside Hori Parata.
    -
    • +

      + + + + +
      + + + Buck Cullen with his daughter Kaiarahi (10 months) in his back yard where he is storing a pair of massive Sperm Whale jawbones. Buck is a integral member of the whale recovery team, alongside Hori Parata. +
      + +
      +
      +
        +
      • +

        Buck Cullen with his daughter Kaiarahi (10 months) in his backyard, where he is storing a pair of massive sperm whale jawbones. Cullen is an integral member of Hori Parata’s whale recovery team -

      -

      +

      +
    • +
    +

    But indigenous elders say they aren’t being listened to when they tell the government their whale kin are sick, and trying to escape an increasingly polluted and unpredictable ocean.

    -

    +

    Earlier this year in South Taranaki, a mass stranding that was described as “unprecedented” left the local Māori tribe scrambling. Security was brought in when thieves attacked a sperm whale with an axe, trying to remove valuable teeth from its jaw.

    -
    - - -
    12 Parāoa Whales (Sperm Whales) recently stranded on the South Taranaki coast of Kaupokonui, on a scale not seen on their coast in recent memory.
    -
    • +

      + + + + +
      + + + 12 Parāoa Whales (Sperm Whales) recently stranded on the South Taranaki coast of Kaupokonui, on a scale not seen on their coast in recent memory. +
      + +
      +
      +
        +
      • +

        12 parāoa whales (sperm whales) recently stranded on the South Taranaki coast of Kaupokonui, on a scale not seen near this location in recent memory -

      -

      +

      +
    • +
    +

    Parata and his 22-year-old son, Te Kaurinui Robert Parata, were called in to assist. Te Kaurinui was called after the first whale his father ever named, and left university this year to return to Whangārei and study whale tikanga (protocol) and carving.

    -

    +

    He says mass strandings are getting more local and international attention and money from donations, but traditional knowledge is being dismissed as overly spiritual.

    -
    - - -
    Kauri (Tekaurinui Robert) Parata, of the New Zealand Māori tribe Ngāti Wai, in front of the carving shed at Hihiaua Cultural Centre in Whangarei
    -
    • +

      + + + + +
      + + + Kauri (Tekaurinui Robert) Parata, of the New Zealand Māori tribe Ngāti Wai, in front of the carving shed at Hihiaua Cultural Centre in Whangarei +
      + +
      +
      +
        +
      • +

        Clockwise from top: Te Kaurinui Parata, in front of the carving shed at Hihiaua Cultural Centre in Whangārei; Parata holds three whale teeth recovered from a beached whale – the middle one shows marks where a poacher had attempted to hack it out with an axe before the recovery group arrived; the Pou, a tribal identifier, in front of the carving shed. -

      -

      +

      +

    • +
    +

    ‘We need to listen’

    -

    +

    Māori harvest rights over dead whales have only been officially recognised since 1998, and the practice still elicits horror from some New Zealanders and visitors.

    -

    +

    “Our own ancestors wouldn’t say to go down there and hug the whales. That’s a modern thing,” says Te Kaurinui.

    -
    - - -
    The Pou in front of the carving shed at Hihiaua Cultural centre
    -
    - - -
    Kauri (Tekaurinui Robert) Parata, holds three whale teeth recovered from a beached whale. The middle tooth shows the marks where a poacher had attempted to hack it out with an axe before the recovery group arrived. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata.
    -

    +

    + + + + +
    + + + The Pou in front of the carving shed at Hihiaua Cultural centre +
    + +
    +
    +
    + + + + +
    + + + Kauri (Tekaurinui Robert) Parata, holds three whale teeth recovered from a beached whale. The middle tooth shows the marks where a poacher had attempted to hack it out with an axe before the recovery group arrived. Kauri is a member of the Manu Taupunga group that is the organising arm of the whale-body recovery operation started by his father, Hori Parata. +
    + +
    +
    +

    The Ngātiwai are investigating a possible link between the crisis of the dieback disease killing New Zealand’s native kauri trees – and threatening the giant Tāne Mahuta, which may be 2,000 years old – and the increase in whale strandings.

    -

    +

    Parata and his family believe whale oil and byproducts could be used to try to cure Kauri dieback, and want more government money and attention directed towards indigenous knowledge of the interconnectedness of the New Zealand environment, and possible indigenous solutions.

    -

    +

    “People dismiss us when we tell them our spiritual understanding of whales – why they are beaching, why they are hurting,” says Te Kaurinui.

    -
    - - -
    Whangārei Harbour from Tamaterau, looking south through Mangrove sprouts coming up through the harbourside silt.
    -
    • +

      + + + + +
      + + + Whangārei Harbour from Tamaterau, looking south through Mangrove sprouts coming up through the harbourside silt. +
      + +
      +
      +
        +
      • +

        Whangārei Harbour seen from Tamaterau, with mangrove sprouts coming up through the harbourside silt -

      -

      +

      +
    • +
    +

    “We are not foreigners in this land. We did not take this land off anyone else. We were not lost waiting for some bullheads to tell us what was going on.”

    -

    +

    Kaitaia conservation department ranger Jamie Werner of Ngātiwai recently attended his first mass beaching on Ninety Mile Beach. It was the first recorded time pygmy whales had stranded on New Zealand shores.

    -

    +

    “I arrived at the beach and we leapfrogged between the animals. They were calling out to each other and reassuring each other,” says Werner. “It was a shock. We’re working to adapt but the ocean is changing so fast.”

    -
    - - -
    The skull of a Brydes whale, in the storage container at Hihiaua Cultural Centre, Whangārei.
    -
    • +

      + + + + +
      + + + The skull of a Brydes whale, in the storage container at Hihiaua Cultural Centre, Whangārei. +
      + +
      +
      +
        +
      • +

        Above, the skull of a bryde’s whale; right, a large-calibre bullet of the type that the New Zealand Department of Conservation uses for euthanasing stranded whales that are beyond rescue -

      -
      - - -
      A large calibre bullet of the type that the New Zealand Department of Conservation (DOC) uses for euthanasing stranded whales that are beyond rescue.
      -

      +

      +
    • +
    +
    + + + + +
    + + + A large calibre bullet of the type that the New Zealand Department of Conservation (DOC) uses for euthanasing stranded whales that are beyond rescue. +
    + +
    +
    +

    The recent spate of mass strandings has been described as “heartbreaking” by the conservation department.

    -

    +

    But for Parata and his family the slow, painful deaths of their ancestors are personal – and ultimately devastating – for the health of the tribe and the sea.

    -

    +

    “It’s very emotional. Our ancestors tell us the strandings are a sign from the sea. So what is the sea telling us? We need to listen.”

    -
    + diff --git a/resources/tests/readability/heise/expected.html b/resources/tests/readability/heise/expected.html index b6c1743..66eba45 100644 --- a/resources/tests/readability/heise/expected.html +++ b/resources/tests/readability/heise/expected.html @@ -1,8 +1,34 @@
    -

    1Password scannt auch QR-Codes.

    -

    (Bild: Hersteller)

    Das in der iOS-Version bereits enthaltene TOTP-Feature ist nun auch für OS X 10.10 verfügbar. Zudem gibt es neue Zusatzfelder in der Datenbank und weitere Verbesserungen.

    -

    AgileBits hat Version 5.3 seines bekannten Passwortmanagers 1Password für OS X freigegeben. Mit dem Update wird eine praktische Funktion nachgereicht, die die iOS-Version der Anwendung bereits seit längerem beherrscht: Das direkte Erstellen von Einmal-Passwörtern. Unterstützt wird dabei der TOTP-Standard (Time-Based One-Time Passwords), den unter anderem Firmen wie Evernote, Dropbox oder Google einsetzen, um ihre Zugänge besser abzusichern. Neben Account und regulärem Passwort wird dabei dann ein Zusatzcode verlangt, der nur kurze Zeit gilt.

    + + +
    + + +
    + +

    1Password scannt auch QR-Codes.

    + + +

    (Bild: Hersteller)

    + +
    + +
    + + + +

    Das in der iOS-Version bereits enthaltene TOTP-Feature ist nun auch für OS X 10.10 verfügbar. Zudem gibt es neue Zusatzfelder in der Datenbank und weitere Verbesserungen.

    +

    AgileBits hat Version 5.3 seines bekannten Passwortmanagers 1Password für OS X freigegeben. Mit dem Update wird eine praktische Funktion nachgereicht, die die iOS-Version der Anwendung bereits seit längerem beherrscht: Das direkte Erstellen von Einmal-Passwörtern. Unterstützt wird dabei der TOTP-Standard (Time-Based One-Time Passwords), den unter anderem Firmen wie Evernote, Dropbox oder Google einsetzen, um ihre Zugänge besser abzusichern. Neben Account und regulärem Passwort wird dabei dann ein Zusatzcode verlangt, der nur kurze Zeit gilt.

    Zur TOTP-Nutzung muss zunächst ein Startwert an 1Password übergeben werden. Das geht unter anderem per QR-Code, den die App über ein neues Scanfenster selbst einlesen kann – etwa aus dem Webbrowser. Eine Einführung in die Technik gibt ein kurzes Video. Die TOTP-Unterstützung in 1Password erlaubt es, auf ein zusätzliches Gerät (z.B. ein iPhone) neben dem Mac zu verzichten, das den Code liefert – was allerdings auch die Sicherheit verringert, weil es keinen "echten" zweiten Faktor mehr gibt.

    Update 5.3 des Passwortmanagers liefert auch noch weitere Verbesserungen. So gibt es die Möglichkeit, FaceTime-Audio- oder Skype-Anrufe aus 1Password zu starten, die Zahl der Zusatzfelder in der Datenbank wurde erweitert und der Umgang mit unterschiedlichen Zeitzonen klappt besser. Die Engine zur Passworteingabe im Browser soll beschleunigt worden sein.

    -

    1Password kostet aktuell knapp 50 Euro im Mac App Store und setzt in seiner aktuellen Version mindestens OS X 10.10 voraus.(bsc)

    -
    +

    1Password kostet aktuell knapp 50 Euro im Mac App Store und setzt in seiner aktuellen Version mindestens OS X 10.10 voraus. + + + +(bsc) +
    + + +

    + + diff --git a/resources/tests/readability/herald-sun-1/expected.html b/resources/tests/readability/herald-sun-1/expected.html index 618800d..789cef4 100644 --- a/resources/tests/readability/herald-sun-1/expected.html +++ b/resources/tests/readability/herald-sun-1/expected.html @@ -1,17 +1,24 @@
    -
    -

    A new Bill would require telecommunications service providers to store so-called ‘metadat

    -

    A new Bill would require telecommunications service providers to store so-called ‘metadata’ for two years.Source: - Supplied

    -
    -

    +

    +

    A new Bill would require telecommunications service providers to store so-called ‘metadat +

    +

    + A new Bill would require telecommunications service providers to store so-called ‘metadata’ for two years. + Source: + Supplied +

    +
    +

    A HIGH-powered federal government team has been doing the rounds of media organisations in the past few days in an attempt to allay concerns about the impact of new surveillance legislation on press freedom. It failed.

    -

    The roadshow featured the Prime Minister’s national security adviser, Andrew Shearer, Justin Bassi, who advises Attorney-General George Brandis on crime and security matters, and Australian Federal Police Commissioner Andrew Colvin. Staffers from the office of Communications Minister Malcolm Turnbull also took part.

    +

    The roadshow featured the Prime Minister’s national security adviser, Andrew Shearer, Justin Bassi, who advises Attorney-General George Brandis on crime and security matters, and Australian Federal Police Commissioner Andrew Colvin. Staffers from the office of Communications Minister Malcolm Turnbull also took part.

    They held meetings with executives from News Corporation and Fairfax, representatives of the TV networks, the ABC top brass and a group from the media union and the Walkley journalism foundation. I was involved as a member of the Walkley board.

    The initiative, from Tony Abbott’s office, is evidence that the Government has been alarmed by the strength of criticism from media of the Data Retention Bill it wants passed before Parliament rises in a fortnight. Bosses, journalists, even the Press Council, are up in arms, not only over this measure, but also over aspects of two earlier pieces of national security legislation that interfere with the ability of the media to hold government to account.

    + + +
    -

    The Bill would require telecommunications service providers to store so-called “metadata” — the who, where, when and how of a communication, but not its content — for two years so security and law enforcement agencies can access it without warrant. Few would argue against the use of such material to catch criminals or terrorists. But, as Parliament’s Joint Committee on Intelligence and Security has pointed out, it would also be used “for the purpose of determining the identity of a journalist’s sources”.

    +

    The Bill would require telecommunications service providers to store so-called “metadata” — the who, where, when and how of a communication, but not its content — for two years so security and law enforcement agencies can access it without warrant. Few would argue against the use of such material to catch criminals or terrorists. But, as Parliament’s Joint Committee on Intelligence and Security has pointed out, it would also be used “for the purpose of determining the identity of a journalist’s sources”.

    And that should ring warning bells for anyone genuinely concerned with the health of our democracy. Without the ability to protect the identity of sources, journalists would be greatly handicapped in exposing corruption, dishonesty, waste, incompetence and misbehaviour by public officials.

    The Press Council is concerned the laws would crush investigative journalism.

    “These legitimate concerns cannot be addressed effectively short of exempting journalists and media organisations,” says president David Weisbrot.

    diff --git a/resources/tests/readability/hidden-nodes/expected.html b/resources/tests/readability/hidden-nodes/expected.html index b8695a0..88da507 100644 --- a/resources/tests/readability/hidden-nodes/expected.html +++ b/resources/tests/readability/hidden-nodes/expected.html @@ -1,11 +1,14 @@
    -

    + +

    Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

    -

    +

    Secondary header

    -

    + +

    Third header

    -
    + +
    diff --git a/resources/tests/readability/hukumusume/expected.html b/resources/tests/readability/hukumusume/expected.html index 6ef604b..58c76ad 100644 --- a/resources/tests/readability/hukumusume/expected.html +++ b/resources/tests/readability/hukumusume/expected.html @@ -1,107 +1,293 @@
    - - - - - -
    - - - -
    - - -

    福娘童話集 > きょうのイソップ童話 > 1月のイソップ童話 > 欲張りなイヌ + + + + + + + + + +
    + + + +
    + + + + + + + + + +
    + + +
    +   +
    + + +

    + 福娘童話集 > きょうのイソップ童話 > 1月のイソップ童話 > 欲張りなイヌ

    -

    元旦のイソップ童話



    よくばりなイヌ



    - 欲張りなイヌ



    ひらがな ←→ 日本語・英語 ←→ English

    -
    - - - - - - - -
    おりがみをつくろう( おりがみくらぶ より)
    -犬の顔の折り紙いぬのかお犬の顔の紙いぬ -
    - - + + + + + + + + +
    +

    + 元旦のイソップ童話
    +
    +
    +
    + よくばりなイヌ
    +
    +
    +
    + 欲張りなイヌ
    +
    +
    +
    + ひらがな ←→ 日本語・英語 ←→ English +

    +
    + + + + + + + + + + + + +
    + + + おりがみをつくろう + + ( おりがみくらぶ より) + + +
    + + + + + + +
    + 犬の顔の折り紙いぬのかお   犬の顔の紙いぬ +
    +
    +
    + + + + - - -
    ♪音声配信(html5) -
    亜姫の朗読☆ イソップ童話より
    -

    +

    + +
    + 亜姫の朗読☆ イソップ童話より +
    +

     肉をくわえたイヌが、橋を渡っていました。  ふと下を見ると、川の中にも肉をくわえたイヌがいます。 イヌはそれを見て、思いました。(あいつの肉の方が、大きそうだ)  イヌは、くやしくてたまりません。 (そうだ、あいつをおどかして、あの肉を取ってやろう)  そこでイヌは、川の中のイヌに向かって思いっきり吠えました。 「ウゥー、ワン!!」  そのとたん、くわえていた肉はポチャンと川の中に落ちてしまいました。 「ああー、ぁぁー」  川の中には、がっかりしたイヌの顔がうつっています。  さっきの川の中のイヌは、水にうつった自分の顔だったのです。  同じ物を持っていても、人が持っている物の方が良く見え、また、欲張るとけっきょく損をするというお話しです。

    -

    +

    おしまい

    -

    前のページへ戻る



    - - - - - - - -
    - - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    -1月 1日の豆知識



    +

    + 前のページへ戻る
    +
    +
    +
    + + +

    +
    + + + + + + + + + + +
    + + + +
    + + + + - - - - - - - - - - - - - - -
    +      1月 1日の豆知識
    +
    +
    +
    366日への旅
    -
    -きょうの記念日

    元旦 -
    -きょうの誕生花

    松(まつ) -
    -きょうの誕生日・出来事

    1949年 Mr.マリック(マジシャン) -
    -恋の誕生日占い

    自分の考えをしっかりと持った女の子。 -
    -なぞなぞ小学校

    ○(丸)を取ったらお母さんになってしまう男の人は? -
    -あこがれの職業紹介

    歌手 -
    -恋の魔法とおまじない 001

    両思いになれる おまじない -
    1月 1日の童話・昔話



    - 福娘童話集
    -きょうの日本昔話

    ネコがネズミを追いかける訳 -
    -きょうの世界昔話

    モンゴルの十二支話 -
    -きょうの日本民話

    仕事の取替えっこ -
    -きょうのイソップ童話

    欲張りなイヌ -
    -きょうの江戸小話

    ぞうきんとお年玉 -
    -きょうの百物語

    百物語の幽霊 -
    - - - - - - - -
    福娘のサイト
    366日への旅

    毎日の記念日・誕生花 ・有名人の誕生日と性格判断
    福娘童話集

    世界と日本の童話と昔話
    女の子応援サイト -さくら-

    誕生日占い、お仕事紹介、おまじない、など
    子どもの病気相談所

    病気検索と対応方法、症状から検索するWEB問診
    世界60秒巡り

    国旗国歌や世界遺産など、世界の国々の豆知識
    -
    + きょうの記念日
    +
    + 元旦 +
    + きょうの誕生花
    +
    + 松(まつ) +
    + きょうの誕生日・出来事
    +
    + 1949年 Mr.マリック(マジシャン) +
    + 恋の誕生日占い
    +
    + 自分の考えをしっかりと持った女の子。 +
    + なぞなぞ小学校
    +
    + ○(丸)を取ったらお母さんになってしまう男の人は? +
    + あこがれの職業紹介
    +
    + 歌手 +
    + 恋の魔法とおまじない 001
    +
    + 両思いになれる おまじない +
    +   1月 1日の童話・昔話
    +
    +
    +
    + 福娘童話集
    +
    + きょうの日本昔話
    +
    + ネコがネズミを追いかける訳 +
    + きょうの世界昔話
    +
    + モンゴルの十二支話 +
    + きょうの日本民話
    +
    + 仕事の取替えっこ +
    + きょうのイソップ童話
    +
    + 欲張りなイヌ +
    + きょうの江戸小話
    +
    + ぞうきんとお年玉 +
    + きょうの百物語
    +
    + 百物語の幽霊 +
    + + + + + + + + + + + + + + + + + + + + + +
    + 福娘のサイト +
    + 366日への旅
    +
    + 毎日の記念日・誕生花 ・有名人の誕生日と性格判断
    +
    + 福娘童話集
    +
    + 世界と日本の童話と昔話
    +
    + 女の子応援サイト -さくら-
    +
    + 誕生日占い、お仕事紹介、おまじない、など
    +
    + 子どもの病気相談所
    +
    + 病気検索と対応方法、症状から検索するWEB問診
    +
    + 世界60秒巡り
    +
    + 国旗国歌や世界遺産など、世界の国々の豆知識
    +
    + +
    diff --git a/resources/tests/readability/iab-1/expected.html b/resources/tests/readability/iab-1/expected.html index da3011a..4b043d5 100644 --- a/resources/tests/readability/iab-1/expected.html +++ b/resources/tests/readability/iab-1/expected.html @@ -1,5 +1,5 @@
    -

    We messed up. As technologists, tasked with delivering content and services to users, we lost track of the user experience.

    +

    We messed up. As technologists, tasked with delivering content and services to users, we lost track of the user experience.

    Twenty years ago we saw an explosion of websites, built by developers around the world, providing all forms of content. This was the beginning of an age of enlightenment, the intersection of content and technology. Many of us in the technical field felt compelled, and even empowered, to produce information as the distribution means for mass communication were no longer restricted by a high barrier to entry.

    In 2000, the dark ages came when the dot-com bubble burst. We were told that our startups were gone or that our divisions sustained by corporate parent companies needed to be in the black. It was a wakeup call that led to a renaissance age. Digital advertising became the foundation of an economic engine that, still now, sustains the free and democratic World Wide Web. In digital publishing, we strived to balance content, commerce, and technology. The content management systems and communication gateways we built to inform and entertain populations around the world disrupted markets and in some cases governments, informed communities of imminent danger, and liberated new forms of art and entertainment—all while creating a digital middle class of small businesses.

    We engineered not just the technical, but also the social and economic foundation that users around the world came to lean on for access to real time information. And users came to expect this information whenever and wherever they needed it. And more often than not, for anybody with a connected device, it was free.

    @@ -16,6 +16,7 @@

    The IAB Tech Lab will continue to provide the tools for publishers in the digital supply chain to have a dialogue with users about their choices so that content providers can generate revenue while creating value. Publishers should have the opportunity to provide rich advertising experiences, L.E.A.N. advertising experiences, and subscription services. Or publishers can simply deny their service to users who choose to keep on blocking ads. That is all part of elasticity of consumer tolerance and choice.

    Finally, we must do this in an increasingly fragmented market, across screens. We must do this in environments where entire sites are blocked, purposefully or not. Yes, it is disappointing that our development efforts will have to manage with multiple frameworks while we work to supply the economic engine to sustain an open internet. However, our goal is still to provide diverse content and voices to as many connected users as possible around the world.

    That is user experience.

    +

    IAB Tech Lab Members can join the IAB Tech Lab Ad Blocking Working Group, please email adblocking@iab.com for more information.

    Read more about ad blocking here.

    -
    + diff --git a/resources/tests/readability/ietf-1/expected.html b/resources/tests/readability/ietf-1/expected.html index 61f56e1..6059c46 100644 --- a/resources/tests/readability/ietf-1/expected.html +++ b/resources/tests/readability/ietf-1/expected.html @@ -1,5 +1,10 @@
    -[Docs] [txt|pdf] [Tracker] [Email] [Diff1] [Diff2] [Nits]

    Versions: 0001020304

    INTERNET DRAFT                                      Michiel B. de Jong
    +   
    +[Docs] [txt|pdf] [Tracker] [Email] [Diff1] [Diff2] [Nits]               
    +
    +Versions: 00 01 02 03 04
    +
    +
    INTERNET DRAFT                                      Michiel B. de Jong
     Document: draft-dejong-remotestorage-04                   IndieHosters
                                                                  F. Kooman
     Intended Status: Proposed Standard                       (independent)
    @@ -51,13 +56,45 @@ Copyright Notice
        described in the Simplified BSD License.
     
     
    -de Jong                                                         [Page 1]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 1]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     Table of Contents
     
    -   1. Introduction...................................................22. Terminology....................................................33. Storage model..................................................34. Requests.......................................................45. Response codes.................................................76. Versioning.....................................................77. CORS headers...................................................88. Session description............................................89. Bearer tokens and access control...............................910. Application-first bearer token issuance.......................1011. Storage-first bearer token issuance...........................1112. Example wire transcripts......................................1212.1. WebFinger................................................1212.2. OAuth dialog form........................................1312.3. OAuth dialog form submission.............................1412.4. OPTIONS preflight........................................1512.5. Initial PUT..............................................1512.6. Subsequent PUT...........................................1612.7. GET......................................................1612.8. DELETE...................................................1713. Distributed versioning........................................1714. Security Considerations.......................................1915. IANA Considerations...........................................2016. Acknowledgments...............................................2017. References....................................................2117.1. Normative References.....................................2117.2. Informative References...................................2118. Authors' addresses............................................221.  Introduction
    +   1. Introduction...................................................2
    +   2. Terminology....................................................3
    +   3. Storage model..................................................3
    +   4. Requests.......................................................4
    +   5. Response codes.................................................7
    +   6. Versioning.....................................................7
    +   7. CORS headers...................................................8
    +   8. Session description............................................8
    +   9. Bearer tokens and access control...............................9
    +  10. Application-first bearer token issuance.......................10
    +  11. Storage-first bearer token issuance...........................11
    +  12. Example wire transcripts......................................12
    +     12.1. WebFinger................................................12
    +     12.2. OAuth dialog form........................................13
    +     12.3. OAuth dialog form submission.............................14
    +     12.4. OPTIONS preflight........................................15
    +     12.5. Initial PUT..............................................15
    +     12.6. Subsequent PUT...........................................16
    +     12.7. GET......................................................16
    +     12.8. DELETE...................................................17
    +  13. Distributed versioning........................................17
    +  14. Security Considerations.......................................19
    +  15. IANA Considerations...........................................20
    +  16. Acknowledgments...............................................20
    +  17. References....................................................21
    +     17.1. Normative References.....................................21
    +     17.2. Informative References...................................21
    +  18. Authors' addresses............................................22
    +
    +
    +1.  Introduction
     
         Many services for data storage are available over the internet. This
         specification describes a vendor-independent interface for such
    @@ -70,8 +107,10 @@ Table of Contents
               documents and subfolders currently contained by the folder
     
     
    -de Jong                                                         [Page 2]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 2]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
            *  GET a document: retrieve its content type, current version,
    @@ -119,8 +158,10 @@ Table of Contents
         For a document, the server stores, and should be able to produce:
     
     
    -de Jong                                                         [Page 3]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 3]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -168,8 +209,10 @@ Table of Contents
         A folder description is a map containing a string-valued 'ETag'
     
     
    -de Jong                                                         [Page 4]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 4]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         field, representing the folder's current version.
    @@ -217,8 +260,10 @@ Table of Contents
         version as a strong ETag in an 'ETag' header.
     
     
    -de Jong                                                         [Page 5]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 5]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -266,8 +311,13 @@ Table of Contents
     
     
     
    -de Jong                                                         [Page 6]
    -
    Internet-Draft              remoteStorage                  December 20145. Response codes
    +de Jong                                                         [Page 6]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
    +
    +
    +5. Response codes
     
         Response codes SHOULD be given as defined by [HTTP, section 6] and
         [BEARER, section 3.1]. The following is a non-normative checklist
    @@ -312,8 +362,10 @@ Table of Contents
     
     
     
    -de Jong                                                         [Page 7]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 7]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         GET requests MAY have a comma-separated list of revisions in an
    @@ -361,8 +413,10 @@ Table of Contents
         tokens, to the URL that is the concatenation of <storage_root> with
     
     
    -de Jong                                                         [Page 8]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                         [Page 8]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         '/' plus one or more <folder> '/' strings indicating a path in the
    @@ -410,8 +464,14 @@ Table of Contents
         section 2.3].
     
     
    -de Jong                                                         [Page 9]
    -
    Internet-Draft              remoteStorage                  December 201410. Application-first bearer token issuance
    +de Jong                                                         [Page 9]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
    +
    +
    +
    +10. Application-first bearer token issuance
     
         To make a remoteStorage server available as 'the remoteStorage of
         <account> at <host>', exactly one link of the following format
    @@ -455,8 +515,10 @@ Table of Contents
         instead of in the request header.
     
     
    -de Jong                                                        [Page 10]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 10]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -504,8 +566,10 @@ Table of Contents
         to the application or open it in a new window. To mimic coming back
     
     
    -de Jong                                                        [Page 11]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 11]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         from the OAuth dialog, it MAY add 'access_token' and 'scope'
    @@ -553,8 +617,10 @@ Table of Contents
         following request, using XMLHttpRequest and CORS:
     
     
    -de Jong                                                        [Page 12]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 12]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -602,8 +668,10 @@ motestorage-04",
         the account's "myfavoritedrinks" scope:
     
     
    -de Jong                                                        [Page 13]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 13]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -647,8 +715,14 @@ low
             Location:https://drinks-unhosted.5apps.com/#access_token=j2YnGt\
     XjzzzHNjkd1CJxoQubA1o%3D&token_type=bearer&state=
     
    -12.4. OPTIONS preflightde Jong                                                        [Page 14]
    -
    Internet-Draft              remoteStorage                  December 2014
    +12.4. OPTIONS preflight
    +
    +
    +
    +de Jong                                                        [Page 14]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         When an in-browser application makes a cross-origin request which
    @@ -693,8 +767,13 @@ ntent-Type, Origin, X-Requested-With, If-Match, If-None-Match
             Access-Control-Allow-Origin: https://drinks-unhosted.5apps.com
             ETag: "1382694045000"
     
    -12.6. Subsequent PUTde Jong                                                        [Page 15]
    -
    Internet-Draft              remoteStorage                  December 2014
    +12.6. Subsequent PUT
    +
    +
    +de Jong                                                        [Page 15]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -742,8 +821,10 @@ e.io/spec/modules/myfavoritedrinks/drink"}
             HTTP/1.1 200 OK
     
     
    -de Jong                                                        [Page 16]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 16]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
             Access-Control-Allow-Origin: https://drinks-unhosted.5apps.com
    @@ -775,7 +856,9 @@ charset=UTF-8","Content-Length":106}}}
         Not Found status, and no ETag header:
     
             HTTP/1.1 404 Not Found
    -        Access-Control-Allow-Origin: https://drinks-unhosted.5apps.com12.8. DELETE
    +        Access-Control-Allow-Origin: https://drinks-unhosted.5apps.com
    +
    +12.8. DELETE
     
         A DELETE request may look like this:
     
    @@ -789,8 +872,10 @@ charset=UTF-8","Content-Length":106}}}
     
     
     
    -de Jong                                                        [Page 17]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 17]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         And the server may respond with a 412 Conflict or a 200 OK status:
    @@ -838,8 +923,10 @@ charset=UTF-8","Content-Length":106}}}
     
     
     
    -de Jong                                                        [Page 18]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 18]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         As an example, the root folder may contain 10 directories,
    @@ -887,8 +974,10 @@ charset=UTF-8","Content-Length":106}}}
         OAuth dialog and launch dashboard or token revokation interface
     
     
    -de Jong                                                        [Page 19]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 19]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
         SHOULD be on a different origin than the remoteStorage interface.
    @@ -927,15 +1016,19 @@ charset=UTF-8","Content-Length":106}}}
         Rick van Rein, Mark Nottingham, Julian Reschke, and Markus
         Lanthaler, among many others.
     
    -17. References17.1. Normative References
    +17. References
    +
    +17.1. Normative References
     
         [WORDS]
             Bradner, S., "Key words for use in RFCs to Indicate Requirement
             Levels", BCP 14, RFC 2119, March 1997.
     
     
    -de Jong                                                        [Page 20]
    -
    Internet-Draft              remoteStorage                  December 2014
    +de Jong                                                        [Page 20]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
     
     
     
    @@ -969,7 +1062,8 @@ charset=UTF-8","Content-Length":106}}}
             Conditional Requests", RFC7233, June 2014.
     
         [SPDY]
    -        Mark Belshe, Roberto Peon, "SPDY Protocol - Draft 3.1", http://www.chromium.org/spdy/spdy-protocol/spdy-protocol-draft3-1,
    +        Mark Belshe, Roberto Peon, "SPDY Protocol - Draft 3.1", http://
    +        www.chromium.org/spdy/spdy-protocol/spdy-protocol-draft3-1,
             September 2013.
     
         [JSON-LD]
    @@ -982,8 +1076,13 @@ charset=UTF-8","Content-Length":106}}}
             W3C Candidate Recommendation 29 January 2013",
     
     
    -de Jong                                                        [Page 21]
    -
    Internet-Draft              remoteStorage                  December 2014http://www.w3.org/TR/cors/, January 2013.
    +de Jong                                                        [Page 21]
    +
    +
     
    +Internet-Draft              remoteStorage                  December 2014
    +
    +
    +        http://www.w3.org/TR/cors/, January 2013.
     
         [MANIFEST]
             Mozilla Developer Network (ed), "App manifest -- Revision
    @@ -1031,6 +1130,8 @@ charset=UTF-8","Content-Length":106}}}
     de Jong                                                        [Page 22]
     
     
    -
    Html markup produced by rfcmarkup 1.111, available from -https://tools.ietf.org/tools/rfcmarkup/ +
    +Html markup produced by rfcmarkup 1.111, available from +https://tools.ietf.org/tools/rfcmarkup/ +
    diff --git a/resources/tests/readability/webmd-1/expected.html b/resources/tests/readability/webmd-1/expected.html index 8969848..1fe1cae 100644 --- a/resources/tests/readability/webmd-1/expected.html +++ b/resources/tests/readability/webmd-1/expected.html @@ -1,18 +1,27 @@
    -

    Feb. 23, 2015 -- Life-threatening peanut allergies have mysteriously + + + + + + + + + +

    Feb. 23, 2015 -- Life-threatening peanut allergies have mysteriously been on the rise in the past decade, with little hope for a cure.

    -

    But a groundbreaking new +

    But a groundbreaking new study may offer a way to stem that rise, while another may offer some hope for those who are already allergic.

    -

    Parents have been told for years to avoid giving foods containing +

    Parents have been told for years to avoid giving foods containing peanuts to babies for fear of triggering an allergy. Now research shows the opposite is true: Feeding babies snacks made with peanuts before their first birthday appears to prevent that from happening.

    -

    The study is published in the New England Journal of Medicine, +

    The study is published in the New England Journal of Medicine, and it was presented at the annual meeting of the American Academy of Allergy, @@ -28,35 +37,36 @@ egg, they had the skin condition eczema, or both.

    -

    Overall, about 3% of kids who ate peanut butter or peanut snacks +

    Overall, about 3% of kids who ate peanut butter or peanut snacks before their first birthday got an allergy, compared to about 17% of kids who didn’t eat them.

    -

    “I think this study is an astounding and groundbreaking study, +

    “I think this study is an astounding and groundbreaking study, really,” says Katie Allen, MD, PhD. She's the director of the Center for Food and Allergy Research at the Murdoch Children’s Research Institute in Melbourne, Australia. Allen was not involved in the research.

    -

    Experts say the research should shift thinking about how kids develop +

    Experts say the research should shift thinking about how kids develop food allergies, and it should change the guidance doctors give to parents.

    -

    Meanwhile, for children and adults who are already allergic to peanuts, +

    Meanwhile, for children and adults who are already allergic to peanuts, another study presented at the same meeting held out hope of a treatment.

    -

    A new skin patch called Viaskin allowed people with peanut allergies +

    A new skin patch called Viaskin allowed people with peanut allergies to eat tiny amounts of peanuts after they wore it for a year.

    -

    A Change in Guidelines?

    -

    Allergies to peanuts and other foods are on the rise. In the U.S., +

    A Change in Guidelines?

    + +

    Allergies to peanuts and other foods are on the rise. In the U.S., more than 2% of people react to peanuts, a 400% increase since 1997. And reactions to peanuts and other tree nuts can be especially severe. Nuts are the main reason people get a life-threatening problem called anaphylaxis.

    -
    + diff --git a/src/constants.rs b/src/constants.rs index 18c3069..b9abebf 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -18,8 +18,7 @@ pub static BYLINE: Lazy = Lazy::new(|| { .build() .expect("BYLINE regex") }); -pub static NORMALIZE: Lazy = - Lazy::new(|| Regex::new(r#"/\s{2,}/g"#).expect("NORMALIZE regex")); +pub static NORMALIZE: Lazy = Lazy::new(|| Regex::new(r#"\s{2,}"#).expect("NORMALIZE regex")); pub static TOKENIZE: Lazy = Lazy::new(|| Regex::new(r#"\W+"#).expect("TOKENIZE regex")); pub static UNLIELY_CANDIDATES: Lazy = Lazy::new(|| { RegexBuilder::new(r#"-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote"#).case_insensitive(true).build().expect("UNLIELY_CANDIDATES regex") diff --git a/src/full_text_parser/mod.rs b/src/full_text_parser/mod.rs index c8412ca..d284fb8 100644 --- a/src/full_text_parser/mod.rs +++ b/src/full_text_parser/mod.rs @@ -1001,7 +1001,7 @@ impl FullTextParser { || Util::has_single_tag_inside_element(&node, "SECTION") { if let Some(mut parent) = node.get_parent() { - if let Some(mut child) = node.get_child_nodes().into_iter().next() { + if let Some(mut child) = node.get_child_elements().into_iter().next() { for (k, v) in node.get_attributes().into_iter() { child.set_attribute(&k, &v).map_err(|e| { log::error!("{e}"); diff --git a/src/full_text_parser/readability/mod.rs b/src/full_text_parser/readability/mod.rs index 2f1c625..d0eff58 100644 --- a/src/full_text_parser/readability/mod.rs +++ b/src/full_text_parser/readability/mod.rs @@ -34,7 +34,7 @@ impl Readability { let tag_name = node_ref.get_name().to_uppercase(); if tag_name == "TEXT" && node_ref.get_content().trim().is_empty() { - node = Util::remove_and_next(node_ref); + node = Util::next_node(node_ref, true); continue; } @@ -188,18 +188,6 @@ impl Readability { node = Util::next_node(node_ref, false); } - // let html = document.to_string_with_options(libxml::tree::SaveOptions { - // format: true, - // no_declaration: false, - // no_empty_tags: true, - // no_xhtml: false, - // xhtml: false, - // as_xml: false, - // as_html: true, - // non_significant_whitespace: false, - // }); - // std::fs::write("debug.html", &html).unwrap(); - let mut candidates = Vec::new(); // Loop through all paragraphs, and assign a score to them based on how content-y they look. // Then add their score to their parent node. @@ -262,6 +250,10 @@ impl Readability { if let Some(score) = Self::get_content_score(&ancestor) { let add_score = content_score / score_divider; let new_score = score + add_score; + log::debug!( + "{}: {score} + {add_score} = {new_score}", + ancestor.get_name() + ); Self::set_content_score(&mut ancestor, new_score)?; } } @@ -289,9 +281,14 @@ impl Readability { }); let top_candidates = candidates.into_iter().take(5).collect::>(); - // for candidate in top_candidates.iter() { - // println!("candidate: {} {:?}", candidate.get_name(), candidate.get_attributes()); - // } + + for candidate in top_candidates.iter() { + log::debug!( + "candidate: {} {:?}", + candidate.get_name(), + candidate.get_attributes() + ); + } let mut needed_to_create_top_candidate = false; let mut top_candidate = top_candidates.first().cloned().unwrap_or_else(|| { // If we still have no top candidate, just use the body as a last resort. @@ -302,6 +299,8 @@ impl Readability { rt }); + //Util::serialize_node(&top_candidate, "top_candidate.html"); + let mut alternative_candidate_ancestors = Vec::new(); // Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array // and whose scores are quite closed with current `topCandidate` node. @@ -346,6 +345,8 @@ impl Readability { Self::initialize_node(&mut top_candidate, &state)?; } + //Util::serialize_node(&top_candidate, "new_top_candidate.html"); + // Because of our bonus system, parents of candidates might have scores // themselves. They get half of the node. There won't be nodes with higher // scores than our topCandidate, but if we see the score going *up* in the first @@ -433,7 +434,11 @@ impl Readability { let mut append = false; let score = Self::get_content_score(&sibling).unwrap_or(0.0); - log::debug!("Looking at sibling node: {sibling:?} with score {score}"); + log::debug!( + "Looking at sibling node: {} ({:?}) with score {score}", + sibling.get_name(), + sibling.get_attribute("class") + ); if top_candidate == sibling { append = true; @@ -473,14 +478,22 @@ impl Readability { } if append { - log::debug!("Appending node: {sibling:?}"); + log::debug!( + "Appending node: {} ({:?})", + sibling.get_name(), + sibling.get_attribute("class") + ); if !constants::ALTER_TO_DIV_EXCEPTIONS .contains(sibling.get_name().to_uppercase().as_str()) { // We have a node that isn't a common block level element, like a form or td tag. // Turn it into a div so it doesn't get filtered out later by accident. - log::debug!("Altering sibling: {sibling:?} to div."); + log::debug!( + "Altering sibling: {} ({:?})", + sibling.get_name(), + sibling.get_attribute("class") + ); sibling.set_name("DIV").map_err(|error| { log::error!("{error}"); @@ -544,6 +557,8 @@ impl Readability { let text = Util::get_inner_text(&article_content, true); let text_length = text.len(); + //Util::serialize_node(&article_content, "debug.html"); + if text_length < constants::DEFAULT_CHAR_THRESHOLD { parse_successful = false; diff --git a/src/full_text_parser/readability/tests.rs b/src/full_text_parser/readability/tests.rs index 32fe94a..d3fc1a3 100644 --- a/src/full_text_parser/readability/tests.rs +++ b/src/full_text_parser/readability/tests.rs @@ -8,7 +8,10 @@ use crate::{ async fn run_test(name: &str) { libxml::tree::node::set_node_rc_guard(10); - let _ = env_logger::builder().is_test(true).try_init(); + let _ = env_logger::builder() + .filter_level(log::LevelFilter::Debug) + .is_test(true) + .try_init(); let empty_config = ConfigEntry::default(); @@ -170,7 +173,7 @@ async fn dropbox_blog() { } #[tokio::test] -async fn ebbb_org() { +async fn ebb_org() { run_test("ebb-org").await } diff --git a/src/util.rs b/src/util.rs index 1a7b173..8ab7483 100644 --- a/src/util.rs +++ b/src/util.rs @@ -308,7 +308,7 @@ impl Util { pub fn get_inner_text(node: &Node, normalize_spaces: bool) -> String { let content = node.get_content().trim().to_owned(); if normalize_spaces { - constants::NORMALIZE.replace(&content, " ").into() + constants::NORMALIZE.replace_all(&content, " ").into() } else { content } @@ -427,7 +427,7 @@ impl Util { } pub fn get_link_density(node: &Node) -> f64 { - let text_length = Util::get_inner_text(node, false).len(); + let text_length = Util::get_inner_text(node, true).len(); if text_length == 0 { return 0.0; } @@ -443,7 +443,7 @@ impl Util { } else { 1.0 }; - link_length += Util::get_inner_text(&link_node, false).len() as f64 * coefficient; + link_length += Util::get_inner_text(&link_node, true).len() as f64 * coefficient; } } @@ -580,7 +580,7 @@ impl Util { } let link_density = Self::get_link_density(node); - let content = Self::get_inner_text(node, false); + let content = Self::get_inner_text(node, true); let content_length = content.len(); let have_to_remove = (img > 1 @@ -780,4 +780,21 @@ impl Util { || ((tag_name == "A" || tag_name == "DEL" || tag_name == "INS") && node.get_child_nodes().iter().all(Self::is_phrasing_content)) } + + #[allow(dead_code)] + pub fn serialize_node(node: &Node, filename: &str) { + let mut doc = libxml::tree::Document::new().unwrap(); + doc.set_root_element(node); + let html = doc.to_string_with_options(libxml::tree::SaveOptions { + format: true, + no_declaration: false, + no_empty_tags: true, + no_xhtml: false, + xhtml: false, + as_xml: false, + as_html: true, + non_significant_whitespace: false, + }); + std::fs::write(filename, &html).unwrap(); + } }