refactor & more testing

2025-07-07 16:15:32 +02:00 · 2023-02-25 00:42:26 +01:00 · 2023-02-25 00:42:26 +01:00 · e3246af28b
commit e3246af28b
parent 7ae98904d4
14 changed files with 1969 additions and 101 deletions
--- a/resources/tests/readability/001/expected.html
+++ b/resources/tests/readability/001/expected.html
@ -0,0 +1,132 @@
+<article><DIV id="readability-page-1" class="page"><section>
+                        <p><strong>So finally you're <a href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/" target="_blank">testing your frontend JavaScript code</a>? Great! The more you
+write tests, the more confident you are with your code… but how much precisely?
+That's where <a href="http://en.wikipedia.org/wiki/Code_coverage" target="_blank">code coverage</a> might
+help.</strong>
+                        </p>
+                        <p>The idea behind code coverage is to record which parts of your code (functions,
+                            statements, conditionals and so on) have been executed by your test suite,
+                            to compute metrics out of these data and usually to provide tools for navigating
+                            and inspecting them.</p>
+                        <p>Not a lot of frontend developers I know actually test their frontend code,
+                            and I can barely imagine how many of them have ever setup code coverage…
+                            Mostly because there are not many frontend-oriented tools in this area
+                            I guess.</p>
+                        <p>Actually I've only found one which provides an adapter for <a href="http://visionmedia.github.io/mocha/" target="_blank">Mocha</a> and
+                            actually works…</p>
+                        <blockquote>
+                            <p>Drinking game for web devs:
+                                <br>(1) Think of a noun
+                                <br>(2) Google "&lt;noun&gt;.js"
+                                <br>(3) If a library with that name exists - drink</p>— Shay Friedman (@ironshay)
+                            <a href="https://twitter.com/ironshay/statuses/370525864523743232" target="_blank">August 22, 2013</a>
+                        </blockquote>
+                        <p><strong><a href="http://blanketjs.org/" target="_blank">Blanket.js</a></strong> is an <em>easy to install, easy to configure,
+and easy to use JavaScript code coverage library that works both in-browser and
+with nodejs.</em>
+                        </p>
+                        <p>Its use is dead easy, adding Blanket support to your Mocha test suite
+                            is just matter of adding this simple line to your HTML test file:</p>
+<pre><code>&lt;script src="vendor/blanket.js"
+        data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
+</code></pre>
+
+                        <p>Source files: <a href="https://raw.github.com/alex-seville/blanket/master/dist/qunit/blanket.min.js" target="_blank">blanket.js</a>,
+                            <a href="https://raw.github.com/alex-seville/blanket/master/src/adapters/mocha-blanket.js" target="_blank">mocha-blanket.js</a>
+                        </p>
+                        <p>As an example, let's reuse the silly <code>Cow</code> example we used
+                            <a href="/code/2013/testing-frontend-javascript-code-using-mocha-chai-and-sinon/" target="_blank">in a previous episode</a>:</p>
+<pre><code>// cow.js
+(function(exports) {
+  "use strict";
+
+  function Cow(name) {
+    this.name = name || "Anon cow";
+  }
+  exports.Cow = Cow;
+
+  Cow.prototype = {
+    greets: function(target) {
+      if (!target)
+        throw new Error("missing target");
+      return this.name + " greets " + target;
+    }
+  };
+})(this);
+</code></pre>
+
+                        <p>And its test suite, powered by Mocha and <a href="http://chaijs.com/" target="_blank">Chai</a>:</p>
+<pre><code>var expect = chai.expect;
+
+describe("Cow", function() {
+  describe("constructor", function() {
+    it("should have a default name", function() {
+      var cow = new Cow();
+      expect(cow.name).to.equal("Anon cow");
+    });
+
+    it("should set cow's name if provided", function() {
+      var cow = new Cow("Kate");
+      expect(cow.name).to.equal("Kate");
+    });
+  });
+
+  describe("#greets", function() {
+    it("should greet passed target", function() {
+      var greetings = (new Cow("Kate")).greets("Baby");
+      expect(greetings).to.equal("Kate greets Baby");
+    });
+  });
+});
+</code></pre>
+
+                        <p>Let's create the HTML test file for it, featuring Blanket and its adapter
+                            for Mocha:</p>
+<pre><code>&lt;!DOCTYPE html&gt;
+&lt;html&gt;
+&lt;head&gt;
+  &lt;meta charset="utf-8"&gt;
+  &lt;title&gt;Test&lt;/title&gt;
+  &lt;link rel="stylesheet" media="all" href="vendor/mocha.css"&gt;
+&lt;/head&gt;
+&lt;body&gt;
+  &lt;div id="mocha"&gt;&lt;/div&gt;
+  &lt;div id="messages"&gt;&lt;/div&gt;
+  &lt;div id="fixtures"&gt;&lt;/div&gt;
+  &lt;script src="vendor/mocha.js"&gt;&lt;/script&gt;
+  &lt;script src="vendor/chai.js"&gt;&lt;/script&gt;
+  &lt;script src="vendor/blanket.js"
+          data-cover-adapter="vendor/mocha-blanket.js"&gt;&lt;/script&gt;
+  &lt;script&gt;mocha.setup('bdd');&lt;/script&gt;
+  &lt;script src="cow.js" data-cover&gt;&lt;/script&gt;
+  &lt;script src="cow_test.js"&gt;&lt;/script&gt;
+  &lt;script&gt;mocha.run();&lt;/script&gt;
+&lt;/body&gt;
+&lt;/html&gt;
+</code></pre>
+
+                        <p><strong>Notes</strong>:</p>
+                        <ul>
+                            <li>Notice the <code>data-cover</code> attribute we added to the script tag
+                                loading the source of our library;</li>
+                            <li>The HTML test file <em>must</em> be served over HTTP for the adapter to
+                                be loaded.</li>
+                        </ul>
+                        <p>Running the tests now gives us something like this:</p>
+                        <p>
+                            <img alt="screenshot" src="/static/code/2013/blanket-coverage.png">
+                        </p>
+                        <p>As you can see, the report at the bottom highlights that we haven't actually
+                            tested the case where an error is raised in case a target name is missing.
+                            We've been informed of that, nothing more, nothing less. We simply know
+                            we're missing a test here. Isn't this cool? I think so!</p>
+                        <p>Just remember that code coverage will only <a href="http://codebetter.com/karlseguin/2008/12/09/code-coverage-use-it-wisely/" target="_blank">bring you numbers</a> and
+                            raw information, not actual proofs that the whole of your <em>code logic</em> has
+                            been actually covered. If you ask me, the best inputs you can get about
+                            your code logic and implementation ever are the ones issued out of <a href="http://www.extremeprogramming.org/rules/pair.html" target="_blank">pair programming</a>
+sessions
+                            and <a href="http://alexgaynor.net/2013/sep/26/effective-code-review/" target="_blank">code reviews</a> —
+                            but that's another story.</p>
+                        <p><strong>So is code coverage silver bullet? No. Is it useful? Definitely. Happy testing!</strong>
+                        </p>
+                    </section></DIV></article>
--- a/resources/tests/readability/001/source.html
+++ b/resources/tests/readability/001/source.html
--- a/resources/tests/readability/002/expected.html
+++ b/resources/tests/readability/002/expected.html
@ -0,0 +1,594 @@
+<article><DIV id="readability-page-1" class="page">
+                    <article role="article">
+                        <p>For more than a decade the Web has used XMLHttpRequest (XHR) to achieve
+                            asynchronous requests in JavaScript. While very useful, XHR is not a very
+                            nice API. It suffers from lack of separation of concerns. The input, output
+                            and state are all managed by interacting with one object, and state is
+                            tracked using events. Also, the event-based model doesn’t play well with
+                            JavaScript’s recent focus on Promise- and generator-based asynchronous
+                            programming.</p>
+                        <p>The <a href="https://developer.mozilla.org/en-US/docs/Web/API/Fetch_API" target="_blank">Fetch API</a> intends
+                            to fix most of these problems. It does this by introducing the same primitives
+                            to JS that are used in the HTTP protocol. In addition, it introduces a
+                            utility function <code>fetch()</code> that succinctly captures the intention
+                            of retrieving a resource from the network.</p>
+                        <p>The <a href="https://fetch.spec.whatwg.org" target="_blank">Fetch specification</a>, which
+                            defines the API, nails down the semantics of a user agent fetching a resource.
+                            This, combined with ServiceWorkers, is an attempt to:</p>
+                        <ol>
+                            <li>Improve the offline experience.</li>
+                            <li>Expose the building blocks of the Web to the platform as part of the
+                                <a href="https://extensiblewebmanifesto.org/" target="_blank">extensible web movement</a>.</li>
+                        </ol>
+                        <p>As of this writing, the Fetch API is available in Firefox 39 (currently
+                            Nightly) and Chrome 42 (currently dev). Github has a <a href="https://github.com/github/fetch" target="_blank">Fetch polyfill</a>.</p>
+                        
+<h2>Feature detection</h2>
+
+                        <p>Fetch API support can be detected by checking for <code>Headers</code>,<code>Request</code>, <code>Response</code> or <code>fetch</code> on
+                            the <code>window</code> or <code>worker</code> scope.</p>
+                        
+<h2>Simple fetching</h2>
+
+                        <p>The most useful, high-level part of the Fetch API is the <code>fetch()</code> function.
+                            In its simplest form it takes a URL and returns a promise that resolves
+                            to the response. The response is captured as a <code>Response</code> object.</p>
+                        <P>
+                            <table>
+                                <tbody>
+                                    <tr>
+                                        <td>
+<pre>fetch<span>(</span><span>"/data.json"</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
+  <span>// res instanceof Response == true.</span>
+  <span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
+    res.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>data<span>)</span> <span>{</span>
+      console.<span>log</span><span>(</span>data.<span>entries</span><span>)</span><span>;</span>
+    <span>}</span><span>)</span><span>;</span>
+  <span>}</span> <span>else</span> <span>{</span>
+    console.<span>log</span><span>(</span><span>"Looks like the response wasn't perfect, got status"</span><span>,</span> res.<span>status</span><span>)</span><span>;</span>
+  <span>}</span>
+<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span><span>"Fetch failed!"</span><span>,</span> e<span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                                        </td>
+                                    </tr>
+                                </tbody>
+                            </table>
+            </P>
+            <p>Submitting some parameters, it would look like this:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre>fetch<span>(</span><span>"http://www.example.org/submit.php"</span><span>,</span> <span>{</span>
+  method<span>:</span> <span>"POST"</span><span>,</span>
+  headers<span>:</span> <span>{</span>
+    <span>"Content-Type"</span><span>:</span> <span>"application/x-www-form-urlencoded"</span>
+  <span>}</span><span>,</span>
+  body<span>:</span> <span>"firstName=Nikhil&amp;favColor=blue&amp;password=easytoguess"</span>
+<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
+  <span>if</span> <span>(</span>res.<span>ok</span><span>)</span> <span>{</span>
+    alert<span>(</span><span>"Perfect! Your settings are saved."</span><span>)</span><span>;</span>
+  <span>}</span> <span>else</span> <span>if</span> <span>(</span>res.<span>status</span> <span>==</span> <span>401</span><span>)</span> <span>{</span>
+    alert<span>(</span><span>"Oops! You are not authorized."</span><span>)</span><span>;</span>
+  <span>}</span>
+<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
+  alert<span>(</span><span>"Error submitting form!"</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>The <code>fetch()</code> function’s arguments are the same as those passed
+                to the
+                <br>
+<code>Request()</code> constructor, so you may directly pass arbitrarily
+                complex requests to <code>fetch()</code> as discussed below.</p>
+            
+<h2>Headers</h2>
+
+            <p>Fetch introduces 3 interfaces. These are <code>Headers</code>, <code>Request</code> and
+            <br>
+<code>Response</code>. They map directly to the underlying HTTP concepts,
+                but have
+                <br>certain visibility filters in place for privacy and security reasons,
+                such as
+                <br>supporting CORS rules and ensuring cookies aren’t readable by third parties.</p>
+            <p>The <a href="https://fetch.spec.whatwg.org/#headers-class" target="_blank">Headers interface</a> is
+                a simple multi-map of names to values:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre><span>var</span> content <span>=</span> <span>"Hello World"</span><span>;</span>
+<span>var</span> reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>)</span><span>;</span>
+reqHeaders.<span>append</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/plain"</span>
+reqHeaders.<span>append</span><span>(</span><span>"Content-Length"</span><span>,</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>)</span><span>;</span>
+reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"ProcessThisImmediately"</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>The same can be achieved by passing an array of arrays or a JS object
+                literal
+                <br>to the constructor:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre>reqHeaders <span>=</span> <span>new</span> Headers<span>(</span><span>{</span>
+  <span>"Content-Type"</span><span>:</span> <span>"text/plain"</span><span>,</span>
+  <span>"Content-Length"</span><span>:</span> content.<span>length</span>.<span>toString</span><span>(</span><span>)</span><span>,</span>
+  <span>"X-Custom-Header"</span><span>:</span> <span>"ProcessThisImmediately"</span><span>,</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>The contents can be queried and retrieved:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre>console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Content-Type"</span><span>)</span><span>)</span><span>;</span> <span>// true</span>
+console.<span>log</span><span>(</span>reqHeaders.<span>has</span><span>(</span><span>"Set-Cookie"</span><span>)</span><span>)</span><span>;</span> <span>// false</span>
+reqHeaders.<span>set</span><span>(</span><span>"Content-Type"</span><span>,</span> <span>"text/html"</span><span>)</span><span>;</span>
+reqHeaders.<span>append</span><span>(</span><span>"X-Custom-Header"</span><span>,</span> <span>"AnotherValue"</span><span>)</span><span>;</span>
+ 
+console.<span>log</span><span>(</span>reqHeaders.<span>get</span><span>(</span><span>"Content-Length"</span><span>)</span><span>)</span><span>;</span> <span>// 11</span>
+console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// ["ProcessThisImmediately", "AnotherValue"]</span>
+ 
+reqHeaders.<span>delete</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>;</span>
+console.<span>log</span><span>(</span>reqHeaders.<span>getAll</span><span>(</span><span>"X-Custom-Header"</span><span>)</span><span>)</span><span>;</span> <span>// []</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>Some of these operations are only useful in ServiceWorkers, but they provide
+            <br>a much nicer API to Headers.</p>
+            <p>Since Headers can be sent in requests, or received in responses, and have
+                various limitations about what information can and should be mutable, <code>Headers</code> objects
+                have a <strong>guard</strong> property. This is not exposed to the Web, but
+                it affects which mutation operations are allowed on the Headers object.
+                <br>Possible values are:</p>
+            <ul>
+                <li>“none”: default.</li>
+                <li>“request”: guard for a Headers object obtained from a Request (<code>Request.headers</code>).</li>
+                <li>“request-no-cors”: guard for a Headers object obtained from a Request
+                    created
+                    <br>with mode “no-cors”.</li>
+                <li>“response”: naturally, for Headers obtained from Response (<code>Response.headers</code>).</li>
+                <li>“immutable”: Mostly used for ServiceWorkers, renders a Headers object
+                <br>read-only.</li>
+            </ul>
+            <p>The details of how each guard affects the behaviors of the Headers object
+                are
+                <br>in the <a href="https://fetch.spec.whatwg.org" target="_blank">specification</a>. For example,
+                you may not append or set a “request” guarded Headers’ “Content-Length”
+                header. Similarly, inserting “Set-Cookie” into a Response header is not
+                allowed so that ServiceWorkers may not set cookies via synthesized Responses.</p>
+            <p>All of the Headers methods throw TypeError if <code>name</code> is not a
+                <a href="https://fetch.spec.whatwg.org/#concept-header-name" target="_blank">valid HTTP Header name</a>. The mutation operations will throw TypeError
+                    if there is an immutable guard. Otherwise they fail silently. For example:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre><span>var</span> res <span>=</span> Response.<span>error</span><span>(</span><span>)</span><span>;</span>
+<span>try</span> <span>{</span>
+  res.<span>headers</span>.<span>set</span><span>(</span><span>"Origin"</span><span>,</span> <span>"http://mybank.com"</span><span>)</span><span>;</span>
+<span>}</span> <span>catch</span><span>(</span>e<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span><span>"Cannot pretend to be a bank!"</span><span>)</span><span>;</span>
+<span>}</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+        </P>
+        
+<h2>Request</h2>
+
+        <p>The Request interface defines a request to fetch a resource over HTTP.
+            URL, method and headers are expected, but the Request also allows specifying
+            a body, a request mode, credentials and cache hints.</p>
+        <p>The simplest Request is of course, just a URL, as you may do to GET a
+            resource.</p>
+        <P>
+            <table>
+                <tbody>
+                    <tr>
+                        <td>
+<pre><span>var</span> req <span>=</span> <span>new</span> Request<span>(</span><span>"/index.html"</span><span>)</span><span>;</span>
+console.<span>log</span><span>(</span>req.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
+console.<span>log</span><span>(</span>req.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </P>
+        <p>You may also pass a Request to the <code>Request()</code> constructor to
+            create a copy.
+            <br>(This is not the same as calling the <code>clone()</code> method, which
+            is covered in
+            <br>the “Reading bodies” section.).</p>
+        <P>
+            <table>
+                <tbody>
+                    <tr>
+                        <td>
+<pre><span>var</span> copy <span>=</span> <span>new</span> Request<span>(</span>req<span>)</span><span>;</span>
+console.<span>log</span><span>(</span>copy.<span>method</span><span>)</span><span>;</span> <span>// "GET"</span>
+console.<span>log</span><span>(</span>copy.<span>url</span><span>)</span><span>;</span> <span>// "http://example.com/index.html"</span></pre>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+        </P>
+        <p>Again, this form is probably only useful in ServiceWorkers.</p>
+        <p>The non-URL attributes of the <code>Request</code> can only be set by passing
+            initial
+            <br>values as a second argument to the constructor. This argument is a dictionary.</p>
+        <P>
+            <table>
+                <tbody>
+                    <tr>
+                        <td>
+<pre><span>var</span> uploadReq <span>=</span> <span>new</span> Request<span>(</span><span>"/uploadImage"</span><span>,</span> <span>{</span>
+  method<span>:</span> <span>"POST"</span><span>,</span>
+  headers<span>:</span> <span>{</span>
+    <span>"Content-Type"</span><span>:</span> <span>"image/png"</span><span>,</span>
+  <span>}</span><span>,</span>
+  body<span>:</span> <span>"image data"</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                        </td>
+                    </tr>
+                </tbody>
+            </table>
+            </P>
+            <p>The Request’s mode is used to determine if cross-origin requests lead
+                to valid responses, and which properties on the response are readable.
+                Legal mode values are <code>"same-origin"</code>, <code>"no-cors"</code> (default)
+                and <code>"cors"</code>.</p>
+            <p>The <code>"same-origin"</code> mode is simple, if a request is made to another
+                origin with this mode set, the result is simply an error. You could use
+                this to ensure that
+                <br>a request is always being made to your origin.</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre><span>var</span> arbitraryUrl <span>=</span> document.<span>getElementById</span><span>(</span><span>"url-input"</span><span>)</span>.<span>value</span><span>;</span>
+fetch<span>(</span>arbitraryUrl<span>,</span> <span>{</span> mode<span>:</span> <span>"same-origin"</span> <span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>res<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span><span>"Response succeeded?"</span><span>,</span> res.<span>ok</span><span>)</span><span>;</span>
+<span>}</span><span>,</span> <span>function</span><span>(</span>e<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span><span>"Please enter a same-origin URL!"</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>The <code>"no-cors"</code> mode captures what the web platform does by default
+                for scripts you import from CDNs, images hosted on other domains, and so
+                on. First, it prevents the method from being anything other than “HEAD”,
+                “GET” or “POST”. Second, if any ServiceWorkers intercept these requests,
+                they may not add or override any headers except for <a href="https://fetch.spec.whatwg.org/#simple-header" target="_blank">these</a>.
+                Third, JavaScript may not access any properties of the resulting Response.
+                This ensures that ServiceWorkers do not affect the semantics of the Web
+                and prevents security and privacy issues that could arise from leaking
+                data across domains.</p>
+            <p><code>"cors"</code> mode is what you’ll usually use to make known cross-origin
+                requests to access various APIs offered by other vendors. These are expected
+                to adhere to
+                <br>the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Access_control_CORS" target="_blank">CORS protocol</a>.
+                Only a <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors" target="_blank">limited set</a> of
+                headers is exposed in the Response, but the body is readable. For example,
+                you could get a list of Flickr’s <a href="https://www.flickr.com/services/api/flickr.interestingness.getList.html" target="_blank">most interesting</a> photos
+                today like this:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre><span>var</span> u <span>=</span> <span>new</span> URLSearchParams<span>(</span><span>)</span><span>;</span>
+u.<span>append</span><span>(</span><span>'method'</span><span>,</span> <span>'flickr.interestingness.getList'</span><span>)</span><span>;</span>
+u.<span>append</span><span>(</span><span>'api_key'</span><span>,</span> <span>'&lt;insert api key here&gt;'</span><span>)</span><span>;</span>
+u.<span>append</span><span>(</span><span>'format'</span><span>,</span> <span>'json'</span><span>)</span><span>;</span>
+u.<span>append</span><span>(</span><span>'nojsoncallback'</span><span>,</span> <span>'1'</span><span>)</span><span>;</span>
+ 
+<span>var</span> apiCall <span>=</span> fetch<span>(</span><span>'https://api.flickr.com/services/rest?'</span> <span>+</span> u<span>)</span><span>;</span>
+ 
+apiCall.<span>then</span><span>(</span><span>function</span><span>(</span>response<span>)</span> <span>{</span>
+  <span>return</span> response.<span>json</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>json<span>)</span> <span>{</span>
+    <span>// photo is a list of photos.</span>
+    <span>return</span> json.<span>photos</span>.<span>photo</span><span>;</span>
+  <span>}</span><span>)</span><span>;</span>
+<span>}</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>photos<span>)</span> <span>{</span>
+  photos.<span>forEach</span><span>(</span><span>function</span><span>(</span>photo<span>)</span> <span>{</span>
+    console.<span>log</span><span>(</span>photo.<span>title</span><span>)</span><span>;</span>
+  <span>}</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>You may not read out the “Date” header since Flickr does not allow it
+                via
+                <br>
+<code>Access-Control-Expose-Headers</code>.</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre>response.<span>headers</span>.<span>get</span><span>(</span><span>"Date"</span><span>)</span><span>;</span> <span>// null</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>The <code>credentials</code> enumeration determines if cookies for the other
+                domain are
+                <br>sent to cross-origin requests. This is similar to XHR’s <code>withCredentials</code>
+                <br>flag, but tri-valued as <code>"omit"</code> (default), <code>"same-origin"</code> and <code>"include"</code>.</p>
+            <p>The Request object will also give the ability to offer caching hints to
+                the user-agent. This is currently undergoing some <a href="https://github.com/slightlyoff/ServiceWorker/issues/585" target="_blank">security review</a>.
+                Firefox exposes the attribute, but it has no effect.</p>
+            <p>Requests have two read-only attributes that are relevant to ServiceWorkers
+            <br>intercepting them. There is the string <code>referrer</code>, which is
+                set by the UA to be
+                <br>the referrer of the Request. This may be an empty string. The other is
+                <br>
+<code>context</code> which is a rather <a href="https://fetch.spec.whatwg.org/#requestcredentials" target="_blank">large enumeration</a> defining
+                what sort of resource is being fetched. This could be “image” if the request
+                is from an
+                &lt;img&gt;tag in the controlled document, “worker” if it is an attempt to load a
+                worker script, and so on. When used with the <code>fetch()</code> function,
+                it is “fetch”.</p>
+            
+<h2>Response</h2>
+
+            <p><code>Response</code> instances are returned by calls to <code>fetch()</code>.
+                They can also be created by JS, but this is only useful in ServiceWorkers.</p>
+            <p>We have already seen some attributes of Response when we looked at <code>fetch()</code>.
+                The most obvious candidates are <code>status</code>, an integer (default
+                value 200) and <code>statusText</code> (default value “OK”), which correspond
+                to the HTTP status code and reason. The <code>ok</code> attribute is just
+                a shorthand for checking that <code>status</code> is in the range 200-299
+                inclusive.</p>
+            <p><code>headers</code> is the Response’s Headers object, with guard “response”.
+                The <code>url</code> attribute reflects the URL of the corresponding request.</p>
+            <p>Response also has a <code>type</code>, which is “basic”, “cors”, “default”,
+                “error” or
+                <br>“opaque”.</p>
+            <ul>
+                <li>
+<code>"basic"</code>: normal, same origin response, with all headers exposed
+                    except
+                    <br>“Set-Cookie” and “Set-Cookie2″.</li>
+                <li>
+<code>"cors"</code>: response was received from a valid cross-origin request.
+                    <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-cors" target="_blank">Certain headers and the body</a>may be accessed.</li>
+                <li>
+<code>"error"</code>: network error. No useful information describing
+                    the error is available. The Response’s status is 0, headers are empty and
+                    immutable. This is the type for a Response obtained from <code>Response.error()</code>.</li>
+                <li>
+<code>"opaque"</code>: response for “no-cors” request to cross-origin
+                resource. <a href="https://fetch.spec.whatwg.org/#concept-filtered-response-opaque" target="_blank">Severely<br>
+  restricted</a>
+                </li>
+            </ul>
+            <p>The “error” type results in the <code>fetch()</code> Promise rejecting with
+                TypeError.</p>
+            <p>There are certain attributes that are useful only in a ServiceWorker scope.
+                The
+                <br>idiomatic way to return a Response to an intercepted request in ServiceWorkers
+                is:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>event<span>)</span> <span>{</span>
+  event.<span>respondWith</span><span>(</span><span>new</span> Response<span>(</span><span>"Response body"</span><span>,</span> <span>{</span>
+    headers<span>:</span> <span>{</span> <span>"Content-Type"</span> <span>:</span> <span>"text/plain"</span> <span>}</span>
+  <span>}</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+            </P>
+            <p>As you can see, Response has a two argument constructor, where both arguments
+                are optional. The first argument is a body initializer, and the second
+                is a dictionary to set the <code>status</code>, <code>statusText</code> and <code>headers</code>.</p>
+            <p>The static method <code>Response.error()</code> simply returns an error
+                response. Similarly, <code>Response.redirect(url, status)</code> returns
+                a Response resulting in
+                <br>a redirect to <code>url</code>.</p>
+            
+<h2>Dealing with bodies</h2>
+
+            <p>Both Requests and Responses may contain body data. We’ve been glossing
+                over it because of the various data types body may contain, but we will
+                cover it in detail now.</p>
+            <p>A body is an instance of any of the following types.</p>
+            <ul>
+                <li>
+<a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/ArrayBuffer" target="_blank">ArrayBuffer</a>
+                </li>
+                <li>
+<a href="https://developer.mozilla.org/en-US/docs/Web/API/ArrayBufferView" target="_blank">ArrayBufferView</a> (Uint8Array
+                    and friends)</li>
+                <li>
+<a href="https://developer.mozilla.org/en-US/docs/Web/API/Blob" target="_blank">Blob</a>/
+                    <a href="https://developer.mozilla.org/en-US/docs/Web/API/File" target="_blank">File</a>
+                </li>
+                <li>string</li>
+                <li>
+<a href="https://url.spec.whatwg.org/#interface-urlsearchparams" target="_blank">URLSearchParams</a>
+                </li>
+                <li>
+<a href="https://developer.mozilla.org/en-US/docs/Web/API/FormData" target="_blank">FormData</a> –
+                    currently not supported by either Gecko or Blink. Firefox expects to ship
+                    this in version 39 along with the rest of Fetch.</li>
+            </ul>
+            <p>In addition, Request and Response both offer the following methods to
+                extract their body. These all return a Promise that is eventually resolved
+                with the actual content.</p>
+            <ul>
+                <li>
+<code>arrayBuffer()</code>
+                </li>
+                <li>
+<code>blob()</code>
+                </li>
+                <li>
+<code>json()</code>
+                </li>
+                <li>
+<code>text()</code>
+                </li>
+                <li>
+<code>formData()</code>
+                </li>
+            </ul>
+            <p>This is a significant improvement over XHR in terms of ease of use of
+                non-text data!</p>
+            <p>Request bodies can be set by passing <code>body</code> parameters:</p>
+            <P>
+                <table>
+                    <tbody>
+                        <tr>
+                            <td>
+<pre><span>var</span> form <span>=</span> <span>new</span> FormData<span>(</span>document.<span>getElementById</span><span>(</span><span>'login-form'</span><span>)</span><span>)</span><span>;</span>
+fetch<span>(</span><span>"/login"</span><span>,</span> <span>{</span>
+  method<span>:</span> <span>"POST"</span><span>,</span>
+  body<span>:</span> form
+<span>}</span><span>)</span></pre>
+                            </td>
+                        </tr>
+                    </tbody>
+                </table>
+                </P>
+                <p>Responses take the first argument as the body.</p>
+                <P>
+                    <table>
+                        <tbody>
+                            <tr>
+                                <td>
+<pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>new</span> File<span>(</span><span>[</span><span>"chunk"</span><span>,</span> <span>"chunk"</span><span>]</span><span>,</span> <span>"archive.zip"</span><span>,</span>
+                       <span>{</span> type<span>:</span> <span>"application/zip"</span> <span>}</span><span>)</span><span>)</span><span>;</span></pre>
+                                </td>
+                            </tr>
+                        </tbody>
+                    </table>
+                </P>
+                <p>Both Request and Response (and by extension the <code>fetch()</code> function),
+                    will try to intelligently <a href="https://fetch.spec.whatwg.org/#concept-bodyinit-extract" target="_blank">determine the content type</a>.
+                    Request will also automatically set a “Content-Type” header if none is
+                    set in the dictionary.</p>
+                
+<h3>Streams and cloning</h3>
+
+                <p>It is important to realise that Request and Response bodies can only be
+                    read once! Both interfaces have a boolean attribute <code>bodyUsed</code> to
+                    determine if it is safe to read or not.</p>
+                <P>
+                    <table>
+                        <tbody>
+                            <tr>
+                                <td>
+<pre><span>var</span> res <span>=</span> <span>new</span> Response<span>(</span><span>"one time use"</span><span>)</span><span>;</span>
+console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
+res.<span>text</span><span>(</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>v<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
+<span>}</span><span>)</span><span>;</span>
+console.<span>log</span><span>(</span>res.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
+ 
+res.<span>text</span><span>(</span><span>)</span>.<span>catch</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
+  console.<span>log</span><span>(</span><span>"Tried to read already consumed Response"</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                                </td>
+                            </tr>
+                        </tbody>
+                    </table>
+                </P>
+                <p>This decision allows easing the transition to an eventual <a href="https://streams.spec.whatwg.org/" target="_blank">stream-based</a> Fetch
+                    API. The intention is to let applications consume data as it arrives, allowing
+                    for JavaScript to deal with larger files like videos, and perform things
+                    like compression and editing on the fly.</p>
+                <p>Often, you’ll want access to the body multiple times. For example, you
+                    can use the upcoming <a href="http://slightlyoff.github.io/ServiceWorker/spec/service_worker/index.html#cache-objects" target="_blank">Cache API</a> to
+                    store Requests and Responses for offline use, and Cache requires bodies
+                    to be available for reading.</p>
+                <p>So how do you read out the body multiple times within such constraints?
+                    The API provides a <code>clone()</code> method on the two interfaces. This
+                    will return a clone of the object, with a ‘new’ body. <code>clone()</code> MUST
+                    be called before the body of the corresponding object has been used. That
+                    is, <code>clone()</code> first, read later.</p>
+                <P>
+                    <table>
+                        <tbody>
+                            <tr>
+                                <td>
+<pre>addEventListener<span>(</span><span>'fetch'</span><span>,</span> <span>function</span><span>(</span>evt<span>)</span> <span>{</span>
+  <span>var</span> sheep <span>=</span> <span>new</span> Response<span>(</span><span>"Dolly"</span><span>)</span><span>;</span>
+  console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
+  <span>var</span> clone <span>=</span> sheep.<span>clone</span><span>(</span><span>)</span><span>;</span>
+  console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
+ 
+  clone.<span>text</span><span>(</span><span>)</span><span>;</span>
+  console.<span>log</span><span>(</span>sheep.<span>bodyUsed</span><span>)</span><span>;</span> <span>// false</span>
+  console.<span>log</span><span>(</span>clone.<span>bodyUsed</span><span>)</span><span>;</span> <span>// true</span>
+ 
+  evt.<span>respondWith</span><span>(</span>cache.<span>add</span><span>(</span>sheep.<span>clone</span><span>(</span><span>)</span><span>)</span>.<span>then</span><span>(</span><span>function</span><span>(</span>e<span>)</span> <span>{</span>
+    <span>return</span> sheep<span>;</span>
+  <span>}</span><span>)</span><span>;</span>
+<span>}</span><span>)</span><span>;</span></pre>
+                                </td>
+                            </tr>
+                        </tbody>
+                    </table>
+                </P>
+                
+<h2>Future improvements</h2>
+
+                <p>Along with the transition to streams, Fetch will eventually have the ability
+                    to abort running <code>fetch()</code>es and some way to report the progress
+                    of a fetch. These are provided by XHR, but are a little tricky to fit in
+                    the Promise-based nature of the Fetch API.</p>
+                <p>You can contribute to the evolution of this API by participating in discussions
+                    on the <a href="https://whatwg.org/mailing-list" target="_blank">WHATWG mailing list</a> and
+                    in the issues in the <a href="https://www.w3.org/Bugs/Public/buglist.cgi?product=WHATWG&amp;component=Fetch&amp;resolution=---" target="_blank">Fetch</a> and
+                    <a href="https://github.com/slightlyoff/ServiceWorker/issues" target="_blank">ServiceWorker</a>specifications.</p>
+                <p>For a better web!</p>
+                <p><em>The author would like to thank Andrea Marchesini, Anne van Kesteren and Ben<br>
+Kelly for helping with the specification and implementation.</em>
+                </p>
+                <footer>
+                    <p>Posted by <a href="https://hacks.mozilla.org/author/nmarathemozilla-com/" title="Posts by Nikhil Marathe" rel="author" target="_blank">Nikhil Marathe</a>
+on
+                        <time datetime="2015-03-10T08:05:41-07:00">March 10, 2015</time>at
+                            <time datetime="PDT08:05:41-07:00">08:05</time>
+                    </p>
+                    <P>
+                        
+                    </P>
+                </footer>
+                </article>
+                
+                
+                </DIV></article>
--- a/resources/tests/readability/002/source.html
+++ b/resources/tests/readability/002/source.html
--- a/src/article.rs
+++ b/src/article.rs
@ -32,8 +32,7 @@ impl Article {
            .map(|doc| doc.to_string_with_options(options))
    }

-    #[allow(dead_code)]
-    pub(crate) fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
+    pub fn save_html(&self, path: &PathBuf) -> Result<(), Error> {
        if let Some(ref html) = self.get_content() {
            if let Ok(()) = std::fs::create_dir_all(path) {
                let mut file_name = match self.title.clone() {
--- a/src/constants.rs
+++ b/src/constants.rs
@ -4,6 +4,8 @@ use once_cell::sync::Lazy;
 use regex::Regex;

 pub const DEFAULT_CHAR_THRESHOLD: usize = 500;
+pub static IS_IMAGE: Lazy<Regex> =
+    Lazy::new(|| Regex::new(r#"/\.(jpg|jpeg|png|webp)/i"#).expect("IS_IMAGE regex"));
 pub static SIBLING_CONTENT: Lazy<Regex> =
    Lazy::new(|| Regex::new(r#"/\.( |$)/"#).expect("SIBLING_CONTENT regex"));
 pub static BYLINE: Lazy<Regex> = Lazy::new(|| {
--- a/src/full_text_parser/config/config_collection.rs
+++ b/src/full_text_parser/config/config_collection.rs
@ -21,7 +21,13 @@ impl ConfigCollection {
        for (file_name, entry) in EmbededConfigFiles::iter()
            .filter_map(|file_name| EmbededConfigFiles::get(&file_name).map(|e| (file_name, e)))
        {
-            let entry = ConfigEntry::parse_data(entry.data).await.unwrap();
+            let entry = match ConfigEntry::parse_data(entry.data).await {
+                Ok(entry) => entry,
+                Err(error) => {
+                    log::error!("{error}");
+                    continue;
+                }
+            };
            let file_name: &str = file_name.borrow();
            embedded_entries.insert(file_name.to_owned(), entry);
        }
--- a/src/full_text_parser/config/config_entry.rs
+++ b/src/full_text_parser/config/config_entry.rs
@ -19,7 +19,7 @@ pub struct Header {
    pub value: String,
 }

-#[derive(Clone)]
+#[derive(Clone, Default)]
 pub struct ConfigEntry {
    pub xpath_title: Vec<String>,
    pub xpath_author: Vec<String>,
@ -34,24 +34,6 @@ pub struct ConfigEntry {
    pub next_page_link: Option<String>,
 }

-impl Default for ConfigEntry {
-    fn default() -> Self {
-        Self {
-            xpath_title: Vec::new(),
-            xpath_author: Vec::new(),
-            xpath_date: Vec::new(),
-            xpath_body: Vec::new(),
-            xpath_strip: Vec::new(),
-            strip_id_or_class: Vec::new(),
-            strip_image_src: Vec::new(),
-            replace: Vec::new(),
-            header: Vec::new(),
-            single_page_link: None,
-            next_page_link: None,
-        }
-    }
-}
-
 impl ConfigEntry {
    pub async fn parse_path(config_path: &Path) -> Result<ConfigEntry, ConfigError> {
        let mut file = fs::File::open(&config_path).await?;
--- a/src/full_text_parser/metadata.rs
+++ b/src/full_text_parser/metadata.rs
@ -23,9 +23,9 @@ pub fn extract(
                    let new_title = constants::TITLE_CUT_END.replace(&title, "$1");
                    let word_count = constants::WORD_COUNT.split(&title).count();
                    if word_count < 3 {
-                        constants::TITLE_CUT_FRONT.replace(&title, "$1").to_string()
+                        constants::TITLE_CUT_FRONT.replace(&title, "$1").trim().to_string()
                    } else {
-                        new_title.to_string()
+                        new_title.trim().to_string()
                    }
                } else {
                    title
--- a/src/full_text_parser/mod.rs
+++ b/src/full_text_parser/mod.rs
@ -11,6 +11,7 @@ use self::config::{ConfigCollection, ConfigEntry};
 use self::error::FullTextParserError;
 use self::readability::Readability;
 use crate::article::Article;
+use crate::constants;
 use crate::util::Util;

 use encoding_rs::Encoding;
@ -19,9 +20,8 @@ use libxml::parser::Parser;
 use libxml::tree::{Document, Node};
 use libxml::xpath::Context;
 use log::{debug, error, info, warn};
-use regex::Regex;
 use reqwest::header::HeaderMap;
-use reqwest::Client;
+use reqwest::{Client, Url};
 use std::path::Path;
 use std::str::from_utf8;

@ -40,6 +40,8 @@ impl FullTextParser {
        url: &url::Url,
        client: &Client,
    ) -> Result<Article, FullTextParserError> {
+        libxml::tree::node::set_node_rc_guard(3);
+
        info!("Scraping article: '{}'", url.as_str());

        // check if we have a config for the url
@ -106,7 +108,6 @@ impl FullTextParser {

        self.parse_pages(
            &mut article,
-            &url,
            &html,
            &mut root,
            config,
@ -137,7 +138,6 @@ impl FullTextParser {
    async fn parse_pages(
        &self,
        article: &mut Article,
-        url: &url::Url,
        html: &str,
        root: &mut Node,
        config: Option<&ConfigEntry>,
@ -183,7 +183,8 @@ impl FullTextParser {
        if article.thumbnail_url.is_none() {
            Self::check_for_thumbnail(&xpath_ctx, article);
        }
-        Self::strip_junk(&xpath_ctx, config, global_config, url);
+        Self::strip_junk(&xpath_ctx, config, global_config);
+        Self::fix_urls(&xpath_ctx, &article.url);
        Self::unwrap_noscript_images(&xpath_ctx)?;
        let found_body = Self::extract_body(&xpath_ctx, root, config, global_config)?;

@ -200,7 +201,8 @@ impl FullTextParser {
            let html = Self::download(&url, client, headers).await?;
            document = Self::parse_html(&html, config, global_config)?;
            xpath_ctx = Self::get_xpath_ctx(&document)?;
-            Self::strip_junk(&xpath_ctx, config, global_config, &url);
+            Self::strip_junk(&xpath_ctx, config, global_config);
+            Self::fix_urls(&xpath_ctx, &url);
            Self::unwrap_noscript_images(&xpath_ctx)?;
            Self::extract_body(&xpath_ctx, root, config, global_config)?;
        }
@ -256,7 +258,8 @@ impl FullTextParser {
        let xpath_ctx = Self::get_xpath_ctx(&document)?;
        metadata::extract(&xpath_ctx, config, Some(global_config), article);
        Self::check_for_thumbnail(&xpath_ctx, article);
-        Self::strip_junk(&xpath_ctx, config, global_config, url);
+        Self::strip_junk(&xpath_ctx, config, global_config);
+        Self::fix_urls(&xpath_ctx, url);
        Self::extract_body(&xpath_ctx, root, config, global_config)?;

        Ok(())
@ -543,12 +546,15 @@ impl FullTextParser {
        Ok(url)
    }

-    fn strip_junk(
-        context: &Context,
-        config: Option<&ConfigEntry>,
-        global_config: &ConfigEntry,
-        url: &url::Url,
-    ) {
+    fn fix_urls(context: &Context, url: &Url) {
+        let _ = Self::repair_urls(context, "//img", "src", url);
+        let _ = Self::repair_urls(context, "//a", "src", url);
+        let _ = Self::repair_urls(context, "//a", "href", url);
+        let _ = Self::repair_urls(context, "//object", "data", url);
+        let _ = Self::repair_urls(context, "//iframe", "src", url);
+    }
+
+    fn strip_junk(context: &Context, config: Option<&ConfigEntry>, global_config: &ConfigEntry) {
        // strip specified xpath
        if let Some(config) = config {
            for xpath_strip in &config.xpath_strip {
@ -596,12 +602,6 @@ impl FullTextParser {
        let _ = Self::remove_attribute(context, Some("img"), "sizes");
        let _ = Self::add_attribute(context, Some("a"), "target", "_blank");

-        let _ = Self::repair_urls(context, "//img", "src", url);
-        let _ = Self::repair_urls(context, "//a", "src", url);
-        let _ = Self::repair_urls(context, "//a", "href", url);
-        let _ = Self::repair_urls(context, "//object", "data", url);
-        let _ = Self::repair_urls(context, "//iframe", "src", url);
-
        // strip elements using Readability.com and Instapaper.com ignore class names
        // .entry-unrelated and .instapaper_ignore
        // See http://blog.instapaper.com/post/730281947
@ -638,7 +638,6 @@ impl FullTextParser {
    fn unwrap_noscript_images(ctx: &Context) -> Result<(), FullTextParserError> {
        // Find img without source or attributes that might contains image, and remove it.
        // This is done to prevent a placeholder img is replaced by img from noscript in next step.
-        let img_regex = Regex::new(r#"/\.(jpg|jpeg|png|webp)/i"#).unwrap();
        let img_nodes = Util::evaluate_xpath(ctx, "//img", false)?;
        for mut img_node in img_nodes {
            let attrs = img_node.get_attributes();
@ -648,7 +647,7 @@ impl FullTextParser {
                    || name == "srcset"
                    || name == "data-src"
                    || name == "data-srcset"
-                    || img_regex.is_match(&value)
+                    || constants::IS_IMAGE.is_match(value)
            });
            if !keep {
                img_node.unlink();
@ -668,24 +667,31 @@ impl FullTextParser {
            // attributes that might contains image.
            if let Some(prev) = noscript_node.get_prev_element_sibling() {
                if Util::is_single_image(&prev) {
-
                    {
                        let mut prev_img = prev.clone();

                        if prev_img.get_name().to_uppercase() != "IMG" {
-                            if let Some(img_node) = Util::get_elements_by_tag_name(&prev_img, "img").into_iter().next() {
+                            if let Some(img_node) = Util::get_elements_by_tag_name(&prev_img, "img")
+                                .into_iter()
+                                .next()
+                            {
                                prev_img = img_node;
                            }
                        }

-                        let new_img = Util::get_elements_by_tag_name(&noscript_node, "img").into_iter().next();
+                        let new_img = Util::get_elements_by_tag_name(&noscript_node, "img")
+                            .into_iter()
+                            .next();
                        if let Some(mut new_img) = new_img {
                            for (key, value) in prev_img.get_attributes() {
                                if value.is_empty() {
                                    continue;
                                }

-                                if key == "src" || key == "srcset" || img_regex.is_match(&value) {
+                                if key == "src"
+                                    || key == "srcset"
+                                    || constants::IS_IMAGE.is_match(&value)
+                                {
                                    if new_img.get_attribute(&key).as_deref() == Some(&value) {
                                        continue;
                                    }
@ -695,7 +701,10 @@ impl FullTextParser {
                                        attr_name = format!("data-old-{attr_name}");
                                    }

-                                    new_img.set_attribute(&attr_name, &value).unwrap();
+                                    new_img.set_attribute(&attr_name, &value).map_err(|e| {
+                                        log::error!("{e}");
+                                        FullTextParserError::Xml
+                                    })?;
                                }
                            }
                        }
@ -703,7 +712,10 @@ impl FullTextParser {

                    if let Some(mut parent) = noscript_node.get_parent() {
                        if let Some(first_child) = noscript_node.get_first_child() {
-                            parent.replace_child_node(first_child, prev).unwrap();
+                            parent.replace_child_node(first_child, prev).map_err(|e| {
+                                log::error!("{e}");
+                                FullTextParserError::Xml
+                            })?;
                            noscript_node.unlink();
                        }
                    }
@ -825,7 +837,9 @@ impl FullTextParser {
        Ok(())
    }

-    pub(crate) fn post_process_content(root: &mut Node) -> Result<(), FullTextParserError> {
+    pub(crate) fn post_process_content(
+        root: &mut Node
+    ) -> Result<(), FullTextParserError> {
        Self::clean_classes(root)?;
        Self::simplify_nested_elements(root)?;
        Ok(())
--- a/src/full_text_parser/readability/mod.rs
+++ b/src/full_text_parser/readability/mod.rs
@ -5,7 +5,7 @@ mod tests;

 use std::cmp::Ordering;

-use libxml::tree::{node, Document, Node, NodeType};
+use libxml::tree::{Document, Node, NodeType};

 use self::state::State;
 use super::error::FullTextParserError;
@ -19,8 +19,6 @@ impl Readability {
        root: &mut Node,
        title: Option<&str>,
    ) -> Result<bool, FullTextParserError> {
-        node::set_node_rc_guard(6);
-
        let mut state = State::default();
        let mut document = document;
        let mut attempts: Vec<(Node, usize, Document)> = Vec::new();
@ -253,12 +251,11 @@ impl Readability {
            let mut top_candidate = top_candidates.first().cloned().unwrap_or_else(|| {
                // If we still have no top candidate, just use the body as a last resort.
                // We also have to copy the body node so it is something we can modify.
-                let mut rt = document.get_root_element().unwrap();
-                Self::initialize_node(&mut rt, &state).unwrap();
+                let mut rt = document.get_root_element().expect("doc should have root");
+                Self::initialize_node(&mut rt, &state).expect("init should not fail");
                needed_to_create_top_candidate = true;
                rt
            });
-            let mut parent_of_top_candidate = None;

            let mut alternative_candidate_ancestors = Vec::new();
            // Find a better top candidate node if it contains (at least three) nodes which belong to `topCandidates` array
@ -274,25 +271,21 @@ impl Readability {
            }

            if alternative_candidate_ancestors.len() >= constants::MINIMUM_TOPCANDIDATES {
-                parent_of_top_candidate = top_candidate.get_parent();
+                let mut parent_of_top_candidate = top_candidate.get_parent();

-                loop {
-                    if let Some(parent) = &parent_of_top_candidate {
-                        let mut lists_containing_this_ancestor = 0;
-                        let tmp = usize::min(
-                            alternative_candidate_ancestors.len(),
-                            constants::MINIMUM_TOPCANDIDATES,
-                        );
-                        for ancestor in alternative_candidate_ancestors.iter().take(tmp) {
-                            lists_containing_this_ancestor +=
-                                if ancestor == parent { 1 } else { 0 };
-                        }
+                while let Some(parent) = &parent_of_top_candidate {
+                    let mut lists_containing_this_ancestor = 0;
+                    let tmp = usize::min(
+                        alternative_candidate_ancestors.len(),
+                        constants::MINIMUM_TOPCANDIDATES,
+                    );
+                    for ancestor in alternative_candidate_ancestors.iter().take(tmp) {
+                        lists_containing_this_ancestor +=
+                            if ancestor == parent { 1 } else { 0 };
+                    }

-                        if lists_containing_this_ancestor >= constants::MINIMUM_TOPCANDIDATES {
-                            top_candidate = parent.clone();
-                            break;
-                        }
-                    } else {
+                    if lists_containing_this_ancestor >= constants::MINIMUM_TOPCANDIDATES {
+                        top_candidate = parent.clone();
                        break;
                    }

@ -311,7 +304,7 @@ impl Readability {
            // lurking in other places that we want to unify in. The sibling stuff
            // below does some of that - but only if we've looked high enough up the DOM
            // tree.
-            parent_of_top_candidate = top_candidate.get_parent();
+            let mut parent_of_top_candidate = top_candidate.get_parent();
            let mut last_score = Self::get_content_score(&top_candidate).unwrap_or(0.0);

            // The scores shouldn't get too low.
--- a/src/full_text_parser/readability/tests.rs
+++ b/src/full_text_parser/readability/tests.rs
@ -1,7 +1,4 @@
-use libxml::{
-    tree::{Document, Node},
-    xpath::Context,
-};
+use libxml::tree::{Document, Node};
 use reqwest::Url;

 use crate::{
@ -9,13 +6,21 @@ use crate::{
    full_text_parser::{config::ConfigEntry, metadata},
 };

-async fn prepare(html: &str, url: &Url) -> (Document, Context, Article) {
+async fn run_test(name: &str) {
+    libxml::tree::node::set_node_rc_guard(3);
+    let _ = env_logger::builder().is_test(true).try_init();
+
    let empty_config = ConfigEntry::default();
-    let document = crate::FullTextParser::parse_html(html, None, &empty_config).unwrap();
+
+    let url = Url::parse("http://google.com").unwrap();
+    let html = std::fs::read_to_string(format!("./resources/tests/readability/{name}/source.html"))
+        .expect("Failed to read source HTML");
+    let document = crate::FullTextParser::parse_html(&html, None, &empty_config).unwrap();
    let xpath_ctx = crate::FullTextParser::get_xpath_ctx(&document).unwrap();
-    crate::FullTextParser::strip_junk(&xpath_ctx, None, &empty_config, url);
+
+    crate::FullTextParser::strip_junk(&xpath_ctx, None, &empty_config);
    crate::FullTextParser::unwrap_noscript_images(&xpath_ctx).unwrap();
-    let article = Article {
+    let mut article = Article {
        title: None,
        author: None,
        url: url.clone(),
@ -23,17 +28,6 @@ async fn prepare(html: &str, url: &Url) -> (Document, Context, Article) {
        thumbnail_url: None,
        document: None,
    };
-    (document, xpath_ctx, article)
-}
-
-#[tokio::test]
-async fn test_1() {
-    let _ = env_logger::builder().is_test(true).try_init();
-
-    let html = std::fs::read_to_string(r"./resources/tests/readability-test-1.html")
-        .expect("Failed to read HTML");
-    let url = Url::parse("http://google.com").unwrap();
-    let (document, xpath_ctx, mut article) = prepare(&html, &url).await;

    let mut article_document = Document::new().unwrap();
    let mut root = Node::new("article", None, &document).unwrap();
@ -48,5 +42,21 @@ async fn test_1() {

    article.document = Some(article_document);
    let html = article.get_content().unwrap();
-    std::fs::write("test.html", html).unwrap();
+
+    let expected = std::fs::read_to_string(format!("./resources/tests/readability/{name}/expected.html"))
+        .expect("Failed to read expected HTML");
+
+    //std::fs::write("expected.html", &html).unwrap();
+    
+    assert_eq!(expected, html);
+}
+
+#[tokio::test(flavor = "current_thread")]
+async fn test_001() {
+    run_test("001").await
+}
+
+#[tokio::test(flavor = "current_thread")]
+async fn test_002() {
+    run_test("002").await
 }
--- a/src/full_text_parser/tests.rs
+++ b/src/full_text_parser/tests.rs
@ -1,10 +1,11 @@
-use super::{FullTextParser, config::ConfigEntry};
+use super::{config::ConfigEntry, FullTextParser};
 use libxml::tree::SaveOptions;
 use reqwest::Client;
 use std::path::PathBuf;

 #[tokio::test]
 async fn golem() {
+    let _ = env_logger::builder().is_test(true).try_init();
    let out_path = PathBuf::from(r"./test_output");
    let url = url::Url::parse("https://www.golem.de/news/http-error-418-fehlercode-ich-bin-eine-teekanne-darf-bleiben-1708-129460.html").unwrap();

@ -29,6 +30,7 @@ async fn golem() {

 #[tokio::test]
 async fn phoronix() {
+    let _ = env_logger::builder().is_test(true).try_init();
    let out_path = PathBuf::from(r"./test_output");
    let url =
        url::Url::parse("http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1")
@ -48,6 +50,7 @@ async fn phoronix() {

 #[tokio::test]
 async fn youtube() {
+    let _ = env_logger::builder().is_test(true).try_init();
    let out_path = PathBuf::from(r"./test_output");
    let url = url::Url::parse("https://www.youtube.com/watch?v=8KjaIumu-jI").unwrap();

@ -57,7 +60,7 @@ async fn youtube() {

    assert_eq!(
        article.title.as_deref(),
-        Some("RIGGED! Arena Shuffler is BROKEN | 13 Land Mono Red Burn")
+        Some("RIGGED! Arena Shuffler is BROKEN")
    );
    assert!(article
        .get_content()
@ -67,6 +70,7 @@ async fn youtube() {

 #[tokio::test]
 async fn encoding_windows_1252() {
+    let _ = env_logger::builder().is_test(true).try_init();
    let url = url::Url::parse("https://www.aerzteblatt.de/nachrichten/139511/Scholz-zuversichtlich-mit-Blick-auf-Coronasituation-im-Winter").unwrap();
    let html = FullTextParser::download(&url, &Client::new(), reqwest::header::HeaderMap::new())
        .await
--- a/src/util.rs
+++ b/src/util.rs
@ -86,7 +86,8 @@ impl Util {
        for node in res {
            let content = node.get_content();
            let url_str = if content.trim().is_empty() && node.has_attribute("href") {
-                node.get_attribute("href").unwrap()
+                node.get_attribute("href")
+                    .expect("already checked for href")
            } else {
                content
            };