I have a document fetched by a $.get call, it's a big bloated HTML document. I need to use jQuery to grab an element from it.

I'm trying this (in coffeescript):

$.get url, (data) ->
  title = $(data).find('title').text()

This doesn't work. In browser console I've whittled this down to $(document.documentElement.outerHTML).find('title') where document.documentElement.outerHTML gives a string of the document.

I've tried jQuery.parseHTML, with the same result.

TL;DR ... use the DOMParser API

<!-- language: lang-js --> <pre><code>var <i>htmlString</i> = "&lt;html&gt;&lt;head&gt;&lt;title&gt;Name&lt;/title&gt;&lt;/head&gt;&lt;body&gt;&lt;div class='content'&gt;Hello&lt;/div&gt;&lt;/body&gt;&lt;/html&gt;"; <b>var htmlDoc = (new DOMParser()).parseFromString(</b><i>htmlString</i><b>, "text/xml");</b></code></pre>

Unfortunately, there current answers don't hit a lot of edge cases

You should not use $.parseHTML(htmlString) as it's immediately lossy. If we check the source code on $.parseHtml, it'll call buildFragment which creates a temporary DOM element and sets the innerHTML property.

innerHtml Parsing

Element.innerHTML provides an API for:

And here's the spec for Html Fragment Parsing Algorithm

Taking a sample string, here's the result of trying various HTML Parsing approaches:

<!-- begin snippet: js hide: false console: false babel: false --> <!-- language: lang-js -->
var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";

function ParseHtmlTests() {

  /*** $.parseHTML ***/
  var $parseHtml = $.parseHTML(htmlString)

  console.LogOutput(
    '1. $.parseHTML',
    $parseHtml,
    $parseHtml.map(function(el, i) { return el.outerHTML }),
    $($parseHtml).find("title").text(),
    $($parseHtml).find(".content").text()
  )


  /*** tempDiv.innerHTML ***/
  var tempDiv = document.createElement("div")
  tempDiv.innerHTML = htmlString

  console.LogOutput(
    '2. tempDiv.innerHTML',
    tempDiv,
    tempDiv.outerHTML,
    $(tempDiv).find("title").text(),
    $(tempDiv).find(".content").text()
  )


  /*** divAppendContents ***/
  var $divAppendContents = $('<div></div>').append(htmlString)

  console.LogOutput(
    '3. divAppendContents',
    $divAppendContents,
    $divAppendContents.html(),
    $divAppendContents.find("title").text(),
    $divAppendContents.find(".content").text()
  )


  /*** tempHtml.innerHTML ***/
  var tmpHtml = document.createElement( 'html' );
  tmpHtml.innerHTML = htmlString;

  console.LogOutput(
   '4. tempHtml.innerHTML',
    tmpHtml,
    tmpHtml.outerHTML,
    tmpHtml.getElementsByTagName('title')[0].innerText,
    tmpHtml.getElementsByClassName('content')[0].innerText
  )


  /*** DOMParser.parseFromString ***/
  var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");

  console.LogOutput(
    '5. DOMParser.parseFromString',
    htmlDoc,
    htmlDoc.documentElement.outerHTML,
    htmlDoc.documentElement.getElementsByTagName('title')[0].innerHTML,
    htmlDoc.documentElement.getElementsByClassName('content')[0].innerHTML
  )
}

/*** Create Console Log Methods ***/
console.group = console.group || function(msg) {
  console.log(msg)
}
console.groupEnd = console.groupEnd || function(msg) {
  console.log("----------------------------")
}
console.LogOutput = function(method, dom, html, title, content) {
  console.group(method);
  console.log("DOM:", dom)
  console.log("HTML:", html)
  console.log("Title:", title)
  console.log("Content:", content)
  console.groupEnd();
};

/*** Execute Script ***/
ParseHtmlTests()
<!-- language: lang-html -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.js"></script>
<!-- end snippet -->

And here's the output from the above script in chrome:

Output

The best approach seems to be creating a HTML Root object by setting the innerHTML of a temporary HTML document or by using the DOMParser API

Further Reading: