// Load the paper
console.time("Paper text loaded");
const paperUrl = "https://arxiv.org/html/1706.03762";
const response = await fetch(paperUrl);
const paperText = await response.text();
console.timeEnd("Paper text loaded");
// Create document and chunk it
console.time("Chunks created");
const doc = MDocument.fromHTML(paperText);
const chunks = await doc.chunk({
strategy: "html",
headers: [
["h1", "Header 1"],
["p", "Paragraph"],
],
});
// Load the paper
console.time("Paper text loaded");
const paperUrl = "https://arxiv.org/html/1706.03762";
const response = await fetch(paperUrl);
const paperText = await response.text();
console.timeEnd("Paper text loaded");
// Create document and chunk it
console.time("Chunks created");
const doc = MDocument.fromHTML(paperText);
const chunks = await doc.chunk({
strategy: "html",
headers: [
["h1", "Header 1"],
["p", "Paragraph"],
],
});