Implementing a Recursive Scraper Without Recursion in TypeScript
Any idea how I should create a recursive scraper, without using recursion, because I'm afraid Typescript will break.
const scrapeUrlRecursive = Effect.fn(function* (url: string, baseUrl: string = url) {
const results = yield* Queue.bounded<{ url: string; html: string }>(2);
const queue = yield* Queue.unbounded<string>();
yield* queue.offer(url);
while (!(yield* queue.isEmpty)) {
const url = yield* queue.take;
const html = yield* scrapeUrl(url);
// Load a document so we can query the links
const dom = yield* DOMService;
const document = yield* dom.load(html);
// Query the links
const links = Array.fromIterable(document.querySelectorAll("a"))
.map((anchor) => anchor.href)
.filter((href) => href.startsWith(baseUrl));
yield* queue.offerAll(links);
const placed = yield* results.offer({ url, html });
yield* Effect.logInfo(`Placed ${url} in results: ${placed}`);
if (!placed) {
break;
}
}
yield* Effect.logInfo("Taking all results");
return yield* results.takeAll;
}, Effect.scoped); const scrapeUrlRecursive = Effect.fn(function* (url: string, baseUrl: string = url) {
const results = yield* Queue.bounded<{ url: string; html: string }>(2);
const queue = yield* Queue.unbounded<string>();
yield* queue.offer(url);
while (!(yield* queue.isEmpty)) {
const url = yield* queue.take;
const html = yield* scrapeUrl(url);
// Load a document so we can query the links
const dom = yield* DOMService;
const document = yield* dom.load(html);
// Query the links
const links = Array.fromIterable(document.querySelectorAll("a"))
.map((anchor) => anchor.href)
.filter((href) => href.startsWith(baseUrl));
yield* queue.offerAll(links);
const placed = yield* results.offer({ url, html });
yield* Effect.logInfo(`Placed ${url} in results: ${placed}`);
if (!placed) {
break;
}
}
yield* Effect.logInfo("Taking all results");
return yield* results.takeAll;
}, Effect.scoped);