Request queue with id: [id] not does not exist

I create an API with express that runs crawle when called on an endpoint. It is weird that it works completly fine on the first request I make to the API, but fails on the next ones. I get the error: Request queue with id: [id] not does not exist. I think I'm making some JavaScript mistake tbh, I don't have much experience with it. Here is the way I'm doing the API:
import { crawler } from './main.js' // Import the exported crawler from main file
import express from "express";

const app = express();
app.use(express.json());

const BASE_URL = "https.....";

app.post("/scrape", async (req, res) => {
if (!req.body || !req.body.usernames) {
return res.status(400).json({ error: "Invalid input" });
}

const { usernames } = req.body;
const urls = usernames.map(username => `${BASE_URL}${username}`);

try {
await crawler.run(urls);
const dataset = await crawler.getData();


return res.status(200).json({ data: dataset });
} catch (error) {
console.error("Scraping error:", error);
return res.status(500).json({ error: "Scraping failed" });
}
});


const PORT = parseInt(process.env.PORT) || 3000;
app.listen(PORT, () => console.log(`Server running on port ${PORT}`));
import { crawler } from './main.js' // Import the exported crawler from main file
import express from "express";

const app = express();
app.use(express.json());

const BASE_URL = "https.....";

app.post("/scrape", async (req, res) => {
if (!req.body || !req.body.usernames) {
return res.status(400).json({ error: "Invalid input" });
}

const { usernames } = req.body;
const urls = usernames.map(username => `${BASE_URL}${username}`);

try {
await crawler.run(urls);
const dataset = await crawler.getData();


return res.status(200).json({ data: dataset });
} catch (error) {
console.error("Scraping error:", error);
return res.status(500).json({ error: "Scraping failed" });
}
});


const PORT = parseInt(process.env.PORT) || 3000;
app.listen(PORT, () => console.log(`Server running on port ${PORT}`));
Here is how my crawler look:
const proxies = [...] //my proxy list

const proxyConfiguration = new ProxyConfiguration({
proxyUrls: proxies,
});


export const crawler = new CheerioCrawler({
proxyConfiguration,

requestHandler: async ({ request, json, proxyInfo }) => {
log.info(JSON.stringify(proxyInfo, null, 2))

/// Scraping logic

await Dataset.pushData({
// pushing data
});
}, new Configuration({
persistStorage: false,
}));
const proxies = [...] //my proxy list

const proxyConfiguration = new ProxyConfiguration({
proxyUrls: proxies,
});


export const crawler = new CheerioCrawler({
proxyConfiguration,

requestHandler: async ({ request, json, proxyInfo }) => {
log.info(JSON.stringify(proxyInfo, null, 2))

/// Scraping logic

await Dataset.pushData({
// pushing data
});
}, new Configuration({
persistStorage: false,
}));
2 Replies
Hall
Hall2mo ago
Someone will reply to you shortly. In the meantime, this might help:
wise-white
wise-white2mo ago
Maybe the error happens because Crawlee's components (like request queues) are designed for single-use within one crawl. When you try to reuse the same crawler instance for a second request, it's trying to access resources that were already cleaned up. So probably you need to create a new crawler instance for each request e.q.:
// Define a function that creates a new crawler instance for each request
const createCrawler = () => {
const proxies = [...]; // your proxy list

const proxyConfiguration = new ProxyConfiguration({
proxyUrls: proxies,
});

return new CheerioCrawler({
proxyConfiguration,
// Turn off persistence to avoid storage conflicts
useSessionPool: false,

requestHandler: async ({ request, json, proxyInfo }) => {
console.log(JSON.stringify(proxyInfo, null, 2));

// Scraping logic
await Dataset.pushData({
// pushing data
});
}
}, new Configuration({
persistStorage: false,
}));
};

// Create a new crawler instance for each request
const crawler = createCrawler();

// Run the crawler with the provided URLs
await crawler.run(urls);
// Define a function that creates a new crawler instance for each request
const createCrawler = () => {
const proxies = [...]; // your proxy list

const proxyConfiguration = new ProxyConfiguration({
proxyUrls: proxies,
});

return new CheerioCrawler({
proxyConfiguration,
// Turn off persistence to avoid storage conflicts
useSessionPool: false,

requestHandler: async ({ request, json, proxyInfo }) => {
console.log(JSON.stringify(proxyInfo, null, 2));

// Scraping logic
await Dataset.pushData({
// pushing data
});
}
}, new Configuration({
persistStorage: false,
}));
};

// Create a new crawler instance for each request
const crawler = createCrawler();

// Run the crawler with the provided URLs
await crawler.run(urls);

Did you find this page helpful?