Custom configuration is not working

i am trying to use custom configuration but no luck so far
No description
8 Replies
absent-sapphire
absent-sapphireOP•3y ago
require('dotenv').config({ path: '..//.env' })
process.env.CRAWLEE_STORAGE_DIR = "../storage"

const { CheerioCrawler, Configuration } = require('crawlee');
const { MongoClient } = require("mongodb");

const UserClient = new MongoClient(process.env.DATABASE_URL);
const AmazonProduct = UserClient.db("group_monitors").collection("amazon_products");

const configuration = new Configuration({
defaultKeyValueStoreId: "amazon"
});

(async () => {
const crawler = new CheerioCrawler({}, configuration)
const products = await AmazonProduct.find().toArray()

crawler.router.addDefaultHandler(async ({ request, $ }) => {
const title = $('title').text();
console.log(title)
})

await crawler.run([
'https://www.amazon.com/kindle-the-lightest-and-most-compact-kindle/dp/B09SWTG9GF',
]);

})()
require('dotenv').config({ path: '..//.env' })
process.env.CRAWLEE_STORAGE_DIR = "../storage"

const { CheerioCrawler, Configuration } = require('crawlee');
const { MongoClient } = require("mongodb");

const UserClient = new MongoClient(process.env.DATABASE_URL);
const AmazonProduct = UserClient.db("group_monitors").collection("amazon_products");

const configuration = new Configuration({
defaultKeyValueStoreId: "amazon"
});

(async () => {
const crawler = new CheerioCrawler({}, configuration)
const products = await AmazonProduct.find().toArray()

crawler.router.addDefaultHandler(async ({ request, $ }) => {
const title = $('title').text();
console.log(title)
})

await crawler.run([
'https://www.amazon.com/kindle-the-lightest-and-most-compact-kindle/dp/B09SWTG9GF',
]);

})()
genetic-orange
genetic-orange•3y ago
You don't store any data with Dataset.pushData() so it doesn't really have anything to store
absent-sapphire
absent-sapphireOP•3y ago
it also does not change request queue default ID
genetic-orange
genetic-orange•3y ago
But you did not specify that, you specified defaultKeyValueStoreId: "amazon"
absent-sapphire
absent-sapphireOP•3y ago
basically any configuration i am trying to do with this method just does not work
harsh-harlequin
harsh-harlequin•3y ago
@CTK WARRIOR just an idea of what I'm doing, example:
const config = new Configuration({ defaultDatasetId: jobId, defaultKeyValueStoreId: jobId, defaultRequestQueueId: jobId});
const rQueue = await crawlee.RequestQueue.open(jobId, config.options);
const config = new Configuration({ defaultDatasetId: jobId, defaultKeyValueStoreId: jobId, defaultRequestQueueId: jobId});
const rQueue = await crawlee.RequestQueue.open(jobId, config.options);
then in your call to CheerioCrawler:
const crawler = new CheerioCrawler({requestQueue: rQueue }, configuration)
const crawler = new CheerioCrawler({requestQueue: rQueue }, configuration)
wise-white
wise-white•3y ago
Any updates here? @CTK WARRIOR @cryptorex @Lukas Krivka was this resolved in any way? Having the same issue where custom configuration just doesn't work
Pepa J
Pepa J•3y ago

Did you find this page helpful?