Crawlee not working with cloudflare

It keeps on returning 403 even with rotating proxy pool Source code:
import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee';
import proxy from './proxy_config.js';

// PlaywrightCrawler crawls the web using a headless browser controlled by the Playwright library.
const proxyConfiguration = new ProxyConfiguration({
proxyUrls: [`http://${proxy.username}:${proxy.password}@${proxy.host}:${proxy.port}`]
});
const crawler = new PlaywrightCrawler({
// Use the requestHandler to process each of the crawled pages.
proxyConfiguration,
async requestHandler({ request, page, enqueueLinks, pushData, log }) {
const title = await page.title();
log.info(`Title of ${request.loadedUrl} is '${title}'`);

// Save results as JSON to `./storage/datasets/default` directory.
await pushData({ title, url: request.loadedUrl });

// Extract links from the current page and add them to the crawling queue.
await enqueueLinks();
},

// Uncomment this option to see the browser window.
// headless: false,

// Comment this option to scrape the full website.
maxRequestsPerCrawl: 20,
});

// Add first URL to the queue and start the crawl.
await crawler.run(['https://nopecha.com/demo/cloudflare']);

// Export the whole dataset to a single file in `./result.csv`.
await crawler.exportData('./result.csv');

// Or work with the data directly.
const data = await crawler.getData();
console.table(data.items);
import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee';
import proxy from './proxy_config.js';

// PlaywrightCrawler crawls the web using a headless browser controlled by the Playwright library.
const proxyConfiguration = new ProxyConfiguration({
proxyUrls: [`http://${proxy.username}:${proxy.password}@${proxy.host}:${proxy.port}`]
});
const crawler = new PlaywrightCrawler({
// Use the requestHandler to process each of the crawled pages.
proxyConfiguration,
async requestHandler({ request, page, enqueueLinks, pushData, log }) {
const title = await page.title();
log.info(`Title of ${request.loadedUrl} is '${title}'`);

// Save results as JSON to `./storage/datasets/default` directory.
await pushData({ title, url: request.loadedUrl });

// Extract links from the current page and add them to the crawling queue.
await enqueueLinks();
},

// Uncomment this option to see the browser window.
// headless: false,

// Comment this option to scrape the full website.
maxRequestsPerCrawl: 20,
});

// Add first URL to the queue and start the crawl.
await crawler.run(['https://nopecha.com/demo/cloudflare']);

// Export the whole dataset to a single file in `./result.csv`.
await crawler.exportData('./result.csv');

// Or work with the data directly.
const data = await crawler.getData();
console.table(data.items);
No description
4 Replies
Hall
Hall8mo ago
View post on community site
This post has been pushed to the community knowledgebase. Any replies in this thread will be synced to the community site.
Apify Community
ambitious-aqua
ambitious-aquaOP8mo ago
@Helper @Apify Developer Community Manager
deep-jade
deep-jade8mo ago
Captchas | Academy | Apify Documentation
Learn about the reasons a bot might be presented a captcha, the best ways to avoid captchas in the first place, and how to programmatically solve them.
ambitious-aqua
ambitious-aquaOP8mo ago
@Hamza Not working it require to solve the challenge

Did you find this page helpful?