playwright response is missing status code.
This is the code, but the status is always empty
crawler.router.use (async ({request, response, page, enqueueLinks, log, proxyInfo, session, parseWithCheerio}) => {
log.info("middleware fired")
page.on('response', async (response) => {
const status = response.status()
log.info('<<', status)
if (response.url().includes('https://CAPTCHA-URL'))
{
// log.info('<<', response.status(), response.url)
if (response.status() == 403)
{
log.error('Puzzle Failed')
puzzleSolved = false
puzzleDetected = true
}
else if (response.status() == 200)
{
log.info('Puzzle Solved Successful')
puzzleSolved = true
puzzleDetected = false
}
}
});
} )
I tried putting this code in preNavigation and postNavigation hooks as well, the status is still missing7 Replies
extended-salmon•3y ago
Instead of
log.info('<<', status)
try to use
or
log.info('<<', { status });
correct-apricotOP•3y ago
yes! thank you. it worked now!
also, is there any way to retire the session on a bad status code and retry the same request with a new session?
extended-salmon•3y ago
You can use
session.retire();
This article should help:
https://crawlee.dev/docs/next/guides/session-managementSession Management | Crawlee
How to manage your cookies, proxy IP rotations and more
correct-apricotOP•3y ago
session.retire() is retiring the session but its not retrying the request with a new session.
extended-salmon•3y ago
Yeah, so you need to throw an error.
if (title === 'Blocked') {
session.retire(); // first reture session
throw new Error('Request was blocked. Will retry...') // next time this request will use new session.
}
correct-apricotOP•3y ago
I already tried that.. its still not retrying the request :/
here's my requestHandler code:
crawler.router.addHandler('products', async ({request, response, page, enqueueLinks, log, proxyInfo, session, parseWithCheerio}) => {
if (puzzleDetected || response.status() == 403)
{
session.retire()
log.error("Captcha, retiring session")
throw new Error('Request was blocked. Will retry...')
}
console.log("pre cookies: " + session.getCookieString(page.url()))
log.info ("Product page handler")
// log.info (proxyInfo.url)
const results = []
const $ = await parseWithCheerio();
$('.product-item').each((index, item) => {
results.push({
url: $(item).find('a').attr('href'),
title: $(item).find('.product-item-name').text(),
price: $(item).find('.product-item-price .price').text(),
})
})
console.log (results);
console.log("cookies: " + session.getCookieString(page.url()))
await page.waitForTimeout((5 * 1000) + Math.floor(Math.random() * 2000))
await page.mouse.wheel(0, 500 + Math.floor(Math.random() * 10000))
await page.waitForTimeout((10 * 1000) + Math.floor(Math.random() * 2000))
});
Sorry, my bad. I was using it in the wrong handle! its working for now. thank you so much! Really appreciate the help@Bilal Ghouri just advanced to level 2! Thanks for your contributions! 🎉