error handling unfound elements w/ puppeteer
in my puppeteer crawler, im searching for some elements and they sometimes might not be there, so when they're not there, since the crawler is awaiting that element, if it isnt there, it causes an error that can often crash the crawler
ive tried wrapping the await element statements in try catch statements to handle errors and return but ive seen that it still returns errors because when it awaits the element, it needs to see that element to move on
i want it to be able to skip over unfound elements, scrape the OTHER elements on the page, and move on
a small snippet of the code:
3 Replies
@harish just advanced to level 4! Thanks for your contributions! 🎉
compatible-crimsonOP•2y ago
pactRouter.addHandler('PACT_PRODUCT', async ({ page, log, request }) => {
console.log('Scraping products');
try{
const site = 'WearPACT';
const title = await page.$eval('div.product-title', (element) => element.textContent?.trim());
const descriptions: string[] = [];
const featureTexts = await page.$$eval('div.col-12.features.px-0 li', (lis) =>
lis.map((li) => li.textContent?.trim())
);
featureTexts
.filter((text): text is string => text !== undefined)
.forEach((text) => descriptions.push(text));
const originalPrice = await page.$eval('div.dollar-reg', (element) => element.textContent?.trim());
const salePrice = await page.$eval('div.dollar.red', (element) => element.textContent?.trim());
pactRouter.addHandler('PACT_PRODUCT', async ({ page, log, request }) => {
console.log('Scraping products');
try{
const site = 'WearPACT';
const title = await page.$eval('div.product-title', (element) => element.textContent?.trim());
const descriptions: string[] = [];
const featureTexts = await page.$$eval('div.col-12.features.px-0 li', (lis) =>
lis.map((li) => li.textContent?.trim())
);
featureTexts
.filter((text): text is string => text !== undefined)
.forEach((text) => descriptions.push(text));
const originalPrice = await page.$eval('div.dollar-reg', (element) => element.textContent?.trim());
const salePrice = await page.$eval('div.dollar.red', (element) => element.textContent?.trim());
let shippingInfo;
if (await page.$('div.col-12.free-returns')) {
shippingInfo = await page.$eval('div.col-12.free-returns', (element) => element.textContent?.trim());
} else if (await page.$('div.col-12.free-ship-returns')) {
shippingInfo = await page.$eval('div.col-12.free-ship-returns', (element) =>
element.textContent?.trim()
);
}
const reviewScore = (await page.$eval(
'span.avg-score.font-color-gray-darker',
(element) => element.textContent?.trim()
)) || '';
const reviewNumber = (await page.$eval(
'span.reviews-qa-label.font-color-gray',
(element) => element.textContent?.trim()
)) || '';
const productData = {
url: request.loadedUrl,
site,
title,
descriptions,
originalPrice,
salePrice,
shippingInfo,
reviewScore,
reviewNumber,
};
productList.push(productData); // Append productData to productList
console.log('Scraped', productList.length, 'products');
// Read the existing data from the rawData.json file
let rawData: any = {};
try {
const rawDataStr = fs.readFileSync('rawData.json', 'utf8');
rawData = JSON.parse(rawDataStr);
} catch (error) {
console.log('Error reading rawData.json:', error);
}
// Append the new data to the existing data
if (rawData.productList) {
rawData.productList.push(productData);
} else {
rawData.productList = [productData];
}
// Write the updated data back to the rawData.json file
fs.writeFileSync('rawData.json', JSON.stringify(rawData, null, 2));
console.log('rawData.json updated for WearPACT');
}catch(error){
console.log('error scraping product')
return
}
});
let shippingInfo;
if (await page.$('div.col-12.free-returns')) {
shippingInfo = await page.$eval('div.col-12.free-returns', (element) => element.textContent?.trim());
} else if (await page.$('div.col-12.free-ship-returns')) {
shippingInfo = await page.$eval('div.col-12.free-ship-returns', (element) =>
element.textContent?.trim()
);
}
const reviewScore = (await page.$eval(
'span.avg-score.font-color-gray-darker',
(element) => element.textContent?.trim()
)) || '';
const reviewNumber = (await page.$eval(
'span.reviews-qa-label.font-color-gray',
(element) => element.textContent?.trim()
)) || '';
const productData = {
url: request.loadedUrl,
site,
title,
descriptions,
originalPrice,
salePrice,
shippingInfo,
reviewScore,
reviewNumber,
};
productList.push(productData); // Append productData to productList
console.log('Scraped', productList.length, 'products');
// Read the existing data from the rawData.json file
let rawData: any = {};
try {
const rawDataStr = fs.readFileSync('rawData.json', 'utf8');
rawData = JSON.parse(rawDataStr);
} catch (error) {
console.log('Error reading rawData.json:', error);
}
// Append the new data to the existing data
if (rawData.productList) {
rawData.productList.push(productData);
} else {
rawData.productList = [productData];
}
// Write the updated data back to the rawData.json file
fs.writeFileSync('rawData.json', JSON.stringify(rawData, null, 2));
console.log('rawData.json updated for WearPACT');
}catch(error){
console.log('error scraping product')
return
}
});
try {
await page.waitForSelector('h1.product-single__title');
await page.waitForSelector('div.product-single__description.rte p');
await page.waitForSelector('span.money');
await page.waitForSelector('div.bv_avgRating_component_container');
await page.waitForSelector('div.bv_numReviews_text');
} catch (error) {
console.log('Error waiting for selectors:', error);
return; // Skip this product if selectors are not found
}
try {
const title = await page.$eval('h1.product-single__title', (element) => element.textContent?.trim() ?? '');
const descriptions = await page.$$eval('div.product-single__description.rte p', (elements) =>
elements.map((el) => el.textContent?.trim() ?? '')
);
const originalPrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
const salePrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
try {
await page.waitForSelector('h1.product-single__title');
await page.waitForSelector('div.product-single__description.rte p');
await page.waitForSelector('span.money');
await page.waitForSelector('div.bv_avgRating_component_container');
await page.waitForSelector('div.bv_numReviews_text');
} catch (error) {
console.log('Error waiting for selectors:', error);
return; // Skip this product if selectors are not found
}
try {
const title = await page.$eval('h1.product-single__title', (element) => element.textContent?.trim() ?? '');
const descriptions = await page.$$eval('div.product-single__description.rte p', (elements) =>
elements.map((el) => el.textContent?.trim() ?? '')
);
const originalPrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
const salePrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
extended-salmon•2y ago
Hi! Wrapping
Are you sure you're not getting the error from somewhere else?
waiForSelector()
in a try-catch block should generally work:
try {
await page.waitForSelector(selector)
// ...
} catch (error) {
console.log("The element didn't appear.")
}
try {
await page.waitForSelector(selector)
// ...
} catch (error) {
console.log("The element didn't appear.")
}