error handling unfound elements w/ puppeteer

in my puppeteer crawler, im searching for some elements and they sometimes might not be there, so when they're not there, since the crawler is awaiting that element, if it isnt there, it causes an error that can often crash the crawler ive tried wrapping the await element statements in try catch statements to handle errors and return but ive seen that it still returns errors because when it awaits the element, it needs to see that element to move on i want it to be able to skip over unfound elements, scrape the OTHER elements on the page, and move on a small snippet of the code:
3 Replies
MEE6
MEE6•2y ago
@harish just advanced to level 4! Thanks for your contributions! 🎉
compatible-crimson
compatible-crimsonOP•2y ago
pactRouter.addHandler('PACT_PRODUCT', async ({ page, log, request }) => {
console.log('Scraping products');

try{
const site = 'WearPACT';

const title = await page.$eval('div.product-title', (element) => element.textContent?.trim());

const descriptions: string[] = [];

const featureTexts = await page.$$eval('div.col-12.features.px-0 li', (lis) =>
lis.map((li) => li.textContent?.trim())
);

featureTexts
.filter((text): text is string => text !== undefined)
.forEach((text) => descriptions.push(text));

const originalPrice = await page.$eval('div.dollar-reg', (element) => element.textContent?.trim());
const salePrice = await page.$eval('div.dollar.red', (element) => element.textContent?.trim());
pactRouter.addHandler('PACT_PRODUCT', async ({ page, log, request }) => {
console.log('Scraping products');

try{
const site = 'WearPACT';

const title = await page.$eval('div.product-title', (element) => element.textContent?.trim());

const descriptions: string[] = [];

const featureTexts = await page.$$eval('div.col-12.features.px-0 li', (lis) =>
lis.map((li) => li.textContent?.trim())
);

featureTexts
.filter((text): text is string => text !== undefined)
.forEach((text) => descriptions.push(text));

const originalPrice = await page.$eval('div.dollar-reg', (element) => element.textContent?.trim());
const salePrice = await page.$eval('div.dollar.red', (element) => element.textContent?.trim());
let shippingInfo;
if (await page.$('div.col-12.free-returns')) {
shippingInfo = await page.$eval('div.col-12.free-returns', (element) => element.textContent?.trim());
} else if (await page.$('div.col-12.free-ship-returns')) {
shippingInfo = await page.$eval('div.col-12.free-ship-returns', (element) =>
element.textContent?.trim()
);
}

const reviewScore = (await page.$eval(
'span.avg-score.font-color-gray-darker',
(element) => element.textContent?.trim()
)) || '';
const reviewNumber = (await page.$eval(
'span.reviews-qa-label.font-color-gray',
(element) => element.textContent?.trim()
)) || '';

const productData = {
url: request.loadedUrl,
site,
title,
descriptions,
originalPrice,
salePrice,
shippingInfo,
reviewScore,
reviewNumber,
};
productList.push(productData); // Append productData to productList

console.log('Scraped', productList.length, 'products');
// Read the existing data from the rawData.json file
let rawData: any = {};
try {
const rawDataStr = fs.readFileSync('rawData.json', 'utf8');
rawData = JSON.parse(rawDataStr);
} catch (error) {
console.log('Error reading rawData.json:', error);
}

// Append the new data to the existing data
if (rawData.productList) {
rawData.productList.push(productData);
} else {
rawData.productList = [productData];
}

// Write the updated data back to the rawData.json file
fs.writeFileSync('rawData.json', JSON.stringify(rawData, null, 2));
console.log('rawData.json updated for WearPACT');
}catch(error){
console.log('error scraping product')
return
}
});
let shippingInfo;
if (await page.$('div.col-12.free-returns')) {
shippingInfo = await page.$eval('div.col-12.free-returns', (element) => element.textContent?.trim());
} else if (await page.$('div.col-12.free-ship-returns')) {
shippingInfo = await page.$eval('div.col-12.free-ship-returns', (element) =>
element.textContent?.trim()
);
}

const reviewScore = (await page.$eval(
'span.avg-score.font-color-gray-darker',
(element) => element.textContent?.trim()
)) || '';
const reviewNumber = (await page.$eval(
'span.reviews-qa-label.font-color-gray',
(element) => element.textContent?.trim()
)) || '';

const productData = {
url: request.loadedUrl,
site,
title,
descriptions,
originalPrice,
salePrice,
shippingInfo,
reviewScore,
reviewNumber,
};
productList.push(productData); // Append productData to productList

console.log('Scraped', productList.length, 'products');
// Read the existing data from the rawData.json file
let rawData: any = {};
try {
const rawDataStr = fs.readFileSync('rawData.json', 'utf8');
rawData = JSON.parse(rawDataStr);
} catch (error) {
console.log('Error reading rawData.json:', error);
}

// Append the new data to the existing data
if (rawData.productList) {
rawData.productList.push(productData);
} else {
rawData.productList = [productData];
}

// Write the updated data back to the rawData.json file
fs.writeFileSync('rawData.json', JSON.stringify(rawData, null, 2));
console.log('rawData.json updated for WearPACT');
}catch(error){
console.log('error scraping product')
return
}
});
i have also tried wrapping the indivudal sites in their own statements on diff sites
try {
await page.waitForSelector('h1.product-single__title');
await page.waitForSelector('div.product-single__description.rte p');
await page.waitForSelector('span.money');
await page.waitForSelector('div.bv_avgRating_component_container');
await page.waitForSelector('div.bv_numReviews_text');
} catch (error) {
console.log('Error waiting for selectors:', error);
return; // Skip this product if selectors are not found
}

try {
const title = await page.$eval('h1.product-single__title', (element) => element.textContent?.trim() ?? '');

const descriptions = await page.$$eval('div.product-single__description.rte p', (elements) =>
elements.map((el) => el.textContent?.trim() ?? '')
);

const originalPrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
const salePrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');

try {
await page.waitForSelector('h1.product-single__title');
await page.waitForSelector('div.product-single__description.rte p');
await page.waitForSelector('span.money');
await page.waitForSelector('div.bv_avgRating_component_container');
await page.waitForSelector('div.bv_numReviews_text');
} catch (error) {
console.log('Error waiting for selectors:', error);
return; // Skip this product if selectors are not found
}

try {
const title = await page.$eval('h1.product-single__title', (element) => element.textContent?.trim() ?? '');

const descriptions = await page.$$eval('div.product-single__description.rte p', (elements) =>
elements.map((el) => el.textContent?.trim() ?? '')
);

const originalPrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');
const salePrice = await page.$eval('span.money', (element) => element.textContent?.trim() ?? '');

...
extended-salmon
extended-salmon•2y ago
Hi! Wrapping waiForSelector() in a try-catch block should generally work:
try {
await page.waitForSelector(selector)
// ...
} catch (error) {
console.log("The element didn't appear.")
}
try {
await page.waitForSelector(selector)
// ...
} catch (error) {
console.log("The element didn't appear.")
}
Are you sure you're not getting the error from somewhere else?

Did you find this page helpful?