How can I wait with processing further logic untill all request from batch are proceeded

Hi I have this code:
async processBatch(batch){
// requests: {
// url: string;
// userData: CrawlerUserData;
// }[]
const requests = this.generateRequests(batch)
await this.crawler.addRequests(requests)

return this.processResults(requests)
}
...
async processResults(requests){
...
for (const request of requests) {
const userData = request.userData as CrawlerUserData
if (userData.error) {
this.statistics.incrementErrors()
continue
}

if (userData.results) {
...
await this.saveResults(userData)
}
}

return batchResults
}
async processBatch(batch){
// requests: {
// url: string;
// userData: CrawlerUserData;
// }[]
const requests = this.generateRequests(batch)
await this.crawler.addRequests(requests)

return this.processResults(requests)
}
...
async processResults(requests){
...
for (const request of requests) {
const userData = request.userData as CrawlerUserData
if (userData.error) {
this.statistics.incrementErrors()
continue
}

if (userData.results) {
...
await this.saveResults(userData)
}
}

return batchResults
}
and this is my route handler:
import { createPlaywrightRouter } from 'crawlee'

export const router = createPlaywrightRouter()

router.addDefaultHandler(async ({ page, request, log }) => {
const userData = request.userData as CrawlerUserData
try {
await page.waitForLoadState('networkidle', { timeout: 5000 })

const analyzer = new AlertsProximityAnalyzer(userData, callbackCheckingIfDataExist)

await analyzer.analyze(page) // executing callback

userData.results = analyzer.results
// Do I need to save the results here?
} catch (error) {
...
} finally {
// Instead of closing the page, reset it for the next use
await page.evaluate(() => window.stop())
await page.setContent('<html></html>')
}
})
import { createPlaywrightRouter } from 'crawlee'

export const router = createPlaywrightRouter()

router.addDefaultHandler(async ({ page, request, log }) => {
const userData = request.userData as CrawlerUserData
try {
await page.waitForLoadState('networkidle', { timeout: 5000 })

const analyzer = new AlertsProximityAnalyzer(userData, callbackCheckingIfDataExist)

await analyzer.analyze(page) // executing callback

userData.results = analyzer.results
// Do I need to save the results here?
} catch (error) {
...
} finally {
// Instead of closing the page, reset it for the next use
await page.evaluate(() => window.stop())
await page.setContent('<html></html>')
}
})
The problem is the crawling process executes once the whole code in processBatch is done, eg. all batches are added to requestQueue and processResults is executed ( which do not have any data since there is not yet created userData.results so what I want to know it I need to move my logic to saving results to DB to route handler or is there some way to stop executing this function and start executing route handler and then move back to executing processResults In response I will paste pseudo algorithm what I expect
2 Replies
Hall
Hall8mo ago
View post on community site
This post has been pushed to the community knowledgebase. Any replies in this thread will be synced to the community site.
Apify Community
modern-teal
modern-tealOP8mo ago
async processBatch () {
1. generateRequests
2. crawler.addRequests()
3. await logic from route default handler executions
4. processResults
}
async processBatch () {
1. generateRequests
2. crawler.addRequests()
3. await logic from route default handler executions
4. processResults
}

Did you find this page helpful?