How i can change/save the logger that the context provides

The context of handler provides a context.log but i want to save/change the logger is used, because i want to save this, i am using the Crawly without Apify CLI
No description
6 Replies
Hall
Hall•3mo ago
Someone will reply to you shortly. In the meantime, this might help: -# This post was marked as solved by Jaogmar. View answer.
conscious-sapphire
conscious-sapphire•3mo ago
Hey @Jaogmar Example with Loguru, But you can also use the standard logger in the same way
import asyncio
from datetime import timedelta

from loguru import logger

from crawlee.crawlers import (
BeautifulSoupCrawler,
BeautifulSoupCrawlingContext,
)

logger.add('app.log', format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}')

async def main() -> None:
crawler = BeautifulSoupCrawler(
max_request_retries=1,
request_handler_timeout=timedelta(seconds=30),
max_requests_per_crawl=10,
configure_logging=False,
_logger=logger
)

@crawler.router.default_handler
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')

data = {
'url': context.request.url,
'title': context.soup.title.string if context.soup.title else None,
'h1s': [h1.text for h1 in context.soup.find_all('h1')],
'h2s': [h2.text for h2 in context.soup.find_all('h2')],
'h3s': [h3.text for h3 in context.soup.find_all('h3')],
}

await context.push_data(data)

crawler.log.info('Test')
await crawler.run(['https://crawlee.dev'])


if __name__ == '__main__':
asyncio.run(main())
import asyncio
from datetime import timedelta

from loguru import logger

from crawlee.crawlers import (
BeautifulSoupCrawler,
BeautifulSoupCrawlingContext,
)

logger.add('app.log', format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}')

async def main() -> None:
crawler = BeautifulSoupCrawler(
max_request_retries=1,
request_handler_timeout=timedelta(seconds=30),
max_requests_per_crawl=10,
configure_logging=False,
_logger=logger
)

@crawler.router.default_handler
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
context.log.info(f'Processing {context.request.url} ...')

data = {
'url': context.request.url,
'title': context.soup.title.string if context.soup.title else None,
'h1s': [h1.text for h1 in context.soup.find_all('h1')],
'h2s': [h2.text for h2 in context.soup.find_all('h2')],
'h3s': [h3.text for h3 in context.soup.find_all('h3')],
}

await context.push_data(data)

crawler.log.info('Test')
await crawler.run(['https://crawlee.dev'])


if __name__ == '__main__':
asyncio.run(main())
flat-fuchsia
flat-fuchsiaOP•3mo ago
thank you Bro, i forget to close this thread
MEE6
MEE6•3mo ago
@Jaogmar just advanced to level 1! Thanks for your contributions! 🎉
flat-fuchsia
flat-fuchsiaOP•3mo ago
i used the same way that you initialize de crawler class with the logger the problem that i had that i was beeing very dumb and i forget to add the correct handlers to the logger and when a used the context.log.info nothing happened
conscious-sapphire
conscious-sapphire•3mo ago
Glad you've solved it) 🙂

Did you find this page helpful?