self-hosted /scrape doesn't populate the JSON schema
Running
Where with the cloud service I'm also getting:
My scrape parameters:
batch/scrape
on a local setup using Docker, I'm getting all the metadata, but no actual content.
See an example of my results:
[{
"success": true,
"status": "completed",
"completed": 1205,
"total": 1205,
"creditsUsed": 6025,
"expiresAt": "2025-06-12T13:42:40.000Z",
"data": [
{
"metadata": {
"generator": "mkdocs-1.6.1, mkdocs-material-9.6.2+insiders-4.53.15",
"ahrefs-site-verification": "b13556...60a",
"favicon": "https://example.com/_images/favicon.ico",
"viewport": "width=device-width,initial-scale=1",
"title": "Explore example.com Docs: Your Resource for... ",
"language": "en",
"description": "Access example.com Docs for ...",
"scrapeId": "a01....646",
"sourceURL": "https://example.com",
"url": "https://example.com/",
"statusCode": 200,
"contentType": "text/html; charset=UTF-8",
"proxyUsed": "basic"
}
}
},...
[{
"success": true,
"status": "completed",
"completed": 1205,
"total": 1205,
"creditsUsed": 6025,
"expiresAt": "2025-06-12T13:42:40.000Z",
"data": [
{
"metadata": {
"generator": "mkdocs-1.6.1, mkdocs-material-9.6.2+insiders-4.53.15",
"ahrefs-site-verification": "b13556...60a",
"favicon": "https://example.com/_images/favicon.ico",
"viewport": "width=device-width,initial-scale=1",
"title": "Explore example.com Docs: Your Resource for... ",
"language": "en",
"description": "Access example.com Docs for ...",
"scrapeId": "a01....646",
"sourceURL": "https://example.com",
"url": "https://example.com/",
"statusCode": 200,
"contentType": "text/html; charset=UTF-8",
"proxyUsed": "basic"
}
}
},...
{
"main_content": "## FULL MARKDOWN CONTENT",
"shouldUseSmartscrape": false,
"smartscrape_reasoning": null,
"smartscrape_prompt": null
}
{
"main_content": "## FULL MARKDOWN CONTENT",
"shouldUseSmartscrape": false,
"smartscrape_reasoning": null,
"smartscrape_prompt": null
}
{
"urls": $My_Links_Array,
"ignoreInvalidURLs": true,
"formats": [
"json"
],
"onlyMainContent": true,
"maxAge": 86400000,
"waitFor": 1000,
"jsonOptions": {
"prompt": "Get all the main content, leaving out any headers, footers, images and menus",
"schema": {
"type": "object",
"properties": {
"main_content": {
"type": "string"
}
}
}
}
}
{
"urls": $My_Links_Array,
"ignoreInvalidURLs": true,
"formats": [
"json"
],
"onlyMainContent": true,
"maxAge": 86400000,
"waitFor": 1000,
"jsonOptions": {
"prompt": "Get all the main content, leaving out any headers, footers, images and menus",
"schema": {
"type": "object",
"properties": {
"main_content": {
"type": "string"
}
}
}
}
}
1 Reply
@Zohar were you able to fix this issue?