Puppeteer download CSV file using javascript

Hello, I am trying to use puppeteer with javascript to scrape website and download CSV file using await page.click(CSS selector). Unfortunately, no file is ever downloaded. On local machine it works. Screenshots are downloaded to the Apify storage, but the CSV file is not... does anybody know how to do that? Thank you
1 Reply
stormy-gold
stormy-gold3y ago
you can try to use something like this (if you can figure out which URL you can use to download file):
const { gotScraping } = require('got-scraping');

exports.downloadFile = async (key, url, keyValueStore) => {
const response = await gotScraping({
url,
responseType: 'buffer',
});

const { body, headers } = response;
const contentType = headers['content-type'];

await keyValueStore.setValue(normalizeForKey(key), body, { contentType });

};
const { gotScraping } = require('got-scraping');

exports.downloadFile = async (key, url, keyValueStore) => {
const response = await gotScraping({
url,
responseType: 'buffer',
});

const { body, headers } = response;
const contentType = headers['content-type'];

await keyValueStore.setValue(normalizeForKey(key), body, { contentType });

};
or you can intercept request after click event. Something like this (some old example, but logic is ok):
const requestPromise = require("request-promise");


await page.setRequestInterception(true);

log(`Waiting for navigation and clicking button...`);

Promise.all([
page.waitForNavigation({
waitUntil: "networkidle2",
timeout: 60000
}),
page.click("input[type=submit]"),
]);
log(`Button "View Document" was clicked`);
log("Making interception...");

const xRequest = await new Promise((resolve) => {
page.on("request", (request) => {
if (
request.url().includes("/doc1") ||
request.url().includes("file=")
) {
request.abort();
resolve(request); // getting this request
} else {
request.continue();
}
});
});

const options = {
encoding: null,
method: xRequest._method,
uri: xRequest._url,
body: xRequest._postData,
headers: xRequest._headers,
};

const cookies = await page.cookies();
options.headers.Cookie = cookies
.map((ck) => ck.name + "=" + ck.value)
.join(";");

const resp = await requestPromise(options);
log("Intercepted request was made");
log("Saving PDF-file..");
await store.setValue("PDF-file", resp, {
contentType: "application/pdf",
});
log("PDF-file was saved!");
const requestPromise = require("request-promise");


await page.setRequestInterception(true);

log(`Waiting for navigation and clicking button...`);

Promise.all([
page.waitForNavigation({
waitUntil: "networkidle2",
timeout: 60000
}),
page.click("input[type=submit]"),
]);
log(`Button "View Document" was clicked`);
log("Making interception...");

const xRequest = await new Promise((resolve) => {
page.on("request", (request) => {
if (
request.url().includes("/doc1") ||
request.url().includes("file=")
) {
request.abort();
resolve(request); // getting this request
} else {
request.continue();
}
});
});

const options = {
encoding: null,
method: xRequest._method,
uri: xRequest._url,
body: xRequest._postData,
headers: xRequest._headers,
};

const cookies = await page.cookies();
options.headers.Cookie = cookies
.map((ck) => ck.name + "=" + ck.value)
.join(";");

const resp = await requestPromise(options);
log("Intercepted request was made");
log("Saving PDF-file..");
await store.setValue("PDF-file", resp, {
contentType: "application/pdf",
});
log("PDF-file was saved!");

Did you find this page helpful?