Reaching CPU time limit when streaming AI responses

export const runtime = "edge";

import { NextRequest, NextResponse } from 'next/server';

const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

export async function POST(req: NextRequest) {
const { messages } = await req.json();

const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4.1",
messages,
temperature: 0,
stream: true,
}),
});

if (!response.ok || !response.body) {
const error = await response.text();
return new NextResponse(error, { status: response.status });
}

return new NextResponse(response.body, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
});
}
export const runtime = "edge";

import { NextRequest, NextResponse } from 'next/server';

const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

export async function POST(req: NextRequest) {
const { messages } = await req.json();

const response = await fetch(OPENAI_API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
},
body: JSON.stringify({
model: "gpt-4.1",
messages,
temperature: 0,
stream: true,
}),
});

if (!response.ok || !response.body) {
const error = await response.text();
return new NextResponse(error, { status: response.status });
}

return new NextResponse(response.body, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
});
}
So this very basic API route that simply creates a response stream from OpenAI and returns it, is using a lot of CPU time for long responses. When you prompt it with "hi" it doesn't respond with much and it takes around ~10ms of CPU time, but if you ask it to write a long story it uses 170ms of CPU time, which is more than what Cloudflare allows. I have no idea how to optimize this code further to reduce CPU usage, other than to disable streaming but that's no fun.
1 Reply
GamerPath
GamerPathOP•7mo ago
Also i'm a bit concerned if there is a wall-time limit on pages functions, because when using big models like o3 it can take minutes before you see anything returned from the stream. But I am unable to find anything regarding a wall-time limit for pages functions, I just find things for workers Okay so I found a solution I just deleted my cloudflare pages app and used Railway instead 🙃 I think there's a 5 minute limit on railway but I haven't reached it

Did you find this page helpful?