Cloudflare Developers•7mo ago

Reaching CPU time limit when streaming AI responses

export const runtime = "edge";

import { NextRequest, NextResponse } from 'next/server';

const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

export async function POST(req: NextRequest) {
    const { messages } = await req.json();

    const response = await fetch(OPENAI_API_URL, {
        method: "POST",
        headers: {
            "Content-Type": "application/json",
            "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
        },
        body: JSON.stringify({
            model: "gpt-4.1",
            messages,
            temperature: 0,
            stream: true,
        }),
    });

    if (!response.ok || !response.body) {
        const error = await response.text();
        return new NextResponse(error, { status: response.status });
    }

    return new NextResponse(response.body, {
        headers: {
            "Content-Type": "text/event-stream",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        },
    });
}

export const runtime = "edge";

import { NextRequest, NextResponse } from 'next/server';

const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

export async function POST(req: NextRequest) {
    const { messages } = await req.json();

    const response = await fetch(OPENAI_API_URL, {
        method: "POST",
        headers: {
            "Content-Type": "application/json",
            "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
        },
        body: JSON.stringify({
            model: "gpt-4.1",
            messages,
            temperature: 0,
            stream: true,
        }),
    });

    if (!response.ok || !response.body) {
        const error = await response.text();
        return new NextResponse(error, { status: response.status });
    }

    return new NextResponse(response.body, {
        headers: {
            "Content-Type": "text/event-stream",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        },
    });
}

So this very basic API route that simply creates a response stream from OpenAI and returns it, is using a lot of CPU time for long responses. When you prompt it with "hi" it doesn't respond with much and it takes around ~10ms of CPU time, but if you ask it to write a long story it uses 170ms of CPU time, which is more than what Cloudflare allows. I have no idea how to optimize this code further to reduce CPU usage, other than to disable streaming but that's no fun.

1 Reply

GamerPathOP•7mo ago

Also i'm a bit concerned if there is a wall-time limit on pages functions, because when using big models like o3 it can take minutes before you see anything returned from the stream. But I am unable to find anything regarding a wall-time limit for pages functions, I just find things for workers Okay so I found a solution I just deleted my cloudflare pages app and used Railway instead 🙃 I think there's a 5 minute limit on railway but I haven't reached it

Gaming

Programming

Reaching CPU time limit when streaming AI responses

Did you find this page helpful?