Cloudflare DevelopersCD
Cloudflare Developers9mo ago
4 replies
GamerPath

Reaching CPU time limit when streaming AI responses

export const runtime = "edge";

import { NextRequest, NextResponse } from 'next/server';

const OPENAI_API_URL = "https://api.openai.com/v1/chat/completions";

export async function POST(req: NextRequest) {
    const { messages } = await req.json();

    const response = await fetch(OPENAI_API_URL, {
        method: "POST",
        headers: {
            "Content-Type": "application/json",
            "Authorization": `Bearer ${process.env.OPENAI_API_KEY}`,
        },
        body: JSON.stringify({
            model: "gpt-4.1",
            messages,
            temperature: 0,
            stream: true,
        }),
    });

    if (!response.ok || !response.body) {
        const error = await response.text();
        return new NextResponse(error, { status: response.status });
    }

    return new NextResponse(response.body, {
        headers: {
            "Content-Type": "text/event-stream",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
        },
    });
}


So this very basic API route that simply creates a response stream from OpenAI and returns it, is using a lot of CPU time for long responses. When you prompt it with "hi" it doesn't respond with much and it takes around ~10ms of CPU time, but if you ask it to write a long story it uses 170ms of CPU time, which is more than what Cloudflare allows. I have no idea how to optimize this code further to reduce CPU usage, other than to disable streaming but that's no fun.
Was this page helpful?