[Critical] Load balancing endpoints have a payload limit of ~1MB

The connection is killed by runpod for all request above ~1MB. Here's how to reproduce it: #!/usr/bin/env python3 """ FastAPI test server - JSON with base64 encoded image """ import asyncio import base64 import logging from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse from pydantic import BaseModel import uvicorn logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI() class ImageRequest(BaseModel): """Request model with base64 encoded image""" image: str prompt: str = "test" @app.post("/predict") async def predict(request: ImageRequest): """ Accept JSON with base64 image, wait 5 seconds, return base64 image """ try: # Decode base64 image try: image_bytes = base64.b64decode(request.image) except Exception as e: raise HTTPException(status_code=400, detail=f"Invalid base64 image data: {str(e)}") if len(image_bytes) == 0: raise HTTPException(status_code=400, detail="Image data is empty") logger.info(f"Received image: {len(image_bytes)} bytes, prompt: {request.prompt}") # Wait 5 seconds to simulate processing await asyncio.sleep(5) # Encode image back to base64 image_b64 = base64.b64encode(image_bytes).decode('utf-8') return JSONResponse( content={ "image": image_b64, "success": True }, status_code=200 ) except HTTPException: raise except Exception as e: logger.error(f"Error processing request: {str(e)}") raise HTTPException(status_code=500, detail=f"Server error: {str(e)}") @app.get("/ping") async def health_check(): """Health check endpoint""" return {"status": "healthy"} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000)
4 Replies
Unknown User
Unknown User2mo ago
Message Not Public
Sign In & Join Server To View
Poddy
Poddy2mo ago
@emilwallner
Escalated To Zendesk
The thread has been escalated to Zendesk!
Unknown User
Unknown User2mo ago
Message Not Public
Sign In & Join Server To View
emilwallner
emilwallnerOP2mo ago
most likely a bug since their regular endpoints have 10MB / 20MB limits, they most likely left the default values in their cloudflare proxy service, not realizing the default values are rather limiting, or they went for the new cloudflare python workers which can't be used for anything but simple test cases

Did you find this page helpful?