[Critical] Load balancing endpoints have a payload limit of ~1MB
The connection is killed by runpod for all request above ~1MB.
Here's how to reproduce it:
#!/usr/bin/env python3
"""
FastAPI test server - JSON with base64 encoded image
"""
import asyncio
import base64
import logging
from fastapi import FastAPI, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import uvicorn
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
class ImageRequest(BaseModel):
"""Request model with base64 encoded image"""
image: str
prompt: str = "test"
@app.post("/predict")
async def predict(request: ImageRequest):
"""
Accept JSON with base64 image, wait 5 seconds, return base64 image
"""
try:
# Decode base64 image
try:
image_bytes = base64.b64decode(request.image)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Invalid base64 image data: {str(e)}")
if len(image_bytes) == 0:
raise HTTPException(status_code=400, detail="Image data is empty")
logger.info(f"Received image: {len(image_bytes)} bytes, prompt: {request.prompt}")
# Wait 5 seconds to simulate processing
await asyncio.sleep(5)
# Encode image back to base64
image_b64 = base64.b64encode(image_bytes).decode('utf-8')
return JSONResponse(
content={
"image": image_b64,
"success": True
},
status_code=200
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error processing request: {str(e)}")
raise HTTPException(status_code=500, detail=f"Server error: {str(e)}")
@app.get("/ping")
async def health_check():
"""Health check endpoint"""
return {"status": "healthy"}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)4 Replies
Unknown User•2mo ago
Message Not Public
Sign In & Join Server To View
@emilwallner
Escalated To Zendesk
The thread has been escalated to Zendesk!
Unknown User•2mo ago
Message Not Public
Sign In & Join Server To View
most likely a bug since their regular endpoints have 10MB / 20MB limits, they most likely left the default values in their cloudflare proxy service, not realizing the default values are rather limiting, or they went for the new cloudflare python workers which can't be used for anything but simple test cases