How to Deploy VLLM Serverless using Programming Language
Hello how can we achieve deploying Severless
VLLM instances using an API rather than going to the UI?
VLLM instances using an API rather than going to the UI?
url: https://api.runpod.io/graphql?api_key=api_key_here
mutation saveEndpoint($input: EndpointInput!) {
saveEndpoint(input: $input) {
gpuIds
id
idleTimeout
locations
name
networkVolumeId
scalerType
scalerValue
templateId
userId
workersMax
workersMin
gpuCount
}
}
// input
{
"gpuIds": "AMPERE_16",
"gpuCount": 1,
"allowedCudaVersions": "",
"idleTimeout": 5,
"locations": null,
"name": "broken_violet_tarantula -fb",
"networkVolumeId": null,
"scalerType": "QUEUE_DELAY",
"scalerValue": 4,
"workersMax": 3,
"workersMin": 0,
"executionTimeoutMs": 600000,
"template": {
"containerDiskInGb": 5,
"containerRegistryAuthId": "",
"dockerArgs": "",
"env": [],
"imageName": "test/test",
"startScript": "",
"ports": "",
"name": "broken_violet_tarantula__template__6oknzg"
}
}