import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
// Automatically fetches more pages as needed.
for await (const modelTemplate of client.modelTemplates.list()) {
console.log(modelTemplate.id);
}{
"items": [
{
"name": "<string>",
"endpoint_type": "SYNC",
"model_type": "COMPLETION",
"vendor_configuration": {
"bundle_config": {
"registry": "<string>",
"image": "<string>",
"tag": "<string>",
"command": [
"<string>"
],
"env": {},
"streaming_command": [
"<string>"
],
"readiness_initial_delay_seconds": 120,
"healthcheck_route": "/readyz",
"predict_route": "/predict",
"streaming_predict_route": "/generate_streaming"
},
"vendor": "LAUNCH",
"endpoint_config": {
"cpus": 3,
"memory": "8Gi",
"storage": "16Gi",
"gpus": 0,
"gpu_type": "nvidia-tesla-t4",
"min_workers": 0,
"max_workers": 1,
"per_worker": 10,
"endpoint_type": "ASYNC",
"high_priority": false
},
"fine_tuning_job_bundle_config": {
"registry": "<string>",
"image": "<string>",
"tag": "<string>",
"command": [
"<string>"
],
"env": {},
"mount_location": "/workspace/launch_specific/config.json",
"training_dataset_schema_type": "GENERATION",
"resources": {
"cpus": 3,
"memory": "8Gi",
"storage": "16Gi",
"gpus": 0,
"gpu_type": "nvidia-tesla-t4"
}
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"model_creation_parameters_schema": {
"parameters": [
{
"name": "<string>",
"type": "<string>",
"description": "<string>",
"required": true
}
]
},
"model_request_parameters_schema": {
"parameters": [
{
"name": "<string>",
"type": "<string>",
"description": "<string>",
"required": true
}
]
},
"endpoint_protocol": "SGP"
}
],
"total_item_count": 123,
"current_page": 123,
"items_per_page": 123
}Lists all model templates accessible to the user.
This API can be used to list model templates. If a user has access to multiple accounts, all model templates from all accounts the user is associated with will be returned.
import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
// Automatically fetches more pages as needed.
for await (const modelTemplate of client.modelTemplates.list()) {
console.log(modelTemplate.id);
}{
"items": [
{
"name": "<string>",
"endpoint_type": "SYNC",
"model_type": "COMPLETION",
"vendor_configuration": {
"bundle_config": {
"registry": "<string>",
"image": "<string>",
"tag": "<string>",
"command": [
"<string>"
],
"env": {},
"streaming_command": [
"<string>"
],
"readiness_initial_delay_seconds": 120,
"healthcheck_route": "/readyz",
"predict_route": "/predict",
"streaming_predict_route": "/generate_streaming"
},
"vendor": "LAUNCH",
"endpoint_config": {
"cpus": 3,
"memory": "8Gi",
"storage": "16Gi",
"gpus": 0,
"gpu_type": "nvidia-tesla-t4",
"min_workers": 0,
"max_workers": 1,
"per_worker": 10,
"endpoint_type": "ASYNC",
"high_priority": false
},
"fine_tuning_job_bundle_config": {
"registry": "<string>",
"image": "<string>",
"tag": "<string>",
"command": [
"<string>"
],
"env": {},
"mount_location": "/workspace/launch_specific/config.json",
"training_dataset_schema_type": "GENERATION",
"resources": {
"cpus": 3,
"memory": "8Gi",
"storage": "16Gi",
"gpus": 0,
"gpu_type": "nvidia-tesla-t4"
}
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"model_creation_parameters_schema": {
"parameters": [
{
"name": "<string>",
"type": "<string>",
"description": "<string>",
"required": true
}
]
},
"model_request_parameters_schema": {
"parameters": [
{
"name": "<string>",
"type": "<string>",
"description": "<string>",
"required": true
}
]
},
"endpoint_protocol": "SGP"
}
],
"total_item_count": 123,
"current_page": 123,
"items_per_page": 123
}Page number for pagination to be returned by the given endpoint. Starts at page 1
x >= 1Maximum number of artifacts to be returned by the given endpoint. Defaults to 100 and cannot be greater than 10k.
1 <= x <= 10000Successful Response
The data returned for the current page.
Show child attributes
An enum representing the different types of model endpoint types supported.
Attributes: SYNC: Denotes that the model endpoint type is sync. ASYNC: Denotes that the model endpoint type is async. STREAMING: Denotes that the model endpoint type is streaming. BATCH: Denotes that the model endpoint type is batch.
SYNC, ASYNC, STREAMING, BATCH An enum representing the different types of models supported.
Attributes: COMPLETION: Denotes that the model type is completion. CHAT_COMPLETION: Denotes that the model type is chat completion. AGENT: Denotes that the model type is agent. EMBEDDING: Denotes that the model type is embedding. RERANKING: Denotes that the model type is reranking. GENERIC: Denotes that the model type is generic. BUNDLE: "Not to be used directly - type to surface a model bundle in the UI - TODO: Explicitly type a bundle
COMPLETION, CHAT_COMPLETION, AGENT, EMBEDDING, RERANKING, GENERIC, BUNDLE Configuration for launching a model using the Launch service which is an internal and self-hosted service developed by Scale that deploys models on Kubernetes.
Attributes: vendor: The vendor of the model template bundle_config: The bundle configuration of the model template endpoint_config: The endpoint configuration of the model template
Show child attributes
Show child attributes
"LAUNCH"Show child attributes
nvidia-tesla-t4, nvidia-ampere-a10, nvidia-ampere-a100, nvidia-ampere-a100e, nvidia-hopper-h100, nvidia-hopper-h100-1g20gb, nvidia-hopper-h100-3g40gb The maximum number of concurrent requests that an individual worker can
service. Launch automatically scales the number of workers for the endpoint so that
each worker is processing per_worker requests, subject to the limits defined by
min_workers and max_workers.
per_worker, then the number of workers will be reduced. - Otherwise,
if the average number of concurrent requests per worker is higher than
per_worker, then the number of workers will be increased to meet the elevated
traffic.Here is our recommendation for computing per_worker:
min_workers and max_workers per your minimum and maximum
throughput requirements. 2. Determine a value for the maximum number of
concurrent requests in the workload. Divide this number by max_workers. Doing
this ensures that the number of workers will "climb" to max_workers.An enum representing the different types of model endpoint types supported.
Attributes: SYNC: Denotes that the model endpoint type is sync. ASYNC: Denotes that the model endpoint type is async. STREAMING: Denotes that the model endpoint type is streaming. BATCH: Denotes that the model endpoint type is batch.
SYNC, ASYNC, STREAMING, BATCH Show child attributes
The filesystem location where the fine tuning job's configuration will be available when it is started.
Optionally set required training and validation dataset schema
GENERATION, RERANKING_QUESTIONS Show child attributes
nvidia-tesla-t4, nvidia-ampere-a10, nvidia-ampere-a100, nvidia-ampere-a100e, nvidia-hopper-h100, nvidia-hopper-h100-1g20gb, nvidia-hopper-h100-3g40gb The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account The field names and types of available parameter fields which may be specified during model creation
Show child attributes
Show child attributes
"str"The field names and types of available parameter fields which may be specified in a model execution API's model_request_parameters field.
Show child attributes
Show child attributes
The name of the calling convention expected by the Launch model endpoint
SGP, COHERE, VLLM The total number of items of the query
The current page number.
The number of items per page.