> ## Documentation Index
> Fetch the complete documentation index at: https://docs.gp.scale.com/llms.txt
> Use this file to discover all available pages before exploring further.

# List All Model Deployments

> TODO: Document



## OpenAPI

````yaml https://app.stainlessapi.com/api/spec/documented/sgp/openapi.yml get /v4/model-deployments
openapi: 3.1.0
info:
  title: EGP API V4
  description: >-
    This is the parent API for all EGP APIs. If you are looking for the EGP API,
    please go to https://api.egp.scale.com/docs.
  contact:
    name: Scale Generative AI Platform
    url: https://scale.com/genai-platform
  version: 0.1.0
servers:
  - url: https://api.egp.scale.com
security: []
tags:
  - name: Models
    description: Model API.
paths:
  /v4/model-deployments:
    get:
      tags:
        - Models
      summary: List All Model Deployments
      description: 'TODO: Document'
      operationId: GET-V4-/model-deployments
      parameters:
        - name: sort_by
          in: query
          required: false
          schema:
            anyOf:
              - type: array
                items:
                  $ref: '#/components/schemas/SortByEnumForModelDeployment'
              - type: 'null'
            title: Sort By
        - name: page
          in: query
          required: false
          schema:
            type: integer
            minimum: 1
            description: >-
              Page number for pagination to be returned by the given endpoint.
              Starts at page 1
            default: 1
            title: Page
          description: >-
            Page number for pagination to be returned by the given endpoint.
            Starts at page 1
        - name: limit
          in: query
          required: false
          schema:
            type: integer
            maximum: 10000
            minimum: 1
            description: >-
              Maximum number of artifacts to be returned by the given endpoint.
              Defaults to 100 and cannot be greater than 10k.
            default: 100
            title: Limit
          description: >-
            Maximum number of artifacts to be returned by the given endpoint.
            Defaults to 100 and cannot be greater than 10k.
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PaginatedModelDeploymentResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      x-codeSamples:
        - lang: Python
          source: |-
            import os
            from scale_gp import SGPClient

            client = SGPClient(
                api_key=os.environ.get("SGP_API_KEY"),  # This is the default and can be omitted
            )
            page = client.models.deployments.list_all()
            page = page.items[0]
            print(page.id)
        - lang: Go
          source: "package main\n\nimport (\n\t\"context\"\n\t\"fmt\"\n\n\t\"github.com/stainless-sdks/sgp-go\"\n\t\"github.com/stainless-sdks/sgp-go/option\"\n)\n\nfunc main() {\n\tclient := sgp.NewClient(\n\t\toption.WithAPIKey(\"My API Key\"),\n\t)\n\tpage, err := client.Models.Deployments.ListAll(context.TODO(), sgp.ModelDeploymentListAllParams{})\n\tif err != nil {\n\t\tpanic(err.Error())\n\t}\n\tfmt.Printf(\"%+v\\n\", page)\n}\n"
components:
  schemas:
    SortByEnumForModelDeployment:
      type: string
      enum:
        - model_creation_parameters:asc
        - model_creation_parameters:desc
        - model_endpoint_id:asc
        - model_endpoint_id:desc
        - model_instance_id:asc
        - model_instance_id:desc
        - vendor_configuration:asc
        - vendor_configuration:desc
        - deployment_metadata:asc
        - deployment_metadata:desc
        - status:asc
        - status:desc
        - id:asc
        - id:desc
        - created_at:asc
        - created_at:desc
        - account_id:asc
        - account_id:desc
        - created_by_user_id:asc
        - created_by_user_id:desc
        - created_by_identity_type:asc
        - created_by_identity_type:desc
        - name:asc
        - name:desc
      title: SortByEnumForModelDeployment
    PaginatedModelDeploymentResponse:
      properties:
        items:
          items:
            $ref: '#/components/schemas/ModelDeploymentResponse'
          type: array
          title: Items
          description: The data returned for the current page.
        total_item_count:
          type: integer
          title: Total Item Count
          description: The total number of items of the query
        current_page:
          type: integer
          title: Current Page
          description: The current page number.
        items_per_page:
          type: integer
          title: Items Per Page
          description: The number of items per page.
      type: object
      required:
        - items
        - total_item_count
        - current_page
        - items_per_page
      title: PaginatedModelDeploymentResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    ModelDeploymentResponse:
      properties:
        name:
          type: string
          title: Name
        model_creation_parameters:
          title: Model Creation Parameters
          additionalProperties: true
          type: object
        model_endpoint_id:
          title: Model Endpoint Id
          type: string
        model_instance_id:
          title: Model Instance Id
          type: string
        vendor_configuration:
          $ref: '#/components/schemas/DeploymentVendorConfiguration'
        deployment_metadata:
          title: Deployment Metadata
          additionalProperties: true
          type: object
        id:
          type: string
          title: Id
          description: The unique identifier of the entity.
        created_at:
          type: string
          format: date-time
          title: Created At
          description: The date and time when the entity was created in ISO format.
        account_id:
          title: Account Id
          description: The ID of the account that owns the given entity.
          can_patch: false
          optional_in_request: true
          type: string
        created_by_user_id:
          type: string
          title: Created By User Id
          description: The user who originally created the entity.
        created_by_identity_type:
          $ref: '#/components/schemas/IdentifierTypeEnum'
          description: The type of identity that created the entity.
        status:
          type: string
          title: Status
          description: Status of the model's deployment.
      type: object
      required:
        - name
        - id
        - created_at
        - account_id
        - created_by_user_id
        - created_by_identity_type
        - status
      title: ModelDeploymentResponse
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
          additionalProperties: true
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    DeploymentVendorConfiguration:
      oneOf:
        - $ref: '#/components/schemas/LaunchDeploymentVendorConfiguration'
        - $ref: '#/components/schemas/LLMEngineDeploymentVendorConfiguration'
      title: DeploymentVendorConfiguration
      discriminator:
        propertyName: vendor
        mapping:
          LAUNCH:
            $ref: '#/components/schemas/LaunchDeploymentVendorConfiguration'
          LLMENGINE:
            $ref: '#/components/schemas/LLMEngineDeploymentVendorConfiguration'
    IdentifierTypeEnum:
      type: string
      enum:
        - user
        - service_account
      title: IdentifierTypeEnum
    LaunchDeploymentVendorConfiguration:
      properties:
        min_workers:
          type: integer
          title: Min Workers
          default: 0
        max_workers:
          type: integer
          title: Max Workers
          default: 1
        per_worker:
          type: integer
          title: Per Worker
          description: >-
            The maximum number of concurrent requests that an individual worker
            can

            service. Launch automatically scales the number of workers for the
            endpoint so that

            each worker is processing ``per_worker`` requests, subject to the
            limits defined by

            ``min_workers`` and ``max_workers``.


            - If the average number of concurrent requests per worker is lower
            than

            ``per_worker``, then the number of workers will be reduced. -
            Otherwise,

            if the average number of concurrent requests per worker is higher
            than

            ``per_worker``, then the number of workers will be increased to meet
            the elevated

            traffic.


            Here is our recommendation for computing ``per_worker``:


            1. Compute ``min_workers`` and ``max_workers`` per your minimum and
            maximum

            throughput requirements. 2. Determine a value for the maximum number
            of

            concurrent requests in the workload. Divide this number by
            ``max_workers``. Doing

            this ensures that the number of workers will "climb" to
            ``max_workers``.
          default: 10
        vendor:
          type: string
          const: LAUNCH
          title: Vendor
          default: LAUNCH
      type: object
      title: LaunchDeploymentVendorConfiguration
    LLMEngineDeploymentVendorConfiguration:
      properties:
        cpus:
          type: integer
          title: Cpus
          default: 3
        memory:
          type: string
          title: Memory
          default: 8Gi
        storage:
          type: string
          title: Storage
          default: 16Gi
        gpus:
          type: integer
          title: Gpus
          default: 0
        gpu_type:
          $ref: '#/components/schemas/GPUType'
        min_workers:
          type: integer
          title: Min Workers
          default: 0
        max_workers:
          type: integer
          title: Max Workers
          default: 1
        per_worker:
          type: integer
          title: Per Worker
          description: >-
            The maximum number of concurrent requests that an individual worker
            can

            service. Launch automatically scales the number of workers for the
            endpoint so that

            each worker is processing ``per_worker`` requests, subject to the
            limits defined by

            ``min_workers`` and ``max_workers``.


            - If the average number of concurrent requests per worker is lower
            than

            ``per_worker``, then the number of workers will be reduced. -
            Otherwise,

            if the average number of concurrent requests per worker is higher
            than

            ``per_worker``, then the number of workers will be increased to meet
            the elevated

            traffic.


            Here is our recommendation for computing ``per_worker``:


            1. Compute ``min_workers`` and ``max_workers`` per your minimum and
            maximum

            throughput requirements. 2. Determine a value for the maximum number
            of

            concurrent requests in the workload. Divide this number by
            ``max_workers``. Doing

            this ensures that the number of workers will "climb" to
            ``max_workers``.
          default: 10
        vendor:
          type: string
          const: LLMENGINE
          title: Vendor
          default: LLMENGINE
        high_priority:
          title: High Priority
          default: false
          type: boolean
        num_shards:
          type: integer
          title: Num Shards
          default: 4
        checkpoint_path:
          title: Checkpoint Path
          type: string
        model_name:
          title: Model Name
          type: string
        base_model_name:
          title: Base Model Name
          type: string
        inference_framework_image_tag:
          title: Inference Framework Image Tag
          type: string
      type: object
      title: LLMEngineDeploymentVendorConfiguration
    GPUType:
      type: string
      enum:
        - nvidia-tesla-t4
        - nvidia-ampere-a10
        - nvidia-ampere-a100
        - nvidia-ampere-a100e
        - nvidia-hopper-h100
        - nvidia-hopper-h100-1g20gb
        - nvidia-hopper-h100-3g40gb
      title: GPUType

````