Resume an Application
Resume a paused application by scaling back to the original number of replicas
curl --request PATCH \
--url https://{controlPlaneURL}/api/svc/v1/apps/{id}/scale-to-original \
--header 'Authorization: Bearer <token>'
{
"id": "<string>",
"version": 123,
"fqn": "<string>",
"applicationId": "<string>",
"manifest": {
"name": "<string>",
"image": {
"type": "build",
"docker_registry": "<string>",
"build_source": {
"type": "remote",
"remote_uri": "<string>"
},
"build_spec": {
"type": "dockerfile",
"dockerfile_path": "<string>",
"build_context_path": "<string>",
"command": "<string>",
"build_args": {}
}
},
"artifacts_download": {
"cache_volume": {
"storage_class": "<string>",
"cache_size": 500
},
"artifacts": [
{
"type": "truefoundry-artifact",
"artifact_version_fqn": "<string>",
"download_path_env_variable": "<string>"
}
]
},
"resources": {
"cpu_request": 128.0005,
"cpu_limit": 128.0005,
"memory_request": 1000000,
"memory_limit": 1000000,
"ephemeral_storage_request": 1000000,
"ephemeral_storage_limit": 1000000,
"shared_memory_size": 1000032,
"node": {
"type": "node_selector",
"instance_families": [
"<string>"
],
"capacity_type": "spot_fallback_on_demand"
},
"devices": [
{
"type": "nvidia_gpu",
"name": "<string>",
"count": 8
}
]
},
"env": null,
"ports": [
{
"port": 32768,
"protocol": "TCP",
"expose": true,
"app_protocol": "http",
"host": "<string>",
"path": "<string>",
"rewrite_path_to": "<string>",
"auth": {
"type": "basic_auth",
"username": "<string>",
"password": "<string>"
}
}
],
"service_account": "<string>",
"mounts": [
{
"type": "secret",
"mount_path": "<string>",
"secret_fqn": "<string>"
}
],
"labels": {},
"kustomize": {
"patch": {},
"additions": [
{}
]
},
"liveness_probe": {
"config": {
"type": "http",
"path": "<string>",
"port": 32767,
"host": "<string>",
"scheme": "<string>"
},
"initial_delay_seconds": 18000,
"period_seconds": 18000,
"timeout_seconds": 18000,
"success_threshold": 50,
"failure_threshold": 50
},
"readiness_probe": {
"config": {
"type": "http",
"path": "<string>",
"port": 32767,
"host": "<string>",
"scheme": "<string>"
},
"initial_delay_seconds": 18000,
"period_seconds": 18000,
"timeout_seconds": 18000,
"success_threshold": 50,
"failure_threshold": 50
},
"workspace_fqn": "<string>",
"type": "service",
"replicas": 250,
"auto_shutdown": {
"wait_time": 1
},
"allow_interception": false,
"rollout_strategy": {
"type": "rolling_update",
"max_unavailable_percentage": 50,
"max_surge_percentage": 50
}
},
"application": {
"id": "<string>",
"fqn": "<string>",
"name": "<string>",
"type": "async-service",
"createdBySubject": {
"subjectId": "<string>",
"subjectType": "user",
"subjectSlug": "<string>",
"subjectDisplayName": "<string>"
},
"tenantName": "<string>",
"metadata": {
"paused": true
},
"lifecycleStage": "active",
"workspaceId": "<string>",
"lastVersion": 123,
"activeVersion": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"recommendations": [
{
"id": "<string>",
"clusterId": "<string>",
"applicationId": "<string>",
"deploymentId": "<string>",
"applicationVersion": 123,
"recommendationData": {},
"recommendationType": "<string>",
"appliedDeploymentId": "<string>",
"expiryTimestamp": "2023-11-07T05:31:56Z",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"alerts": [
{
"id": "<string>",
"name": "<string>",
"timestamps": [
"2023-11-07T05:31:56Z"
],
"startTime": "2023-11-07T05:31:56Z",
"resolvedTime": "2023-11-07T05:31:56Z",
"applicationId": "<string>",
"clusterId": "<string>",
"tenantName": "<string>",
"fingerprint": "<string>",
"updatedAt": "<string>",
"createdAt": "<string>",
"applicationDebugInfoId": "<string>"
}
],
"alertsSummary": {},
"applicationDebugInfos": [
{
"id": "<string>",
"applicationId": "<string>",
"application": {},
"debugInfo": {},
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"potentialProblems": [
{
"name": "<string>",
"description": "<string>"
}
],
"autopilot": {},
"createdBy": "<string>",
"deployment": {},
"activeDeploymentId": "<string>",
"lastDeploymentId": "<string>"
},
"createdBySubject": {
"subjectId": "<string>",
"subjectType": "user",
"subjectSlug": "<string>",
"subjectDisplayName": "<string>"
},
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"deploymentBuilds": [
{
"name": "<string>",
"status": 20
}
],
"deploymentStatuses": [
{
"id": "<string>",
"deploymentId": "<string>",
"status": "INITIALIZED",
"state": {},
"transition": "BUILDING",
"message": "<string>",
"retryCount": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"currentStatusId": "<string>",
"currentStatus": {
"id": "<string>",
"deploymentId": "<string>",
"status": "INITIALIZED",
"state": {},
"transition": "BUILDING",
"message": "<string>",
"retryCount": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
},
"appliedRecommendations": [
{
"id": "<string>",
"clusterId": "<string>",
"applicationId": "<string>",
"deploymentId": "<string>",
"applicationVersion": 123,
"recommendationData": {},
"recommendationType": "<string>",
"appliedDeploymentId": "<string>",
"expiryTimestamp": "2023-11-07T05:31:56Z",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"createdBy": "<string>"
}
Authorizations
Bearer authentication header of the form Bearer <token>
, where <token>
is your auth token.
Path Parameters
Id of the application
Response
+docs=Describes the configuration for the service
+usage=Name of the service. This uniquely identifies this service in the workspace.
Name can only contain alphanumeric characters and '-' and can be atmost 25 characters long +sort=1 +message=3 to 32 lower case characters long alphanumeric word, may contain - in between, cannot start with a number
+docs=Specify whether you want to deploy a Docker image or build and deploy from source code +label=Deploy a Docker image or build and deploy from source code +icon=fa-solid fa-cloud-arrow-up:#21B6A8 +sort=2
+value=build
build
+docs=Instructions to build a container image out of the build source +label=Build using DockerFile or using Buildpack +icon=fa-wrench +sort=2
+value=dockerfile
dockerfile
+label=Path to Dockerfile +usage=The file path of the Dockerfile relative to project root path.
+label=Path to build context +usage=Build context path for the Dockerfile relative to project root path.
+label=Command Override
+usage=Override the command to run when the container starts
When deploying a Job, the command can be templatized by defining params
and referencing them in command
E.g. python main.py --learning_rate {{learning_rate}}
+docs=FQN of the container registry. You can the FQN of your desired container registry (or add one) in the Integrations pageIntegrations page +label=Docker Registry +usage=FQN of the container registry. If you can't find your registry here, add it through the Integrations page
+docs=Specify the ports you want the service to be exposed to +label=Configure ports and endpoints to route customer traffic +usage=Expose the deployment to make it accessible over the internet or keep it private. Implement authentication to restrict access. Docs +icon=fa-plug +sort=4
+docs=Describes the ports the service should be exposed to.
+usage=Port number to expose.
1 <= x <= 65535
+usage=Protocol for the port.
TCP
, UDP
+usage=Expose the port
+label=Application Protocol
+usage=Application Protocol for the port.
Select the application protocol used by your service. For most use cases, this should be http
(HTTP/1.1).
If you are running a gRPC server, select the grpc
option.
This is only applicable if expose=true
.
http
, grpc
, tcp
+usage=Host e.g. ai.example.com, app.truefoundry.com +message=Upto 253 characters, each part of host should be at most 63 characters long, can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts must be separated by periods (.)
+usage=Path e.g. /v1/api/ml/, /v2/docs/ +message=Should begin and end with a forward slash (/). Each part can can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts should be separated by forward slashes (/)
+label=Rewrite Path to
+usage=Rewrite the path prefix to a different path.
If path
is /v1/api
and rewrite_path_to
is /api
. The URI in the HTTP request http://0.0.0.0:8080/v1/api/houses
will be rewritten to http://0.0.0.0:8080/api/houses
before the request is forwarded your service.
Defaults to /
.
This is only applicable if path
is given.
+message=Should begin and end with a forward slash (/). Each part can can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts should be separated by forward slashes (/)
+usage=Authentication method for inbound traffic
+value=basic_auth
basic_auth
+label=Username for service auth +message=Username for the user to authenticate with +sort=1
+label=Password for service auth +message=Password for the user to authenticate with +sort=2
+value=service
service
+label=Replicas +usage=Deploy multiple instances of your pods to distribute incoming traffic across them, ensuring effective load balancing. +icon=fa-clone +sort=4
0 <= x <= 500
+docs=Describes the configuration for the artifacts cache +label=Artifacts Download +usage=Download and cache models in a volume to enhance loading speeds and reduce costs by avoiding repeated downloads. Docs
+label=Artifacts +usage=List of artifacts to be cached
+docs=Input for Artifact from TrueFoundry Artifact Registry +label=TrueFoundry Artifact Source
+value=truefoundry-artifact
truefoundry-artifact
+label=Artifact or Model Version FQN +usage=Artifact or Model Version FQN of the artifact to be downloaded
+label=Download Path Environment Variable +usage=Environment variable which will contain the download path of the artifact
+docs=Describes the volume that will be used to cache the models +label=Artifacts Cache Volume
+label=Storage Class +usage=Storage class of the Volume where artifacts will be cached
+label=Cache Size (GB) +usage=Size of the Volume (in GB) where artifacts will be cached. Should be greater than twice the size of artifacts getting cached
1 <= x <= 1000
+docs=Describes the resource constraints for the application so that it can be deployed accordingly on the cluster To learn more you can go here +icon=fa-microchip +label=Resources +usage=Configure resource allocations, specify node constraints and capacity types to improve performance and reduce expenses. Docs
+label=CPU Request
+sort=1
+usage=Requested CPU which determines the minimum cost incurred. The CPU usage can exceed the requested
amount, but not the value specified in the limit. 1 CPU means 1 CPU core. Fractional CPU can be requested
like 0.5
or 0.05
0.001 <= x <= 256
+label=CPU Limit
+usage=CPU limit beyond which the usage cannot be exceeded. 1 CPU means 1 CPU core. Fractional CPU can be requested
like 0.5
. CPU limit should be >= cpu request.
+sort=2
0.001 <= x <= 256
+label=Memory Request +usage=Requested memory which determines the minimum cost incurred. The unit of memory is in megabytes(MB). So 1 means 1 MB and 2000 means 2GB. +sort=3
1 <= x <= 2000000
+label=Memory Limit +usage=Memory limit after which the application will be killed with an OOM error. The unit of memory is in megabytes(MB). So 1 means 1 MB and 2000 means 2GB. MemoryLimit should be greater than memory request. +sort=4
1 <= x <= 2000000
+label=Storage Request +usage=Requested disk storage. The unit of memory is in megabytes(MB). This is ephemeral storage and will be wiped out on pod restarts or eviction +sort=5
1 <= x <= 2000000
+label=Storage Limit +usage=Disk storage limit. The unit of memory is in megabytes(MB). Exceeding this limit will result in eviction. It should be greater than the request. This is ephemeral storage and will be wiped out on pod restarts or eviction +sort=6
1 <= x <= 2000000
+label=Shared Memory Size (MB)
+usage=Define the shared memory requirements for your workload. Machine learning libraries like Pytorch can use Shared Memory
for inter-process communication. If you use this, we will mount a tmpfs
backed volume at the /dev/shm
directory.
Any usage will also count against the workload's memory limit (resources.memory_limit
) along with your workload's memory usage.
If the overall usage goes above resources.memory_limit
the user process may get killed.
Shared Memory Size cannot be more than the defined Memory Limit for the workload.
64 <= x <= 2000000
+label=Node +usage=This field determines how the underlying node resource is to be utilized
+value=node_selector
node_selector
+label=Instance family +usage=Instance family of the underlying machine to use. Multiple instance families can be supplied. The workload is guaranteed to be scheduled on one of them.
+label=Capacity Type +usage=Configure what type of nodes to run the app. By default no placement logic is applied. "spot_fallback_on_demand" will try to place the application on spot nodes but will fallback to on-demand when spot nodes are not available. "spot" will strictly place the application on spot nodes. "on_demand" will strictly place the application on on-demand nodes.
spot_fallback_on_demand
, spot
, on_demand
+label=Devices +usage=Define custom device or accelerator requirements for your workload. We currently support NVIDIA GPUs, AWS Inferentia Accelerators, Single Host TPU Slices.
+value=nvidia_gpu
nvidia_gpu
+label=GPU Count +usage=Count of GPUs to provide to the application Note the exact count and max count available for a given GPU type depends on cloud provider and cluster type.
1 <= x <= 16
+label=GPU Name +usage=Name of the Nvidia GPU. One of [P4, P100, V100, T4, A10G, A100_40GB, A100_80GB] This field is required for Node Selector and can be ignored in Nodepool Selector. One instance of the card contains the following amount of memory - P4: 8 GB, P100: 16 GB, V100: 16 GB, T4: 16 GB, A10G: 24 GB, A100_40GB: 40GB, A100_80GB: 80 GB
+usage=Configure data to be mounted to service pod(s) as a string, secret or volume. Docs +sort=10011
+value=secret
secret
+label=File path +usage=Absolute file path where the file will be created. +message=Please enter a valid file path
+label=Secret +usage=The TrueFoundry secret whose value will be the file content.
+label=Patch
+usage=Content of kustomization.yaml to perform kustomize operation. Please do not include the resources
section. It is filled in automatically
+label=Additional Manifests +usage=Additional kubernetes manifests to be included in the application
+docs=Describes the configuration for the Health Probe's To learn more you can go here +icon=fa-heart +uiType=HealthProbe
+docs=Describes the Instructions for assessing container health by executing an HTTP GET request. To learn more you can go here +label=Instructions for assessing container health by executing an HTTP GET request.
+sort=1 +label=Request Type +value=http
http
+usage=Path to the health check endpoint +sort=2
+usage=Listening port for the health check endpoint +sort=3
0 <= x <= 65535
+sort=4 +usage=Host name to connect to, defaults to the pod IP
+sort=5 +usage=Scheme to use for connecting to the host
+usage=Time to wait after container has started before checking the endpoint
0 <= x <= 36000
+usage=How often to check the endpoint
1 <= x <= 36000
+usage=Time to wait for a response from the endpoint before considering it down
1 <= x <= 36000
+usage=Number of successful responses from the endpoint before container is considered healthy
1 <= x <= 100
+usage=Number of consecutive failures before the container is considered down
1 <= x <= 100
+docs=Describes the configuration for the Health Probe's To learn more you can go here +icon=fa-heart +uiType=HealthProbe
+docs=Describes the Instructions for assessing container health by executing an HTTP GET request. To learn more you can go here +label=Instructions for assessing container health by executing an HTTP GET request.
+sort=1 +label=Request Type +value=http
http
+usage=Path to the health check endpoint +sort=2
+usage=Listening port for the health check endpoint +sort=3
0 <= x <= 65535
+sort=4 +usage=Host name to connect to, defaults to the pod IP
+sort=5 +usage=Scheme to use for connecting to the host
+usage=Time to wait after container has started before checking the endpoint
0 <= x <= 36000
+usage=How often to check the endpoint
1 <= x <= 36000
+usage=Time to wait for a response from the endpoint before considering it down
1 <= x <= 36000
+usage=Number of successful responses from the endpoint before container is considered healthy
1 <= x <= 100
+usage=Number of consecutive failures before the container is considered down
1 <= x <= 100
+label=Workspace FQN +docs=Fully qualified name of the workspace +uiType=Hidden
+label=Wait Time +usage=The period to wait after the last received request before scaling the replicas to 0
x >= 0
+label=Allow intercepts +usage=Whether to allow intercepts to be applied for this service. This would inject an additional sidecar in each pod of the service. Not recommended on production
+label=Rollout strategy +usage=Strategy to dictate how a rollout should happen when a new release for this service is made Docs
+value=rolling_update
rolling_update
+label=Max unavailable(%) +usage=Percentage of total replicas that can be brought down at one time. For a value of 25 when replicas are set to 12 this would mean minimum (25% of 12) = 3 pods might be unavailable during the deployment. Setting this to a higher value can help in speeding up the deployment process.
0 <= x <= 100
+label=Max Surge(%) +usage=Percentage of total replicas of updated image that can be brought up over the total replicas count. For a value of 25 when replicas are set to 12 this would mean (12+(25% of 12) = 15) pods might be running at one time. Setting this to a higher value can help in speeding up the deployment process.
0 <= x <= 100
Subject ID
Subject type
user
, team
, serviceaccount
Subject slug
Subject display name
active
, deleting
, deletion_failed
async-service
, service
, job
, spark-job
, helm
, notebook
, codeserver
, rstudio
, ssh-server
, volume
, application
, application-set
, intercept
, workflow
Recommendations for this application
Alerts for this application
Summary of alerts for this application
Debug infos for this application
Subject ID
Subject type
user
, team
, serviceaccount
Subject slug
Subject display name
active
, deleting
, deletion_failed
async-service
, service
, job
, spark-job
, helm
, notebook
, codeserver
, rstudio
, ssh-server
, volume
, application
, application-set
, intercept
, workflow
Recommendations for this application
Alerts for this application
Summary of alerts for this application
Debug infos for this application
+docs=Describes the configuration for the service
Applied recommendations for this deployment
+docs=Describes the configuration for the service
+usage=Name of the service. This uniquely identifies this service in the workspace.
Name can only contain alphanumeric characters and '-' and can be atmost 25 characters long +sort=1 +message=3 to 32 lower case characters long alphanumeric word, may contain - in between, cannot start with a number
+docs=Specify whether you want to deploy a Docker image or build and deploy from source code +label=Deploy a Docker image or build and deploy from source code +icon=fa-solid fa-cloud-arrow-up:#21B6A8 +sort=2
+value=build
build
+docs=Source code location. +label=Fetch source code to build and deploy +icon=fa-code +sort=1
+docs=Instructions to build a container image out of the build source +label=Build using DockerFile or using Buildpack +icon=fa-wrench +sort=2
+docs=FQN of the container registry. You can the FQN of your desired container registry (or add one) in the Integrations pageIntegrations page +label=Docker Registry +usage=FQN of the container registry. If you can't find your registry here, add it through the Integrations page
+docs=Specify the ports you want the service to be exposed to +label=Configure ports and endpoints to route customer traffic +usage=Expose the deployment to make it accessible over the internet or keep it private. Implement authentication to restrict access. Docs +icon=fa-plug +sort=4
+docs=Describes the ports the service should be exposed to.
+usage=Port number to expose.
1 <= x <= 65535
+usage=Protocol for the port.
TCP
, UDP
+usage=Expose the port
+label=Application Protocol
+usage=Application Protocol for the port.
Select the application protocol used by your service. For most use cases, this should be http
(HTTP/1.1).
If you are running a gRPC server, select the grpc
option.
This is only applicable if expose=true
.
http
, grpc
, tcp
+usage=Host e.g. ai.example.com, app.truefoundry.com +message=Upto 253 characters, each part of host should be at most 63 characters long, can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts must be separated by periods (.)
+usage=Path e.g. /v1/api/ml/, /v2/docs/ +message=Should begin and end with a forward slash (/). Each part can can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts should be separated by forward slashes (/)
+label=Rewrite Path to
+usage=Rewrite the path prefix to a different path.
If path
is /v1/api
and rewrite_path_to
is /api
. The URI in the HTTP request http://0.0.0.0:8080/v1/api/houses
will be rewritten to http://0.0.0.0:8080/api/houses
before the request is forwarded your service.
Defaults to /
.
This is only applicable if path
is given.
+message=Should begin and end with a forward slash (/). Each part can can contain alphabets, digits and hypen, must begin and end with an alphanumeric characters. Parts should be separated by forward slashes (/)
+usage=Authentication method for inbound traffic
+value=service
service
+label=Replicas +usage=Deploy multiple instances of your pods to distribute incoming traffic across them, ensuring effective load balancing. +icon=fa-clone +sort=4
0 <= x <= 500
+docs=Describes the configuration for the artifacts cache +label=Artifacts Download +usage=Download and cache models in a volume to enhance loading speeds and reduce costs by avoiding repeated downloads. Docs
+label=Artifacts +usage=List of artifacts to be cached
+docs=Input for Artifact from TrueFoundry Artifact Registry +label=TrueFoundry Artifact Source
+docs=Describes the volume that will be used to cache the models +label=Artifacts Cache Volume
+docs=Describes the resource constraints for the application so that it can be deployed accordingly on the cluster To learn more you can go here +icon=fa-microchip +label=Resources +usage=Configure resource allocations, specify node constraints and capacity types to improve performance and reduce expenses. Docs
+label=CPU Request
+sort=1
+usage=Requested CPU which determines the minimum cost incurred. The CPU usage can exceed the requested
amount, but not the value specified in the limit. 1 CPU means 1 CPU core. Fractional CPU can be requested
like 0.5
or 0.05
0.001 <= x <= 256
+label=CPU Limit
+usage=CPU limit beyond which the usage cannot be exceeded. 1 CPU means 1 CPU core. Fractional CPU can be requested
like 0.5
. CPU limit should be >= cpu request.
+sort=2
0.001 <= x <= 256
+label=Memory Request +usage=Requested memory which determines the minimum cost incurred. The unit of memory is in megabytes(MB). So 1 means 1 MB and 2000 means 2GB. +sort=3
1 <= x <= 2000000
+label=Memory Limit +usage=Memory limit after which the application will be killed with an OOM error. The unit of memory is in megabytes(MB). So 1 means 1 MB and 2000 means 2GB. MemoryLimit should be greater than memory request. +sort=4
1 <= x <= 2000000
+label=Storage Request +usage=Requested disk storage. The unit of memory is in megabytes(MB). This is ephemeral storage and will be wiped out on pod restarts or eviction +sort=5
1 <= x <= 2000000
+label=Storage Limit +usage=Disk storage limit. The unit of memory is in megabytes(MB). Exceeding this limit will result in eviction. It should be greater than the request. This is ephemeral storage and will be wiped out on pod restarts or eviction +sort=6
1 <= x <= 2000000
+label=Shared Memory Size (MB)
+usage=Define the shared memory requirements for your workload. Machine learning libraries like Pytorch can use Shared Memory
for inter-process communication. If you use this, we will mount a tmpfs
backed volume at the /dev/shm
directory.
Any usage will also count against the workload's memory limit (resources.memory_limit
) along with your workload's memory usage.
If the overall usage goes above resources.memory_limit
the user process may get killed.
Shared Memory Size cannot be more than the defined Memory Limit for the workload.
64 <= x <= 2000000
+label=Node +usage=This field determines how the underlying node resource is to be utilized
+label=Devices +usage=Define custom device or accelerator requirements for your workload. We currently support NVIDIA GPUs, AWS Inferentia Accelerators, Single Host TPU Slices.
+usage=Configure data to be mounted to service pod(s) as a string, secret or volume. Docs +sort=10011
+value=secret
secret
+label=File path +usage=Absolute file path where the file will be created. +message=Please enter a valid file path
+label=Secret +usage=The TrueFoundry secret whose value will be the file content.
+label=Labels
+label=Patch
+usage=Content of kustomization.yaml to perform kustomize operation. Please do not include the resources
section. It is filled in automatically
+label=Additional Manifests +usage=Additional kubernetes manifests to be included in the application
+docs=Describes the configuration for the Health Probe's To learn more you can go here +icon=fa-heart +uiType=HealthProbe
+docs=Describes the Instructions for assessing container health by executing an HTTP GET request. To learn more you can go here +label=Instructions for assessing container health by executing an HTTP GET request.
+usage=Time to wait after container has started before checking the endpoint
0 <= x <= 36000
+usage=How often to check the endpoint
1 <= x <= 36000
+usage=Time to wait for a response from the endpoint before considering it down
1 <= x <= 36000
+usage=Number of successful responses from the endpoint before container is considered healthy
1 <= x <= 100
+usage=Number of consecutive failures before the container is considered down
1 <= x <= 100
+docs=Describes the configuration for the Health Probe's To learn more you can go here +icon=fa-heart +uiType=HealthProbe
+docs=Describes the Instructions for assessing container health by executing an HTTP GET request. To learn more you can go here +label=Instructions for assessing container health by executing an HTTP GET request.
+usage=Time to wait after container has started before checking the endpoint
0 <= x <= 36000
+usage=How often to check the endpoint
1 <= x <= 36000
+usage=Time to wait for a response from the endpoint before considering it down
1 <= x <= 36000
+usage=Number of successful responses from the endpoint before container is considered healthy
1 <= x <= 100
+usage=Number of consecutive failures before the container is considered down
1 <= x <= 100
+label=Workspace FQN +docs=Fully qualified name of the workspace +uiType=Hidden
+label=Wait Time +usage=The period to wait after the last received request before scaling the replicas to 0
x >= 0
+label=Allow intercepts +usage=Whether to allow intercepts to be applied for this service. This would inject an additional sidecar in each pod of the service. Not recommended on production
+label=Rollout strategy +usage=Strategy to dictate how a rollout should happen when a new release for this service is made Docs
+value=rolling_update
rolling_update
+label=Max unavailable(%) +usage=Percentage of total replicas that can be brought down at one time. For a value of 25 when replicas are set to 12 this would mean minimum (25% of 12) = 3 pods might be unavailable during the deployment. Setting this to a higher value can help in speeding up the deployment process.
0 <= x <= 100
+label=Max Surge(%) +usage=Percentage of total replicas of updated image that can be brought up over the total replicas count. For a value of 25 when replicas are set to 12 this would mean (12+(25% of 12) = 15) pods might be running at one time. Setting this to a higher value can help in speeding up the deployment process.
0 <= x <= 100
Subject ID
Subject type
user
, team
, serviceaccount
Subject slug
Subject display name
Subject ID
Subject type
user
, team
, serviceaccount
Subject slug
Subject display name
active
, deleting
, deletion_failed
async-service
, service
, job
, spark-job
, helm
, notebook
, codeserver
, rstudio
, ssh-server
, volume
, application
, application-set
, intercept
, workflow
Recommendations for this application
Alerts for this application
Summary of alerts for this application
Debug infos for this application
+docs=Describes the configuration for the service
Applied recommendations for this deployment
INITIALIZED
, BUILD_SUCCESS
, BUILD_FAILED
, DEPLOY_FAILED
, DEPLOY_FAILED_WITH_RETRY
, DEPLOY_SUCCESS
, ROLLOUT_STARTED
, SET_TRAFFIC
, PAUSED
, FAILED
, CANCELLED
, REDEPLOY_STARTED
, BUILDING
BUILDING
, DEPLOYING
, REUSING_EXISTING_BUILD
, COMPONENTS_DEPLOYING
, WAITING
,
INITIALIZED
, BUILD_SUCCESS
, BUILD_FAILED
, DEPLOY_FAILED
, DEPLOY_FAILED_WITH_RETRY
, DEPLOY_SUCCESS
, ROLLOUT_STARTED
, SET_TRAFFIC
, PAUSED
, FAILED
, CANCELLED
, REDEPLOY_STARTED
, BUILDING
BUILDING
, DEPLOYING
, REUSING_EXISTING_BUILD
, COMPONENTS_DEPLOYING
, WAITING
,
Applied recommendations for this deployment
INITIALIZED
, BUILD_SUCCESS
, BUILD_FAILED
, DEPLOY_FAILED
, DEPLOY_FAILED_WITH_RETRY
, DEPLOY_SUCCESS
, ROLLOUT_STARTED
, SET_TRAFFIC
, PAUSED
, FAILED
, CANCELLED
, REDEPLOY_STARTED
, BUILDING
BUILDING
, DEPLOYING
, REUSING_EXISTING_BUILD
, COMPONENTS_DEPLOYING
, WAITING
,
INITIALIZED
, BUILD_SUCCESS
, BUILD_FAILED
, DEPLOY_FAILED
, DEPLOY_FAILED_WITH_RETRY
, DEPLOY_SUCCESS
, ROLLOUT_STARTED
, SET_TRAFFIC
, PAUSED
, FAILED
, CANCELLED
, REDEPLOY_STARTED
, BUILDING
BUILDING
, DEPLOYING
, REUSING_EXISTING_BUILD
, COMPONENTS_DEPLOYING
, WAITING
,
Applied recommendations for this deployment
curl --request PATCH \
--url https://{controlPlaneURL}/api/svc/v1/apps/{id}/scale-to-original \
--header 'Authorization: Bearer <token>'
{
"id": "<string>",
"version": 123,
"fqn": "<string>",
"applicationId": "<string>",
"manifest": {
"name": "<string>",
"image": {
"type": "build",
"docker_registry": "<string>",
"build_source": {
"type": "remote",
"remote_uri": "<string>"
},
"build_spec": {
"type": "dockerfile",
"dockerfile_path": "<string>",
"build_context_path": "<string>",
"command": "<string>",
"build_args": {}
}
},
"artifacts_download": {
"cache_volume": {
"storage_class": "<string>",
"cache_size": 500
},
"artifacts": [
{
"type": "truefoundry-artifact",
"artifact_version_fqn": "<string>",
"download_path_env_variable": "<string>"
}
]
},
"resources": {
"cpu_request": 128.0005,
"cpu_limit": 128.0005,
"memory_request": 1000000,
"memory_limit": 1000000,
"ephemeral_storage_request": 1000000,
"ephemeral_storage_limit": 1000000,
"shared_memory_size": 1000032,
"node": {
"type": "node_selector",
"instance_families": [
"<string>"
],
"capacity_type": "spot_fallback_on_demand"
},
"devices": [
{
"type": "nvidia_gpu",
"name": "<string>",
"count": 8
}
]
},
"env": null,
"ports": [
{
"port": 32768,
"protocol": "TCP",
"expose": true,
"app_protocol": "http",
"host": "<string>",
"path": "<string>",
"rewrite_path_to": "<string>",
"auth": {
"type": "basic_auth",
"username": "<string>",
"password": "<string>"
}
}
],
"service_account": "<string>",
"mounts": [
{
"type": "secret",
"mount_path": "<string>",
"secret_fqn": "<string>"
}
],
"labels": {},
"kustomize": {
"patch": {},
"additions": [
{}
]
},
"liveness_probe": {
"config": {
"type": "http",
"path": "<string>",
"port": 32767,
"host": "<string>",
"scheme": "<string>"
},
"initial_delay_seconds": 18000,
"period_seconds": 18000,
"timeout_seconds": 18000,
"success_threshold": 50,
"failure_threshold": 50
},
"readiness_probe": {
"config": {
"type": "http",
"path": "<string>",
"port": 32767,
"host": "<string>",
"scheme": "<string>"
},
"initial_delay_seconds": 18000,
"period_seconds": 18000,
"timeout_seconds": 18000,
"success_threshold": 50,
"failure_threshold": 50
},
"workspace_fqn": "<string>",
"type": "service",
"replicas": 250,
"auto_shutdown": {
"wait_time": 1
},
"allow_interception": false,
"rollout_strategy": {
"type": "rolling_update",
"max_unavailable_percentage": 50,
"max_surge_percentage": 50
}
},
"application": {
"id": "<string>",
"fqn": "<string>",
"name": "<string>",
"type": "async-service",
"createdBySubject": {
"subjectId": "<string>",
"subjectType": "user",
"subjectSlug": "<string>",
"subjectDisplayName": "<string>"
},
"tenantName": "<string>",
"metadata": {
"paused": true
},
"lifecycleStage": "active",
"workspaceId": "<string>",
"lastVersion": 123,
"activeVersion": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"recommendations": [
{
"id": "<string>",
"clusterId": "<string>",
"applicationId": "<string>",
"deploymentId": "<string>",
"applicationVersion": 123,
"recommendationData": {},
"recommendationType": "<string>",
"appliedDeploymentId": "<string>",
"expiryTimestamp": "2023-11-07T05:31:56Z",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"alerts": [
{
"id": "<string>",
"name": "<string>",
"timestamps": [
"2023-11-07T05:31:56Z"
],
"startTime": "2023-11-07T05:31:56Z",
"resolvedTime": "2023-11-07T05:31:56Z",
"applicationId": "<string>",
"clusterId": "<string>",
"tenantName": "<string>",
"fingerprint": "<string>",
"updatedAt": "<string>",
"createdAt": "<string>",
"applicationDebugInfoId": "<string>"
}
],
"alertsSummary": {},
"applicationDebugInfos": [
{
"id": "<string>",
"applicationId": "<string>",
"application": {},
"debugInfo": {},
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"potentialProblems": [
{
"name": "<string>",
"description": "<string>"
}
],
"autopilot": {},
"createdBy": "<string>",
"deployment": {},
"activeDeploymentId": "<string>",
"lastDeploymentId": "<string>"
},
"createdBySubject": {
"subjectId": "<string>",
"subjectType": "user",
"subjectSlug": "<string>",
"subjectDisplayName": "<string>"
},
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z",
"deploymentBuilds": [
{
"name": "<string>",
"status": 20
}
],
"deploymentStatuses": [
{
"id": "<string>",
"deploymentId": "<string>",
"status": "INITIALIZED",
"state": {},
"transition": "BUILDING",
"message": "<string>",
"retryCount": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"currentStatusId": "<string>",
"currentStatus": {
"id": "<string>",
"deploymentId": "<string>",
"status": "INITIALIZED",
"state": {},
"transition": "BUILDING",
"message": "<string>",
"retryCount": 123,
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
},
"appliedRecommendations": [
{
"id": "<string>",
"clusterId": "<string>",
"applicationId": "<string>",
"deploymentId": "<string>",
"applicationVersion": 123,
"recommendationData": {},
"recommendationType": "<string>",
"appliedDeploymentId": "<string>",
"expiryTimestamp": "2023-11-07T05:31:56Z",
"createdAt": "2023-11-07T05:31:56Z",
"updatedAt": "2023-11-07T05:31:56Z"
}
],
"createdBy": "<string>"
}