import argparse
import logging
from typing import Optional
from servicefoundry import (
Service, Build, LocalSource,
DockerFileBuild, Port, ArtifactsDownload,
TrueFoundryArtifactSource,
ArtifactsCacheVolume, Resources, GPUType, NvidiaGPU,
)
logging.basicConfig(level=logging.INFO, format=logging.BASIC_FORMAT)
def main():
service = Service(
name=<"service_name">,
image=Build(
build_source=LocalSource(local_build=False),
build_spec=DockerFileBuild(
dockerfile_path="./Dockerfile",
command="python main.py"
)
),
ports=[Port(port=8080, host=<"host.app.example.com">, path=<"/">)],
env={
# This should be the `entry_point` argument, the code file containing model_fn, predict_fn, etc
"SAGEMAKER_PROGRAM": <"inference.py">
},
artifacts_download=ArtifactsDownload(
artifacts=[
TrueFoundryArtifactSource(
# This should be the model version fqn obtained by running `upload_model.py`
artifact_version_fqn=<"model_version_fqn">
download_path_env_variable="MODEL_DIR",
)
],
),
resources=Resources(
cpu_request=1,
cpu_limit=4,
memory_request=8000,
memory_limit=16000,
ephemeral_storage_request=10000,
ephemeral_storage_limit=16000,
devices=[
NvidiaGPU(name=GPUType.T4, count=1)
]
),
liveness_probe=HealthProbe(
config=HttpProbe(path="/ping", port=8080),
initial_delay_seconds=30,
period_seconds=10,
timeout_seconds=1,
success_threshold=1,
failure_threshold=5,
),
readiness_probe=HealthProbe(
config=HttpProbe(path="/ping", port=8080),
initial_delay_seconds=30,
period_seconds=10,
timeout_seconds=1,
success_threshold=1,
failure_threshold=5,
),
)
service.deploy(workspace_fqn=<"workspace_fqn">, wait=False)
if __name__ == '__main__':
main()