๐Ÿšง

Note

This feature is in the Beta stage - the input parameters and Python APIs can change.
We also have plans to add more model formats and frameworks, model stores and easy inference and testing tools.

We would love to hear from you if you have any feedback or run into any issues.

๐Ÿ“˜

Note

This is a full example that involves a Torchserve Custom Handler and extra files for reference. Keep an eye out for a simplified example coming soon ๐Ÿ‘€

In this example, we will train a model on CIFAR image classification dataset and then deploy it as a Service that can be used for inference.

Training a Model

We can also use Truefoundry Jobs to train this model in the cloud :wink:

Our initial code structure looks like follows:

.
โ”œโ”€โ”€ requirements.txt
โ”œโ”€โ”€ transforms.py
โ”œโ”€โ”€ model.py
โ”œโ”€โ”€ train.py
โ”œโ”€โ”€ handler.py

requirements.txt

--extra-index-url https://download.pytorch.org/whl/cpu
torch==1.12.1
torchvision==0.13.1

# For logging the trained model
mlfoundry>=0.5.5,<0.6.0

# For deploying it as a Service
servicefoundry>=0.6.1,<0.7.0

transforms.py

import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

model.py

import torch
import torch.nn as nn
import torch.nn.functional as F

CLASSES = (
    'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

handler.py

import os
import io
import base64
import json
import logging
from abc import ABC

import torch
import torch.nn.functional as F
from PIL import Image
from ts.torch_handler.base_handler import BaseHandler
from transforms import transform

logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] [%(levelname)s] -- %(message)s')
logger = logging.getLogger(__name__)
CLASSES = (
    'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
)

class CNNCIFAR10Handler(BaseHandler):
    def __init__(self):
        super().__init__()
        self.initialized = False

    def initialize(self, context):
        properties = context.system_properties
        has_gpu = torch.cuda.is_available() and properties.get("gpu_id") is not None
        self.map_location = "cuda" if has_gpu else "cpu"
        self.device = torch.device(
            self.map_location + ":" + str(properties.get("gpu_id"))
            if has_gpu else self.map_location
        )
        self.manifest = context.manifest
        model_dir = properties.get("model_dir")
        model_file = self.manifest["model"].get("modelFile", "")
        logger.debug("Loading eager model without state dict")
        self.model = torch.load(model_pt_path, map_location=self.map_location)
        self.model.to(self.device)
        self.model.eval()
        logger.debug("Model file %s loaded successfully", model_pt_path)

        self.initialized = True

    def preprocess(self, data):
        images = []
        logger.debug(data)
        for row in data:
            # Compat layer: normally the envelope should just return the data
            # directly, but older versions of Torchserve didn't have envelope.
            image = row.get("data") or row.get("body")
            if isinstance(image, str):
                # if the image is a string of bytesarray.
                image = base64.b64decode(image)

            # If the image is sent as bytesarray
            if isinstance(image, (bytearray, bytes)):
                image = Image.open(io.BytesIO(image))
            else:
                # if the image is a list
                image = torch.FloatTensor(image)
            image = transform(image)
            images.append(image)

        images = torch.stack(images).to(self.device)
        return images

    def postprocess(self, data):
        data = F.softmax(data, dim=1).cpu().numpy().tolist()  # B x C
        return [dict(zip(CLASSES, probs)) for probs in data]

train.py

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from model import Net
from transforms import transform

import mlfoundry

def main():
    batch_size = 4
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    net = Net()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(2):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 2000 == 1999:
                print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
                running_loss = 0.0
    print('Finished Training')
    client = mlfoundry.get_client()
    run = client.create_run(project_name="cifar-clf", run_name="cifar-pt")
    
    model_version = run.log_model(
        name="cifar-pt",
        model=net,
        framework="pytorch",
        model_save_kwargs=dict(
            extra_files=["./model.py", "./transforms.py"],
            torchserve_handler="./handler.py"
        )
    )
    print("Model Logged as:", model_version.fqn)
    


if __name__ == '__main__':
    main()

We can now run this script as python train.py. This will train a DNN model and log it to Model Registry and return an FQN (Fully Qualified Name) that we will use later.

A Model Version FQN looks like model:{org_name}/{username}/{project_name}/{model_name}:{version}

Deploying the model

Before we proceed, make sure you have completed the setup steps.

Click to Expand In short, you should have
  1. Signed up on Truefoundry Platform
  2. Installed the servicefoundry SDK: pip install -U "servicefoundry>=0.6.0,<0.7.0"
  3. Logged in: sfy login
  4. Have a Workspace FQN you can deploy the model to. You can find all workspaces here

We will now deploy the model we logged with Truefoundry's Model Registry which gives us a Model Version FQN

A Model Version FQN looks like model:{org_name}/{username}/{project_name}/{model_name}:{version} For e.g. model:truefoundry/user/cifar-clf/cifar-pt:1

We can deploy our model using either Python code or YAML file and servicefoundry CLI

Deploy using Python code

Create a deploy.py file with the following code and replace <YOUR_MODEL_VERSION_FQN> and <WORKSPACE> with your values.

import logging
from servicefoundry import ModelDeployment, TruefoundryModelRegistry, Resources

logging.basicConfig(level=logging.INFO, format=logging.BASIC_FORMAT)

# Replace these with your values
MODEL_VERSION_FQN = "<YOUR_MODEL_VERSION_FQN>" # E.g. model:truefoundry/user/cifar-clf/cifar-pt:1
WORKSPACE = "<YOUR_WORKSPACE_FQN>" # E.g. tfy-ctl-euwe1:model-deployment-test

model_deployment = ModelDeployment(
    name="cifar-svc",
    model_source=TruefoundryModelRegistry(
        model_version_fqn=MODEL_VERSION_FQN
    ),
    resources=Resources(cpu_request=0.2, cpu_limit=0.5, memory_request=500, memory_limit=1000)
)
deployment = model_deployment.deploy(workspace_fqn=WORKSPACE)

Run this as python deploy.py from a shell

Deploy using CLI

Create a servicefoundry.yaml file with the following spec and replace <YOUR_MODEL_VERSION_FQN>

# Replace <YOUR_MODEL_VERSION_FQN> with your model version FQN - E.g. model:truefoundry/user/cifar-clf/cifar-pt:1
name: cifar-svc
type: model-deployment
model_source:
  type: tfy-model-registry
  model_version_fqn: <YOUR_MODEL_VERSION_FQN>
resources:
  cpu_request: 0.2
  cpu_limit: 0.5
  memory_request: 500
  memory_limit: 1000

Run this as sfy deploy --workspace-fqn <YOUR_WORKSPACE_FQN> from a shell

Testing the model

On running the above script, your model will be deployed and an Endpoint should be available on the UI Dashboard.

We will send the following images in base64 format to our model

๐Ÿ“˜

Note

Behind the scenes, our model is being deployed using Torchserve and Kserve's V2 Dataplane protocol

The general format of the Inference URL thus looks like

{endpoint_url}/v2/models/{service-name}/infer

Here is an example Python code snippet to send a request with the above data.

import json
from urllib.parse import urljoin

import requests

# Replace this with the value of your endpoint 
ENDPOINT_URL = "https://truefoundry.tfy-ctl-euwe1.truefoundry.com/cifar-svc-model-deployment-test"
MODEL_NAME = "cifar-pt"

response = requests.post(
    urljoin(ENDPOINT_URL, f'v2/models/{MODEL_NAME}/infer',
    json={
      "inputs": [
          {
              "data": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwCjb3MU8kkSsS8Rw4IxjIqW4mhtrQyDzXmUktGqggL2Oc9c1pajDFAsn7sRzT3c21lHXbkDP4AflVmRItX0OW9NtFaK0aRnYu1XkA5H5H9K8n6mlVcW9D0FiG6d+pyay3N5ayXCkQqj7dpG7n3qrFd+fNJbSbfNUZyvQ1q6fZRW2k6tK58x/NWNATgBdv8AiawJLSRzDdwfJIhwOfvcfyrqnh6fJZK1jnjWnzas9M1JBq9nbSWab3mnjkjCc4LdQfzOayNR0pdI12Sw1PUbaG2Uh9izE5ySeBjjORwemKxvDuvnw7rkc8iGW1SQs0Q7HGAw/OsW+1Iaje3FzPM3nSOWZn5yfrXrywnv2ex5scV7t1uWNQv5YVa1sHD/AHl85XIUgnuPXGOaXT5IbSzHmBpJu5Jzj6VVWHZGJiA6n0OauTb7C2juJo43Mh+RD/CPXHeqeApTjysj67Ui7o//2Q==","datatype":"BYTES","name":"312a4eb0-0ca7-4803-a101-a6d2c18486fd","shape":-1},{"data":"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwCjb3MU8kkSsS8Rw4IxjIqW4mhtrQyDzXmUktGqggL2Oc9c1pajDFAsn7sRzT3c21lHXbkDP4AflVmRItX0OW9NtFaK0aRnYu1XkA5H5H9K8n6mlVcW9D0FiG6d+pyay3N5ayXCkQqj7dpG7n3qrFd+fNJbSbfNUZyvQ1q6fZRW2k6tK58x/NWNATgBdv8AiawJLSRzDdwfJIhwOfvcfyrqnh6fJZK1jnjWnzas9M1JBq9nbSWab3mnjkjCc4LdQfzOayNR0pdI12Sw1PUbaG2Uh9izE5ySeBjjORwemKxvDuvnw7rkc8iGW1SQs0Q7HGAw/OsW+1Iaje3FzPM3nSOWZn5yfrXrywnv2ex5scV7t1uWNQv5YVa1sHD/AHl85XIUgnuPXGOaXT5IbSzHmBpJu5Jzj6VVWHZGJiA6n0OauTb7C2juJo43Mh+RD/CPXHeqeApTjysj67Ui7o//2Q==",
              "datatype": "BYTES",
              "name": "312a4eb0-0ca7-4803-a101-a6d2c18486fe",
              "shape": -1
          }
      ]
    }
)

result = response.json()
print(json.dumps(result, indent=4))
# Replace these two with your values
ENDPOINT_URL="https://truefoundry.tfy-ctl-euwe1.truefoundry.com/cifar-svc-example-model-deployment-test"
MODEL_NAME="cifar-pt"

curl -X POST \
     -H 'Content-Type: application/json' \
     -d '{"inputs":[{"data":"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwCjb3MU8kkSsS8Rw4IxjIqW4mhtrQyDzXmUktGqggL2Oc9c1pajDFAsn7sRzT3c21lHXbkDP4AflVmRItX0OW9NtFaK0aRnYu1XkA5H5H9K8n6mlVcW9D0FiG6d+pyay3N5ayXCkQqj7dpG7n3qrFd+fNJbSbfNUZyvQ1q6fZRW2k6tK58x/NWNATgBdv8AiawJLSRzDdwfJIhwOfvcfyrqnh6fJZK1jnjWnzas9M1JBq9nbSWab3mnjkjCc4LdQfzOayNR0pdI12Sw1PUbaG2Uh9izE5ySeBjjORwemKxvDuvnw7rkc8iGW1SQs0Q7HGAw/OsW+1Iaje3FzPM3nSOWZn5yfrXrywnv2ex5scV7t1uWNQv5YVa1sHD/AHl85XIUgnuPXGOaXT5IbSzHmBpJu5Jzj6VVWHZGJiA6n0OauTb7C2juJo43Mh+RD/CPXHeqeApTjysj67Ui7o//2Q==","datatype":"BYTES","name":"312a4eb0-0ca7-4803-a101-a6d2c18486fd","shape":-1},{"data":"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwCjb3MU8kkSsS8Rw4IxjIqW4mhtrQyDzXmUktGqggL2Oc9c1pajDFAsn7sRzT3c21lHXbkDP4AflVmRItX0OW9NtFaK0aRnYu1XkA5H5H9K8n6mlVcW9D0FiG6d+pyay3N5ayXCkQqj7dpG7n3qrFd+fNJbSbfNUZyvQ1q6fZRW2k6tK58x/NWNATgBdv8AiawJLSRzDdwfJIhwOfvcfyrqnh6fJZK1jnjWnzas9M1JBq9nbSWab3mnjkjCc4LdQfzOayNR0pdI12Sw1PUbaG2Uh9izE5ySeBjjORwemKxvDuvnw7rkc8iGW1SQs0Q7HGAw/OsW+1Iaje3FzPM3nSOWZn5yfrXrywnv2ex5scV7t1uWNQv5YVa1sHD/AHl85XIUgnuPXGOaXT5IbSzHmBpJu5Jzj6VVWHZGJiA6n0OauTb7C2juJo43Mh+RD/CPXHeqeApTjysj67Ui7o//2Q==","datatype":"BYTES","name":"312a4eb0-0ca7-4803-a101-a6d2c18486fe","shape":-1}]}' \
     "${ENDPOINT_URL}/v2/models/${MODEL_NAME}/infer"