Documentation Index
Fetch the complete documentation index at: https://docs.gp.scale.com/llms.txt
Use this file to discover all available pages before exploring further.
1. Setup Client and Test Data
1. Setup Client and Test Data
Initialize the SGP Client and setup translation test data.
from scale_gp import SGPClient
from uuid import uuid4
client = SGPClient(environment="production-multitenant")
# Test data for translation
test_data = [
{
"origin_text": "Artificial intelligence (AI) is the simulation...",
"language": "Spanish",
"expected_translation": "La inteligencia artificial (IA) es la si..."
},
# Additional test data...
]
2. Create Translation Dataset
2. Create Translation Dataset
Define translation test cases and create the dataset.
from scale_gp.lib.dataset_builder import DatasetBuilder
test_cases = []
for data in test_data:
tc = TranslationTestCaseSchema(
origin_text=data["origin_text"],
language=data["language"],
expected_translation=data["expected_translation"]
)
test_cases.append(tc)
# Dataset creation
dataset = DatasetBuilder(client).initialize(
account_id="account_id_placeholder",
name=f"translation Dataset {uuid4()}",
test_cases=test_cases
)
print(dataset)
3. Define External Application
3. Define External Application
Implement a custom translation application for the evaluation.
from scale_gp.lib.external_applications import ExternalApplication, ExternalApplicationOutputFlexible
def my_translation_app(prompt, test_case):
start = datetime.now().replace(microsecond=5000)
return ExternalApplicationOutputFlexible(
generation_output={
"generated_translation": "Sample Translation HERE"
},
trace_spans=[
{
"node_id": "formatting",
"start_timestamp": str(start.isoformat()),
"operation_input": {
"document": "EXAMPLE INPUT TEXT"
},
"operation_output": {
"formatted_document": "EXAMPLE INPUT TEXT FORMATTED"
},
"duration_ms": 1000,
}
],
metrics={"grammar": 0.5}
)
# Initialize application
app = ExternalApplication(client)
app.initialize(application_variant_id="variant_id_placeholder", application=my_translation_app)
app.generate_outputs(evaluation_dataset_id=dataset.id, evaluation_dataset_version='1')
4. Setup Evaluation Questions and Configurations
4. Setup Evaluation Questions and Configurations
Create evaluation questions and setup evaluation configuration.
question_requests = [
{
"type": "categorical",
"title": "Test Question 1",
"prompt": "Does the translation have punctuation issues",
"choices": [{"label": "No", "value": 0}, {"label": "Yes", "value": 1}],
"account_id": "account_id_placeholder",
},
# Additional questions...
]
question_ids = []
for question in question_requests:
q = client.questions.create(**question)
question_ids.append(q.id)
print(q)
q_set = client.question_sets.create(
name="translation question set",
question_ids=question_ids,
account_id="account_id_placeholder"
)
print(q_set)
config = client.evaluation_configs.create(
account_id="account_id_placeholder",
question_set_id=q_set.id,
evaluation_type='human'
)
print(config)
5. Initialize and Start Evaluation
5. Initialize and Start Evaluation
Set up annotation configuration and start the evaluation.
from scale_gp.types import TranslationAnnotationConfigParam
from scale_gp.lib.types import data_locator
annotation_config_dict = TranslationAnnotationConfigParam(
original_text_loc=data_locator.test_case_data.input["origin_text"],
translation_loc=data_locator.test_case_output.output["generated_translation"],
expected_translation_loc=data_locator.test_case_data.expected_output["expected_translation"],
)
evaluation = client.evaluations.create(
account_id="account_id_placeholder",
application_variant_id="variant_id_placeholder",
application_spec_id="spec_id_placeholder",
description="Demo Evaluation",
name="Translation Evaluation",
evaluation_config_id=config.id,
annotation_config=annotation_config_dict,
evaluation_dataset_id=dataset.id,
type="builder"
)
print(evaluation)
import os
from uuid import uuid4
from datetime import datetime
from typing import List
import httpx
from scale_gp import SGPClient
from scale_gp.lib.types.translation import TranslationTestCaseSchema
from scale_gp.lib.dataset_builder import DatasetBuilder
from scale_gp.lib.external_applications import ExternalApplication, ExternalApplicationOutputFlexible
from scale_gp.types import TranslationAnnotationConfigParam
from scale_gp.lib.types import data_locator
# Initialize the client
client = SGPClient(environment="production-multitenant")
# Test data for translation
test_data = [
{
"origin_text": "Artificial intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems.",
"language": "Spanish",
"expected_translation": "La inteligencia artificial (IA) es la simulación de procesos de inteligencia humana por máquinas, especialmente sistemas informáticos."
},
{
"origin_text": "Machine learning is a subset of AI that focuses on the development of computer programs that can access data and use it to learn for themselves.",
"language": "French",
"expected_translation": "L'apprentissage automatique est un sous-ensemble de l'IA qui se concentre sur le développement de programmes informatiques capables d'accéder aux données et de les utiliser pour apprendre par eux-mêmes."
},
{
"origin_text": "Natural Language Processing (NLP) is a branch of AI that helps computers understand, interpret, and manipulate human language.",
"language": "German",
"expected_translation": "Die Verarbeitung natürlicher Sprache (NLP) ist ein Zweig der KI, der Computern hilft, menschliche Sprache zu verstehen, zu interpretieren und zu manipulieren."
},
{
"origin_text": "Deep learning is part of a broader family of machine learning methods based on artificial neural networks with representation learning.",
"language": "Italian",
"expected_translation": "Il deep learning fa parte di una più ampia famiglia di metodi di apprendimento automatico basati su reti neurali artificiali con apprendimento della rappresentazione."
},
{
"origin_text": "Robotics is a field of engineering that involves the design, construction, and operation of robots, often incorporating AI for decision-making and task execution.",
"language": "Portuguese",
"expected_translation": "A robótica é um campo da engenharia que envolve o design, construção e operação de robôs, frequentemente incorporando IA para tomada de decisões e execução de tarefas."
}
]
# Create test cases
test_cases = []
for data in test_data:
tc = TranslationTestCaseSchema(
origin_text=data["origin_text"],
language=data["language"],
expected_translation=data["expected_translation"]
)
test_cases.append(tc)
# Dataset creation
dataset = DatasetBuilder(client).initialize(
account_id="account_id_placeholder",
name=f"translation Dataset {uuid4()}",
test_cases=test_cases
)
print(dataset)
# Define external application
def my_translation_app(prompt, test_case):
print(prompt['origin_text'][:50])
start = datetime.now().replace(microsecond=5000)
return ExternalApplicationOutputFlexible(
generation_output={
"generated_translation": "Sample Translation HERE"
},
trace_spans=[
{
"node_id": "formatting",
"start_timestamp": str(start.isoformat()),
"operation_input": {
"document": "EXAMPLE INPUT TEXT"
},
"operation_output": {
"formatted_document": "EXAMPLE INPUT TEXT FORMATTED"
},
"duration_ms": 1000,
}
],
metrics={"grammar": 0.5}
)
# Initialize application
app = ExternalApplication(client)
app.initialize(application_variant_id="variant_id_placeholder", application=my_translation_app)
app.generate_outputs(evaluation_dataset_id=dataset.id, evaluation_dataset_version='1')
# Evaluation setup
question_requests = [
{
"type": "categorical",
"title": "Test Question 1",
"prompt": "Does the translation have punctuation issues",
"choices": [{"label": "No", "value": 0}, {"label": "Yes", "value": 1}],
"account_id": "account_id_placeholder",
},
{
"type": "categorical",
"title": "Test Question 2",
"prompt": "Does the translation have grammatical issues",
"choices": [{"label": "No", "value": 0}, {"label": "Yes", "value": 1}],
"account_id": "account_id_placeholder",
},
{
"type": "free_text",
"title": "Test Question 3",
"prompt": "List all translation issues",
"account_id": "account_id_placeholder",
}
]
question_ids = []
for question in question_requests:
q = client.questions.create(**question)
question_ids.append(q.id)
print(q)
q_set = client.question_sets.create(
name="translation question set",
question_ids=question_ids,
account_id="account_id_placeholder"
)
print(q_set)
config = client.evaluation_configs.create(
account_id="account_id_placeholder",
question_set_id=q_set.id,
evaluation_type='human'
)
print(config)
annotation_config_dict = TranslationAnnotationConfigParam(
original_text_loc=data_locator.test_case_data.input["origin_text"],
translation_loc=data_locator.test_case_output.output["generated_translation"],
expected_translation_loc=data_locator.test_case_data.expected_output["expected_translation"],
)
evaluation = client.evaluations.create(
account_id="account_id_placeholder",
application_variant_id="variant_id_placeholder",
application_spec_id="spec_id_placeholder",
description="Demo Evaluation",
name="Translation Evaluation",
evaluation_config_id=config.id,
annotation_config=annotation_config_dict,
evaluation_dataset_id=dataset.id,
type="builder"
)
print(evaluation)
EvaluationDataset(
id='32f3862e-75e1-4b69-ab08-638ae6ae3829',
account_id='f4b2a52e-29ff-4225-961e-378e23e67524',
created_at=datetime.datetime(2024, 10, 18, 0, 29, 30, 684934),
created_by_user_id='6f655fda-0492-494b-bc1d-8d02bcb42c89',
name='translation Dataset 2024-10-17 20:29:30 3926b308-d14b-41c8-a53f-7511fb906d13',
schema_type='FLEXIBLE',
updated_at=datetime.datetime(2024, 10, 18, 0, 29, 30, 684934),
archived_at=None,
evaluation_dataset_metadata=None,
knowledge_base_id=None,
out_of_date=None,
schema_sub_type=None,
vendor=None
)

