Evaluations
Generate an Application Report Card
Recipes
- Evaluations
- Applications
- Datasets
- Inference
Evaluations
Generate an Application Report Card
Generate a report card for an application variant
import os
import time
from scale_gp import SGPClient
client = SGPClient(api_key=api_key)
evaluation = client.evaluations.create(
type="builder",
account_id=account_id,
application_spec_id=spec_id,
application_variant_id=variant_id,
description="description",
evaluation_dataset_id=manual_evaluation_dataset.id,
name="Report card evaluation",
evaluation_config_id=evaluation_config.id,
)
create_response = client.application_variant_reports.create(
application_variant_id=builder.application.variant_id,
evaluation_dataset_ids=[manual_evaluation_dataset.id],
account_id=account_id,
)
print(create_response)
# retrieve the application variant report, will still be PENDING if immediately retrieved
application_variant_report = client.application_variant_reports.retrieve(
application_variant_report_id=create_response.id,
view=["AsyncJobs"],
)
print(application_variant_report)
ApplicationVariantWithScores(
id='cba0f08d-cb98-4221-a96d-d3eacb1c7a38',
account_id='66049ada2fc77c99ef015be7',
application_spec_id='ef0f2d50-1304-4b40-a0cc-1faf449b9555',
application_variant_id='37d7d6ed-b7e8-4c3b-a488-03c96bb23d5d',
created_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
updated_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
category_scores=None,
evaluation_datasets=None,
score=None
)
Follow the instructions in the Quickstart Guide to setup the SGP Client
from scale_gp import SGPClient
client = SGPClient(api_key=api_key)
Create an evaluation by following any previous evaluation recipe such as Multistage Evaluation
evaluation = client.evaluations.create(
type="builder",
account_id=account_id,
application_spec_id=spec_id,
application_variant_id=variant_id,
description="description",
evaluation_dataset_id=evaluation_dataset.id,
name="Report card evaluation",
evaluation_config_id=evaluation_config.id,
)
A report card will provide a summary of the performance of the application variant against an evaluation dataset Three main scores will be produced: Accuracy, Quality and Trust & Safety. The performance in each category will contribute to an overall Scale Confidence Score for the variant.
You can view generated report cards in the UI at: https://egp.dashboard.scale.com/applications/{application_spec_id}/{application_variant_id}/report-card/overview
create_response = client.application_variant_reports.create(
application_variant_id=variant_id,
evaluation_dataset_ids=[manual_evaluation_dataset.id],
account_id=account_id,
)
# retrieve the application variant report, will still be PENDING if immediately retrieved
application_variant_report = client.application_variant_reports.retrieve(
application_variant_report_id=create_response.id,
view=["AsyncJobs"],
)
import os
import time
from scale_gp import SGPClient
client = SGPClient(api_key=api_key)
evaluation = client.evaluations.create(
type="builder",
account_id=account_id,
application_spec_id=spec_id,
application_variant_id=variant_id,
description="description",
evaluation_dataset_id=manual_evaluation_dataset.id,
name="Report card evaluation",
evaluation_config_id=evaluation_config.id,
)
create_response = client.application_variant_reports.create(
application_variant_id=builder.application.variant_id,
evaluation_dataset_ids=[manual_evaluation_dataset.id],
account_id=account_id,
)
print(create_response)
# retrieve the application variant report, will still be PENDING if immediately retrieved
application_variant_report = client.application_variant_reports.retrieve(
application_variant_report_id=create_response.id,
view=["AsyncJobs"],
)
print(application_variant_report)
ApplicationVariantWithScores(
id='cba0f08d-cb98-4221-a96d-d3eacb1c7a38',
account_id='66049ada2fc77c99ef015be7',
application_spec_id='ef0f2d50-1304-4b40-a0cc-1faf449b9555',
application_variant_id='37d7d6ed-b7e8-4c3b-a488-03c96bb23d5d',
created_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
updated_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
category_scores=None,
evaluation_datasets=None,
score=None
)
import os
import time
from scale_gp import SGPClient
client = SGPClient(api_key=api_key)
evaluation = client.evaluations.create(
type="builder",
account_id=account_id,
application_spec_id=spec_id,
application_variant_id=variant_id,
description="description",
evaluation_dataset_id=manual_evaluation_dataset.id,
name="Report card evaluation",
evaluation_config_id=evaluation_config.id,
)
create_response = client.application_variant_reports.create(
application_variant_id=builder.application.variant_id,
evaluation_dataset_ids=[manual_evaluation_dataset.id],
account_id=account_id,
)
print(create_response)
# retrieve the application variant report, will still be PENDING if immediately retrieved
application_variant_report = client.application_variant_reports.retrieve(
application_variant_report_id=create_response.id,
view=["AsyncJobs"],
)
print(application_variant_report)
ApplicationVariantWithScores(
id='cba0f08d-cb98-4221-a96d-d3eacb1c7a38',
account_id='66049ada2fc77c99ef015be7',
application_spec_id='ef0f2d50-1304-4b40-a0cc-1faf449b9555',
application_variant_id='37d7d6ed-b7e8-4c3b-a488-03c96bb23d5d',
created_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
updated_at=datetime.datetime(2024, 9, 26, 20, 34, 6, 45000),
category_scores=None,
evaluation_datasets=None,
score=None
)