import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
const testCaseResultWithViews = await client.evaluations.testCaseResults.retrieve(
'evaluation_id',
'test_case_result_id',
);
console.log(testCaseResultWithViews);{
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"application_test_case_output": {
"application_variant_id": "<string>",
"evaluation_dataset_id": "<string>",
"test_case_version_id": "<string>",
"test_case_id": "<string>",
"output": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"schema_type": "GENERATION",
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"application_interaction_id": "<string>",
"application_test_case_output_group_id": "<string>",
"interaction": {
"id": "<string>",
"input": {},
"output": {},
"aggregated": true,
"application_spec_id": "<string>",
"application_variant_id": "<string>",
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"operation_status": "SUCCESS",
"created_at": "2023-11-07T05:31:56Z",
"operation_metadata": {},
"chat_thread_id": "<string>",
"interaction_source": "EXTERNAL_AI",
"trace_spans": [
{
"id": "<string>",
"node_id": "<string>",
"operation_type": "TEXT_INPUT",
"operation_status": "SUCCESS",
"operation_input": {},
"operation_output": {},
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"application_interaction_id": "<string>",
"application_variant_id": "<string>",
"operation_expected": {},
"operation_metadata": {},
"end_timestamp": "2023-11-07T05:31:56Z",
"trace_id": "<string>",
"parent_id": "<string>",
"group_id": "<string>",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user"
}
]
}
},
"test_case_version": {
"evaluation_dataset_id": "<string>",
"test_case_data": {
"input": "<string>",
"artifact_ids_filter": [
"<string>"
],
"expected_output": "<string>",
"expected_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"autogenerated": true,
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"schema_type": "GENERATION",
"chat_history": {},
"test_case_metadata": {},
"invalidated_at": "2023-11-07T05:31:56Z",
"archived_at": "2023-11-07T05:31:56Z"
},
"task": {
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
},
"metrics": {},
"custom_metrics": [
{
"id": "<string>",
"type": "<string>",
"name": "<string>",
"output": 123
}
],
"audit_tasks": [
{
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
}
],
"annotation_results": [
{
"question_id": "<string>",
"annotation_type": "llm_auto",
"test_case_result_lineage_id": "<string>",
"selected_choice": {
"label": "<string>",
"value": "<string>",
"audit_required": false
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"llm_auto_eval_metadata": {
"prompt_tokens": 123,
"completion_tokens": 123,
"time_elapsed_s": 123,
"llm_reasoning": "<string>",
"annotation_result_id": "<string>",
"cost": 123
}
}
],
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}Gets the details of a test case result
This API can be used to get information about a single test case result by ID. To use this API, pass in the id that was returned from your Create Test Case Result API call as a path parameter.
Review the response schema to see the fields that will be returned.
import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
const testCaseResultWithViews = await client.evaluations.testCaseResults.retrieve(
'evaluation_id',
'test_case_result_id',
);
console.log(testCaseResultWithViews);{
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"application_test_case_output": {
"application_variant_id": "<string>",
"evaluation_dataset_id": "<string>",
"test_case_version_id": "<string>",
"test_case_id": "<string>",
"output": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"schema_type": "GENERATION",
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"application_interaction_id": "<string>",
"application_test_case_output_group_id": "<string>",
"interaction": {
"id": "<string>",
"input": {},
"output": {},
"aggregated": true,
"application_spec_id": "<string>",
"application_variant_id": "<string>",
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"operation_status": "SUCCESS",
"created_at": "2023-11-07T05:31:56Z",
"operation_metadata": {},
"chat_thread_id": "<string>",
"interaction_source": "EXTERNAL_AI",
"trace_spans": [
{
"id": "<string>",
"node_id": "<string>",
"operation_type": "TEXT_INPUT",
"operation_status": "SUCCESS",
"operation_input": {},
"operation_output": {},
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"application_interaction_id": "<string>",
"application_variant_id": "<string>",
"operation_expected": {},
"operation_metadata": {},
"end_timestamp": "2023-11-07T05:31:56Z",
"trace_id": "<string>",
"parent_id": "<string>",
"group_id": "<string>",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user"
}
]
}
},
"test_case_version": {
"evaluation_dataset_id": "<string>",
"test_case_data": {
"input": "<string>",
"artifact_ids_filter": [
"<string>"
],
"expected_output": "<string>",
"expected_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"autogenerated": true,
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"schema_type": "GENERATION",
"chat_history": {},
"test_case_metadata": {},
"invalidated_at": "2023-11-07T05:31:56Z",
"archived_at": "2023-11-07T05:31:56Z"
},
"task": {
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
},
"metrics": {},
"custom_metrics": [
{
"id": "<string>",
"type": "<string>",
"name": "<string>",
"output": 123
}
],
"audit_tasks": [
{
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
}
],
"annotation_results": [
{
"question_id": "<string>",
"annotation_type": "llm_auto",
"test_case_result_lineage_id": "<string>",
"selected_choice": {
"label": "<string>",
"value": "<string>",
"audit_required": false
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"llm_auto_eval_metadata": {
"prompt_tokens": 123,
"completion_tokens": 123,
"time_elapsed_s": 123,
"llm_reasoning": "<string>",
"annotation_result_id": "<string>",
"cost": 123
}
}
],
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}AnnotationResults, AuditTasks, CustomMetrics, Metrics, Task, TestCaseVersion, Trace Successful Response
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account Show child attributes
Show child attributes
Show child attributes
"CHUNKS"GENERATION, FLEXIBLE The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The ID of the account that owns the given entity.
Show child attributes
Boolean of whether this interaction has been uploaded to s3 bucket yet, default is false
How much time the step took in milliseconds(ms)
The outcome of the operation
SUCCESS, ERROR, CANCELED The date and time when the entity was created in ISO format.
The JSON representation of the metadata insights emitted through the execution. This can differ based on different types of operations
EXTERNAL_AI, EVALUATION, SGP_CHAT, AGENTS_SERVICE Show child attributes
Identifies the application step
The id of the node in the application_variant config that emitted this insight
Type of the operation, e.g. RERANKING
TEXT_INPUT, TEXT_OUTPUT, COMPLETION_INPUT, COMPLETION, KB_RETRIEVAL, KB_INPUT, RERANKING, EXTERNAL_ENDPOINT, PROMPT_ENGINEERING, DOCUMENT_INPUT, MAP_REDUCE, DOCUMENT_SEARCH, DOCUMENT_PROMPT, CUSTOM, CODE_EXECUTION, DATA_MANIPULATION, EVALUATION, FILE_RETRIEVAL, KB_ADD_CHUNK, KB_MANAGEMENT, GUARDRAIL, TRACER, AGENT_TRACER, AGENT_WORKFLOW, STANDALONE The outcome of the operation
SUCCESS, ERROR, CANCELED The start time of the step
How much time the step took in milliseconds(ms)
The interaction ID this span belongs to
The id of the application variant this span belongs to
The JSON representation of the metadata insights emitted through the execution. This can differ based on different types of operations
The end time of the step, nullable, since it can be set to done at a later point in time.
The root-level ID where this span belongs to
Who is the parent span of this current span, null if span is root parent.
The ID of the group this span belongs to
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account Show child attributes
The ID of the associated evaluation dataset.
Show child attributes
Show child attributes
"CHUNKS"Boolean to track whether or not the test case is autogenerated
The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account "GENERATION"Used for tracking previous chat interactions for multi-chat test cases
Metadata for the test case
The date and time when the entity was overwritten or deleted in ISO format.
The date and time when the entity was archived in ISO format.
Show child attributes
EVALUATION_ANNOTATION, EVALUATION_AUDIT, CONTRIBUTOR_ANNOTATION, CONTRIBUTOR_AUDIT The ID of the account that owns the given entity.
NOT_READY, PENDING, PENDING_REDO, COMPLETED, FIXED The date and time when the task assignment expires in ISO format.
The entity that the task is associated with.
Show child attributes
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.
Show child attributes
EVALUATION_ANNOTATION, EVALUATION_AUDIT, CONTRIBUTOR_ANNOTATION, CONTRIBUTOR_AUDIT The ID of the account that owns the given entity.
NOT_READY, PENDING, PENDING_REDO, COMPLETED, FIXED The date and time when the task assignment expires in ISO format.
The entity that the task is associated with.
Show child attributes
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.
Show child attributes
The type of annotation result.
llm_auto, human The selected choices(s) for the annotation result, in JSON form. For categorical questions, this is an object or list of objects (depending on if multiple selections are allowed). For free text questions, this is a string. For numeric or rating questions, this is a number.
The unique identifier of the entity.
The date and time when the entity was created in ISO format.
Show child attributes
The time elapsed to generate this annotation in seconds.
The reasoning the LLM gave for the annotation it provided.
The ID of the associated annotation result.
The cost of the annotation in cents.
UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.