import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
// Automatically fetches more pages as needed.
for await (const testCaseResultWithViews of client.evaluations.testCaseResults.list('evaluation_id')) {
console.log(testCaseResultWithViews);
}{
"items": [
{
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"application_test_case_output": {
"application_variant_id": "<string>",
"evaluation_dataset_id": "<string>",
"test_case_version_id": "<string>",
"test_case_id": "<string>",
"output": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"schema_type": "GENERATION",
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"application_interaction_id": "<string>",
"application_test_case_output_group_id": "<string>",
"interaction": {
"id": "<string>",
"input": {},
"output": {},
"aggregated": true,
"application_spec_id": "<string>",
"application_variant_id": "<string>",
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"operation_status": "SUCCESS",
"created_at": "2023-11-07T05:31:56Z",
"operation_metadata": {},
"chat_thread_id": "<string>",
"interaction_source": "EXTERNAL_AI",
"trace_spans": [
{
"id": "<string>",
"node_id": "<string>",
"operation_type": "TEXT_INPUT",
"operation_status": "SUCCESS",
"operation_input": {},
"operation_output": {},
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"application_interaction_id": "<string>",
"application_variant_id": "<string>",
"operation_expected": {},
"operation_metadata": {},
"end_timestamp": "2023-11-07T05:31:56Z",
"trace_id": "<string>",
"parent_id": "<string>",
"group_id": "<string>",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user"
}
]
}
},
"test_case_version": {
"evaluation_dataset_id": "<string>",
"test_case_data": {
"input": "<string>",
"artifact_ids_filter": [
"<string>"
],
"expected_output": "<string>",
"expected_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"autogenerated": true,
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"schema_type": "GENERATION",
"chat_history": {},
"test_case_metadata": {},
"invalidated_at": "2023-11-07T05:31:56Z",
"archived_at": "2023-11-07T05:31:56Z"
},
"task": {
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
},
"metrics": {},
"custom_metrics": [
{
"id": "<string>",
"type": "<string>",
"name": "<string>",
"output": 123
}
],
"audit_tasks": [
{
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
}
],
"annotation_results": [
{
"question_id": "<string>",
"annotation_type": "llm_auto",
"test_case_result_lineage_id": "<string>",
"selected_choice": {
"label": "<string>",
"value": "<string>",
"audit_required": false
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"llm_auto_eval_metadata": {
"prompt_tokens": 123,
"completion_tokens": 123,
"time_elapsed_s": 123,
"llm_reasoning": "<string>",
"annotation_result_id": "<string>",
"cost": 123
}
}
],
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
],
"total_item_count": 123,
"current_page": 123,
"items_per_page": 123
}Lists all test case results accessible to the user.
This API can be used to list test case results. If a user has access to multiple accounts, all test case results from all accounts the user is associated with will be returned.
import SGPClient from 'sgp';
const client = new SGPClient({
apiKey: 'My API Key',
});
// Automatically fetches more pages as needed.
for await (const testCaseResultWithViews of client.evaluations.testCaseResults.list('evaluation_id')) {
console.log(testCaseResultWithViews);
}{
"items": [
{
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"application_test_case_output": {
"application_variant_id": "<string>",
"evaluation_dataset_id": "<string>",
"test_case_version_id": "<string>",
"test_case_id": "<string>",
"output": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"schema_type": "GENERATION",
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"application_interaction_id": "<string>",
"application_test_case_output_group_id": "<string>",
"interaction": {
"id": "<string>",
"input": {},
"output": {},
"aggregated": true,
"application_spec_id": "<string>",
"application_variant_id": "<string>",
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"operation_status": "SUCCESS",
"created_at": "2023-11-07T05:31:56Z",
"operation_metadata": {},
"chat_thread_id": "<string>",
"interaction_source": "EXTERNAL_AI",
"trace_spans": [
{
"id": "<string>",
"node_id": "<string>",
"operation_type": "TEXT_INPUT",
"operation_status": "SUCCESS",
"operation_input": {},
"operation_output": {},
"start_timestamp": "2023-11-07T05:31:56Z",
"duration_ms": 123,
"application_interaction_id": "<string>",
"application_variant_id": "<string>",
"operation_expected": {},
"operation_metadata": {},
"end_timestamp": "2023-11-07T05:31:56Z",
"trace_id": "<string>",
"parent_id": "<string>",
"group_id": "<string>",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user"
}
]
}
},
"test_case_version": {
"evaluation_dataset_id": "<string>",
"test_case_data": {
"input": "<string>",
"artifact_ids_filter": [
"<string>"
],
"expected_output": "<string>",
"expected_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"autogenerated": true,
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"schema_type": "GENERATION",
"chat_history": {},
"test_case_metadata": {},
"invalidated_at": "2023-11-07T05:31:56Z",
"archived_at": "2023-11-07T05:31:56Z"
},
"task": {
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
},
"metrics": {},
"custom_metrics": [
{
"id": "<string>",
"type": "<string>",
"name": "<string>",
"output": 123
}
],
"audit_tasks": [
{
"id": "<string>",
"task_type": "EVALUATION_ANNOTATION",
"task_entity_parent_id": "<string>",
"task_entity_id": "<string>",
"account_id": "<string>",
"status": "NOT_READY",
"priority": 123,
"assigned_to": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
},
"assignment_expires_at": "2023-11-07T05:31:56Z",
"task_entity": {
"label_status": "PENDING",
"application_spec_id": "<string>",
"evaluation_id": "<string>",
"evaluation_dataset_id": "<string>",
"evaluation_dataset_version_num": "<string>",
"test_case_id": "<string>",
"test_case_evaluation_data": {
"generation_output": "<string>",
"generation_extra_info": {
"chunks": [
{
"text": "<string>",
"metadata": {}
}
],
"schema_type": "CHUNKS"
}
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"updated_at": "2023-11-07T05:31:56Z",
"account_id": "<string>",
"created_by_user_id": "<string>",
"created_by_identity_type": "user",
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
}
],
"annotation_results": [
{
"question_id": "<string>",
"annotation_type": "llm_auto",
"test_case_result_lineage_id": "<string>",
"selected_choice": {
"label": "<string>",
"value": "<string>",
"audit_required": false
},
"id": "<string>",
"created_at": "2023-11-07T05:31:56Z",
"llm_auto_eval_metadata": {
"prompt_tokens": 123,
"completion_tokens": 123,
"time_elapsed_s": 123,
"llm_reasoning": "<string>",
"annotation_result_id": "<string>",
"cost": 123
}
}
],
"audit_status": "UNAUDITED",
"audit_required": true,
"audit_comment": "<string>",
"test_case_evaluation_data_schema": "GENERATION",
"result": {},
"completed_at": "2023-11-07T05:31:56Z",
"time_spent_labeling_s": 123,
"application_test_case_output_id": "<string>",
"invalidated_at": "2023-11-07T05:31:56Z",
"annotated_by_user_id": "<string>",
"annotated_by_identity_type": "user",
"edited_by_user_id": "<string>",
"edited_by_identity_type": "user",
"archived_at": "2023-11-07T05:31:56Z",
"edited_by": {
"id": "<string>",
"email": "<string>",
"first_name": "<string>",
"last_name": "<string>",
"preferences": {}
}
}
],
"total_item_count": 123,
"current_page": 123,
"items_per_page": 123
}AnnotationResults, AuditTasks, CustomMetrics, Metrics, Task, TestCaseVersion, Trace Page number for pagination to be returned by the given endpoint. Starts at page 1
x >= 1Maximum number of artifacts to be returned by the given endpoint. Defaults to 100 and cannot be greater than 10k.
1 <= x <= 10000Successful Response
The data returned for the current page.
Show child attributes
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account Show child attributes
Show child attributes
Show child attributes
"CHUNKS"GENERATION, FLEXIBLE The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The ID of the account that owns the given entity.
Show child attributes
Boolean of whether this interaction has been uploaded to s3 bucket yet, default is false
How much time the step took in milliseconds(ms)
The outcome of the operation
SUCCESS, ERROR, CANCELED The date and time when the entity was created in ISO format.
The JSON representation of the metadata insights emitted through the execution. This can differ based on different types of operations
EXTERNAL_AI, EVALUATION, SGP_CHAT, AGENTS_SERVICE Show child attributes
Identifies the application step
The id of the node in the application_variant config that emitted this insight
Type of the operation, e.g. RERANKING
TEXT_INPUT, TEXT_OUTPUT, COMPLETION_INPUT, COMPLETION, KB_RETRIEVAL, KB_INPUT, RERANKING, EXTERNAL_ENDPOINT, PROMPT_ENGINEERING, DOCUMENT_INPUT, MAP_REDUCE, DOCUMENT_SEARCH, DOCUMENT_PROMPT, CUSTOM, CODE_EXECUTION, DATA_MANIPULATION, EVALUATION, FILE_RETRIEVAL, KB_ADD_CHUNK, KB_MANAGEMENT, GUARDRAIL, TRACER, AGENT_TRACER, AGENT_WORKFLOW, STANDALONE The outcome of the operation
SUCCESS, ERROR, CANCELED The start time of the step
How much time the step took in milliseconds(ms)
The interaction ID this span belongs to
The id of the application variant this span belongs to
The JSON representation of the metadata insights emitted through the execution. This can differ based on different types of operations
The end time of the step, nullable, since it can be set to done at a later point in time.
The root-level ID where this span belongs to
Who is the parent span of this current span, null if span is root parent.
The ID of the group this span belongs to
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account Show child attributes
The ID of the associated evaluation dataset.
Show child attributes
Show child attributes
"CHUNKS"Boolean to track whether or not the test case is autogenerated
The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account "GENERATION"Used for tracking previous chat interactions for multi-chat test cases
Metadata for the test case
The date and time when the entity was overwritten or deleted in ISO format.
The date and time when the entity was archived in ISO format.
Show child attributes
EVALUATION_ANNOTATION, EVALUATION_AUDIT, CONTRIBUTOR_ANNOTATION, CONTRIBUTOR_AUDIT The ID of the account that owns the given entity.
NOT_READY, PENDING, PENDING_REDO, COMPLETED, FIXED The date and time when the task assignment expires in ISO format.
The entity that the task is associated with.
Show child attributes
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.
Show child attributes
EVALUATION_ANNOTATION, EVALUATION_AUDIT, CONTRIBUTOR_ANNOTATION, CONTRIBUTOR_AUDIT The ID of the account that owns the given entity.
NOT_READY, PENDING, PENDING_REDO, COMPLETED, FIXED The date and time when the task assignment expires in ISO format.
The entity that the task is associated with.
Show child attributes
The status of the test case result. This should not be explictly set when creating a test case result. When patching a test case result, this field can be set to 'PENDING' to prevent the test case result from being marked 'COMPLETED'.
PENDING, COMPLETED, FAILED Show child attributes
Show child attributes
"CHUNKS"The unique identifier of the entity.
The date and time when the entity was created in ISO format.
The date and time when the entity was last updated in ISO format.
The ID of the account that owns the given entity.
The user who originally created the entity.
The type of identity that created the entity.
user, service_account UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.
Show child attributes
Show child attributes
The type of annotation result.
llm_auto, human The selected choices(s) for the annotation result, in JSON form. For categorical questions, this is an object or list of objects (depending on if multiple selections are allowed). For free text questions, this is a string. For numeric or rating questions, this is a number.
The unique identifier of the entity.
The date and time when the entity was created in ISO format.
Show child attributes
The time elapsed to generate this annotation in seconds.
The reasoning the LLM gave for the annotation it provided.
The ID of the associated annotation result.
The cost of the annotation in cents.
UNAUDITED, FIXED, APPROVED "GENERATION"The time spent labeling in seconds.
The date and time when the entity was overwritten or deleted in ISO format.
The user who annotated the task.
The type of identity that annotated the entity.
user, service_account The user who edited the task.
The type of identity that edited the entity.
user, service_account The date and time when the entity was archived in ISO format.
The total number of items of the query
The current page number.
The number of items per page.