Agentex outputs are generated before other evaluation tasks run. This allows you to chain the agent output with other tasks like auto evaluations or contributor evaluations.
evaluation = client.evaluations.create( name="Agentex with Judge Evaluation", data=[ { "input": "What is the capital of France?", "expected_output": "Paris" }, ... ], tasks=[ # First, generate output from Agentex agent { "task_type": "agentex_output", "alias": "agent_response", "configuration": { "agentex_agent_id": "your-agent-id", "input_column": "item.input", "include_traces": False } }, # Then, evaluate the output with an LLM judge { "task_type": "auto_evaluation.guided_decoding", "alias": "correctness", "configuration": { "model": "openai/gpt-4o", "prompt": """ Given the user's query: {{item.input}}, The agent's response was: {{item.agent_response.output}} The expected response is: {{item.expected_output}} Did the agent's response correctly answer the question? """, "choices": ["Yes", "No"] } } ])