# 1. Instantiate Client
from scale_gp import SGPClient

ACCOUNT_ID = ... # fill in here
SGP_API_KEY = ... 

client = SGPClient(
    api_key=SGP_API_KEY,
    account_id=ACCOUNT_ID,
)

# 2. Setup a Application and Evaluation Configuration
math_app = client.application_specs.create(
    account_id=ACCOUNT_ID,
    name="Mathbot",
    description="An application that can answer math/finance questions.",
)

question_requests = [
    {
        "type": "categorical",
        "title": "Final Answer Accuracy",
        "prompt": "Rate the accuracy of the Final Answer",
        "choices": [{"label": "1 - Inaccurate", "value": "1"}, {"label": "2 - Partial", "value": "2"}, {"label": "3 - Perfect", "value": "3"}],
        "account_id": ACCOUNT_ID,
    },
    {
        "type": "categorical",
        "title": "Reasoning Quality",
        "prompt": "Rate the quality of the Reasoning to Get to the Final Answer",
        "choices": [{"label": "0 - Not Applicable", "value": "0"}, {"label": "1 - Low Quality", "value": "1"}, {"label": "2 - Partial", "value": "2"}, {"label": "3 - Perfect", "value": "3"}],
        "account_id": ACCOUNT_ID,
    },
]

question_ids = []
for question in question_requests:
    q = client.questions.create(
        **question
    )
    question_ids.append(q.id)
    print(q)

q_set = client.question_sets.create(
    name="test question set",
    question_ids=question_ids,
    account_id=ACCOUNT_ID,
)

evaluation_config = client.evaluation_configs.create(
    account_id=ACCOUNT_ID,
    question_set_id=q_set.id,
    evaluation_type='human',
)

# 4. V1: An application with multiple inputs - Create a `FLEXIBLE` evaluation dataset
from scale_gp.types.evaluation_datasets import FlexibleTestCaseSchema
from scale_gp.lib.dataset_builder import DatasetBuilder

flexible_eval_dataset = DatasetBuilder(client).initialize(
    account_id=ACCOUNT_ID,
    name="Math questions with external data",
    test_cases = [
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is 2 plus 2"}, expected_output="4"),
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is the square root of 64959212 modulo 99 to 3 decimal places?"}, expected_output="40.727"),
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is the percent gain of APPL's stock since yesteday?"}, expected_output="10%"),
        FlexibleTestCaseSchema(input={"stock_prices": [110, 101, 102, 100, 110], "query": "what is the percent gain of APPL's stock since 5 days ago?"}, expected_output="0%"),
    ]
)

# 6. V2: A tool calling bot that generates a trace
from scale_gp.lib.external_applications import ExternalApplication, ExternalApplicationOutputCompletion

mathbot_with_python_variant = client.application_variants.create(
    account_id=ACCOUNT_ID,
    application_spec_id=math_app.id,
    name="Mathbot with python",
    description="A variant that can answer math questions with python.",
    configuration={},
    version="OFFLINE", # since we're running this variant locally, we set the version to OFFLINE
)

from datetime import datetime

def mathbot_with_python(input: dict):
    trace = []

    # STEP 1: Generate a python script that can solve the problem
    start = datetime.now()
    system_prompt = f"Here are APPL's stock prices for the last 5 days (the last value is today's price): {json.dumps(input["stock_prices"])}"    
    response = client.chat_completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": input["query"]
            },
            {
                "role": "user",
                "content": "Return a python statement that can solve the problem, formatted as a markdown code block (WITHOUT the language specifier). Please make sure it is a python statement meaning it has no imports or assignments inside of it. It should not have any equals signs!"
            }
        ],
        model="gpt-4o-mini",
    )
    python_statement = response.chat_completion.message.content.split("```")[1].strip()

    trace.append({
        "node_id": "create_python_script",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "stock_prices": input["stock_prices"],
            "query": input["query"]
        },
        "operation_output": {
            "python_script": python_statement
        },
    })

    # STEP 2: Run the python script
    start = datetime.now()
    value = eval(python_statement)

    trace.append({
        "node_id": "run_python_script",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "python_script": python_statement
        },
        "operation_output": {
            "value": value
        },
    })
    
    # STEP 3: Summarize the answer
    start = datetime.now()
    response = client.chat_completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": input["query"]
            },
            {
                "role": "assistant",
                "content": python_statement
            },
            {
                "role": "user",
                "content": f"This is the output of the script: {value}. What is the answer to the question?"
            }
        ],
        model="gpt-4o-mini",
    )    

    trace.append({
        "node_id": "summarize_output",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "python_script": python_statement,
            "value": value
        },
        "operation_output": {
            "answer": response.chat_completion.message.content
        },
    })


    print("Question:", input["query"], "Stock Prices:", input["stock_prices"], "Answer:", response.chat_completion.message.content)

    return ExternalApplicationOutputCompletion(generation_output=response.chat_completion.message.content, trace_spans=trace)

runner = ExternalApplication(
    client,
).initialize(application_variant_id=mathbot_with_python_variant.id, application=mathbot_with_python)

runner.generate_outputs(
    evaluation_dataset_id=flexible_eval_dataset.id,
    evaluation_dataset_version=1, # we only have 1 version of the dataset
)

# 7. Showing the trace to annotators and changing annotation configuration basd on question
annotation_config = {
    "components": [
        [
            # Let's put the query and the output side by side since that's what we care about most
            { "data_loc": ["test_case_data", "input", "query"], "label": "Query" },
            { "data_loc": ["test_case_output", "output"] }
        ],
        [
            { "data_loc": ["test_case_data", "expected_output"] }
        ],
        [
            { "data_loc": ["test_case_data", "input", "stock_prices"] }
        ],
    ]
}

mathbot_with_python_evaluation = client.evaluations.create(
    account_id=ACCOUNT_ID,
    application_variant_id=mathbot_with_python_variant.id,
    application_spec_id=math_app.id,
    name="Mathbot with python evaluation",
    description="Mathbot with python evaluation",
    evaluation_config_id=evaluation_config.id,
    type="builder",
    evaluation_dataset_id=flexible_eval_dataset.id,
    evaluation_dataset_version=1,
    annotation_config=annotation_config,
    # For the second question, we only need to look at the python script to undertand the reasoning behind the answer
    # So let's add a different annotation config for that question
    question_id_to_annotation_config={
        question_ids[1]: {
            "components": [
                [ { "data_loc": ["trace", "create_python_script", "output"] } ]
            ]
        }
    }
)
# 1. Instantiate Client
from scale_gp import SGPClient

ACCOUNT_ID = ... # fill in here
SGP_API_KEY = ... 

client = SGPClient(
    api_key=SGP_API_KEY,
    account_id=ACCOUNT_ID,
)

# 2. Setup a Application and Evaluation Configuration
math_app = client.application_specs.create(
    account_id=ACCOUNT_ID,
    name="Mathbot",
    description="An application that can answer math/finance questions.",
)

question_requests = [
    {
        "type": "categorical",
        "title": "Final Answer Accuracy",
        "prompt": "Rate the accuracy of the Final Answer",
        "choices": [{"label": "1 - Inaccurate", "value": "1"}, {"label": "2 - Partial", "value": "2"}, {"label": "3 - Perfect", "value": "3"}],
        "account_id": ACCOUNT_ID,
    },
    {
        "type": "categorical",
        "title": "Reasoning Quality",
        "prompt": "Rate the quality of the Reasoning to Get to the Final Answer",
        "choices": [{"label": "0 - Not Applicable", "value": "0"}, {"label": "1 - Low Quality", "value": "1"}, {"label": "2 - Partial", "value": "2"}, {"label": "3 - Perfect", "value": "3"}],
        "account_id": ACCOUNT_ID,
    },
]

question_ids = []
for question in question_requests:
    q = client.questions.create(
        **question
    )
    question_ids.append(q.id)
    print(q)

q_set = client.question_sets.create(
    name="test question set",
    question_ids=question_ids,
    account_id=ACCOUNT_ID,
)

evaluation_config = client.evaluation_configs.create(
    account_id=ACCOUNT_ID,
    question_set_id=q_set.id,
    evaluation_type='human',
)

# 4. V1: An application with multiple inputs - Create a `FLEXIBLE` evaluation dataset
from scale_gp.types.evaluation_datasets import FlexibleTestCaseSchema
from scale_gp.lib.dataset_builder import DatasetBuilder

flexible_eval_dataset = DatasetBuilder(client).initialize(
    account_id=ACCOUNT_ID,
    name="Math questions with external data",
    test_cases = [
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is 2 plus 2"}, expected_output="4"),
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is the square root of 64959212 modulo 99 to 3 decimal places?"}, expected_output="40.727"),
        FlexibleTestCaseSchema(input={"stock_prices": [100, 101, 102, 100, 110], "query": "what is the percent gain of APPL's stock since yesteday?"}, expected_output="10%"),
        FlexibleTestCaseSchema(input={"stock_prices": [110, 101, 102, 100, 110], "query": "what is the percent gain of APPL's stock since 5 days ago?"}, expected_output="0%"),
    ]
)

# 6. V2: A tool calling bot that generates a trace
from scale_gp.lib.external_applications import ExternalApplication, ExternalApplicationOutputCompletion

mathbot_with_python_variant = client.application_variants.create(
    account_id=ACCOUNT_ID,
    application_spec_id=math_app.id,
    name="Mathbot with python",
    description="A variant that can answer math questions with python.",
    configuration={},
    version="OFFLINE", # since we're running this variant locally, we set the version to OFFLINE
)

from datetime import datetime

def mathbot_with_python(input: dict):
    trace = []

    # STEP 1: Generate a python script that can solve the problem
    start = datetime.now()
    system_prompt = f"Here are APPL's stock prices for the last 5 days (the last value is today's price): {json.dumps(input["stock_prices"])}"    
    response = client.chat_completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": input["query"]
            },
            {
                "role": "user",
                "content": "Return a python statement that can solve the problem, formatted as a markdown code block (WITHOUT the language specifier). Please make sure it is a python statement meaning it has no imports or assignments inside of it. It should not have any equals signs!"
            }
        ],
        model="gpt-4o-mini",
    )
    python_statement = response.chat_completion.message.content.split("```")[1].strip()

    trace.append({
        "node_id": "create_python_script",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "stock_prices": input["stock_prices"],
            "query": input["query"]
        },
        "operation_output": {
            "python_script": python_statement
        },
    })

    # STEP 2: Run the python script
    start = datetime.now()
    value = eval(python_statement)

    trace.append({
        "node_id": "run_python_script",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "python_script": python_statement
        },
        "operation_output": {
            "value": value
        },
    })
    
    # STEP 3: Summarize the answer
    start = datetime.now()
    response = client.chat_completions.create(
        messages=[
            {
                "role": "system",
                "content": system_prompt
            },
            {
                "role": "user",
                "content": input["query"]
            },
            {
                "role": "assistant",
                "content": python_statement
            },
            {
                "role": "user",
                "content": f"This is the output of the script: {value}. What is the answer to the question?"
            }
        ],
        model="gpt-4o-mini",
    )    

    trace.append({
        "node_id": "summarize_output",
        "start_timestamp": start.isoformat(),
        "operation_input": {
            "python_script": python_statement,
            "value": value
        },
        "operation_output": {
            "answer": response.chat_completion.message.content
        },
    })


    print("Question:", input["query"], "Stock Prices:", input["stock_prices"], "Answer:", response.chat_completion.message.content)

    return ExternalApplicationOutputCompletion(generation_output=response.chat_completion.message.content, trace_spans=trace)

runner = ExternalApplication(
    client,
).initialize(application_variant_id=mathbot_with_python_variant.id, application=mathbot_with_python)

runner.generate_outputs(
    evaluation_dataset_id=flexible_eval_dataset.id,
    evaluation_dataset_version=1, # we only have 1 version of the dataset
)

# 7. Showing the trace to annotators and changing annotation configuration basd on question
annotation_config = {
    "components": [
        [
            # Let's put the query and the output side by side since that's what we care about most
            { "data_loc": ["test_case_data", "input", "query"], "label": "Query" },
            { "data_loc": ["test_case_output", "output"] }
        ],
        [
            { "data_loc": ["test_case_data", "expected_output"] }
        ],
        [
            { "data_loc": ["test_case_data", "input", "stock_prices"] }
        ],
    ]
}

mathbot_with_python_evaluation = client.evaluations.create(
    account_id=ACCOUNT_ID,
    application_variant_id=mathbot_with_python_variant.id,
    application_spec_id=math_app.id,
    name="Mathbot with python evaluation",
    description="Mathbot with python evaluation",
    evaluation_config_id=evaluation_config.id,
    type="builder",
    evaluation_dataset_id=flexible_eval_dataset.id,
    evaluation_dataset_version=1,
    annotation_config=annotation_config,
    # For the second question, we only need to look at the python script to undertand the reasoning behind the answer
    # So let's add a different annotation config for that question
    question_id_to_annotation_config={
        question_ids[1]: {
            "components": [
                [ { "data_loc": ["trace", "create_python_script", "output"] } ]
            ]
        }
    }
)