Source code for aymara_ai.resources.evals.evals

# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from __future__ import annotations

from typing import Union, Iterable, Optional
from datetime import datetime
from typing_extensions import Literal

import httpx

from .runs import (
    RunsResource,
    AsyncRunsResource,
    RunsResourceWithRawResponse,
    AsyncRunsResourceWithRawResponse,
    RunsResourceWithStreamingResponse,
    AsyncRunsResourceWithStreamingResponse,
)
from ...types import (
    eval_get_params,
    eval_list_params,
    eval_create_params,
    eval_delete_params,
    eval_update_params,
    eval_analyze_params,
    eval_list_prompts_params,
)
from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
    to_raw_response_wrapper,
    to_streamed_response_wrapper,
    async_to_raw_response_wrapper,
    async_to_streamed_response_wrapper,
)
from ...pagination import SyncOffsetPage, AsyncOffsetPage
from ...types.eval import Eval
from ..._base_client import AsyncPaginator, make_request_options
from ...types.eval_prompt import EvalPrompt
from ...types.shared.status import Status
from ...types.shared.content_type import ContentType
from ...types.prompt_example_param import PromptExampleParam
from ...types.eval_analyze_response import EvalAnalyzeResponse

__all__ = ["EvalsResource", "AsyncEvalsResource"]


[docs] class EvalsResource(SyncAPIResource): @cached_property def runs(self) -> RunsResource: return RunsResource(self._client) @cached_property def with_raw_response(self) -> EvalsResourceWithRawResponse: """ This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#accessing-raw-response-data-eg-headers """ return EvalsResourceWithRawResponse(self) @cached_property def with_streaming_response(self) -> EvalsResourceWithStreamingResponse: """ An alternative to `.with_raw_response` that doesn't eagerly read the response body. For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#with_streaming_response """ return EvalsResourceWithStreamingResponse(self)
[docs] def create( self, *, ai_description: str, eval_type: str, ai_instructions: Optional[str] | Omit = omit, created_at: Union[str, datetime, None] | Omit = omit, created_by: Optional[str] | Omit = omit, eval_instructions: Optional[str] | Omit = omit, eval_uuid: Optional[str] | Omit = omit, ground_truth: Optional[eval_create_params.GroundTruth] | Omit = omit, is_jailbreak: bool | Omit = omit, is_sandbox: bool | Omit = omit, language: Optional[str] | Omit = omit, modality: ContentType | Omit = omit, name: Optional[str] | Omit = omit, num_prompts: Optional[int] | Omit = omit, prompt_examples: Optional[Iterable[PromptExampleParam]] | Omit = omit, status: Optional[Status] | Omit = omit, updated_at: Union[str, datetime, None] | Omit = omit, workspace_uuid: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Create a new eval using an eval type configuration. Args: eval_request (Eval): Data for the eval to create, including eval type and configuration. Returns: Eval: The created eval object. Raises: AymaraAPIError: If the request is invalid. Example: POST /api/evals { "eval_type": "...", "workspace_uuid": "...", ... } Args: ai_description: Description of the AI under evaluation. eval_type: Type of the eval (safety, accuracy, etc.) ai_instructions: Instructions the AI should follow. created_at: Timestamp when the eval was created. created_by: Name of the user who created the evaluation. eval_instructions: Additional instructions for the eval, if any. eval_uuid: Unique identifier for the evaluation. ground_truth: Ground truth data or reference file, if any. is_jailbreak: Indicates if the eval is a jailbreak test. is_sandbox: Indicates if the eval results are sandboxed. language: Language code for the eval (default: "en"). modality: Content type for AI interactions. name: Name of the evaluation. num_prompts: Number of prompts/questions in the eval (default: 50). prompt_examples: List of example prompts for the eval. status: Resource status. updated_at: Timestamp when the eval was last updated. workspace_uuid: UUID of the associated workspace, if any. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return self._post( "/v2/evals", body=maybe_transform( { "ai_description": ai_description, "eval_type": eval_type, "ai_instructions": ai_instructions, "created_at": created_at, "created_by": created_by, "eval_instructions": eval_instructions, "eval_uuid": eval_uuid, "ground_truth": ground_truth, "is_jailbreak": is_jailbreak, "is_sandbox": is_sandbox, "language": language, "modality": modality, "name": name, "num_prompts": num_prompts, "prompt_examples": prompt_examples, "status": status, "updated_at": updated_at, "workspace_uuid": workspace_uuid, }, eval_create_params.EvalCreateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=Eval, )
[docs] def update( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, ai_description: Optional[str] | Omit = omit, ai_instructions: Optional[str] | Omit = omit, eval_instructions: Optional[str] | Omit = omit, ground_truth: Optional[eval_update_params.GroundTruth] | Omit = omit, name: Optional[str] | Omit = omit, prompt_creates: Optional[Iterable[eval_update_params.PromptCreate]] | Omit = omit, prompt_updates: Optional[Iterable[eval_update_params.PromptUpdate]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Update an existing eval's fields and prompts. Args: eval_uuid (str): UUID of the eval to update. update_request (EvalUpdateRequest): Update data including fields and prompt modifications. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: Eval: The updated eval data. Raises: AymaraAPIError: If the eval is not found or update is invalid. Example: PUT /api/evals/{eval_uuid} { "name": "Updated Eval Name", "ai_description": "Updated description", "prompt_updates": [ {"prompt_uuid": "...", "content": "New content", "action": "update"}, {"prompt_uuid": "...", "action": "delete"} ], "prompt_creates": [ {"content": "New prompt", "category": "test"} ] } Args: ai_description: New description of the AI under evaluation. ai_instructions: New instructions the AI should follow. eval_instructions: New additional instructions for the eval. ground_truth: New ground truth data or reference file. name: New name for the evaluation. prompt_creates: List of new prompts to add. prompt_updates: List of prompt updates to apply. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return self._put( f"/v2/evals/{eval_uuid}", body=maybe_transform( { "ai_description": ai_description, "ai_instructions": ai_instructions, "eval_instructions": eval_instructions, "ground_truth": ground_truth, "name": name, "prompt_creates": prompt_creates, "prompt_updates": prompt_updates, }, eval_update_params.EvalUpdateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_update_params.EvalUpdateParams), ), cast_to=Eval, )
[docs] def list( self, *, limit: int | Omit = omit, offset: int | Omit = omit, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> SyncOffsetPage[Eval]: """ List all evals, with optional filtering. Args: workspace_uuid (str, optional): Optional workspace UUID for filtering. Use "\\**" for enterprise-wide access, omit for user's current workspace. Returns: list[Eval]: List of evals matching the filter. Raises: AymaraAPIError: If the request is invalid. Example: GET /api/evals?workspace_uuid=... Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return self._get_api_list( "/v2/evals", page=SyncOffsetPage[Eval], options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform( { "limit": limit, "offset": offset, "workspace_uuid": workspace_uuid, }, eval_list_params.EvalListParams, ), ), model=Eval, )
[docs] def delete( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> None: """Delete an eval. Args: eval_uuid (str): UUID of the eval to delete. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: None Raises: AymaraAPIError: If the eval is not found. Example: DELETE /api/evals/{eval_uuid} Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") extra_headers = {"Accept": "*/*", **(extra_headers or {})} return self._delete( f"/v2/evals/{eval_uuid}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_delete_params.EvalDeleteParams), ), cast_to=NoneType, )
[docs] def analyze( self, *, created_after: Union[str, datetime, None] | Omit = omit, created_before: Union[str, datetime, None] | Omit = omit, created_by: Optional[str] | Omit = omit, eval_type: Optional[str] | Omit = omit, has_score_runs: Optional[bool] | Omit = omit, is_jailbreak: Optional[bool] | Omit = omit, is_sandbox: Optional[bool] | Omit = omit, language: Optional[str] | Omit = omit, limit: int | Omit = omit, max_pass_rate: Optional[float] | Omit = omit, min_pass_rate: Optional[float] | Omit = omit, modality: Optional[str] | Omit = omit, name: Optional[str] | Omit = omit, offset: int | Omit = omit, run_created_after: Union[str, datetime, None] | Omit = omit, run_created_before: Union[str, datetime, None] | Omit = omit, score_run_status: Optional[str] | Omit = omit, sort_by: Literal["created_at", "updated_at", "name", "pass_rate", "num_score_runs", "last_run_date"] | Omit = omit, sort_order: Literal["asc", "desc"] | Omit = omit, status: Optional[str] | Omit = omit, workspace_uuid: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> EvalAnalyzeResponse: """ Analysis for evals with advanced filtering and aggregated statistics. This endpoint allows analyzing across both eval metadata and score run performance data, providing comprehensive filtering capabilities and aggregated statistics for each eval. Args: analysis_request (EvalAnalysisRequest): Analysis parameters and filters including: - Eval metadata filters (name, type, status, language, etc.) - Score run performance filters (pass rate, run count, etc.) - Sorting and pagination options Returns: EvalAnalysisResponse: Paginated results with matching evals and their statistics Raises: AymaraAPIError: If the request is invalid or analysis parameters are malformed Example: POST /api/v2/eval_analysis { "name": "safety", "eval_type": "safety", "min_pass_rate": 0.8, "has_score_runs": true, "sort_by": "pass_rate", "sort_order": "desc", "limit": 20, "offset": 0 } Args: created_after: Filter evals created after this date created_before: Filter evals created before this date created_by: Filter by creator email eval_type: Filter by eval type (safety, accuracy, jailbreak, image_safety) has_score_runs: Only include evals that have score runs is_jailbreak: Filter by jailbreak status is_sandbox: Filter by sandbox status language: Filter by language code (e.g., en, es) limit: Maximum number of results (1-100) max_pass_rate: Maximum average pass rate (0.0-1.0) min_pass_rate: Minimum average pass rate (0.0-1.0) modality: Filter by modality (text, image) name: Filter by eval names (case-insensitive partial match) offset: Number of results to skip run_created_after: Filter by score runs created after this date run_created_before: Filter by score runs created before this date score_run_status: Filter by any score run status sort_by: Field to sort by sort_order: Sort order status: Filter by eval status (created, processing, finished, failed) workspace_uuid: Filter by workspace UUID extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return self._post( "/v2/eval-analysis", body=maybe_transform( { "created_after": created_after, "created_before": created_before, "created_by": created_by, "eval_type": eval_type, "has_score_runs": has_score_runs, "is_jailbreak": is_jailbreak, "is_sandbox": is_sandbox, "language": language, "limit": limit, "max_pass_rate": max_pass_rate, "min_pass_rate": min_pass_rate, "modality": modality, "name": name, "offset": offset, "run_created_after": run_created_after, "run_created_before": run_created_before, "score_run_status": score_run_status, "sort_by": sort_by, "sort_order": sort_order, "status": status, "workspace_uuid": workspace_uuid, }, eval_analyze_params.EvalAnalyzeParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=EvalAnalyzeResponse, )
[docs] def get( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Retrieve a specific eval by its UUID. Args: eval_uuid (str): UUID of the eval to retrieve. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: Eval: The eval data. Raises: AymaraAPIError: If the eval is not found. Example: GET /api/evals/{eval_uuid} Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return self._get( f"/v2/evals/{eval_uuid}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_get_params.EvalGetParams), ), cast_to=Eval, )
[docs] def list_prompts( self, eval_uuid: str, *, limit: int | Omit = omit, offset: int | Omit = omit, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> SyncOffsetPage[EvalPrompt]: """ Retrieve prompts for a specific eval if they exist. Args: eval_uuid (str): UUID of the eval to get prompts for. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: list[EvalPrompt]: List of prompts and metadata for the eval. Raises: AymaraAPIError: If the eval is not found. Example: GET /api/evals/{eval_uuid}/prompts Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return self._get_api_list( f"/v2/evals/{eval_uuid}/prompts", page=SyncOffsetPage[EvalPrompt], options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform( { "limit": limit, "offset": offset, "workspace_uuid": workspace_uuid, }, eval_list_prompts_params.EvalListPromptsParams, ), ), model=EvalPrompt, )
[docs] class AsyncEvalsResource(AsyncAPIResource): @cached_property def runs(self) -> AsyncRunsResource: return AsyncRunsResource(self._client) @cached_property def with_raw_response(self) -> AsyncEvalsResourceWithRawResponse: """ This property can be used as a prefix for any HTTP method call to return the raw response object instead of the parsed content. For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#accessing-raw-response-data-eg-headers """ return AsyncEvalsResourceWithRawResponse(self) @cached_property def with_streaming_response(self) -> AsyncEvalsResourceWithStreamingResponse: """ An alternative to `.with_raw_response` that doesn't eagerly read the response body. For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#with_streaming_response """ return AsyncEvalsResourceWithStreamingResponse(self)
[docs] async def create( self, *, ai_description: str, eval_type: str, ai_instructions: Optional[str] | Omit = omit, created_at: Union[str, datetime, None] | Omit = omit, created_by: Optional[str] | Omit = omit, eval_instructions: Optional[str] | Omit = omit, eval_uuid: Optional[str] | Omit = omit, ground_truth: Optional[eval_create_params.GroundTruth] | Omit = omit, is_jailbreak: bool | Omit = omit, is_sandbox: bool | Omit = omit, language: Optional[str] | Omit = omit, modality: ContentType | Omit = omit, name: Optional[str] | Omit = omit, num_prompts: Optional[int] | Omit = omit, prompt_examples: Optional[Iterable[PromptExampleParam]] | Omit = omit, status: Optional[Status] | Omit = omit, updated_at: Union[str, datetime, None] | Omit = omit, workspace_uuid: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Create a new eval using an eval type configuration. Args: eval_request (Eval): Data for the eval to create, including eval type and configuration. Returns: Eval: The created eval object. Raises: AymaraAPIError: If the request is invalid. Example: POST /api/evals { "eval_type": "...", "workspace_uuid": "...", ... } Args: ai_description: Description of the AI under evaluation. eval_type: Type of the eval (safety, accuracy, etc.) ai_instructions: Instructions the AI should follow. created_at: Timestamp when the eval was created. created_by: Name of the user who created the evaluation. eval_instructions: Additional instructions for the eval, if any. eval_uuid: Unique identifier for the evaluation. ground_truth: Ground truth data or reference file, if any. is_jailbreak: Indicates if the eval is a jailbreak test. is_sandbox: Indicates if the eval results are sandboxed. language: Language code for the eval (default: "en"). modality: Content type for AI interactions. name: Name of the evaluation. num_prompts: Number of prompts/questions in the eval (default: 50). prompt_examples: List of example prompts for the eval. status: Resource status. updated_at: Timestamp when the eval was last updated. workspace_uuid: UUID of the associated workspace, if any. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return await self._post( "/v2/evals", body=await async_maybe_transform( { "ai_description": ai_description, "eval_type": eval_type, "ai_instructions": ai_instructions, "created_at": created_at, "created_by": created_by, "eval_instructions": eval_instructions, "eval_uuid": eval_uuid, "ground_truth": ground_truth, "is_jailbreak": is_jailbreak, "is_sandbox": is_sandbox, "language": language, "modality": modality, "name": name, "num_prompts": num_prompts, "prompt_examples": prompt_examples, "status": status, "updated_at": updated_at, "workspace_uuid": workspace_uuid, }, eval_create_params.EvalCreateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=Eval, )
[docs] async def update( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, ai_description: Optional[str] | Omit = omit, ai_instructions: Optional[str] | Omit = omit, eval_instructions: Optional[str] | Omit = omit, ground_truth: Optional[eval_update_params.GroundTruth] | Omit = omit, name: Optional[str] | Omit = omit, prompt_creates: Optional[Iterable[eval_update_params.PromptCreate]] | Omit = omit, prompt_updates: Optional[Iterable[eval_update_params.PromptUpdate]] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Update an existing eval's fields and prompts. Args: eval_uuid (str): UUID of the eval to update. update_request (EvalUpdateRequest): Update data including fields and prompt modifications. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: Eval: The updated eval data. Raises: AymaraAPIError: If the eval is not found or update is invalid. Example: PUT /api/evals/{eval_uuid} { "name": "Updated Eval Name", "ai_description": "Updated description", "prompt_updates": [ {"prompt_uuid": "...", "content": "New content", "action": "update"}, {"prompt_uuid": "...", "action": "delete"} ], "prompt_creates": [ {"content": "New prompt", "category": "test"} ] } Args: ai_description: New description of the AI under evaluation. ai_instructions: New instructions the AI should follow. eval_instructions: New additional instructions for the eval. ground_truth: New ground truth data or reference file. name: New name for the evaluation. prompt_creates: List of new prompts to add. prompt_updates: List of prompt updates to apply. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return await self._put( f"/v2/evals/{eval_uuid}", body=await async_maybe_transform( { "ai_description": ai_description, "ai_instructions": ai_instructions, "eval_instructions": eval_instructions, "ground_truth": ground_truth, "name": name, "prompt_creates": prompt_creates, "prompt_updates": prompt_updates, }, eval_update_params.EvalUpdateParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=await async_maybe_transform( {"workspace_uuid": workspace_uuid}, eval_update_params.EvalUpdateParams ), ), cast_to=Eval, )
[docs] def list( self, *, limit: int | Omit = omit, offset: int | Omit = omit, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncPaginator[Eval, AsyncOffsetPage[Eval]]: """ List all evals, with optional filtering. Args: workspace_uuid (str, optional): Optional workspace UUID for filtering. Use "\\**" for enterprise-wide access, omit for user's current workspace. Returns: list[Eval]: List of evals matching the filter. Raises: AymaraAPIError: If the request is invalid. Example: GET /api/evals?workspace_uuid=... Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return self._get_api_list( "/v2/evals", page=AsyncOffsetPage[Eval], options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform( { "limit": limit, "offset": offset, "workspace_uuid": workspace_uuid, }, eval_list_params.EvalListParams, ), ), model=Eval, )
[docs] async def delete( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> None: """Delete an eval. Args: eval_uuid (str): UUID of the eval to delete. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: None Raises: AymaraAPIError: If the eval is not found. Example: DELETE /api/evals/{eval_uuid} Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") extra_headers = {"Accept": "*/*", **(extra_headers or {})} return await self._delete( f"/v2/evals/{eval_uuid}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=await async_maybe_transform( {"workspace_uuid": workspace_uuid}, eval_delete_params.EvalDeleteParams ), ), cast_to=NoneType, )
[docs] async def analyze( self, *, created_after: Union[str, datetime, None] | Omit = omit, created_before: Union[str, datetime, None] | Omit = omit, created_by: Optional[str] | Omit = omit, eval_type: Optional[str] | Omit = omit, has_score_runs: Optional[bool] | Omit = omit, is_jailbreak: Optional[bool] | Omit = omit, is_sandbox: Optional[bool] | Omit = omit, language: Optional[str] | Omit = omit, limit: int | Omit = omit, max_pass_rate: Optional[float] | Omit = omit, min_pass_rate: Optional[float] | Omit = omit, modality: Optional[str] | Omit = omit, name: Optional[str] | Omit = omit, offset: int | Omit = omit, run_created_after: Union[str, datetime, None] | Omit = omit, run_created_before: Union[str, datetime, None] | Omit = omit, score_run_status: Optional[str] | Omit = omit, sort_by: Literal["created_at", "updated_at", "name", "pass_rate", "num_score_runs", "last_run_date"] | Omit = omit, sort_order: Literal["asc", "desc"] | Omit = omit, status: Optional[str] | Omit = omit, workspace_uuid: Optional[str] | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> EvalAnalyzeResponse: """ Analysis for evals with advanced filtering and aggregated statistics. This endpoint allows analyzing across both eval metadata and score run performance data, providing comprehensive filtering capabilities and aggregated statistics for each eval. Args: analysis_request (EvalAnalysisRequest): Analysis parameters and filters including: - Eval metadata filters (name, type, status, language, etc.) - Score run performance filters (pass rate, run count, etc.) - Sorting and pagination options Returns: EvalAnalysisResponse: Paginated results with matching evals and their statistics Raises: AymaraAPIError: If the request is invalid or analysis parameters are malformed Example: POST /api/v2/eval_analysis { "name": "safety", "eval_type": "safety", "min_pass_rate": 0.8, "has_score_runs": true, "sort_by": "pass_rate", "sort_order": "desc", "limit": 20, "offset": 0 } Args: created_after: Filter evals created after this date created_before: Filter evals created before this date created_by: Filter by creator email eval_type: Filter by eval type (safety, accuracy, jailbreak, image_safety) has_score_runs: Only include evals that have score runs is_jailbreak: Filter by jailbreak status is_sandbox: Filter by sandbox status language: Filter by language code (e.g., en, es) limit: Maximum number of results (1-100) max_pass_rate: Maximum average pass rate (0.0-1.0) min_pass_rate: Minimum average pass rate (0.0-1.0) modality: Filter by modality (text, image) name: Filter by eval names (case-insensitive partial match) offset: Number of results to skip run_created_after: Filter by score runs created after this date run_created_before: Filter by score runs created before this date score_run_status: Filter by any score run status sort_by: Field to sort by sort_order: Sort order status: Filter by eval status (created, processing, finished, failed) workspace_uuid: Filter by workspace UUID extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ return await self._post( "/v2/eval-analysis", body=await async_maybe_transform( { "created_after": created_after, "created_before": created_before, "created_by": created_by, "eval_type": eval_type, "has_score_runs": has_score_runs, "is_jailbreak": is_jailbreak, "is_sandbox": is_sandbox, "language": language, "limit": limit, "max_pass_rate": max_pass_rate, "min_pass_rate": min_pass_rate, "modality": modality, "name": name, "offset": offset, "run_created_after": run_created_after, "run_created_before": run_created_before, "score_run_status": score_run_status, "sort_by": sort_by, "sort_order": sort_order, "status": status, "workspace_uuid": workspace_uuid, }, eval_analyze_params.EvalAnalyzeParams, ), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout ), cast_to=EvalAnalyzeResponse, )
[docs] async def get( self, eval_uuid: str, *, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> Eval: """ Retrieve a specific eval by its UUID. Args: eval_uuid (str): UUID of the eval to retrieve. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: Eval: The eval data. Raises: AymaraAPIError: If the eval is not found. Example: GET /api/evals/{eval_uuid} Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return await self._get( f"/v2/evals/{eval_uuid}", options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=await async_maybe_transform({"workspace_uuid": workspace_uuid}, eval_get_params.EvalGetParams), ), cast_to=Eval, )
[docs] def list_prompts( self, eval_uuid: str, *, limit: int | Omit = omit, offset: int | Omit = omit, workspace_uuid: str | Omit = omit, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | httpx.Timeout | None | NotGiven = not_given, ) -> AsyncPaginator[EvalPrompt, AsyncOffsetPage[EvalPrompt]]: """ Retrieve prompts for a specific eval if they exist. Args: eval_uuid (str): UUID of the eval to get prompts for. workspace_uuid (str, optional): Optional workspace UUID for filtering. Returns: list[EvalPrompt]: List of prompts and metadata for the eval. Raises: AymaraAPIError: If the eval is not found. Example: GET /api/evals/{eval_uuid}/prompts Args: extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request timeout: Override the client-level default timeout for this request, in seconds """ if not eval_uuid: raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}") return self._get_api_list( f"/v2/evals/{eval_uuid}/prompts", page=AsyncOffsetPage[EvalPrompt], options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout, query=maybe_transform( { "limit": limit, "offset": offset, "workspace_uuid": workspace_uuid, }, eval_list_prompts_params.EvalListPromptsParams, ), ), model=EvalPrompt, )
class EvalsResourceWithRawResponse: def __init__(self, evals: EvalsResource) -> None: self._evals = evals self.create = to_raw_response_wrapper( evals.create, ) self.update = to_raw_response_wrapper( evals.update, ) self.list = to_raw_response_wrapper( evals.list, ) self.delete = to_raw_response_wrapper( evals.delete, ) self.analyze = to_raw_response_wrapper( evals.analyze, ) self.get = to_raw_response_wrapper( evals.get, ) self.list_prompts = to_raw_response_wrapper( evals.list_prompts, ) @cached_property def runs(self) -> RunsResourceWithRawResponse: return RunsResourceWithRawResponse(self._evals.runs) class AsyncEvalsResourceWithRawResponse: def __init__(self, evals: AsyncEvalsResource) -> None: self._evals = evals self.create = async_to_raw_response_wrapper( evals.create, ) self.update = async_to_raw_response_wrapper( evals.update, ) self.list = async_to_raw_response_wrapper( evals.list, ) self.delete = async_to_raw_response_wrapper( evals.delete, ) self.analyze = async_to_raw_response_wrapper( evals.analyze, ) self.get = async_to_raw_response_wrapper( evals.get, ) self.list_prompts = async_to_raw_response_wrapper( evals.list_prompts, ) @cached_property def runs(self) -> AsyncRunsResourceWithRawResponse: return AsyncRunsResourceWithRawResponse(self._evals.runs) class EvalsResourceWithStreamingResponse: def __init__(self, evals: EvalsResource) -> None: self._evals = evals self.create = to_streamed_response_wrapper( evals.create, ) self.update = to_streamed_response_wrapper( evals.update, ) self.list = to_streamed_response_wrapper( evals.list, ) self.delete = to_streamed_response_wrapper( evals.delete, ) self.analyze = to_streamed_response_wrapper( evals.analyze, ) self.get = to_streamed_response_wrapper( evals.get, ) self.list_prompts = to_streamed_response_wrapper( evals.list_prompts, ) @cached_property def runs(self) -> RunsResourceWithStreamingResponse: return RunsResourceWithStreamingResponse(self._evals.runs) class AsyncEvalsResourceWithStreamingResponse: def __init__(self, evals: AsyncEvalsResource) -> None: self._evals = evals self.create = async_to_streamed_response_wrapper( evals.create, ) self.update = async_to_streamed_response_wrapper( evals.update, ) self.list = async_to_streamed_response_wrapper( evals.list, ) self.delete = async_to_streamed_response_wrapper( evals.delete, ) self.analyze = async_to_streamed_response_wrapper( evals.analyze, ) self.get = async_to_streamed_response_wrapper( evals.get, ) self.list_prompts = async_to_streamed_response_wrapper( evals.list_prompts, ) @cached_property def runs(self) -> AsyncRunsResourceWithStreamingResponse: return AsyncRunsResourceWithStreamingResponse(self._evals.runs)