Source code for aymara_ai.resources.evals.evals

# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

from __future__ import annotations

from typing import Union, Iterable, Optional
from datetime import datetime
from typing_extensions import Literal

import httpx

from .runs import (
    RunsResource,
    AsyncRunsResource,
    RunsResourceWithRawResponse,
    AsyncRunsResourceWithRawResponse,
    RunsResourceWithStreamingResponse,
    AsyncRunsResourceWithStreamingResponse,
)
from ...types import (
    eval_get_params,
    eval_list_params,
    eval_create_params,
    eval_delete_params,
    eval_update_params,
    eval_analyze_params,
    eval_list_prompts_params,
)
from ..._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
    to_raw_response_wrapper,
    to_streamed_response_wrapper,
    async_to_raw_response_wrapper,
    async_to_streamed_response_wrapper,
)
from ...pagination import SyncOffsetPage, AsyncOffsetPage
from ...types.eval import Eval
from ..._base_client import AsyncPaginator, make_request_options
from ...types.eval_prompt import EvalPrompt
from ...types.shared.status import Status
from ...types.shared.content_type import ContentType
from ...types.prompt_example_param import PromptExampleParam
from ...types.eval_analyze_response import EvalAnalyzeResponse

__all__ = ["EvalsResource", "AsyncEvalsResource"]



[docs]
class EvalsResource(SyncAPIResource):
    @cached_property
    def runs(self) -> RunsResource:
        return RunsResource(self._client)

    @cached_property
    def with_raw_response(self) -> EvalsResourceWithRawResponse:
        """
        This property can be used as a prefix for any HTTP method call to return
        the raw response object instead of the parsed content.

        For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#accessing-raw-response-data-eg-headers
        """
        return EvalsResourceWithRawResponse(self)

    @cached_property
    def with_streaming_response(self) -> EvalsResourceWithStreamingResponse:
        """
        An alternative to `.with_raw_response` that doesn't eagerly read the response body.

        For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#with_streaming_response
        """
        return EvalsResourceWithStreamingResponse(self)


[docs]
    def create(
        self,
        *,
        ai_description: str,
        eval_type: str,
        ai_instructions: Optional[str] | Omit = omit,
        created_at: Union[str, datetime, None] | Omit = omit,
        created_by: Optional[str] | Omit = omit,
        eval_instructions: Optional[str] | Omit = omit,
        eval_uuid: Optional[str] | Omit = omit,
        ground_truth: Optional[eval_create_params.GroundTruth] | Omit = omit,
        is_jailbreak: bool | Omit = omit,
        is_sandbox: bool | Omit = omit,
        language: Optional[str] | Omit = omit,
        modality: ContentType | Omit = omit,
        name: Optional[str] | Omit = omit,
        num_prompts: Optional[int] | Omit = omit,
        prompt_examples: Optional[Iterable[PromptExampleParam]] | Omit = omit,
        status: Optional[Status] | Omit = omit,
        updated_at: Union[str, datetime, None] | Omit = omit,
        workspace_uuid: Optional[str] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Create a new eval using an eval type configuration.

        Args: eval_request (Eval): Data for the eval to create, including eval type and
        configuration.

        Returns: Eval: The created eval object.

        Raises: AymaraAPIError: If the request is invalid.

        Example: POST /api/evals { "eval_type": "...", "workspace_uuid": "...", ... }

        Args:
          ai_description: Description of the AI under evaluation.

          eval_type: Type of the eval (safety, accuracy, etc.)

          ai_instructions: Instructions the AI should follow.

          created_at: Timestamp when the eval was created.

          created_by: Name of the user who created the evaluation.

          eval_instructions: Additional instructions for the eval, if any.

          eval_uuid: Unique identifier for the evaluation.

          ground_truth: Ground truth data or reference file, if any.

          is_jailbreak: Indicates if the eval is a jailbreak test.

          is_sandbox: Indicates if the eval results are sandboxed.

          language: Language code for the eval (default: "en").

          modality: Content type for AI interactions.

          name: Name of the evaluation.

          num_prompts: Number of prompts/questions in the eval (default: 50).

          prompt_examples: List of example prompts for the eval.

          status: Resource status.

          updated_at: Timestamp when the eval was last updated.

          workspace_uuid: UUID of the associated workspace, if any.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return self._post(
            "/v2/evals",
            body=maybe_transform(
                {
                    "ai_description": ai_description,
                    "eval_type": eval_type,
                    "ai_instructions": ai_instructions,
                    "created_at": created_at,
                    "created_by": created_by,
                    "eval_instructions": eval_instructions,
                    "eval_uuid": eval_uuid,
                    "ground_truth": ground_truth,
                    "is_jailbreak": is_jailbreak,
                    "is_sandbox": is_sandbox,
                    "language": language,
                    "modality": modality,
                    "name": name,
                    "num_prompts": num_prompts,
                    "prompt_examples": prompt_examples,
                    "status": status,
                    "updated_at": updated_at,
                    "workspace_uuid": workspace_uuid,
                },
                eval_create_params.EvalCreateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=Eval,
        )



[docs]
    def update(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        ai_description: Optional[str] | Omit = omit,
        ai_instructions: Optional[str] | Omit = omit,
        eval_instructions: Optional[str] | Omit = omit,
        ground_truth: Optional[eval_update_params.GroundTruth] | Omit = omit,
        name: Optional[str] | Omit = omit,
        prompt_creates: Optional[Iterable[eval_update_params.PromptCreate]] | Omit = omit,
        prompt_updates: Optional[Iterable[eval_update_params.PromptUpdate]] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Update an existing eval's fields and prompts.

        Args: eval_uuid (str): UUID of the eval to update. update_request
        (EvalUpdateRequest): Update data including fields and prompt modifications.
        workspace_uuid (str, optional): Optional workspace UUID for filtering.

        Returns: Eval: The updated eval data.

        Raises: AymaraAPIError: If the eval is not found or update is invalid.

        Example: PUT /api/evals/{eval_uuid} { "name": "Updated Eval Name",
        "ai_description": "Updated description", "prompt_updates": [ {"prompt_uuid":
        "...", "content": "New content", "action": "update"}, {"prompt_uuid": "...",
        "action": "delete"} ], "prompt_creates": [ {"content": "New prompt", "category":
        "test"} ] }

        Args:
          ai_description: New description of the AI under evaluation.

          ai_instructions: New instructions the AI should follow.

          eval_instructions: New additional instructions for the eval.

          ground_truth: New ground truth data or reference file.

          name: New name for the evaluation.

          prompt_creates: List of new prompts to add.

          prompt_updates: List of prompt updates to apply.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return self._put(
            f"/v2/evals/{eval_uuid}",
            body=maybe_transform(
                {
                    "ai_description": ai_description,
                    "ai_instructions": ai_instructions,
                    "eval_instructions": eval_instructions,
                    "ground_truth": ground_truth,
                    "name": name,
                    "prompt_creates": prompt_creates,
                    "prompt_updates": prompt_updates,
                },
                eval_update_params.EvalUpdateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_update_params.EvalUpdateParams),
            ),
            cast_to=Eval,
        )



[docs]
    def list(
        self,
        *,
        limit: int | Omit = omit,
        offset: int | Omit = omit,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> SyncOffsetPage[Eval]:
        """
        List all evals, with optional filtering.

        Args: workspace_uuid (str, optional): Optional workspace UUID for filtering. Use
        "\\**" for enterprise-wide access, omit for user's current workspace.

        Returns: list[Eval]: List of evals matching the filter.

        Raises: AymaraAPIError: If the request is invalid.

        Example: GET /api/evals?workspace_uuid=...

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return self._get_api_list(
            "/v2/evals",
            page=SyncOffsetPage[Eval],
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform(
                    {
                        "limit": limit,
                        "offset": offset,
                        "workspace_uuid": workspace_uuid,
                    },
                    eval_list_params.EvalListParams,
                ),
            ),
            model=Eval,
        )



[docs]
    def delete(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> None:
        """Delete an eval.

        Args: eval_uuid (str): UUID of the eval to delete.

        workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: None

        Raises: AymaraAPIError: If the eval is not found.

        Example: DELETE /api/evals/{eval_uuid}

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
        return self._delete(
            f"/v2/evals/{eval_uuid}",
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_delete_params.EvalDeleteParams),
            ),
            cast_to=NoneType,
        )



[docs]
    def analyze(
        self,
        *,
        created_after: Union[str, datetime, None] | Omit = omit,
        created_before: Union[str, datetime, None] | Omit = omit,
        created_by: Optional[str] | Omit = omit,
        eval_type: Optional[str] | Omit = omit,
        has_score_runs: Optional[bool] | Omit = omit,
        is_jailbreak: Optional[bool] | Omit = omit,
        is_sandbox: Optional[bool] | Omit = omit,
        language: Optional[str] | Omit = omit,
        limit: int | Omit = omit,
        max_pass_rate: Optional[float] | Omit = omit,
        min_pass_rate: Optional[float] | Omit = omit,
        modality: Optional[str] | Omit = omit,
        name: Optional[str] | Omit = omit,
        offset: int | Omit = omit,
        run_created_after: Union[str, datetime, None] | Omit = omit,
        run_created_before: Union[str, datetime, None] | Omit = omit,
        score_run_status: Optional[str] | Omit = omit,
        sort_by: Literal["created_at", "updated_at", "name", "pass_rate", "num_score_runs", "last_run_date"]
        | Omit = omit,
        sort_order: Literal["asc", "desc"] | Omit = omit,
        status: Optional[str] | Omit = omit,
        workspace_uuid: Optional[str] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> EvalAnalyzeResponse:
        """
        Analysis for evals with advanced filtering and aggregated statistics.

        This endpoint allows analyzing across both eval metadata and score run
        performance data, providing comprehensive filtering capabilities and aggregated
        statistics for each eval.

        Args: analysis_request (EvalAnalysisRequest): Analysis parameters and filters
        including: - Eval metadata filters (name, type, status, language, etc.) - Score
        run performance filters (pass rate, run count, etc.) - Sorting and pagination
        options

        Returns: EvalAnalysisResponse: Paginated results with matching evals and their
        statistics

        Raises: AymaraAPIError: If the request is invalid or analysis parameters are
        malformed

        Example: POST /api/v2/eval_analysis { "name": "safety", "eval_type": "safety",
        "min_pass_rate": 0.8, "has_score_runs": true, "sort_by": "pass_rate",
        "sort_order": "desc", "limit": 20, "offset": 0 }

        Args:
          created_after: Filter evals created after this date

          created_before: Filter evals created before this date

          created_by: Filter by creator email

          eval_type: Filter by eval type (safety, accuracy, jailbreak, image_safety)

          has_score_runs: Only include evals that have score runs

          is_jailbreak: Filter by jailbreak status

          is_sandbox: Filter by sandbox status

          language: Filter by language code (e.g., en, es)

          limit: Maximum number of results (1-100)

          max_pass_rate: Maximum average pass rate (0.0-1.0)

          min_pass_rate: Minimum average pass rate (0.0-1.0)

          modality: Filter by modality (text, image)

          name: Filter by eval names (case-insensitive partial match)

          offset: Number of results to skip

          run_created_after: Filter by score runs created after this date

          run_created_before: Filter by score runs created before this date

          score_run_status: Filter by any score run status

          sort_by: Field to sort by

          sort_order: Sort order

          status: Filter by eval status (created, processing, finished, failed)

          workspace_uuid: Filter by workspace UUID

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return self._post(
            "/v2/eval-analysis",
            body=maybe_transform(
                {
                    "created_after": created_after,
                    "created_before": created_before,
                    "created_by": created_by,
                    "eval_type": eval_type,
                    "has_score_runs": has_score_runs,
                    "is_jailbreak": is_jailbreak,
                    "is_sandbox": is_sandbox,
                    "language": language,
                    "limit": limit,
                    "max_pass_rate": max_pass_rate,
                    "min_pass_rate": min_pass_rate,
                    "modality": modality,
                    "name": name,
                    "offset": offset,
                    "run_created_after": run_created_after,
                    "run_created_before": run_created_before,
                    "score_run_status": score_run_status,
                    "sort_by": sort_by,
                    "sort_order": sort_order,
                    "status": status,
                    "workspace_uuid": workspace_uuid,
                },
                eval_analyze_params.EvalAnalyzeParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=EvalAnalyzeResponse,
        )



[docs]
    def get(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Retrieve a specific eval by its UUID.

        Args: eval_uuid (str): UUID of the eval to retrieve. workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: Eval: The eval data.

        Raises: AymaraAPIError: If the eval is not found.

        Example: GET /api/evals/{eval_uuid}

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return self._get(
            f"/v2/evals/{eval_uuid}",
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform({"workspace_uuid": workspace_uuid}, eval_get_params.EvalGetParams),
            ),
            cast_to=Eval,
        )



[docs]
    def list_prompts(
        self,
        eval_uuid: str,
        *,
        limit: int | Omit = omit,
        offset: int | Omit = omit,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> SyncOffsetPage[EvalPrompt]:
        """
        Retrieve prompts for a specific eval if they exist.

        Args: eval_uuid (str): UUID of the eval to get prompts for. workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: list[EvalPrompt]: List of prompts and metadata for the eval.

        Raises: AymaraAPIError: If the eval is not found.

        Example: GET /api/evals/{eval_uuid}/prompts

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return self._get_api_list(
            f"/v2/evals/{eval_uuid}/prompts",
            page=SyncOffsetPage[EvalPrompt],
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform(
                    {
                        "limit": limit,
                        "offset": offset,
                        "workspace_uuid": workspace_uuid,
                    },
                    eval_list_prompts_params.EvalListPromptsParams,
                ),
            ),
            model=EvalPrompt,
        )





[docs]
class AsyncEvalsResource(AsyncAPIResource):
    @cached_property
    def runs(self) -> AsyncRunsResource:
        return AsyncRunsResource(self._client)

    @cached_property
    def with_raw_response(self) -> AsyncEvalsResourceWithRawResponse:
        """
        This property can be used as a prefix for any HTTP method call to return
        the raw response object instead of the parsed content.

        For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#accessing-raw-response-data-eg-headers
        """
        return AsyncEvalsResourceWithRawResponse(self)

    @cached_property
    def with_streaming_response(self) -> AsyncEvalsResourceWithStreamingResponse:
        """
        An alternative to `.with_raw_response` that doesn't eagerly read the response body.

        For more information, see https://www.github.com/aymara-ai/aymara-sdk-python#with_streaming_response
        """
        return AsyncEvalsResourceWithStreamingResponse(self)


[docs]
    async def create(
        self,
        *,
        ai_description: str,
        eval_type: str,
        ai_instructions: Optional[str] | Omit = omit,
        created_at: Union[str, datetime, None] | Omit = omit,
        created_by: Optional[str] | Omit = omit,
        eval_instructions: Optional[str] | Omit = omit,
        eval_uuid: Optional[str] | Omit = omit,
        ground_truth: Optional[eval_create_params.GroundTruth] | Omit = omit,
        is_jailbreak: bool | Omit = omit,
        is_sandbox: bool | Omit = omit,
        language: Optional[str] | Omit = omit,
        modality: ContentType | Omit = omit,
        name: Optional[str] | Omit = omit,
        num_prompts: Optional[int] | Omit = omit,
        prompt_examples: Optional[Iterable[PromptExampleParam]] | Omit = omit,
        status: Optional[Status] | Omit = omit,
        updated_at: Union[str, datetime, None] | Omit = omit,
        workspace_uuid: Optional[str] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Create a new eval using an eval type configuration.

        Args: eval_request (Eval): Data for the eval to create, including eval type and
        configuration.

        Returns: Eval: The created eval object.

        Raises: AymaraAPIError: If the request is invalid.

        Example: POST /api/evals { "eval_type": "...", "workspace_uuid": "...", ... }

        Args:
          ai_description: Description of the AI under evaluation.

          eval_type: Type of the eval (safety, accuracy, etc.)

          ai_instructions: Instructions the AI should follow.

          created_at: Timestamp when the eval was created.

          created_by: Name of the user who created the evaluation.

          eval_instructions: Additional instructions for the eval, if any.

          eval_uuid: Unique identifier for the evaluation.

          ground_truth: Ground truth data or reference file, if any.

          is_jailbreak: Indicates if the eval is a jailbreak test.

          is_sandbox: Indicates if the eval results are sandboxed.

          language: Language code for the eval (default: "en").

          modality: Content type for AI interactions.

          name: Name of the evaluation.

          num_prompts: Number of prompts/questions in the eval (default: 50).

          prompt_examples: List of example prompts for the eval.

          status: Resource status.

          updated_at: Timestamp when the eval was last updated.

          workspace_uuid: UUID of the associated workspace, if any.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return await self._post(
            "/v2/evals",
            body=await async_maybe_transform(
                {
                    "ai_description": ai_description,
                    "eval_type": eval_type,
                    "ai_instructions": ai_instructions,
                    "created_at": created_at,
                    "created_by": created_by,
                    "eval_instructions": eval_instructions,
                    "eval_uuid": eval_uuid,
                    "ground_truth": ground_truth,
                    "is_jailbreak": is_jailbreak,
                    "is_sandbox": is_sandbox,
                    "language": language,
                    "modality": modality,
                    "name": name,
                    "num_prompts": num_prompts,
                    "prompt_examples": prompt_examples,
                    "status": status,
                    "updated_at": updated_at,
                    "workspace_uuid": workspace_uuid,
                },
                eval_create_params.EvalCreateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=Eval,
        )



[docs]
    async def update(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        ai_description: Optional[str] | Omit = omit,
        ai_instructions: Optional[str] | Omit = omit,
        eval_instructions: Optional[str] | Omit = omit,
        ground_truth: Optional[eval_update_params.GroundTruth] | Omit = omit,
        name: Optional[str] | Omit = omit,
        prompt_creates: Optional[Iterable[eval_update_params.PromptCreate]] | Omit = omit,
        prompt_updates: Optional[Iterable[eval_update_params.PromptUpdate]] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Update an existing eval's fields and prompts.

        Args: eval_uuid (str): UUID of the eval to update. update_request
        (EvalUpdateRequest): Update data including fields and prompt modifications.
        workspace_uuid (str, optional): Optional workspace UUID for filtering.

        Returns: Eval: The updated eval data.

        Raises: AymaraAPIError: If the eval is not found or update is invalid.

        Example: PUT /api/evals/{eval_uuid} { "name": "Updated Eval Name",
        "ai_description": "Updated description", "prompt_updates": [ {"prompt_uuid":
        "...", "content": "New content", "action": "update"}, {"prompt_uuid": "...",
        "action": "delete"} ], "prompt_creates": [ {"content": "New prompt", "category":
        "test"} ] }

        Args:
          ai_description: New description of the AI under evaluation.

          ai_instructions: New instructions the AI should follow.

          eval_instructions: New additional instructions for the eval.

          ground_truth: New ground truth data or reference file.

          name: New name for the evaluation.

          prompt_creates: List of new prompts to add.

          prompt_updates: List of prompt updates to apply.

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return await self._put(
            f"/v2/evals/{eval_uuid}",
            body=await async_maybe_transform(
                {
                    "ai_description": ai_description,
                    "ai_instructions": ai_instructions,
                    "eval_instructions": eval_instructions,
                    "ground_truth": ground_truth,
                    "name": name,
                    "prompt_creates": prompt_creates,
                    "prompt_updates": prompt_updates,
                },
                eval_update_params.EvalUpdateParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=await async_maybe_transform(
                    {"workspace_uuid": workspace_uuid}, eval_update_params.EvalUpdateParams
                ),
            ),
            cast_to=Eval,
        )



[docs]
    def list(
        self,
        *,
        limit: int | Omit = omit,
        offset: int | Omit = omit,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> AsyncPaginator[Eval, AsyncOffsetPage[Eval]]:
        """
        List all evals, with optional filtering.

        Args: workspace_uuid (str, optional): Optional workspace UUID for filtering. Use
        "\\**" for enterprise-wide access, omit for user's current workspace.

        Returns: list[Eval]: List of evals matching the filter.

        Raises: AymaraAPIError: If the request is invalid.

        Example: GET /api/evals?workspace_uuid=...

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return self._get_api_list(
            "/v2/evals",
            page=AsyncOffsetPage[Eval],
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform(
                    {
                        "limit": limit,
                        "offset": offset,
                        "workspace_uuid": workspace_uuid,
                    },
                    eval_list_params.EvalListParams,
                ),
            ),
            model=Eval,
        )



[docs]
    async def delete(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> None:
        """Delete an eval.

        Args: eval_uuid (str): UUID of the eval to delete.

        workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: None

        Raises: AymaraAPIError: If the eval is not found.

        Example: DELETE /api/evals/{eval_uuid}

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
        return await self._delete(
            f"/v2/evals/{eval_uuid}",
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=await async_maybe_transform(
                    {"workspace_uuid": workspace_uuid}, eval_delete_params.EvalDeleteParams
                ),
            ),
            cast_to=NoneType,
        )



[docs]
    async def analyze(
        self,
        *,
        created_after: Union[str, datetime, None] | Omit = omit,
        created_before: Union[str, datetime, None] | Omit = omit,
        created_by: Optional[str] | Omit = omit,
        eval_type: Optional[str] | Omit = omit,
        has_score_runs: Optional[bool] | Omit = omit,
        is_jailbreak: Optional[bool] | Omit = omit,
        is_sandbox: Optional[bool] | Omit = omit,
        language: Optional[str] | Omit = omit,
        limit: int | Omit = omit,
        max_pass_rate: Optional[float] | Omit = omit,
        min_pass_rate: Optional[float] | Omit = omit,
        modality: Optional[str] | Omit = omit,
        name: Optional[str] | Omit = omit,
        offset: int | Omit = omit,
        run_created_after: Union[str, datetime, None] | Omit = omit,
        run_created_before: Union[str, datetime, None] | Omit = omit,
        score_run_status: Optional[str] | Omit = omit,
        sort_by: Literal["created_at", "updated_at", "name", "pass_rate", "num_score_runs", "last_run_date"]
        | Omit = omit,
        sort_order: Literal["asc", "desc"] | Omit = omit,
        status: Optional[str] | Omit = omit,
        workspace_uuid: Optional[str] | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> EvalAnalyzeResponse:
        """
        Analysis for evals with advanced filtering and aggregated statistics.

        This endpoint allows analyzing across both eval metadata and score run
        performance data, providing comprehensive filtering capabilities and aggregated
        statistics for each eval.

        Args: analysis_request (EvalAnalysisRequest): Analysis parameters and filters
        including: - Eval metadata filters (name, type, status, language, etc.) - Score
        run performance filters (pass rate, run count, etc.) - Sorting and pagination
        options

        Returns: EvalAnalysisResponse: Paginated results with matching evals and their
        statistics

        Raises: AymaraAPIError: If the request is invalid or analysis parameters are
        malformed

        Example: POST /api/v2/eval_analysis { "name": "safety", "eval_type": "safety",
        "min_pass_rate": 0.8, "has_score_runs": true, "sort_by": "pass_rate",
        "sort_order": "desc", "limit": 20, "offset": 0 }

        Args:
          created_after: Filter evals created after this date

          created_before: Filter evals created before this date

          created_by: Filter by creator email

          eval_type: Filter by eval type (safety, accuracy, jailbreak, image_safety)

          has_score_runs: Only include evals that have score runs

          is_jailbreak: Filter by jailbreak status

          is_sandbox: Filter by sandbox status

          language: Filter by language code (e.g., en, es)

          limit: Maximum number of results (1-100)

          max_pass_rate: Maximum average pass rate (0.0-1.0)

          min_pass_rate: Minimum average pass rate (0.0-1.0)

          modality: Filter by modality (text, image)

          name: Filter by eval names (case-insensitive partial match)

          offset: Number of results to skip

          run_created_after: Filter by score runs created after this date

          run_created_before: Filter by score runs created before this date

          score_run_status: Filter by any score run status

          sort_by: Field to sort by

          sort_order: Sort order

          status: Filter by eval status (created, processing, finished, failed)

          workspace_uuid: Filter by workspace UUID

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        return await self._post(
            "/v2/eval-analysis",
            body=await async_maybe_transform(
                {
                    "created_after": created_after,
                    "created_before": created_before,
                    "created_by": created_by,
                    "eval_type": eval_type,
                    "has_score_runs": has_score_runs,
                    "is_jailbreak": is_jailbreak,
                    "is_sandbox": is_sandbox,
                    "language": language,
                    "limit": limit,
                    "max_pass_rate": max_pass_rate,
                    "min_pass_rate": min_pass_rate,
                    "modality": modality,
                    "name": name,
                    "offset": offset,
                    "run_created_after": run_created_after,
                    "run_created_before": run_created_before,
                    "score_run_status": score_run_status,
                    "sort_by": sort_by,
                    "sort_order": sort_order,
                    "status": status,
                    "workspace_uuid": workspace_uuid,
                },
                eval_analyze_params.EvalAnalyzeParams,
            ),
            options=make_request_options(
                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
            ),
            cast_to=EvalAnalyzeResponse,
        )



[docs]
    async def get(
        self,
        eval_uuid: str,
        *,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> Eval:
        """
        Retrieve a specific eval by its UUID.

        Args: eval_uuid (str): UUID of the eval to retrieve. workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: Eval: The eval data.

        Raises: AymaraAPIError: If the eval is not found.

        Example: GET /api/evals/{eval_uuid}

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return await self._get(
            f"/v2/evals/{eval_uuid}",
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=await async_maybe_transform({"workspace_uuid": workspace_uuid}, eval_get_params.EvalGetParams),
            ),
            cast_to=Eval,
        )



[docs]
    def list_prompts(
        self,
        eval_uuid: str,
        *,
        limit: int | Omit = omit,
        offset: int | Omit = omit,
        workspace_uuid: str | Omit = omit,
        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
        # The extra values given here take precedence over values defined on the client or passed to this method.
        extra_headers: Headers | None = None,
        extra_query: Query | None = None,
        extra_body: Body | None = None,
        timeout: float | httpx.Timeout | None | NotGiven = not_given,
    ) -> AsyncPaginator[EvalPrompt, AsyncOffsetPage[EvalPrompt]]:
        """
        Retrieve prompts for a specific eval if they exist.

        Args: eval_uuid (str): UUID of the eval to get prompts for. workspace_uuid (str,
        optional): Optional workspace UUID for filtering.

        Returns: list[EvalPrompt]: List of prompts and metadata for the eval.

        Raises: AymaraAPIError: If the eval is not found.

        Example: GET /api/evals/{eval_uuid}/prompts

        Args:
          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        """
        if not eval_uuid:
            raise ValueError(f"Expected a non-empty value for `eval_uuid` but received {eval_uuid!r}")
        return self._get_api_list(
            f"/v2/evals/{eval_uuid}/prompts",
            page=AsyncOffsetPage[EvalPrompt],
            options=make_request_options(
                extra_headers=extra_headers,
                extra_query=extra_query,
                extra_body=extra_body,
                timeout=timeout,
                query=maybe_transform(
                    {
                        "limit": limit,
                        "offset": offset,
                        "workspace_uuid": workspace_uuid,
                    },
                    eval_list_prompts_params.EvalListPromptsParams,
                ),
            ),
            model=EvalPrompt,
        )




class EvalsResourceWithRawResponse:
    def __init__(self, evals: EvalsResource) -> None:
        self._evals = evals

        self.create = to_raw_response_wrapper(
            evals.create,
        )
        self.update = to_raw_response_wrapper(
            evals.update,
        )
        self.list = to_raw_response_wrapper(
            evals.list,
        )
        self.delete = to_raw_response_wrapper(
            evals.delete,
        )
        self.analyze = to_raw_response_wrapper(
            evals.analyze,
        )
        self.get = to_raw_response_wrapper(
            evals.get,
        )
        self.list_prompts = to_raw_response_wrapper(
            evals.list_prompts,
        )

    @cached_property
    def runs(self) -> RunsResourceWithRawResponse:
        return RunsResourceWithRawResponse(self._evals.runs)


class AsyncEvalsResourceWithRawResponse:
    def __init__(self, evals: AsyncEvalsResource) -> None:
        self._evals = evals

        self.create = async_to_raw_response_wrapper(
            evals.create,
        )
        self.update = async_to_raw_response_wrapper(
            evals.update,
        )
        self.list = async_to_raw_response_wrapper(
            evals.list,
        )
        self.delete = async_to_raw_response_wrapper(
            evals.delete,
        )
        self.analyze = async_to_raw_response_wrapper(
            evals.analyze,
        )
        self.get = async_to_raw_response_wrapper(
            evals.get,
        )
        self.list_prompts = async_to_raw_response_wrapper(
            evals.list_prompts,
        )

    @cached_property
    def runs(self) -> AsyncRunsResourceWithRawResponse:
        return AsyncRunsResourceWithRawResponse(self._evals.runs)


class EvalsResourceWithStreamingResponse:
    def __init__(self, evals: EvalsResource) -> None:
        self._evals = evals

        self.create = to_streamed_response_wrapper(
            evals.create,
        )
        self.update = to_streamed_response_wrapper(
            evals.update,
        )
        self.list = to_streamed_response_wrapper(
            evals.list,
        )
        self.delete = to_streamed_response_wrapper(
            evals.delete,
        )
        self.analyze = to_streamed_response_wrapper(
            evals.analyze,
        )
        self.get = to_streamed_response_wrapper(
            evals.get,
        )
        self.list_prompts = to_streamed_response_wrapper(
            evals.list_prompts,
        )

    @cached_property
    def runs(self) -> RunsResourceWithStreamingResponse:
        return RunsResourceWithStreamingResponse(self._evals.runs)


class AsyncEvalsResourceWithStreamingResponse:
    def __init__(self, evals: AsyncEvalsResource) -> None:
        self._evals = evals

        self.create = async_to_streamed_response_wrapper(
            evals.create,
        )
        self.update = async_to_streamed_response_wrapper(
            evals.update,
        )
        self.list = async_to_streamed_response_wrapper(
            evals.list,
        )
        self.delete = async_to_streamed_response_wrapper(
            evals.delete,
        )
        self.analyze = async_to_streamed_response_wrapper(
            evals.analyze,
        )
        self.get = async_to_streamed_response_wrapper(
            evals.get,
        )
        self.list_prompts = async_to_streamed_response_wrapper(
            evals.list_prompts,
        )

    @cached_property
    def runs(self) -> AsyncRunsResourceWithStreamingResponse:
        return AsyncRunsResourceWithStreamingResponse(self._evals.runs)