From c2dd867a25286421fd055a1e7ef02f873c6cb46d Mon Sep 17 00:00:00 2001 From: langfuse-bot Date: Tue, 3 Feb 2026 11:20:21 +0000 Subject: [PATCH 1/3] feat(api): update API spec from langfuse/langfuse 966662e --- langfuse/api/__init__.py | 8 +++ langfuse/api/reference.md | 15 ++++- langfuse/api/resources/__init__.py | 8 +++ langfuse/api/resources/commons/__init__.py | 4 ++ .../api/resources/commons/types/__init__.py | 11 ++- .../commons/types/correction_score.py | 53 +++++++++++++++ langfuse/api/resources/commons/types/score.py | 67 ++++++++++++++++++- .../api/resources/dataset_items/client.py | 22 +++++- .../types/create_dataset_run_item_request.py | 10 +++ langfuse/api/resources/score_v_2/__init__.py | 4 ++ .../api/resources/score_v_2/types/__init__.py | 4 ++ .../types/get_scores_response_data.py | 67 +++++++++++++++++++ .../get_scores_response_data_correction.py | 46 +++++++++++++ 13 files changed, 314 insertions(+), 5 deletions(-) create mode 100644 langfuse/api/resources/commons/types/correction_score.py create mode 100644 langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py index 835bdfefa..d1a6414ed 100644 --- a/langfuse/api/__init__.py +++ b/langfuse/api/__init__.py @@ -36,6 +36,7 @@ Comment, CommentObjectType, ConfigCategory, + CorrectionScore, CreateAnnotationQueueAssignmentResponse, CreateAnnotationQueueItemRequest, CreateAnnotationQueueRequest, @@ -85,9 +86,11 @@ GetScoresResponseData, GetScoresResponseDataBoolean, GetScoresResponseDataCategorical, + GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, + GetScoresResponseData_Correction, GetScoresResponseData_Numeric, GetScoresResponseTraceData, HealthResponse, @@ -199,6 +202,7 @@ ScoreV1_Numeric, Score_Boolean, Score_Categorical, + Score_Correction, Score_Numeric, SdkLogBody, SdkLogEvent, @@ -293,6 +297,7 @@ "Comment", "CommentObjectType", "ConfigCategory", + "CorrectionScore", "CreateAnnotationQueueAssignmentResponse", "CreateAnnotationQueueItemRequest", "CreateAnnotationQueueRequest", @@ -342,9 +347,11 @@ "GetScoresResponseData", "GetScoresResponseDataBoolean", "GetScoresResponseDataCategorical", + "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", + "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", "GetScoresResponseTraceData", "HealthResponse", @@ -456,6 +463,7 @@ "ScoreV1_Numeric", "Score_Boolean", "Score_Categorical", + "Score_Correction", "Score_Numeric", "SdkLogBody", "SdkLogEvent", diff --git a/langfuse/api/reference.md b/langfuse/api/reference.md index 19870d547..5f6371b51 100644 --- a/langfuse/api/reference.md +++ b/langfuse/api/reference.md @@ -1519,7 +1519,8 @@ client.dataset_items.get(
-Get dataset items +Get dataset items. Optionally specify a version to get the items as they existed at that point in time. +Note: If version parameter is provided, datasetName must also be provided.
@@ -1584,6 +1585,18 @@ client.dataset_items.list()
+**version:** `typing.Optional[dt.datetime]` + +ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z"). +If provided, returns state of dataset at this timestamp. +If not provided, returns the latest version. Requires datasetName to be specified. + +
+
+ +
+
+ **page:** `typing.Optional[int]` — page number, starts at 1
diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py index 55c4e012a..0de0a56a5 100644 --- a/langfuse/api/resources/__init__.py +++ b/langfuse/api/resources/__init__.py @@ -67,6 +67,7 @@ Comment, CommentObjectType, ConfigCategory, + CorrectionScore, CreateScoreValue, Dataset, DatasetItem, @@ -101,6 +102,7 @@ ScoreV1_Numeric, Score_Boolean, Score_Categorical, + Score_Correction, Score_Numeric, Session, SessionWithTraces, @@ -268,9 +270,11 @@ GetScoresResponseData, GetScoresResponseDataBoolean, GetScoresResponseDataCategorical, + GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, + GetScoresResponseData_Correction, GetScoresResponseData_Numeric, GetScoresResponseTraceData, ) @@ -313,6 +317,7 @@ "Comment", "CommentObjectType", "ConfigCategory", + "CorrectionScore", "CreateAnnotationQueueAssignmentResponse", "CreateAnnotationQueueItemRequest", "CreateAnnotationQueueRequest", @@ -362,9 +367,11 @@ "GetScoresResponseData", "GetScoresResponseDataBoolean", "GetScoresResponseDataCategorical", + "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", + "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", "GetScoresResponseTraceData", "HealthResponse", @@ -476,6 +483,7 @@ "ScoreV1_Numeric", "Score_Boolean", "Score_Categorical", + "Score_Correction", "Score_Numeric", "SdkLogBody", "SdkLogEvent", diff --git a/langfuse/api/resources/commons/__init__.py b/langfuse/api/resources/commons/__init__.py index 9e522548e..7105b22c5 100644 --- a/langfuse/api/resources/commons/__init__.py +++ b/langfuse/api/resources/commons/__init__.py @@ -10,6 +10,7 @@ Comment, CommentObjectType, ConfigCategory, + CorrectionScore, CreateScoreValue, Dataset, DatasetItem, @@ -41,6 +42,7 @@ ScoreV1_Numeric, Score_Boolean, Score_Categorical, + Score_Correction, Score_Numeric, Session, SessionWithTraces, @@ -68,6 +70,7 @@ "Comment", "CommentObjectType", "ConfigCategory", + "CorrectionScore", "CreateScoreValue", "Dataset", "DatasetItem", @@ -102,6 +105,7 @@ "ScoreV1_Numeric", "Score_Boolean", "Score_Categorical", + "Score_Correction", "Score_Numeric", "Session", "SessionWithTraces", diff --git a/langfuse/api/resources/commons/types/__init__.py b/langfuse/api/resources/commons/types/__init__.py index b9063f3fb..df87680b7 100644 --- a/langfuse/api/resources/commons/types/__init__.py +++ b/langfuse/api/resources/commons/types/__init__.py @@ -9,6 +9,7 @@ from .comment import Comment from .comment_object_type import CommentObjectType from .config_category import ConfigCategory +from .correction_score import CorrectionScore from .create_score_value import CreateScoreValue from .dataset import Dataset from .dataset_item import DatasetItem @@ -29,7 +30,13 @@ from .pricing_tier_condition import PricingTierCondition from .pricing_tier_input import PricingTierInput from .pricing_tier_operator import PricingTierOperator -from .score import Score, Score_Boolean, Score_Categorical, Score_Numeric +from .score import ( + Score, + Score_Boolean, + Score_Categorical, + Score_Correction, + Score_Numeric, +) from .score_config import ScoreConfig from .score_config_data_type import ScoreConfigDataType from .score_data_type import ScoreDataType @@ -52,6 +59,7 @@ "Comment", "CommentObjectType", "ConfigCategory", + "CorrectionScore", "CreateScoreValue", "Dataset", "DatasetItem", @@ -83,6 +91,7 @@ "ScoreV1_Numeric", "Score_Boolean", "Score_Categorical", + "Score_Correction", "Score_Numeric", "Session", "SessionWithTraces", diff --git a/langfuse/api/resources/commons/types/correction_score.py b/langfuse/api/resources/commons/types/correction_score.py new file mode 100644 index 000000000..26abeae49 --- /dev/null +++ b/langfuse/api/resources/commons/types/correction_score.py @@ -0,0 +1,53 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from .base_score import BaseScore + + +class CorrectionScore(BaseScore): + value: float = pydantic_v1.Field() + """ + The numeric value of the score. Always 0 for correction scores. + """ + + string_value: str = pydantic_v1.Field(alias="stringValue") + """ + The string representation of the correction content + """ + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} diff --git a/langfuse/api/resources/commons/types/score.py b/langfuse/api/resources/commons/types/score.py index 8d54b6575..dab6eee43 100644 --- a/langfuse/api/resources/commons/types/score.py +++ b/langfuse/api/resources/commons/types/score.py @@ -204,4 +204,69 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} -Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean] +class Score_Correction(pydantic_v1.BaseModel): + value: float + string_value: str = pydantic_v1.Field(alias="stringValue") + id: str + trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None) + session_id: typing.Optional[str] = pydantic_v1.Field( + alias="sessionId", default=None + ) + observation_id: typing.Optional[str] = pydantic_v1.Field( + alias="observationId", default=None + ) + dataset_run_id: typing.Optional[str] = pydantic_v1.Field( + alias="datasetRunId", default=None + ) + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: dt.datetime = pydantic_v1.Field(alias="createdAt") + updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt") + author_user_id: typing.Optional[str] = pydantic_v1.Field( + alias="authorUserId", default=None + ) + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) + queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) + environment: str + data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field( + alias="dataType", default="CORRECTION" + ) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} + + +Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction] diff --git a/langfuse/api/resources/dataset_items/client.py b/langfuse/api/resources/dataset_items/client.py index 8ece3a790..f557c5eab 100644 --- a/langfuse/api/resources/dataset_items/client.py +++ b/langfuse/api/resources/dataset_items/client.py @@ -1,10 +1,12 @@ # This file was auto-generated by Fern from our API Definition. +import datetime as dt import typing from json.decoder import JSONDecodeError from ...core.api_error import ApiError from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper +from ...core.datetime_utils import serialize_datetime from ...core.jsonable_encoder import jsonable_encoder from ...core.pydantic_utilities import pydantic_v1 from ...core.request_options import RequestOptions @@ -168,12 +170,14 @@ def list( dataset_name: typing.Optional[str] = None, source_trace_id: typing.Optional[str] = None, source_observation_id: typing.Optional[str] = None, + version: typing.Optional[dt.datetime] = None, page: typing.Optional[int] = None, limit: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> PaginatedDatasetItems: """ - Get dataset items + Get dataset items. Optionally specify a version to get the items as they existed at that point in time. + Note: If version parameter is provided, datasetName must also be provided. Parameters ---------- @@ -183,6 +187,11 @@ def list( source_observation_id : typing.Optional[str] + version : typing.Optional[dt.datetime] + ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z"). + If provided, returns state of dataset at this timestamp. + If not provided, returns the latest version. Requires datasetName to be specified. + page : typing.Optional[int] page number, starts at 1 @@ -217,6 +226,7 @@ def list( "datasetName": dataset_name, "sourceTraceId": source_trace_id, "sourceObservationId": source_observation_id, + "version": serialize_datetime(version) if version is not None else None, "page": page, "limit": limit, }, @@ -477,12 +487,14 @@ async def list( dataset_name: typing.Optional[str] = None, source_trace_id: typing.Optional[str] = None, source_observation_id: typing.Optional[str] = None, + version: typing.Optional[dt.datetime] = None, page: typing.Optional[int] = None, limit: typing.Optional[int] = None, request_options: typing.Optional[RequestOptions] = None, ) -> PaginatedDatasetItems: """ - Get dataset items + Get dataset items. Optionally specify a version to get the items as they existed at that point in time. + Note: If version parameter is provided, datasetName must also be provided. Parameters ---------- @@ -492,6 +504,11 @@ async def list( source_observation_id : typing.Optional[str] + version : typing.Optional[dt.datetime] + ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z"). + If provided, returns state of dataset at this timestamp. + If not provided, returns the latest version. Requires datasetName to be specified. + page : typing.Optional[int] page number, starts at 1 @@ -534,6 +551,7 @@ async def main() -> None: "datasetName": dataset_name, "sourceTraceId": source_trace_id, "sourceObservationId": source_observation_id, + "version": serialize_datetime(version) if version is not None else None, "page": page, "limit": limit, }, diff --git a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py index 0a643b835..091f34e7e 100644 --- a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py +++ b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py @@ -30,6 +30,16 @@ class CreateDatasetRunItemRequest(pydantic_v1.BaseModel): traceId should always be provided. For compatibility with older SDK versions it can also be inferred from the provided observationId. """ + dataset_version: typing.Optional[dt.datetime] = pydantic_v1.Field( + alias="datasetVersion", default=None + ) + """ + ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z"). + Specifies the dataset version to use for this experiment run. + If provided, the experiment will use dataset items as they existed at or before this timestamp. + If not provided, uses the latest version of dataset items. + """ + def json(self, **kwargs: typing.Any) -> str: kwargs_with_defaults: typing.Any = { "by_alias": True, diff --git a/langfuse/api/resources/score_v_2/__init__.py b/langfuse/api/resources/score_v_2/__init__.py index 40599eec1..4e333a693 100644 --- a/langfuse/api/resources/score_v_2/__init__.py +++ b/langfuse/api/resources/score_v_2/__init__.py @@ -5,9 +5,11 @@ GetScoresResponseData, GetScoresResponseDataBoolean, GetScoresResponseDataCategorical, + GetScoresResponseDataCorrection, GetScoresResponseDataNumeric, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, + GetScoresResponseData_Correction, GetScoresResponseData_Numeric, GetScoresResponseTraceData, ) @@ -17,9 +19,11 @@ "GetScoresResponseData", "GetScoresResponseDataBoolean", "GetScoresResponseDataCategorical", + "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", + "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/resources/score_v_2/types/__init__.py b/langfuse/api/resources/score_v_2/types/__init__.py index 480ed3406..d08e687ef 100644 --- a/langfuse/api/resources/score_v_2/types/__init__.py +++ b/langfuse/api/resources/score_v_2/types/__init__.py @@ -5,10 +5,12 @@ GetScoresResponseData, GetScoresResponseData_Boolean, GetScoresResponseData_Categorical, + GetScoresResponseData_Correction, GetScoresResponseData_Numeric, ) from .get_scores_response_data_boolean import GetScoresResponseDataBoolean from .get_scores_response_data_categorical import GetScoresResponseDataCategorical +from .get_scores_response_data_correction import GetScoresResponseDataCorrection from .get_scores_response_data_numeric import GetScoresResponseDataNumeric from .get_scores_response_trace_data import GetScoresResponseTraceData @@ -17,9 +19,11 @@ "GetScoresResponseData", "GetScoresResponseDataBoolean", "GetScoresResponseDataCategorical", + "GetScoresResponseDataCorrection", "GetScoresResponseDataNumeric", "GetScoresResponseData_Boolean", "GetScoresResponseData_Categorical", + "GetScoresResponseData_Correction", "GetScoresResponseData_Numeric", "GetScoresResponseTraceData", ] diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py index 965a01c80..4f73fbcae 100644 --- a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py +++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py @@ -208,8 +208,75 @@ class Config: json_encoders = {dt.datetime: serialize_datetime} +class GetScoresResponseData_Correction(pydantic_v1.BaseModel): + trace: typing.Optional[GetScoresResponseTraceData] = None + value: float + string_value: str = pydantic_v1.Field(alias="stringValue") + id: str + trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None) + session_id: typing.Optional[str] = pydantic_v1.Field( + alias="sessionId", default=None + ) + observation_id: typing.Optional[str] = pydantic_v1.Field( + alias="observationId", default=None + ) + dataset_run_id: typing.Optional[str] = pydantic_v1.Field( + alias="datasetRunId", default=None + ) + name: str + source: ScoreSource + timestamp: dt.datetime + created_at: dt.datetime = pydantic_v1.Field(alias="createdAt") + updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt") + author_user_id: typing.Optional[str] = pydantic_v1.Field( + alias="authorUserId", default=None + ) + comment: typing.Optional[str] = None + metadata: typing.Any + config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None) + queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None) + environment: str + data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field( + alias="dataType", default="CORRECTION" + ) + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} + + GetScoresResponseData = typing.Union[ GetScoresResponseData_Numeric, GetScoresResponseData_Categorical, GetScoresResponseData_Boolean, + GetScoresResponseData_Correction, ] diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py new file mode 100644 index 000000000..0c59f29a8 --- /dev/null +++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py @@ -0,0 +1,46 @@ +# This file was auto-generated by Fern from our API Definition. + +import datetime as dt +import typing + +from ....core.datetime_utils import serialize_datetime +from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1 +from ...commons.types.correction_score import CorrectionScore +from .get_scores_response_trace_data import GetScoresResponseTraceData + + +class GetScoresResponseDataCorrection(CorrectionScore): + trace: typing.Optional[GetScoresResponseTraceData] = None + + def json(self, **kwargs: typing.Any) -> str: + kwargs_with_defaults: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + return super().json(**kwargs_with_defaults) + + def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]: + kwargs_with_defaults_exclude_unset: typing.Any = { + "by_alias": True, + "exclude_unset": True, + **kwargs, + } + kwargs_with_defaults_exclude_none: typing.Any = { + "by_alias": True, + "exclude_none": True, + **kwargs, + } + + return deep_union_pydantic_dicts( + super().dict(**kwargs_with_defaults_exclude_unset), + super().dict(**kwargs_with_defaults_exclude_none), + ) + + class Config: + frozen = True + smart_union = True + allow_population_by_field_name = True + populate_by_name = True + extra = pydantic_v1.Extra.allow + json_encoders = {dt.datetime: serialize_datetime} From 2156ee8e329fc0742a2e569c1a34e1cbfa101b2b Mon Sep 17 00:00:00 2001 From: Marlies Mayerhofer <74332854+marliessophie@users.noreply.github.com> Date: Thu, 5 Feb 2026 17:23:47 +0100 Subject: [PATCH 2/3] chore: support dataset versioning via SDK --- langfuse/_client/client.py | 10 +++++++++- tests/test_datasets.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py index ce7d7437d..f7ee28e36 100644 --- a/langfuse/_client/client.py +++ b/langfuse/_client/client.py @@ -2442,13 +2442,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]: ) def get_dataset( - self, name: str, *, fetch_items_page_size: Optional[int] = 50 + self, + name: str, + *, + fetch_items_page_size: Optional[int] = 50, + version: Optional[datetime] = None, ) -> "DatasetClient": """Fetch a dataset by its name. Args: name (str): The name of the dataset to fetch. fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50. + version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC). + If provided, returns the state of items at the specified UTC timestamp. + If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC. Returns: DatasetClient: The dataset with the given name. @@ -2465,6 +2472,7 @@ def get_dataset( dataset_name=self._url_encode(name, is_url_param=True), page=page, limit=fetch_items_page_size, + version=version, ) dataset_items.extend(new_items.data) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 051dcfbf6..fcc38402f 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -527,3 +527,41 @@ def test_delete_dataset_run_with_folder_names(): # Verify the run is deleted runs_after = langfuse.get_dataset_runs(dataset_name=folder_name) assert len(runs_after.data) == 0 + + +def test_get_dataset_with_version(): + """Test that get_dataset correctly filters items by version timestamp.""" + from datetime import datetime, timezone + import time + + langfuse = Langfuse(debug=False) + + # Create dataset + name = create_uuid() + langfuse.create_dataset(name=name) + + # Create first item + item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"}) + langfuse.flush() + time.sleep(3) # Ensure persistence and clear temporal separation + + # Capture timestamp AFTER first item, BEFORE second item + query_timestamp = datetime.now(timezone.utc) + time.sleep(3) # Ensure second item is created AFTER query_timestamp + + # Create second item + langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"}) + langfuse.flush() + time.sleep(3) # Ensure persistence + + # Fetch at the query_timestamp (should only return first item) + dataset = langfuse.get_dataset(name, version=query_timestamp) + + # Verify only first item is retrieved + assert len(dataset.items) == 1 + assert dataset.items[0].input == {"version": "v1"} + assert dataset.items[0].id == item1.id + + # Verify fetching without version returns both items (latest) + dataset_latest = langfuse.get_dataset(name) + assert len(dataset_latest.items) == 2 From 5ac81d10fb619de89ab0456f578209cec943f175 Mon Sep 17 00:00:00 2001 From: Marlies Mayerhofer <74332854+marliessophie@users.noreply.github.com> Date: Thu, 5 Feb 2026 22:11:19 +0100 Subject: [PATCH 3/3] chore: support dataset versioning via SDK --- tests/test_datasets.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index fcc38402f..f86812138 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -1,5 +1,6 @@ import json import time +from datetime import timedelta from concurrent.futures import ThreadPoolExecutor from typing import Sequence @@ -531,8 +532,6 @@ def test_delete_dataset_run_with_folder_names(): def test_get_dataset_with_version(): """Test that get_dataset correctly filters items by version timestamp.""" - from datetime import datetime, timezone - import time langfuse = Langfuse(debug=False) @@ -543,11 +542,16 @@ def test_get_dataset_with_version(): # Create first item item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"}) langfuse.flush() - time.sleep(3) # Ensure persistence and clear temporal separation + time.sleep(3) # Ensure persistence + + # Fetch dataset to get the actual server-assigned timestamp of item1 + dataset_after_item1 = langfuse.get_dataset(name) + assert len(dataset_after_item1.items) == 1 + item1_created_at = dataset_after_item1.items[0].created_at - # Capture timestamp AFTER first item, BEFORE second item - query_timestamp = datetime.now(timezone.utc) - time.sleep(3) # Ensure second item is created AFTER query_timestamp + # Use a timestamp 1 second after item1's actual creation time + query_timestamp = item1_created_at + timedelta(seconds=1) + time.sleep(3) # Ensure temporal separation # Create second item langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"})