diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
index ce7d7437d..f7ee28e36 100644
--- a/langfuse/_client/client.py
+++ b/langfuse/_client/client.py
@@ -2442,13 +2442,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
)
def get_dataset(
- self, name: str, *, fetch_items_page_size: Optional[int] = 50
+ self,
+ name: str,
+ *,
+ fetch_items_page_size: Optional[int] = 50,
+ version: Optional[datetime] = None,
) -> "DatasetClient":
"""Fetch a dataset by its name.
Args:
name (str): The name of the dataset to fetch.
fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
+ version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC).
+ If provided, returns the state of items at the specified UTC timestamp.
+ If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC.
Returns:
DatasetClient: The dataset with the given name.
@@ -2465,6 +2472,7 @@ def get_dataset(
dataset_name=self._url_encode(name, is_url_param=True),
page=page,
limit=fetch_items_page_size,
+ version=version,
)
dataset_items.extend(new_items.data)
diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py
index 835bdfefa..d1a6414ed 100644
--- a/langfuse/api/__init__.py
+++ b/langfuse/api/__init__.py
@@ -36,6 +36,7 @@
Comment,
CommentObjectType,
ConfigCategory,
+ CorrectionScore,
CreateAnnotationQueueAssignmentResponse,
CreateAnnotationQueueItemRequest,
CreateAnnotationQueueRequest,
@@ -85,9 +86,11 @@
GetScoresResponseData,
GetScoresResponseDataBoolean,
GetScoresResponseDataCategorical,
+ GetScoresResponseDataCorrection,
GetScoresResponseDataNumeric,
GetScoresResponseData_Boolean,
GetScoresResponseData_Categorical,
+ GetScoresResponseData_Correction,
GetScoresResponseData_Numeric,
GetScoresResponseTraceData,
HealthResponse,
@@ -199,6 +202,7 @@
ScoreV1_Numeric,
Score_Boolean,
Score_Categorical,
+ Score_Correction,
Score_Numeric,
SdkLogBody,
SdkLogEvent,
@@ -293,6 +297,7 @@
"Comment",
"CommentObjectType",
"ConfigCategory",
+ "CorrectionScore",
"CreateAnnotationQueueAssignmentResponse",
"CreateAnnotationQueueItemRequest",
"CreateAnnotationQueueRequest",
@@ -342,9 +347,11 @@
"GetScoresResponseData",
"GetScoresResponseDataBoolean",
"GetScoresResponseDataCategorical",
+ "GetScoresResponseDataCorrection",
"GetScoresResponseDataNumeric",
"GetScoresResponseData_Boolean",
"GetScoresResponseData_Categorical",
+ "GetScoresResponseData_Correction",
"GetScoresResponseData_Numeric",
"GetScoresResponseTraceData",
"HealthResponse",
@@ -456,6 +463,7 @@
"ScoreV1_Numeric",
"Score_Boolean",
"Score_Categorical",
+ "Score_Correction",
"Score_Numeric",
"SdkLogBody",
"SdkLogEvent",
diff --git a/langfuse/api/reference.md b/langfuse/api/reference.md
index 19870d547..5f6371b51 100644
--- a/langfuse/api/reference.md
+++ b/langfuse/api/reference.md
@@ -1519,7 +1519,8 @@ client.dataset_items.get(
-
-Get dataset items
+Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+Note: If version parameter is provided, datasetName must also be provided.
@@ -1584,6 +1585,18 @@ client.dataset_items.list()
-
+**version:** `typing.Optional[dt.datetime]`
+
+ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+If provided, returns state of dataset at this timestamp.
+If not provided, returns the latest version. Requires datasetName to be specified.
+
+
+
+
+
+-
+
**page:** `typing.Optional[int]` — page number, starts at 1
diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py
index 55c4e012a..0de0a56a5 100644
--- a/langfuse/api/resources/__init__.py
+++ b/langfuse/api/resources/__init__.py
@@ -67,6 +67,7 @@
Comment,
CommentObjectType,
ConfigCategory,
+ CorrectionScore,
CreateScoreValue,
Dataset,
DatasetItem,
@@ -101,6 +102,7 @@
ScoreV1_Numeric,
Score_Boolean,
Score_Categorical,
+ Score_Correction,
Score_Numeric,
Session,
SessionWithTraces,
@@ -268,9 +270,11 @@
GetScoresResponseData,
GetScoresResponseDataBoolean,
GetScoresResponseDataCategorical,
+ GetScoresResponseDataCorrection,
GetScoresResponseDataNumeric,
GetScoresResponseData_Boolean,
GetScoresResponseData_Categorical,
+ GetScoresResponseData_Correction,
GetScoresResponseData_Numeric,
GetScoresResponseTraceData,
)
@@ -313,6 +317,7 @@
"Comment",
"CommentObjectType",
"ConfigCategory",
+ "CorrectionScore",
"CreateAnnotationQueueAssignmentResponse",
"CreateAnnotationQueueItemRequest",
"CreateAnnotationQueueRequest",
@@ -362,9 +367,11 @@
"GetScoresResponseData",
"GetScoresResponseDataBoolean",
"GetScoresResponseDataCategorical",
+ "GetScoresResponseDataCorrection",
"GetScoresResponseDataNumeric",
"GetScoresResponseData_Boolean",
"GetScoresResponseData_Categorical",
+ "GetScoresResponseData_Correction",
"GetScoresResponseData_Numeric",
"GetScoresResponseTraceData",
"HealthResponse",
@@ -476,6 +483,7 @@
"ScoreV1_Numeric",
"Score_Boolean",
"Score_Categorical",
+ "Score_Correction",
"Score_Numeric",
"SdkLogBody",
"SdkLogEvent",
diff --git a/langfuse/api/resources/commons/__init__.py b/langfuse/api/resources/commons/__init__.py
index 9e522548e..7105b22c5 100644
--- a/langfuse/api/resources/commons/__init__.py
+++ b/langfuse/api/resources/commons/__init__.py
@@ -10,6 +10,7 @@
Comment,
CommentObjectType,
ConfigCategory,
+ CorrectionScore,
CreateScoreValue,
Dataset,
DatasetItem,
@@ -41,6 +42,7 @@
ScoreV1_Numeric,
Score_Boolean,
Score_Categorical,
+ Score_Correction,
Score_Numeric,
Session,
SessionWithTraces,
@@ -68,6 +70,7 @@
"Comment",
"CommentObjectType",
"ConfigCategory",
+ "CorrectionScore",
"CreateScoreValue",
"Dataset",
"DatasetItem",
@@ -102,6 +105,7 @@
"ScoreV1_Numeric",
"Score_Boolean",
"Score_Categorical",
+ "Score_Correction",
"Score_Numeric",
"Session",
"SessionWithTraces",
diff --git a/langfuse/api/resources/commons/types/__init__.py b/langfuse/api/resources/commons/types/__init__.py
index b9063f3fb..df87680b7 100644
--- a/langfuse/api/resources/commons/types/__init__.py
+++ b/langfuse/api/resources/commons/types/__init__.py
@@ -9,6 +9,7 @@
from .comment import Comment
from .comment_object_type import CommentObjectType
from .config_category import ConfigCategory
+from .correction_score import CorrectionScore
from .create_score_value import CreateScoreValue
from .dataset import Dataset
from .dataset_item import DatasetItem
@@ -29,7 +30,13 @@
from .pricing_tier_condition import PricingTierCondition
from .pricing_tier_input import PricingTierInput
from .pricing_tier_operator import PricingTierOperator
-from .score import Score, Score_Boolean, Score_Categorical, Score_Numeric
+from .score import (
+ Score,
+ Score_Boolean,
+ Score_Categorical,
+ Score_Correction,
+ Score_Numeric,
+)
from .score_config import ScoreConfig
from .score_config_data_type import ScoreConfigDataType
from .score_data_type import ScoreDataType
@@ -52,6 +59,7 @@
"Comment",
"CommentObjectType",
"ConfigCategory",
+ "CorrectionScore",
"CreateScoreValue",
"Dataset",
"DatasetItem",
@@ -83,6 +91,7 @@
"ScoreV1_Numeric",
"Score_Boolean",
"Score_Categorical",
+ "Score_Correction",
"Score_Numeric",
"Session",
"SessionWithTraces",
diff --git a/langfuse/api/resources/commons/types/correction_score.py b/langfuse/api/resources/commons/types/correction_score.py
new file mode 100644
index 000000000..26abeae49
--- /dev/null
+++ b/langfuse/api/resources/commons/types/correction_score.py
@@ -0,0 +1,53 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+from .base_score import BaseScore
+
+
+class CorrectionScore(BaseScore):
+ value: float = pydantic_v1.Field()
+ """
+ The numeric value of the score. Always 0 for correction scores.
+ """
+
+ string_value: str = pydantic_v1.Field(alias="stringValue")
+ """
+ The string representation of the correction content
+ """
+
+ def json(self, **kwargs: typing.Any) -> str:
+ kwargs_with_defaults: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ return super().json(**kwargs_with_defaults)
+
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+ kwargs_with_defaults_exclude_unset: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ kwargs_with_defaults_exclude_none: typing.Any = {
+ "by_alias": True,
+ "exclude_none": True,
+ **kwargs,
+ }
+
+ return deep_union_pydantic_dicts(
+ super().dict(**kwargs_with_defaults_exclude_unset),
+ super().dict(**kwargs_with_defaults_exclude_none),
+ )
+
+ class Config:
+ frozen = True
+ smart_union = True
+ allow_population_by_field_name = True
+ populate_by_name = True
+ extra = pydantic_v1.Extra.allow
+ json_encoders = {dt.datetime: serialize_datetime}
diff --git a/langfuse/api/resources/commons/types/score.py b/langfuse/api/resources/commons/types/score.py
index 8d54b6575..dab6eee43 100644
--- a/langfuse/api/resources/commons/types/score.py
+++ b/langfuse/api/resources/commons/types/score.py
@@ -204,4 +204,69 @@ class Config:
json_encoders = {dt.datetime: serialize_datetime}
-Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean]
+class Score_Correction(pydantic_v1.BaseModel):
+ value: float
+ string_value: str = pydantic_v1.Field(alias="stringValue")
+ id: str
+ trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None)
+ session_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="sessionId", default=None
+ )
+ observation_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="observationId", default=None
+ )
+ dataset_run_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="datasetRunId", default=None
+ )
+ name: str
+ source: ScoreSource
+ timestamp: dt.datetime
+ created_at: dt.datetime = pydantic_v1.Field(alias="createdAt")
+ updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt")
+ author_user_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="authorUserId", default=None
+ )
+ comment: typing.Optional[str] = None
+ metadata: typing.Any
+ config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
+ queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
+ environment: str
+ data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field(
+ alias="dataType", default="CORRECTION"
+ )
+
+ def json(self, **kwargs: typing.Any) -> str:
+ kwargs_with_defaults: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ return super().json(**kwargs_with_defaults)
+
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+ kwargs_with_defaults_exclude_unset: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ kwargs_with_defaults_exclude_none: typing.Any = {
+ "by_alias": True,
+ "exclude_none": True,
+ **kwargs,
+ }
+
+ return deep_union_pydantic_dicts(
+ super().dict(**kwargs_with_defaults_exclude_unset),
+ super().dict(**kwargs_with_defaults_exclude_none),
+ )
+
+ class Config:
+ frozen = True
+ smart_union = True
+ allow_population_by_field_name = True
+ populate_by_name = True
+ extra = pydantic_v1.Extra.allow
+ json_encoders = {dt.datetime: serialize_datetime}
+
+
+Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction]
diff --git a/langfuse/api/resources/dataset_items/client.py b/langfuse/api/resources/dataset_items/client.py
index 8ece3a790..f557c5eab 100644
--- a/langfuse/api/resources/dataset_items/client.py
+++ b/langfuse/api/resources/dataset_items/client.py
@@ -1,10 +1,12 @@
# This file was auto-generated by Fern from our API Definition.
+import datetime as dt
import typing
from json.decoder import JSONDecodeError
from ...core.api_error import ApiError
from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.datetime_utils import serialize_datetime
from ...core.jsonable_encoder import jsonable_encoder
from ...core.pydantic_utilities import pydantic_v1
from ...core.request_options import RequestOptions
@@ -168,12 +170,14 @@ def list(
dataset_name: typing.Optional[str] = None,
source_trace_id: typing.Optional[str] = None,
source_observation_id: typing.Optional[str] = None,
+ version: typing.Optional[dt.datetime] = None,
page: typing.Optional[int] = None,
limit: typing.Optional[int] = None,
request_options: typing.Optional[RequestOptions] = None,
) -> PaginatedDatasetItems:
"""
- Get dataset items
+ Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+ Note: If version parameter is provided, datasetName must also be provided.
Parameters
----------
@@ -183,6 +187,11 @@ def list(
source_observation_id : typing.Optional[str]
+ version : typing.Optional[dt.datetime]
+ ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+ If provided, returns state of dataset at this timestamp.
+ If not provided, returns the latest version. Requires datasetName to be specified.
+
page : typing.Optional[int]
page number, starts at 1
@@ -217,6 +226,7 @@ def list(
"datasetName": dataset_name,
"sourceTraceId": source_trace_id,
"sourceObservationId": source_observation_id,
+ "version": serialize_datetime(version) if version is not None else None,
"page": page,
"limit": limit,
},
@@ -477,12 +487,14 @@ async def list(
dataset_name: typing.Optional[str] = None,
source_trace_id: typing.Optional[str] = None,
source_observation_id: typing.Optional[str] = None,
+ version: typing.Optional[dt.datetime] = None,
page: typing.Optional[int] = None,
limit: typing.Optional[int] = None,
request_options: typing.Optional[RequestOptions] = None,
) -> PaginatedDatasetItems:
"""
- Get dataset items
+ Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+ Note: If version parameter is provided, datasetName must also be provided.
Parameters
----------
@@ -492,6 +504,11 @@ async def list(
source_observation_id : typing.Optional[str]
+ version : typing.Optional[dt.datetime]
+ ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+ If provided, returns state of dataset at this timestamp.
+ If not provided, returns the latest version. Requires datasetName to be specified.
+
page : typing.Optional[int]
page number, starts at 1
@@ -534,6 +551,7 @@ async def main() -> None:
"datasetName": dataset_name,
"sourceTraceId": source_trace_id,
"sourceObservationId": source_observation_id,
+ "version": serialize_datetime(version) if version is not None else None,
"page": page,
"limit": limit,
},
diff --git a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
index 0a643b835..091f34e7e 100644
--- a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
+++ b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
@@ -30,6 +30,16 @@ class CreateDatasetRunItemRequest(pydantic_v1.BaseModel):
traceId should always be provided. For compatibility with older SDK versions it can also be inferred from the provided observationId.
"""
+ dataset_version: typing.Optional[dt.datetime] = pydantic_v1.Field(
+ alias="datasetVersion", default=None
+ )
+ """
+ ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+ Specifies the dataset version to use for this experiment run.
+ If provided, the experiment will use dataset items as they existed at or before this timestamp.
+ If not provided, uses the latest version of dataset items.
+ """
+
def json(self, **kwargs: typing.Any) -> str:
kwargs_with_defaults: typing.Any = {
"by_alias": True,
diff --git a/langfuse/api/resources/score_v_2/__init__.py b/langfuse/api/resources/score_v_2/__init__.py
index 40599eec1..4e333a693 100644
--- a/langfuse/api/resources/score_v_2/__init__.py
+++ b/langfuse/api/resources/score_v_2/__init__.py
@@ -5,9 +5,11 @@
GetScoresResponseData,
GetScoresResponseDataBoolean,
GetScoresResponseDataCategorical,
+ GetScoresResponseDataCorrection,
GetScoresResponseDataNumeric,
GetScoresResponseData_Boolean,
GetScoresResponseData_Categorical,
+ GetScoresResponseData_Correction,
GetScoresResponseData_Numeric,
GetScoresResponseTraceData,
)
@@ -17,9 +19,11 @@
"GetScoresResponseData",
"GetScoresResponseDataBoolean",
"GetScoresResponseDataCategorical",
+ "GetScoresResponseDataCorrection",
"GetScoresResponseDataNumeric",
"GetScoresResponseData_Boolean",
"GetScoresResponseData_Categorical",
+ "GetScoresResponseData_Correction",
"GetScoresResponseData_Numeric",
"GetScoresResponseTraceData",
]
diff --git a/langfuse/api/resources/score_v_2/types/__init__.py b/langfuse/api/resources/score_v_2/types/__init__.py
index 480ed3406..d08e687ef 100644
--- a/langfuse/api/resources/score_v_2/types/__init__.py
+++ b/langfuse/api/resources/score_v_2/types/__init__.py
@@ -5,10 +5,12 @@
GetScoresResponseData,
GetScoresResponseData_Boolean,
GetScoresResponseData_Categorical,
+ GetScoresResponseData_Correction,
GetScoresResponseData_Numeric,
)
from .get_scores_response_data_boolean import GetScoresResponseDataBoolean
from .get_scores_response_data_categorical import GetScoresResponseDataCategorical
+from .get_scores_response_data_correction import GetScoresResponseDataCorrection
from .get_scores_response_data_numeric import GetScoresResponseDataNumeric
from .get_scores_response_trace_data import GetScoresResponseTraceData
@@ -17,9 +19,11 @@
"GetScoresResponseData",
"GetScoresResponseDataBoolean",
"GetScoresResponseDataCategorical",
+ "GetScoresResponseDataCorrection",
"GetScoresResponseDataNumeric",
"GetScoresResponseData_Boolean",
"GetScoresResponseData_Categorical",
+ "GetScoresResponseData_Correction",
"GetScoresResponseData_Numeric",
"GetScoresResponseTraceData",
]
diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
index 965a01c80..4f73fbcae 100644
--- a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
+++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
@@ -208,8 +208,75 @@ class Config:
json_encoders = {dt.datetime: serialize_datetime}
+class GetScoresResponseData_Correction(pydantic_v1.BaseModel):
+ trace: typing.Optional[GetScoresResponseTraceData] = None
+ value: float
+ string_value: str = pydantic_v1.Field(alias="stringValue")
+ id: str
+ trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None)
+ session_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="sessionId", default=None
+ )
+ observation_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="observationId", default=None
+ )
+ dataset_run_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="datasetRunId", default=None
+ )
+ name: str
+ source: ScoreSource
+ timestamp: dt.datetime
+ created_at: dt.datetime = pydantic_v1.Field(alias="createdAt")
+ updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt")
+ author_user_id: typing.Optional[str] = pydantic_v1.Field(
+ alias="authorUserId", default=None
+ )
+ comment: typing.Optional[str] = None
+ metadata: typing.Any
+ config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
+ queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
+ environment: str
+ data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field(
+ alias="dataType", default="CORRECTION"
+ )
+
+ def json(self, **kwargs: typing.Any) -> str:
+ kwargs_with_defaults: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ return super().json(**kwargs_with_defaults)
+
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+ kwargs_with_defaults_exclude_unset: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ kwargs_with_defaults_exclude_none: typing.Any = {
+ "by_alias": True,
+ "exclude_none": True,
+ **kwargs,
+ }
+
+ return deep_union_pydantic_dicts(
+ super().dict(**kwargs_with_defaults_exclude_unset),
+ super().dict(**kwargs_with_defaults_exclude_none),
+ )
+
+ class Config:
+ frozen = True
+ smart_union = True
+ allow_population_by_field_name = True
+ populate_by_name = True
+ extra = pydantic_v1.Extra.allow
+ json_encoders = {dt.datetime: serialize_datetime}
+
+
GetScoresResponseData = typing.Union[
GetScoresResponseData_Numeric,
GetScoresResponseData_Categorical,
GetScoresResponseData_Boolean,
+ GetScoresResponseData_Correction,
]
diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py
new file mode 100644
index 000000000..0c59f29a8
--- /dev/null
+++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+from ...commons.types.correction_score import CorrectionScore
+from .get_scores_response_trace_data import GetScoresResponseTraceData
+
+
+class GetScoresResponseDataCorrection(CorrectionScore):
+ trace: typing.Optional[GetScoresResponseTraceData] = None
+
+ def json(self, **kwargs: typing.Any) -> str:
+ kwargs_with_defaults: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ return super().json(**kwargs_with_defaults)
+
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+ kwargs_with_defaults_exclude_unset: typing.Any = {
+ "by_alias": True,
+ "exclude_unset": True,
+ **kwargs,
+ }
+ kwargs_with_defaults_exclude_none: typing.Any = {
+ "by_alias": True,
+ "exclude_none": True,
+ **kwargs,
+ }
+
+ return deep_union_pydantic_dicts(
+ super().dict(**kwargs_with_defaults_exclude_unset),
+ super().dict(**kwargs_with_defaults_exclude_none),
+ )
+
+ class Config:
+ frozen = True
+ smart_union = True
+ allow_population_by_field_name = True
+ populate_by_name = True
+ extra = pydantic_v1.Extra.allow
+ json_encoders = {dt.datetime: serialize_datetime}
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 051dcfbf6..f86812138 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -1,5 +1,6 @@
import json
import time
+from datetime import timedelta
from concurrent.futures import ThreadPoolExecutor
from typing import Sequence
@@ -527,3 +528,44 @@ def test_delete_dataset_run_with_folder_names():
# Verify the run is deleted
runs_after = langfuse.get_dataset_runs(dataset_name=folder_name)
assert len(runs_after.data) == 0
+
+
+def test_get_dataset_with_version():
+ """Test that get_dataset correctly filters items by version timestamp."""
+
+ langfuse = Langfuse(debug=False)
+
+ # Create dataset
+ name = create_uuid()
+ langfuse.create_dataset(name=name)
+
+ # Create first item
+ item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"})
+ langfuse.flush()
+ time.sleep(3) # Ensure persistence
+
+ # Fetch dataset to get the actual server-assigned timestamp of item1
+ dataset_after_item1 = langfuse.get_dataset(name)
+ assert len(dataset_after_item1.items) == 1
+ item1_created_at = dataset_after_item1.items[0].created_at
+
+ # Use a timestamp 1 second after item1's actual creation time
+ query_timestamp = item1_created_at + timedelta(seconds=1)
+ time.sleep(3) # Ensure temporal separation
+
+ # Create second item
+ langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"})
+ langfuse.flush()
+ time.sleep(3) # Ensure persistence
+
+ # Fetch at the query_timestamp (should only return first item)
+ dataset = langfuse.get_dataset(name, version=query_timestamp)
+
+ # Verify only first item is retrieved
+ assert len(dataset.items) == 1
+ assert dataset.items[0].input == {"version": "v1"}
+ assert dataset.items[0].id == item1.id
+
+ # Verify fetching without version returns both items (latest)
+ dataset_latest = langfuse.get_dataset(name)
+ assert len(dataset_latest.items) == 2