From c2dd867a25286421fd055a1e7ef02f873c6cb46d Mon Sep 17 00:00:00 2001
From: langfuse-bot <langfuse-bot@langfuse.com>
Date: Tue, 3 Feb 2026 11:20:21 +0000
Subject: [PATCH 1/3] feat(api): update API spec from langfuse/langfuse 966662e

---
 langfuse/api/__init__.py                      |  8 +++
 langfuse/api/reference.md                     | 15 ++++-
 langfuse/api/resources/__init__.py            |  8 +++
 langfuse/api/resources/commons/__init__.py    |  4 ++
 .../api/resources/commons/types/__init__.py   | 11 ++-
 .../commons/types/correction_score.py         | 53 +++++++++++++++
 langfuse/api/resources/commons/types/score.py | 67 ++++++++++++++++++-
 .../api/resources/dataset_items/client.py     | 22 +++++-
 .../types/create_dataset_run_item_request.py  | 10 +++
 langfuse/api/resources/score_v_2/__init__.py  |  4 ++
 .../api/resources/score_v_2/types/__init__.py |  4 ++
 .../types/get_scores_response_data.py         | 67 +++++++++++++++++++
 .../get_scores_response_data_correction.py    | 46 +++++++++++++
 13 files changed, 314 insertions(+), 5 deletions(-)
 create mode 100644 langfuse/api/resources/commons/types/correction_score.py
 create mode 100644 langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py
diff --git a/langfuse/api/__init__.py b/langfuse/api/__init__.py
index 835bdfefa..d1a6414ed 100644
--- a/langfuse/api/__init__.py
+++ b/langfuse/api/__init__.py
@@ -36,6 +36,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateAnnotationQueueAssignmentResponse,
     CreateAnnotationQueueItemRequest,
     CreateAnnotationQueueRequest,
@@ -85,9 +86,11 @@
     GetScoresResponseData,
     GetScoresResponseDataBoolean,
     GetScoresResponseDataCategorical,
+    GetScoresResponseDataCorrection,
     GetScoresResponseDataNumeric,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
     GetScoresResponseTraceData,
     HealthResponse,
@@ -199,6 +202,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     SdkLogBody,
     SdkLogEvent,
@@ -293,6 +297,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateAnnotationQueueAssignmentResponse",
     "CreateAnnotationQueueItemRequest",
     "CreateAnnotationQueueRequest",
@@ -342,9 +347,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
     "HealthResponse",
@@ -456,6 +463,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "SdkLogBody",
     "SdkLogEvent",
diff --git a/langfuse/api/reference.md b/langfuse/api/reference.md
index 19870d547..5f6371b51 100644
--- a/langfuse/api/reference.md
+++ b/langfuse/api/reference.md
@@ -1519,7 +1519,8 @@ client.dataset_items.get(
 <dl>
 <dd>
 
-Get dataset items
+Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+Note: If version parameter is provided, datasetName must also be provided.
 </dd>
 </dl>
 </dd>
@@ -1584,6 +1585,18 @@ client.dataset_items.list()
 <dl>
 <dd>
 
+**version:** `typing.Optional[dt.datetime]` 
+
+ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+If provided, returns state of dataset at this timestamp.
+If not provided, returns the latest version. Requires datasetName to be specified.
+    
+</dd>
+</dl>
+
+<dl>
+<dd>
+
 **page:** `typing.Optional[int]` — page number, starts at 1
     
 </dd>
diff --git a/langfuse/api/resources/__init__.py b/langfuse/api/resources/__init__.py
index 55c4e012a..0de0a56a5 100644
--- a/langfuse/api/resources/__init__.py
+++ b/langfuse/api/resources/__init__.py
@@ -67,6 +67,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateScoreValue,
     Dataset,
     DatasetItem,
@@ -101,6 +102,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     Session,
     SessionWithTraces,
@@ -268,9 +270,11 @@
     GetScoresResponseData,
     GetScoresResponseDataBoolean,
     GetScoresResponseDataCategorical,
+    GetScoresResponseDataCorrection,
     GetScoresResponseDataNumeric,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
     GetScoresResponseTraceData,
 )
@@ -313,6 +317,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateAnnotationQueueAssignmentResponse",
     "CreateAnnotationQueueItemRequest",
     "CreateAnnotationQueueRequest",
@@ -362,9 +367,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
     "HealthResponse",
@@ -476,6 +483,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "SdkLogBody",
     "SdkLogEvent",
diff --git a/langfuse/api/resources/commons/__init__.py b/langfuse/api/resources/commons/__init__.py
index 9e522548e..7105b22c5 100644
--- a/langfuse/api/resources/commons/__init__.py
+++ b/langfuse/api/resources/commons/__init__.py
@@ -10,6 +10,7 @@
     Comment,
     CommentObjectType,
     ConfigCategory,
+    CorrectionScore,
     CreateScoreValue,
     Dataset,
     DatasetItem,
@@ -41,6 +42,7 @@
     ScoreV1_Numeric,
     Score_Boolean,
     Score_Categorical,
+    Score_Correction,
     Score_Numeric,
     Session,
     SessionWithTraces,
@@ -68,6 +70,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateScoreValue",
     "Dataset",
     "DatasetItem",
@@ -102,6 +105,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "Session",
     "SessionWithTraces",
diff --git a/langfuse/api/resources/commons/types/__init__.py b/langfuse/api/resources/commons/types/__init__.py
index b9063f3fb..df87680b7 100644
--- a/langfuse/api/resources/commons/types/__init__.py
+++ b/langfuse/api/resources/commons/types/__init__.py
@@ -9,6 +9,7 @@
 from .comment import Comment
 from .comment_object_type import CommentObjectType
 from .config_category import ConfigCategory
+from .correction_score import CorrectionScore
 from .create_score_value import CreateScoreValue
 from .dataset import Dataset
 from .dataset_item import DatasetItem
@@ -29,7 +30,13 @@
 from .pricing_tier_condition import PricingTierCondition
 from .pricing_tier_input import PricingTierInput
 from .pricing_tier_operator import PricingTierOperator
-from .score import Score, Score_Boolean, Score_Categorical, Score_Numeric
+from .score import (
+    Score,
+    Score_Boolean,
+    Score_Categorical,
+    Score_Correction,
+    Score_Numeric,
+)
 from .score_config import ScoreConfig
 from .score_config_data_type import ScoreConfigDataType
 from .score_data_type import ScoreDataType
@@ -52,6 +59,7 @@
     "Comment",
     "CommentObjectType",
     "ConfigCategory",
+    "CorrectionScore",
     "CreateScoreValue",
     "Dataset",
     "DatasetItem",
@@ -83,6 +91,7 @@
     "ScoreV1_Numeric",
     "Score_Boolean",
     "Score_Categorical",
+    "Score_Correction",
     "Score_Numeric",
     "Session",
     "SessionWithTraces",
diff --git a/langfuse/api/resources/commons/types/correction_score.py b/langfuse/api/resources/commons/types/correction_score.py
new file mode 100644
index 000000000..26abeae49
--- /dev/null
+++ b/langfuse/api/resources/commons/types/correction_score.py
@@ -0,0 +1,53 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+from .base_score import BaseScore
+
+
+class CorrectionScore(BaseScore):
+    value: float = pydantic_v1.Field()
+    """
+    The numeric value of the score. Always 0 for correction scores.
+    """
+
+    string_value: str = pydantic_v1.Field(alias="stringValue")
+    """
+    The string representation of the correction content
+    """
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        allow_population_by_field_name = True
+        populate_by_name = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}
diff --git a/langfuse/api/resources/commons/types/score.py b/langfuse/api/resources/commons/types/score.py
index 8d54b6575..dab6eee43 100644
--- a/langfuse/api/resources/commons/types/score.py
+++ b/langfuse/api/resources/commons/types/score.py
@@ -204,4 +204,69 @@ class Config:
         json_encoders = {dt.datetime: serialize_datetime}
 
 
-Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean]
+class Score_Correction(pydantic_v1.BaseModel):
+    value: float
+    string_value: str = pydantic_v1.Field(alias="stringValue")
+    id: str
+    trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None)
+    session_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="sessionId", default=None
+    )
+    observation_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="observationId", default=None
+    )
+    dataset_run_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="datasetRunId", default=None
+    )
+    name: str
+    source: ScoreSource
+    timestamp: dt.datetime
+    created_at: dt.datetime = pydantic_v1.Field(alias="createdAt")
+    updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt")
+    author_user_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="authorUserId", default=None
+    )
+    comment: typing.Optional[str] = None
+    metadata: typing.Any
+    config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
+    queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
+    environment: str
+    data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field(
+        alias="dataType", default="CORRECTION"
+    )
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        allow_population_by_field_name = True
+        populate_by_name = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}
+
+
+Score = typing.Union[Score_Numeric, Score_Categorical, Score_Boolean, Score_Correction]
diff --git a/langfuse/api/resources/dataset_items/client.py b/langfuse/api/resources/dataset_items/client.py
index 8ece3a790..f557c5eab 100644
--- a/langfuse/api/resources/dataset_items/client.py
+++ b/langfuse/api/resources/dataset_items/client.py
@@ -1,10 +1,12 @@
 # This file was auto-generated by Fern from our API Definition.
 
+import datetime as dt
 import typing
 from json.decoder import JSONDecodeError
 
 from ...core.api_error import ApiError
 from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
+from ...core.datetime_utils import serialize_datetime
 from ...core.jsonable_encoder import jsonable_encoder
 from ...core.pydantic_utilities import pydantic_v1
 from ...core.request_options import RequestOptions
@@ -168,12 +170,14 @@ def list(
         dataset_name: typing.Optional[str] = None,
         source_trace_id: typing.Optional[str] = None,
         source_observation_id: typing.Optional[str] = None,
+        version: typing.Optional[dt.datetime] = None,
         page: typing.Optional[int] = None,
         limit: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> PaginatedDatasetItems:
         """
-        Get dataset items
+        Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+        Note: If version parameter is provided, datasetName must also be provided.
 
         Parameters
         ----------
@@ -183,6 +187,11 @@ def list(
 
         source_observation_id : typing.Optional[str]
 
+        version : typing.Optional[dt.datetime]
+            ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+            If provided, returns state of dataset at this timestamp.
+            If not provided, returns the latest version. Requires datasetName to be specified.
+
         page : typing.Optional[int]
             page number, starts at 1
 
@@ -217,6 +226,7 @@ def list(
                 "datasetName": dataset_name,
                 "sourceTraceId": source_trace_id,
                 "sourceObservationId": source_observation_id,
+                "version": serialize_datetime(version) if version is not None else None,
                 "page": page,
                 "limit": limit,
             },
@@ -477,12 +487,14 @@ async def list(
         dataset_name: typing.Optional[str] = None,
         source_trace_id: typing.Optional[str] = None,
         source_observation_id: typing.Optional[str] = None,
+        version: typing.Optional[dt.datetime] = None,
         page: typing.Optional[int] = None,
         limit: typing.Optional[int] = None,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> PaginatedDatasetItems:
         """
-        Get dataset items
+        Get dataset items. Optionally specify a version to get the items as they existed at that point in time.
+        Note: If version parameter is provided, datasetName must also be provided.
 
         Parameters
         ----------
@@ -492,6 +504,11 @@ async def list(
 
         source_observation_id : typing.Optional[str]
 
+        version : typing.Optional[dt.datetime]
+            ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+            If provided, returns state of dataset at this timestamp.
+            If not provided, returns the latest version. Requires datasetName to be specified.
+
         page : typing.Optional[int]
             page number, starts at 1
 
@@ -534,6 +551,7 @@ async def main() -> None:
                 "datasetName": dataset_name,
                 "sourceTraceId": source_trace_id,
                 "sourceObservationId": source_observation_id,
+                "version": serialize_datetime(version) if version is not None else None,
                 "page": page,
                 "limit": limit,
             },
diff --git a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
index 0a643b835..091f34e7e 100644
--- a/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
+++ b/langfuse/api/resources/dataset_run_items/types/create_dataset_run_item_request.py
@@ -30,6 +30,16 @@ class CreateDatasetRunItemRequest(pydantic_v1.BaseModel):
     traceId should always be provided. For compatibility with older SDK versions it can also be inferred from the provided observationId.
     """
 
+    dataset_version: typing.Optional[dt.datetime] = pydantic_v1.Field(
+        alias="datasetVersion", default=None
+    )
+    """
+    ISO 8601 timestamp (RFC 3339, Section 5.6) in UTC (e.g., "2026-01-21T14:35:42Z").
+    Specifies the dataset version to use for this experiment run. 
+    If provided, the experiment will use dataset items as they existed at or before this timestamp.
+    If not provided, uses the latest version of dataset items.
+    """
+
     def json(self, **kwargs: typing.Any) -> str:
         kwargs_with_defaults: typing.Any = {
             "by_alias": True,
diff --git a/langfuse/api/resources/score_v_2/__init__.py b/langfuse/api/resources/score_v_2/__init__.py
index 40599eec1..4e333a693 100644
--- a/langfuse/api/resources/score_v_2/__init__.py
+++ b/langfuse/api/resources/score_v_2/__init__.py
@@ -5,9 +5,11 @@
     GetScoresResponseData,
     GetScoresResponseDataBoolean,
     GetScoresResponseDataCategorical,
+    GetScoresResponseDataCorrection,
     GetScoresResponseDataNumeric,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
     GetScoresResponseTraceData,
 )
@@ -17,9 +19,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
 ]
diff --git a/langfuse/api/resources/score_v_2/types/__init__.py b/langfuse/api/resources/score_v_2/types/__init__.py
index 480ed3406..d08e687ef 100644
--- a/langfuse/api/resources/score_v_2/types/__init__.py
+++ b/langfuse/api/resources/score_v_2/types/__init__.py
@@ -5,10 +5,12 @@
     GetScoresResponseData,
     GetScoresResponseData_Boolean,
     GetScoresResponseData_Categorical,
+    GetScoresResponseData_Correction,
     GetScoresResponseData_Numeric,
 )
 from .get_scores_response_data_boolean import GetScoresResponseDataBoolean
 from .get_scores_response_data_categorical import GetScoresResponseDataCategorical
+from .get_scores_response_data_correction import GetScoresResponseDataCorrection
 from .get_scores_response_data_numeric import GetScoresResponseDataNumeric
 from .get_scores_response_trace_data import GetScoresResponseTraceData
 
@@ -17,9 +19,11 @@
     "GetScoresResponseData",
     "GetScoresResponseDataBoolean",
     "GetScoresResponseDataCategorical",
+    "GetScoresResponseDataCorrection",
     "GetScoresResponseDataNumeric",
     "GetScoresResponseData_Boolean",
     "GetScoresResponseData_Categorical",
+    "GetScoresResponseData_Correction",
     "GetScoresResponseData_Numeric",
     "GetScoresResponseTraceData",
 ]
diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
index 965a01c80..4f73fbcae 100644
--- a/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
+++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data.py
@@ -208,8 +208,75 @@ class Config:
         json_encoders = {dt.datetime: serialize_datetime}
 
 
+class GetScoresResponseData_Correction(pydantic_v1.BaseModel):
+    trace: typing.Optional[GetScoresResponseTraceData] = None
+    value: float
+    string_value: str = pydantic_v1.Field(alias="stringValue")
+    id: str
+    trace_id: typing.Optional[str] = pydantic_v1.Field(alias="traceId", default=None)
+    session_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="sessionId", default=None
+    )
+    observation_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="observationId", default=None
+    )
+    dataset_run_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="datasetRunId", default=None
+    )
+    name: str
+    source: ScoreSource
+    timestamp: dt.datetime
+    created_at: dt.datetime = pydantic_v1.Field(alias="createdAt")
+    updated_at: dt.datetime = pydantic_v1.Field(alias="updatedAt")
+    author_user_id: typing.Optional[str] = pydantic_v1.Field(
+        alias="authorUserId", default=None
+    )
+    comment: typing.Optional[str] = None
+    metadata: typing.Any
+    config_id: typing.Optional[str] = pydantic_v1.Field(alias="configId", default=None)
+    queue_id: typing.Optional[str] = pydantic_v1.Field(alias="queueId", default=None)
+    environment: str
+    data_type: typing.Literal["CORRECTION"] = pydantic_v1.Field(
+        alias="dataType", default="CORRECTION"
+    )
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        allow_population_by_field_name = True
+        populate_by_name = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}
+
+
 GetScoresResponseData = typing.Union[
     GetScoresResponseData_Numeric,
     GetScoresResponseData_Categorical,
     GetScoresResponseData_Boolean,
+    GetScoresResponseData_Correction,
 ]
diff --git a/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py
new file mode 100644
index 000000000..0c59f29a8
--- /dev/null
+++ b/langfuse/api/resources/score_v_2/types/get_scores_response_data_correction.py
@@ -0,0 +1,46 @@
+# This file was auto-generated by Fern from our API Definition.
+
+import datetime as dt
+import typing
+
+from ....core.datetime_utils import serialize_datetime
+from ....core.pydantic_utilities import deep_union_pydantic_dicts, pydantic_v1
+from ...commons.types.correction_score import CorrectionScore
+from .get_scores_response_trace_data import GetScoresResponseTraceData
+
+
+class GetScoresResponseDataCorrection(CorrectionScore):
+    trace: typing.Optional[GetScoresResponseTraceData] = None
+
+    def json(self, **kwargs: typing.Any) -> str:
+        kwargs_with_defaults: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        return super().json(**kwargs_with_defaults)
+
+    def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
+        kwargs_with_defaults_exclude_unset: typing.Any = {
+            "by_alias": True,
+            "exclude_unset": True,
+            **kwargs,
+        }
+        kwargs_with_defaults_exclude_none: typing.Any = {
+            "by_alias": True,
+            "exclude_none": True,
+            **kwargs,
+        }
+
+        return deep_union_pydantic_dicts(
+            super().dict(**kwargs_with_defaults_exclude_unset),
+            super().dict(**kwargs_with_defaults_exclude_none),
+        )
+
+    class Config:
+        frozen = True
+        smart_union = True
+        allow_population_by_field_name = True
+        populate_by_name = True
+        extra = pydantic_v1.Extra.allow
+        json_encoders = {dt.datetime: serialize_datetime}

From 2156ee8e329fc0742a2e569c1a34e1cbfa101b2b Mon Sep 17 00:00:00 2001
From: Marlies Mayerhofer <74332854+marliessophie@users.noreply.github.com>
Date: Thu, 5 Feb 2026 17:23:47 +0100
Subject: [PATCH 2/3] chore: support dataset versioning via SDK

---
 langfuse/_client/client.py | 10 +++++++++-
 tests/test_datasets.py     | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
index ce7d7437d..f7ee28e36 100644
--- a/langfuse/_client/client.py
+++ b/langfuse/_client/client.py
@@ -2442,13 +2442,20 @@ def get_trace_url(self, *, trace_id: Optional[str] = None) -> Optional[str]:
         )
 
     def get_dataset(
-        self, name: str, *, fetch_items_page_size: Optional[int] = 50
+        self,
+        name: str,
+        *,
+        fetch_items_page_size: Optional[int] = 50,
+        version: Optional[datetime] = None,
     ) -> "DatasetClient":
         """Fetch a dataset by its name.
 
         Args:
             name (str): The name of the dataset to fetch.
             fetch_items_page_size (Optional[int]): All items of the dataset will be fetched in chunks of this size. Defaults to 50.
+            version (Optional[datetime]): Retrieve dataset items as they existed at this specific point in time (UTC).
+                If provided, returns the state of items at the specified UTC timestamp.
+                If not provided, returns the latest version. Must be a timezone-aware datetime object in UTC.
 
         Returns:
             DatasetClient: The dataset with the given name.
@@ -2465,6 +2472,7 @@ def get_dataset(
                     dataset_name=self._url_encode(name, is_url_param=True),
                     page=page,
                     limit=fetch_items_page_size,
+                    version=version,
                 )
                 dataset_items.extend(new_items.data)
 
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 051dcfbf6..fcc38402f 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -527,3 +527,41 @@ def test_delete_dataset_run_with_folder_names():
     # Verify the run is deleted
     runs_after = langfuse.get_dataset_runs(dataset_name=folder_name)
     assert len(runs_after.data) == 0
+
+
+def test_get_dataset_with_version():
+    """Test that get_dataset correctly filters items by version timestamp."""
+    from datetime import datetime, timezone
+    import time
+
+    langfuse = Langfuse(debug=False)
+
+    # Create dataset
+    name = create_uuid()
+    langfuse.create_dataset(name=name)
+
+    # Create first item
+    item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"})
+    langfuse.flush()
+    time.sleep(3)  # Ensure persistence and clear temporal separation
+
+    # Capture timestamp AFTER first item, BEFORE second item
+    query_timestamp = datetime.now(timezone.utc)
+    time.sleep(3)  # Ensure second item is created AFTER query_timestamp
+
+    # Create second item
+    langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"})
+    langfuse.flush()
+    time.sleep(3)  # Ensure persistence
+
+    # Fetch at the query_timestamp (should only return first item)
+    dataset = langfuse.get_dataset(name, version=query_timestamp)
+
+    # Verify only first item is retrieved
+    assert len(dataset.items) == 1
+    assert dataset.items[0].input == {"version": "v1"}
+    assert dataset.items[0].id == item1.id
+
+    # Verify fetching without version returns both items (latest)
+    dataset_latest = langfuse.get_dataset(name)
+    assert len(dataset_latest.items) == 2

From 5ac81d10fb619de89ab0456f578209cec943f175 Mon Sep 17 00:00:00 2001
From: Marlies Mayerhofer <74332854+marliessophie@users.noreply.github.com>
Date: Thu, 5 Feb 2026 22:11:19 +0100
Subject: [PATCH 3/3] chore: support dataset versioning via SDK

---
 tests/test_datasets.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index fcc38402f..f86812138 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -1,5 +1,6 @@
 import json
 import time
+from datetime import timedelta
 from concurrent.futures import ThreadPoolExecutor
 from typing import Sequence
 
@@ -531,8 +532,6 @@ def test_delete_dataset_run_with_folder_names():
 
 def test_get_dataset_with_version():
     """Test that get_dataset correctly filters items by version timestamp."""
-    from datetime import datetime, timezone
-    import time
 
     langfuse = Langfuse(debug=False)
 
@@ -543,11 +542,16 @@ def test_get_dataset_with_version():
     # Create first item
     item1 = langfuse.create_dataset_item(dataset_name=name, input={"version": "v1"})
     langfuse.flush()
-    time.sleep(3)  # Ensure persistence and clear temporal separation
+    time.sleep(3)  # Ensure persistence
+
+    # Fetch dataset to get the actual server-assigned timestamp of item1
+    dataset_after_item1 = langfuse.get_dataset(name)
+    assert len(dataset_after_item1.items) == 1
+    item1_created_at = dataset_after_item1.items[0].created_at
 
-    # Capture timestamp AFTER first item, BEFORE second item
-    query_timestamp = datetime.now(timezone.utc)
-    time.sleep(3)  # Ensure second item is created AFTER query_timestamp
+    # Use a timestamp 1 second after item1's actual creation time
+    query_timestamp = item1_created_at + timedelta(seconds=1)
+    time.sleep(3)  # Ensure temporal separation
 
     # Create second item
     langfuse.create_dataset_item(dataset_name=name, input={"version": "v2"})