diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index fbf915747bfe3..7fd09b05c8e6f 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -6027,6 +6027,12 @@ ], "sqlState" : "42836" }, + "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE" : { + "message" : [ + "Recursive file loading is not supported when the data source has explicit partition columns. Either remove the option \"recursiveFileLookup\", or read the data without supplying partition columns (for example, do not read a partitioned table)." + ], + "sqlState" : "0A000" + }, "RECURSIVE_PROTOBUF_SCHEMA" : { "message" : [ "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: . try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 6f1486c1f1dbd..f72a0bfe6598d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3426,6 +3426,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat "newPath" -> newPath.map(toSQLId).mkString(" -> "))) } + def recursiveFileLookupNotSupportedForPartitionedDataSourceError(): Throwable = { + new AnalysisException( + errorClass = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE", + messageParameters = Map.empty) + } + def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError( viewNameParts: Seq[String], attr: Attribute): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala index 1bf0d2f0301f2..8cea2c95e6940 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.StructType import org.apache.spark.util.ArrayImplicits._ @@ -89,8 +90,7 @@ abstract class PartitioningAwareFileIndex( PartitionDirectory(InternalRow.empty, allFiles().toArray.filter(isNonEmptyFile))) :: Nil } else { if (recursiveFileLookup) { - throw new IllegalArgumentException( - "Datasource with partition do not allow recursive file loading.") + throw QueryCompilationErrors.recursiveFileLookupNotSupportedForPartitionedDataSourceError() } prunePartitions(partitionFilters, partitionSpec()).map { case PartitionPath(values, path) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index 8aa6f5a5d0e6e..1fc45e9703f9e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -878,6 +878,29 @@ class FileBasedDataSourceSuite extends SharedSparkSession assert(fileList.toSet === expectedFileList.toSet) } + test("recursiveFileLookup with a partitioned catalog table is rejected") { + withTable("part_tbl") { + sql( + """ + |CREATE TABLE part_tbl (id INT, value STRING) + |USING parquet + |PARTITIONED BY (year INT) + |""".stripMargin) + sql("INSERT INTO part_tbl PARTITION (year = 2024) VALUES (1, 'a')") + sql("INSERT INTO part_tbl PARTITION (year = 2025) VALUES (2, 'b')") + checkError( + exception = intercept[AnalysisException] { + spark.read + .option("recursiveFileLookup", "true") + .table("part_tbl") + .collect() + }, + condition = "RECURSIVE_FILE_LOOKUP_NOT_SUPPORTED_FOR_PARTITIONED_DATA_SOURCE", + parameters = Map.empty[String, String] + ) + } + } + test("Return correct results when data columns overlap with partition columns") { Seq("parquet", "orc", "json").foreach { format => withTempPath { path =>