Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ singleTableSchema
: colTypeList EOF
;

singlePathElementList
: pathElement (COMMA pathElement)* EOF
;

singleRoutineParamList
: colDefinitionList EOF
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1981,14 +1981,15 @@ class Analyzer(
* This is used for special syntax transformations (e.g., COUNT(*) -> COUNT(1)) that
* should only apply to builtin functions, not to user-defined functions.
*
* In legacy mode (sessionOrder="first"), temp functions shadow builtins, so an
* unqualified name that matches a temp function should NOT be treated as builtin.
* When the effective SQL PATH puts `system.session` before `system.builtin`, temp
* functions shadow builtins, so an unqualified name that matches a temp function
* should NOT be treated as builtin.
*/
private def matchesFunctionName(nameParts: Seq[String], expectedName: String): Boolean = {
if (!FunctionResolution.isUnqualifiedOrBuiltinFunctionName(nameParts, expectedName)) {
return false
}
if (nameParts.size == 1 && conf.sessionFunctionResolutionOrder == "first") {
if (nameParts.size == 1 && functionResolution.isSessionBeforeBuiltinInPath) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Single-pass resolver lacks the path-driven gate. This gate (isSessionBeforeBuiltinInPath) is now path-driven in the iterative analyzer, so a SET PATH = system.session, system.builtin, ... correctly skips count(*)→count(1) rewrite for a temp count. The single-pass equivalent -- FunctionResolverUtils.normalizeCountExpression, called from FunctionResolver / HigherOrderFunctionResolver -- has no such gate; isCount only checks the name. Pre-existing divergence (single-pass also didn't honor sessionFunctionResolutionOrder == "first"), but the PR makes the iterative gate easier to trip via SET PATH, so the asymmetry becomes more reachable. Out of scope to fix here, or worth a follow-up?

val v1Catalog = catalogManager.v1SessionCatalog
!v1Catalog.isTemporaryFunction(FunctionIdentifier(nameParts.head))
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ class FunctionResolution(
nameParts.length == 3 &&
nameParts.head.equalsIgnoreCase(CatalogManager.SYSTEM_CATALOG_NAME)

/**
* True iff `system.session` is searched before `system.builtin` in the effective SQL PATH.
*
* Drives the `count(*) -> count(1)` rewrite (which must skip transformation when a temp
* `count` shadows the builtin) and the `SessionCatalog` security check that blocks creating
* a temp function with a builtin's name. Reads the live PATH via `CatalogManager` and
* applies the same kinds extraction that drives `SessionCatalog`'s fast-path provider, so
* the predicate stays in sync with the lookup loop's actual order.
*/
def isSessionBeforeBuiltinInPath: Boolean = {
val path = catalogManager.sqlResolutionPathEntries(
catalogManager.currentCatalog.name(), catalogManager.currentNamespace.toSeq)
CatalogManager.systemFunctionKindsFromPath(path).headOption
.contains(org.apache.spark.sql.catalyst.catalog.SessionCatalog.Temp)
}

/**
* Produces the ordered list of candidate names for resolution. Expansion happens in two cases:
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,47 @@ class SessionCatalog(
identifier.copy(funcName = "") == SESSION_NAMESPACE_TEMPLATE

/**
* Session function kinds in resolution order for unqualified lookups.
* Matches [[SQLConf.sessionFunctionResolutionOrder]]: "first" (session first),
* "second" (default), "last" (builtin only; session tried after persistent).
* Provider for the ordered `system.builtin` / `system.session` entries on the effective SQL
* PATH (mapped to [[SessionFunctionKind]]). Wired by
* [[org.apache.spark.sql.connector.catalog.CatalogManager]] via
* [[setSessionFunctionKindsProvider]] so unqualified function lookups and the security check
* that blocks temp functions from shadowing builtins read the live PATH (post-`SET PATH`,
* with [[SQLConf.DEFAULT_PATH]] and [[SQLConf.defaultPathOrder]] fallbacks already applied).
*
* The default derives kinds from [[SQLConf.systemPathOrder]] -- which is itself the seeded
* default path -- so this code makes no direct assumption about the legacy resolution-order
* conf beyond using it to seed `defaultPathOrder`. Tests that construct a [[SessionCatalog]]
* standalone without [[CatalogManager]] get sensible default kinds; tests that need other
* orderings should call [[setSessionFunctionKindsProvider]] explicitly.
*/
private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] = {
conf.sessionFunctionResolutionOrder match {
case "first" => Seq(Temp, Builtin)
case "last" => Seq(Builtin)
case _ => Seq(Builtin, Temp) // "second" (default)
}
@volatile private var sessionFunctionKindsProvider: () => Seq[SessionFunctionKind] =
() => CatalogManager.systemFunctionKindsFromPath(conf.systemPathOrder)

/**
* Override the session-function-kinds provider. Called once by
* [[org.apache.spark.sql.connector.catalog.CatalogManager]] from its constructor to wire in
* path-driven kinds; intended for that single writer plus tests that bypass `CatalogManager`.
*/
private[sql] def setSessionFunctionKindsProvider(
provider: () => Seq[SessionFunctionKind]): Unit = {
sessionFunctionKindsProvider = provider
}

/**
* Session function kinds in resolution order for unqualified lookups, derived from the
* effective SQL PATH via [[sessionFunctionKindsProvider]].
*/
private def sessionFunctionKindsInResolutionOrder: Seq[SessionFunctionKind] =
sessionFunctionKindsProvider()

/**
* True iff the effective SQL PATH searches `system.session` before `system.builtin`. Used
* to gate the security check that blocks temporary functions from silently shadowing a
* builtin of the same name.
*/
private def sessionFirstInPath: Boolean =
sessionFunctionKindsInResolutionOrder.headOption.contains(Temp)

/**
* Checks if a namespace represents temporary functions.
*/
Expand Down Expand Up @@ -2080,12 +2109,11 @@ class SessionCatalog(
qualifyIdentifier(func)
}

// Security check: When legacy mode is enabled, block SQL-created temporary functions
// from shadowing builtin functions (to preserve master behavior)
// Scala UDFs are still allowed to shadow in legacy mode
// We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
val sessionFirst = conf.sessionFunctionResolutionOrder == "first"
if (func.database.isEmpty && sessionFirst && !overrideIfExists) {
// Security check: when the effective SQL PATH searches `system.session` before
// `system.builtin`, block creating an unqualified temporary function whose name
// collides with a builtin so it cannot silently shadow that builtin via unqualified
// resolution. We throw ROUTINE_ALREADY_EXISTS to indicate the conflict.
if (func.database.isEmpty && sessionFirstInPath && !overrideIfExists) {
val funcName = func.funcName
// Check if function exists in builtin namespace (extensions are stored as builtins)
val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
Expand Down Expand Up @@ -2195,10 +2223,11 @@ class SessionCatalog(
// Use FunctionIdentifier with session namespace for temporary functions
val tempIdentifier = tempFunctionIdentifier(function.name.funcName)

// Security check: When legacy mode is enabled, block SQL-created temporary functions
// from shadowing builtin functions (including extensions) as a safeguard
// We throw ROUTINE_ALREADY_EXISTS to indicate the builtin function already exists
if ((conf.sessionFunctionResolutionOrder == "first") && !overrideIfExists) {
// Security check: when the effective SQL PATH searches `system.session` before
// `system.builtin`, block creating an unqualified temporary function whose name
// collides with a builtin (including extensions) so it cannot silently shadow that
// builtin via unqualified resolution.
if (sessionFirstInPath && !overrideIfExists) {
val funcName = function.name.funcName
// Check if function exists in builtin namespace (extensions are stored as builtins)
val builtinIdent = FunctionRegistry.builtinFunctionIdentifier(funcName)
Expand Down Expand Up @@ -2499,7 +2528,11 @@ class SessionCatalog(
* Look up the `ExpressionInfo` of the given function by name.
* Resolution order follows the configured path (e.g. builtin then session).
*/
def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = synchronized {
def lookupBuiltinOrTempTableFunction(name: String): Option[ExpressionInfo] = {
// Intentionally not `synchronized` on this [[SessionCatalog]]. Resolution order calls
// [[sessionFunctionKindsProvider]], which may synchronize on [[CatalogManager]]; another
// thread can hold that lock and call into this catalog (e.g. via `setCurrentNamespace`),
// which would deadlock if this method also synchronized on `this`.
lookupFunctionWithShadowing(name, tableFunctionRegistry, checkBuiltinOperators = false)
}

Expand Down Expand Up @@ -2650,7 +2683,10 @@ class SessionCatalog(
/**
* Look up the [[ExpressionInfo]] associated with the specified function, assuming it exists.
*/
def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = synchronized {
def lookupFunctionInfo(name: FunctionIdentifier): ExpressionInfo = {
// Intentionally not `synchronized` on this [[SessionCatalog]] (see
// [[lookupBuiltinOrTempTableFunction]]): unqualified builtin/temp resolution uses
// [[sessionFunctionKindsProvider]] and must not run under this catalog's intrinsic lock.
if (name.database.isEmpty) {
lookupBuiltinOrTempFunction(name.funcName)
.orElse(lookupBuiltinOrTempTableFunction(name.funcName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, LogicalPlan}
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.connector.catalog.PathElement
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StructType
Expand Down Expand Up @@ -110,6 +111,18 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
}
}

/**
* Parse the right-hand side of `SET PATH = ...` (a comma-separated list of path elements).
* Used by [[org.apache.spark.sql.connector.catalog.CatalogManager]] to honor the
* [[SQLConf.DEFAULT_PATH]] conf without re-implementing the SET PATH grammar.
*/
private[sql] def parsePathElements(sqlText: String): Seq[PathElement] = parse(sqlText) { parser =>
val ctx = parser.singlePathElementList()
withErrorHandling(ctx, Some(sqlText)) {
astBuilder.visitSinglePathElementList(ctx)
}
}

def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = {
withOrigin(ctx, sqlText) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, CollationFactory, DateTimeUtils, EvaluateUnresolvedInlineTable, IntervalUtils}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTime, stringToTimestamp, stringToTimestampWithoutTimeZone}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, ChangelogInfo, PathElement, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.ChangelogRange.{TimestampRange, UnboundedRange, VersionRange}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
Expand Down Expand Up @@ -708,6 +708,26 @@ class AstBuilder extends DataTypeAstBuilder
visitMultipartIdentifier(ctx.multipartIdentifier)
}

override def visitSinglePathElementList(
ctx: SinglePathElementListContext): Seq[PathElement] = withOrigin(ctx) {
ctx.pathElement().asScala.map(visitPathElement).toSeq
}

override def visitPathElement(ctx: PathElementContext): PathElement = withOrigin(ctx) {
if (ctx.DEFAULT_PATH() != null) PathElement.DefaultPath
else if (ctx.SYSTEM_PATH() != null) PathElement.SystemPath
else if (ctx.PATH() != null) PathElement.PathRef
else if (ctx.CURRENT_DATABASE() != null || ctx.CURRENT_SCHEMA() != null) {
PathElement.CurrentSchema
} else {
val parts = visitMultipartIdentifier(ctx.multipartIdentifier())
if (parts.length < 2) {
throw QueryCompilationErrors.invalidSqlPathSchemaReferenceError(parts.mkString("."))
}
PathElement.SchemaInPath(parts)
}
}

override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
typedVisit[DataType](ctx.dataType)
}
Expand Down
Loading