Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,6 @@ private HCatConstants() { // restrict instantiation

public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + ".dynamic.jobid";
public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false;
public static final String HCAT_DYNAMIC_CUSTOM_PATTERN = "hcat.dynamic.partitioning.custom.pattern";

// Message Bus related properties.
public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@
import java.util.HashSet;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.fs.Path;

public class HCatFileUtil {

// regex of the form: ${column name}. Following characters are not allowed in column name:
// whitespace characters, /, {, }, \
private static final Pattern customPathPattern = Pattern.compile("(\\$\\{)([^\\s/\\{\\}\\\\]+)(\\})");
import static org.apache.hadoop.hive.metastore.utils.DynamicPartitioningCustomPattern.customPathPattern;

public class HCatFileUtil {
// This method parses the custom dynamic path and replaces each occurrence
// of column name within regex pattern with its corresponding value, if provided
public static String resolveCustomPath(OutputJobInfo jobInfo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN;

/** The OutputFormat to use to write data to HCatalog. The key value is ignored and
* should be given as null. The value is the HCatRecord to write.*/
@InterfaceAudience.Public
Expand Down Expand Up @@ -163,7 +165,7 @@ public static void setOutput(Configuration conf, Credentials credentials,
conf.set(HCatConstants.HCAT_DYNAMIC_PTN_JOBID, dynHash);

// if custom pattern is set in case of dynamic partitioning, configure custom path
String customPattern = conf.get(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN);
String customPattern = conf.get(HCAT_CUSTOM_DYNAMIC_PATTERN);
if (customPattern != null) {
HCatFileUtil.setCustomPath(customPattern, outputJobInfo);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertTrue;
import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN;

public class TestHCatDynamicPartitioned extends HCatMapReduceTest {

Expand Down Expand Up @@ -125,7 +126,7 @@ protected void runHCatDynamicPartitionedTable(boolean asSingleMapTask,
generateWriteRecords(NUM_RECORDS, NUM_TOP_PARTITIONS, 0);
HashMap<String, String> properties = new HashMap<String, String>();
if (customDynamicPathPattern != null) {
properties.put(HCatConstants.HCAT_DYNAMIC_CUSTOM_PATTERN, customDynamicPathPattern);
properties.put(HCAT_CUSTOM_DYNAMIC_PATTERN, customDynamicPathPattern);
}
runMRCreate(null, dataColumns, writeRecords.subList(0,NUM_RECORDS/2), NUM_RECORDS/2,
true, asSingleMapTask, properties);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.SemanticException;

import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN;

/**
* Analyzer for add partition commands.
*/
Expand Down Expand Up @@ -70,7 +72,7 @@ protected void analyzeCommand(TableName tableName, Map<String, String> partition
}

AlterTableAddPartitionDesc desc = new AlterTableAddPartitionDesc(table.getDbName(), table.getTableName(),
ifNotExists, partitions);
ifNotExists, partitions, conf.get(HCAT_CUSTOM_DYNAMIC_PATTERN));
Task<DDLWork> ddlTask = TaskFactory.get(new DDLWork(getInputs(), getOutputs(), desc));
rootTasks.add(ddlTask);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,20 @@ public long getWriteId() {

private ReplicationSpec replicationSpec = null; // TODO: make replicationSpec final too

private final String customPattern;

public AlterTableAddPartitionDesc(String dbName, String tableName, boolean ifNotExists,
List<PartitionDesc> partitions) {
this(dbName, tableName, ifNotExists, partitions, null);
}

public AlterTableAddPartitionDesc(String dbName, String tableName, boolean ifNotExists,
List<PartitionDesc> partitions) {
List<PartitionDesc> partitions, String customPattern) {
this.dbName = dbName;
this.tableName = tableName;
this.ifNotExists = ifNotExists;
this.partitions = partitions;
this.customPattern = customPattern;
}

@Explain(displayName = "db name", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
Expand Down Expand Up @@ -241,4 +249,8 @@ public boolean mayNeedWriteId() {
return true;
}

public String getCustomPattern() {
return this.customPattern;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ private long getWriteId(Table table) throws LockException {
private List<Partition> getPartitions(Table table, long writeId) throws HiveException {
List<Partition> partitions = new ArrayList<>(desc.getPartitions().size());
for (AlterTableAddPartitionDesc.PartitionDesc partitionDesc : desc.getPartitions()) {
Partition partition = convertPartitionSpecToMetaPartition(table, partitionDesc);
Partition partition = convertPartitionSpecToMetaPartition(table, partitionDesc, desc.getCustomPattern());
if (partition != null && writeId > 0) {
partition.setWriteId(writeId);
}
Expand All @@ -91,15 +91,15 @@ private List<Partition> getPartitions(Table table, long writeId) throws HiveExce
}

private Partition convertPartitionSpecToMetaPartition(Table table,
AlterTableAddPartitionDesc.PartitionDesc partitionSpec) throws HiveException {
AlterTableAddPartitionDesc.PartitionDesc partitionSpec, String customPattern) throws HiveException {
Path location = partitionSpec.getLocation() != null ? new Path(table.getPath(), partitionSpec.getLocation()) : null;
if (location != null) {
// Ensure that it is a full qualified path (in most cases it will be since tbl.getPath() is full qualified)
location = new Path(Utilities.getQualifiedPath(context.getConf(), location));
}

Partition partition = org.apache.hadoop.hive.ql.metadata.Partition.createMetaPartitionObject(
table, partitionSpec.getPartSpec(), location);
table, partitionSpec.getPartSpec(), location, customPattern);

if (partitionSpec.getPartParams() != null) {
partition.setParameters(partitionSpec.getPartParams());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import java.util.List;
import java.util.Map;

import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN;

public class FSTableEvent implements TableEvent {
private final Path fromPathMetadata;
Expand Down Expand Up @@ -179,7 +180,7 @@ private AlterTableAddPartitionDesc addPartitionDesc(Path fromPath, ImportTableDe
sd.getBucketCols(), sd.getSortCols(), columnStatistics, writeId);

AlterTableAddPartitionDesc addPartitionDesc = new AlterTableAddPartitionDesc(tblDesc.getDatabaseName(),
tblDesc.getTableName(), true, ImmutableList.of(partitionDesc));
tblDesc.getTableName(), true, ImmutableList.of(partitionDesc), hiveConf.get(HCAT_CUSTOM_DYNAMIC_PATTERN));
addPartitionDesc.setReplicationSpec(replicationSpec());
return addPartitionDesc;
} catch (Exception e) {
Expand Down
21 changes: 19 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.regex.Matcher;

import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils;
Expand All @@ -51,6 +52,8 @@
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.OutputFormat;

import static org.apache.hadoop.hive.metastore.utils.DynamicPartitioningCustomPattern.customPathPattern;

/**
* A Hive Table Partition: is a fundamental storage unit within a Table.
*
Expand Down Expand Up @@ -119,11 +122,16 @@ public Partition(Table tbl, org.apache.hadoop.hive.metastore.api.Partition tp)
* Thrown if we could not create the partition.
*/
public Partition(Table tbl, Map<String, String> partSpec, Path location) throws HiveException {
initialize(tbl, createMetaPartitionObject(tbl, partSpec, location));
initialize(tbl, createMetaPartitionObject(tbl, partSpec, location, null));
}

public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject(
Table tbl, Map<String, String> partSpec, Path location) throws HiveException {
return createMetaPartitionObject(tbl, partSpec, location, null);
}

public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartitionObject(
Table tbl, Map<String, String> partSpec, Path location) throws HiveException {
Table tbl, Map<String, String> partSpec, Path location, String customPattern) throws HiveException {
List<String> pvals = new ArrayList<String>();
for (FieldSchema field : tbl.getPartCols()) {
String val = partSpec.get(field.getName());
Expand All @@ -142,6 +150,15 @@ public static org.apache.hadoop.hive.metastore.api.Partition createMetaPartition

if (!tbl.isView()) {
tpart.setSd(tbl.getSd().deepCopy());
if (location == null && customPattern != null) { //should only happen for msck repair table with custom pattern
Matcher m = customPathPattern.matcher(customPattern);
StringBuffer sb = new StringBuffer();
while (m.find()) {
m.appendReplacement(sb, partSpec.get(m.group(2)));
Copy link

Copilot AI Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The code partSpec.get(m.group(2)) can return null if the partition spec doesn't contain a value for the column name extracted from the pattern. This would result in appendReplacement inserting "null" as a string into the path. Add a null check and throw an appropriate exception if a required partition column is missing from the spec.

Suggested change
m.appendReplacement(sb, partSpec.get(m.group(2)));
String partColName = m.group(2);
String partValue = partSpec.get(partColName);
if (partValue == null) {
throw new HiveException("partition spec is invalid; required partition column '"
+ partColName + "' is missing for custom pattern: " + customPattern);
}
m.appendReplacement(sb, partValue);

Copilot uses AI. Check for mistakes.
}
m.appendTail(sb);
location = new Path(tbl.getDataLocation(), sb.toString());
}
tpart.getSd().setLocation((location != null) ? location.toString() : null);
}
return tpart;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.ReplChangeManager;
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
Expand Down Expand Up @@ -414,7 +415,8 @@ private static AlterTableAddPartitionDesc getBaseAddPartitionDescFromPartition(P
partitionSpec, location, partition.getParameters(), sd.getInputFormat(), sd.getOutputFormat(),
sd.getNumBuckets(), sd.getCols(), sd.getSerdeInfo().getSerializationLib(), sd.getSerdeInfo().getParameters(),
sd.getBucketCols(), sd.getSortCols(), null, writeId);
return new AlterTableAddPartitionDesc(dbName, tblDesc.getTableName(), true, ImmutableList.of(partitionDesc));
return new AlterTableAddPartitionDesc(dbName, tblDesc.getTableName(), true, ImmutableList.of(partitionDesc),
conf.get(MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN));
}

private static ImportTableDesc getBaseCreateTableDescFromTable(String dbName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.hadoop.hive.metastore.HiveMetaStoreChecker;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand All @@ -56,6 +57,8 @@

import com.google.common.collect.Lists;

import static org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.HCAT_CUSTOM_DYNAMIC_PATTERN;

/**
* TestHiveMetaStoreChecker.
*
Expand All @@ -66,15 +69,22 @@ public class TestHiveMetaStoreChecker {
private IMetaStoreClient msc;
private FileSystem fs;
private HiveMetaStoreChecker checker = null;
private HiveMetaStoreChecker customChecker = null;

private final String catName = "hive";
private final String dbName = "testhivemetastorechecker_db";
private final String tableName = "testhivemetastorechecker_table";
private final String externalTableName = "testhivemetastorechecker_table_external";

private final String partDateName = "partdate";
private final String partCityName = "partcity";

private final String partIdName = "partid";

private final String partStateName = "partstate";

private List<FieldSchema> partCols;
private List<FieldSchema> partColsExt;
private List<Map<String, String>> parts;

@Before
Expand All @@ -92,10 +102,19 @@ public void setUp() throws Exception {
SessionState ss = SessionState.start(conf);
ss.initTxnMgr(conf);

HiveConf customConf = new HiveConf(conf);
customConf.set(HCAT_CUSTOM_DYNAMIC_PATTERN, "const/${partstate}/const2/${partid}-${partcity}");
customChecker = new HiveMetaStoreChecker(msc, customConf);

partCols = new ArrayList<>();
partCols.add(new FieldSchema(partDateName, serdeConstants.STRING_TYPE_NAME, ""));
partCols.add(new FieldSchema(partCityName, serdeConstants.STRING_TYPE_NAME, ""));

partColsExt = new ArrayList<FieldSchema>();
partColsExt.add(new FieldSchema(partCityName, serdeConstants.STRING_TYPE_NAME, ""));
partColsExt.add(new FieldSchema(partStateName, serdeConstants.STRING_TYPE_NAME, ""));
partColsExt.add(new FieldSchema(partIdName, serdeConstants.STRING_TYPE_NAME, ""));

parts = new ArrayList<>();
Map<String, String> part1 = new HashMap<>();
part1.put(partDateName, "2008-01-01");
Expand Down Expand Up @@ -342,6 +361,33 @@ public void testAddPartitionCompactedBase() throws Exception {
assertEquals(200, result.getPartitionsNotInMs().iterator().next().getMaxTxnId());
}

@Test
public void testCustomPartitionPatternCheck()
throws HiveException, AlreadyExistsException, IOException, MetastoreException {
Table table = createTestExternalTable();
Path tablePath = table.getDataLocation();
fs = tablePath.getFileSystem(hive.getConf());
fs.mkdirs(new Path(tablePath, "const/CO/const2/1-denver"));
fs.mkdirs(new Path(tablePath, "const/CA/const2/1-sanjose"));
fs.mkdirs(new Path(tablePath, "const/CA/const2/2-sanfrancisco"));
fs.mkdirs(new Path(tablePath, "const/IL/const2/1-chicago"));
fs.mkdirs(new Path(tablePath, "const/TX/const2/1-dallas"));
fs.mkdirs(new Path(tablePath, "const/TX/const2/2-austin"));
fs.mkdirs(new Path(tablePath, "const/NY/const2/1-newyork"));
fs.createNewFile(new Path(tablePath, "const/CO/const2/1-denver/datafile"));
fs.createNewFile(new Path(tablePath, "const/CA/const2/1-sanjose/datafile"));
fs.createNewFile(new Path(tablePath, "const/CA/const2/2-sanfrancisco/datafile"));
fs.createNewFile(new Path(tablePath, "const/IL/const2/1-chicago/datafile"));
fs.createNewFile(new Path(tablePath, "const/TX/const2/1-dallas/datafile"));
fs.createNewFile(new Path(tablePath, "const/TX/const2/2-austin/datafile"));
fs.createNewFile(new Path(tablePath, "const/NY/const2/1-newyork/datafile"));
CheckResult result = customChecker.checkMetastore(null, dbName, externalTableName, null, null);
assertEquals(7, result.getPartitionsNotInMs().size());
//cleanup
hive.dropTable(dbName, externalTableName);
fs.delete(tablePath, true);
}

@Test
public void testAddPartitionMMBase() throws Exception {
Table table = createTestTable(true);
Expand Down Expand Up @@ -812,6 +858,23 @@ private Table createTestTable(boolean transactional) throws HiveException, Alrea
}
return table;
}

private Table createTestExternalTable() throws AlreadyExistsException, HiveException {
Database db = new Database();
db.setName(dbName);
hive.createDatabase(db, true);

Table table = new Table(dbName, externalTableName);
table.setDbName(dbName);
table.setInputFormatClass(TextInputFormat.class);
table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class);
table.setPartCols(partColsExt);

hive.createTable(table);
table = hive.getTable(dbName, externalTableName);
table.setTableType(TableType.EXTERNAL_TABLE);
return table;
}
private Table createNonPartitionedTable() throws Exception {
Database db = new Database();
db.setName(dbName);
Expand Down
Loading