From 1eb2cbce587b820924f5753ae45588982893c2e5 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Tue, 10 Mar 2026 10:34:42 +0900 Subject: [PATCH 1/2] DAOS-18190 container: Split CONT_DESTROY RDB TX CONT_DESTROY handling employs one RDB TX and invokes a CONT_TGT_DESTROY CoRPC synchronously during the RDB TX. This is related to the following problems: - DAOS-18190: A partially-destroyed container remains openable, leading to false positive "corruptions". - DAOS-18301: The CONT_TGT_DESTROY CoRPC may trigger deadlocks on cs_lock. This patch addresses both problems by splitting CONT_DESTROY handling such that a first RDB TX marks the container as DESTROYING (i.e., having entered the state of being destroyed), the CONT_TGT_DESTROY CoRPC goes without holding cs_lock, and a second RDB TX destroys the container from the RDB. Although not openable, DESTROYING containers still appear in container list results, and may need re-destroying. This is to minimize changes and focus on the most important problems first; further improvements will come next. Test-tag: pr test_ec_online_rebuild Signed-off-by: Li Wei --- src/container/srv_container.c | 317 +++++++++++++++++++++++++------ src/container/srv_layout.h | 14 +- src/container/srv_target.c | 7 - src/gurt/tests/test_gurt.c | 12 +- src/include/daos/common.h | 5 +- src/include/daos_errno.h | 5 +- src/include/daos_srv/container.h | 6 +- src/pool/srv_pool.c | 4 +- src/tests/suite/daos_container.c | 40 +++- 9 files changed, 324 insertions(+), 86 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 42b0702f782..8d8b40c2368 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1280,6 +1280,26 @@ cont_create(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont_svc *sv return rc; } +/* Magic number for cont_dbl_op_val.cdv_magic */ +#define CONT_DBL_OP_VAL_MAGIC 0xd23f93ba + +/* + * CONT_DESTROY_BYLABEL ds_pool_svc_op_val.ov_resvd type + * + * A duplicate CONT_DESTROY_BYLABEL request must use cdv_uuid instead of looking + * up the container UUID by label again, for the original container may have + * been destroyed already and the label reused by another container. + * + * We need cdv_magic because the reserved bytes (ov_resvd) used to store this + * struct were not zeroed properly in previous versions. + */ +struct cont_dbl_op_val { + uint32_t cdv_magic; + uuid_t cdv_uuid; +}; +D_CASSERT(sizeof(struct cont_dbl_op_val) <= sizeof(((struct ds_pool_svc_op_val *)0)->ov_resvd)); +D_CASSERT(offsetof(struct cont_dbl_op_val, cdv_uuid) == sizeof(uint32_t)); + static int cont_destroy_bcast(crt_context_t ctx, struct cont_svc *svc, const uuid_t cont_uuid) @@ -1515,13 +1535,13 @@ find_hdls_by_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) return 0; } -static int cont_close_hdls(struct cont_svc *svc, - struct cont_tgt_close_rec *recs, int nrecs, - crt_context_t ctx); +static int +cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, int nrecs, crt_context_t ctx, + bool remotely); static int evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl *pool_hdl, - crt_context_t ctx) + crt_context_t ctx, bool remotely) { struct find_hdls_by_cont_arg arg; int rc; @@ -1543,11 +1563,12 @@ evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl if (!force) { rc = -DER_BUSY; - D_WARN("Not evicting handles, "DF_RC"\n", DP_RC(rc)); + DL_INFO(rc, DF_CONT ": not evicting handles", + DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid)); goto out; } - rc = cont_close_hdls(cont->c_svc, arg.fha_buf.rb_recs, arg.fha_buf.rb_nrecs, ctx); + rc = cont_close_hdls(cont->c_svc, arg.fha_buf.rb_recs, arg.fha_buf.rb_nrecs, ctx, remotely); out: recs_buf_fini(&arg.fha_buf); @@ -1561,20 +1582,29 @@ static int cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, crt_rpc_t *rpc, int cont_proto_ver) { - d_iov_t key; d_iov_t val; int rc; - daos_prop_t *prop = NULL; - struct daos_prop_entry *lbl_ent; + container_flags_t container_flags; + daos_prop_t *prop = NULL; struct d_ownership owner; uint32_t force; - struct daos_acl *acl; - bool need_destroy_oid_oit_kvs = false; + struct daos_acl *acl; cont_destroy_in_get_data(rpc, opc_get(rpc->cr_opc), cont_proto_ver, &force, NULL); D_DEBUG(DB_MD, DF_CONT ": processing rpc: %p force=%u\n", DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid), rpc, force); + d_iov_set(&val, &container_flags, sizeof(container_flags)); + rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_ghce, &val); + if (rc != 0) + goto out; + if (container_flags & CONTAINER_F_DESTROYING) { + D_DEBUG(DB_MD, DF_CONT ": already destroying\n", + DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid)); + rc = 0; + goto out; + } + /* Reset recov_cont prop to notify on flight pool_recov_cont to retry. */ rc = ds_pool_prop_recov_cont_reset(tx, cont->c_svc->cs_rsvc); if (rc != 0) @@ -1605,13 +1635,68 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, D_GOTO(out_prop, rc = -DER_NO_PERM); } - rc = evict_hdls(tx, cont, force, NULL /* pool_hdl */, rpc->cr_ctx); + /* + * Delete the handles from the DB. The remote ds_cont_hdl objects will + * be destroyed by the CONT_TGT_DESTROY CoRPC. + * + * Replacing this generic call with a container-destroy-specific one + * could optimize away things like the tracking of the number of + * handles and the deletion of the entries in the handle index KVS. + */ + rc = evict_hdls(tx, cont, force, NULL /* pool_hdl */, rpc->cr_ctx, false /* remotely */); if (rc != 0) goto out_prop; - rc = cont_destroy_bcast(rpc->cr_ctx, cont->c_svc, cont->c_uuid); + container_flags |= CONTAINER_F_DESTROYING; + d_iov_set(&val, &container_flags, sizeof(container_flags)); + rc = rdb_tx_update(tx, &cont->c_prop, &ds_cont_prop_ghce, &val); + +out_prop: + daos_prop_free(prop); +out: + return rc; +} + +static int +cont_destroy_post(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_t uuid, crt_rpc_t *rpc, + int cont_proto_ver) +{ + struct rdb_tx tx; + struct cont *cont; + d_iov_t key; + d_iov_t val; + int rc; + daos_prop_t *prop = NULL; + struct daos_prop_entry *lbl_ent; + bool need_destroy_oid_oit_kvs = false; + + if (DAOS_FAIL_CHECK(DAOS_CONT_DESTROY_FAIL_POST)) { + rc = -DER_NOMEM; + goto out; + } + + rc = cont_destroy_bcast(rpc->cr_ctx, svc, uuid); if (rc != 0) - goto out_prop; + goto out; + + rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); + if (rc != 0) + goto out; + ABT_rwlock_wrlock(svc->cs_lock); + + rc = cont_lookup(&tx, svc, uuid, &cont); + if (rc != 0) { + if (rc == -DER_NONEXIST) { + /* + * Because we released and reacquired svc->cs_lock when + * calling cont_destroy_bcast above... + */ + D_DEBUG(DB_MD, DF_CONT ": already destroyed\n", + DP_CONT(svc->cs_pool_uuid, uuid)); + rc = 0; + } + goto out_lock; + } cont_track_eph_leader_delete(cont->c_svc, cont->c_uuid); @@ -1621,50 +1706,53 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, d_iov_t value; d_iov_set(&value, NULL, 0); - rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_oit_oids, &value); - if (rc && rc != -DER_NONEXIST) { - DL_ERROR(rc, "failed to lookup oit oid kvs pool/cont: " DF_CONTF, - DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid)); - goto out_prop; - } - /* There was a bug that oit oids might be created already see DAOS-14799 */ + rc = rdb_tx_lookup(&tx, &cont->c_prop, &ds_cont_prop_oit_oids, &value); + if (rc && rc != -DER_NONEXIST) { + DL_ERROR(rc, "failed to lookup oit oid kvs pool/cont: " DF_CONTF, + DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid)); + goto out_cont; + } + /* There was a bug that oit oids might be created already see DAOS-14799 */ if (rc == 0) need_destroy_oid_oit_kvs = true; } /* Destroy oit oids index KVS. */ if (need_destroy_oid_oit_kvs) { - rc = rdb_tx_destroy_kvs(tx, &cont->c_prop, &ds_cont_prop_oit_oids); + rc = rdb_tx_destroy_kvs(&tx, &cont->c_prop, &ds_cont_prop_oit_oids); if (rc != 0) - goto out_prop; + goto out_cont; } /* Destroy the handle index KVS. */ - rc = rdb_tx_destroy_kvs(tx, &cont->c_prop, &ds_cont_prop_handles); + rc = rdb_tx_destroy_kvs(&tx, &cont->c_prop, &ds_cont_prop_handles); if (rc != 0) - goto out_prop; + goto out_cont; /* Destroy the user attribute KVS. */ - rc = rdb_tx_destroy_kvs(tx, &cont->c_prop, &ds_cont_attr_user); + rc = rdb_tx_destroy_kvs(&tx, &cont->c_prop, &ds_cont_attr_user); if (rc != 0) - goto out_prop; + goto out_cont; /* Destroy the snapshot KVS. */ - rc = rdb_tx_destroy_kvs(tx, &cont->c_prop, &ds_cont_prop_snapshots); + rc = rdb_tx_destroy_kvs(&tx, &cont->c_prop, &ds_cont_prop_snapshots); if (rc != 0) - goto out_prop; + goto out_cont; /* Delete entry in container UUIDs KVS (if added during create) */ + rc = cont_prop_read(&tx, cont, DAOS_CO_QUERY_PROP_LABEL, &prop, true); + if (rc != 0) + goto out_cont; + D_ASSERT(prop != NULL); lbl_ent = daos_prop_entry_get(prop, DAOS_PROP_CO_LABEL); if (lbl_ent) { d_iov_set(&key, lbl_ent->dpe_str, strnlen(lbl_ent->dpe_str, DAOS_PROP_MAX_LABEL_BUF_LEN)); d_iov_set(&val, NULL, 0); - rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); + rc = rdb_tx_lookup(&tx, &cont->c_svc->cs_uuids, &key, &val); if (rc != -DER_NONEXIST) { if (rc == 0) { - rc = rdb_tx_delete(tx, &cont->c_svc->cs_uuids, - &key); + rc = rdb_tx_delete(&tx, &cont->c_svc->cs_uuids, &key); if (rc != 0) goto out_prop; D_DEBUG(DB_MD, DF_CONT": deleted label: %s\n", @@ -1679,13 +1767,22 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, /* Destroy the container attribute KVS. */ d_iov_set(&key, cont->c_uuid, sizeof(uuid_t)); - rc = rdb_tx_destroy_kvs(tx, &cont->c_svc->cs_conts, &key); + rc = rdb_tx_destroy_kvs(&tx, &cont->c_svc->cs_conts, &key); + if (rc != 0) + goto out_prop; + + rc = rdb_tx_commit(&tx); out_prop: daos_prop_free(prop); +out_cont: + cont_put(cont); +out_lock: + ABT_rwlock_unlock(svc->cs_lock); + rdb_tx_end(&tx); out: - D_DEBUG(DB_MD, DF_CONT ": replying rpc: %p " DF_RC "\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, cont->c_uuid), rpc, DP_RC(rc)); + D_DEBUG(DB_MD, DF_CONT ": replying: rpc=%p: " DF_RC "\n", + DP_CONT(pool_hdl->sph_pool->sp_uuid, uuid), rpc, DP_RC(rc)); return rc; } @@ -2294,8 +2391,9 @@ cont_svc_eph_track_leader_stop(struct cont_svc *svc) svc->cs_cont_ephs_leader_req = NULL; } -int -cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, struct cont **cont) +static int +cont_lookup_internal(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, + bool include_destroying, struct cont **cont) { struct cont *p; d_iov_t key; @@ -2321,15 +2419,36 @@ cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, st rc = rdb_path_clone(&svc->cs_conts, &p->c_prop); if (rc != 0) D_GOTO(err_p, rc); - rc = rdb_path_push(&p->c_prop, &key); if (rc != 0) - D_GOTO(err_attrs, rc); + goto err_prop; + + if (!include_destroying) { + container_flags_t flags; + + d_iov_set(&tmp, &flags, sizeof(flags)); + rc = rdb_tx_lookup(tx, &p->c_prop, &ds_cont_prop_ghce, &tmp); + if (rc != 0) { + if (rc == -DER_NONEXIST) { + D_ERROR(DF_CONT ": container property ghce not found\n", + DP_CONT(svc->cs_pool_uuid, p->c_uuid)); + /* It is the property that doesn't exist, not the container. */ + rc = -DER_IO; + } + goto err_prop; + } + if (flags & CONTAINER_F_DESTROYING) { + D_DEBUG(DB_MD, DF_CONT ": ignore destroying\n", + DP_CONT(svc->cs_pool_uuid, p->c_uuid)); + rc = -DER_NONEXIST; + goto err_prop; + } + } /* c_snaps */ rc = rdb_path_clone(&p->c_prop, &p->c_snaps); if (rc != 0) - D_GOTO(err_attrs, rc); + goto err_prop; rc = rdb_path_push(&p->c_snaps, &ds_cont_prop_snapshots); if (rc != 0) D_GOTO(err_snaps, rc); @@ -2369,7 +2488,7 @@ cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, st rdb_path_fini(&p->c_user); err_snaps: rdb_path_fini(&p->c_snaps); -err_attrs: +err_prop: rdb_path_fini(&p->c_prop); err_p: D_FREE(p); @@ -2377,6 +2496,12 @@ cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, st return rc; } +int +cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, struct cont **cont) +{ + return cont_lookup_internal(tx, svc, uuid, true /* include_destroying */, cont); +} + static int cont_lookup_bylabel(struct rdb_tx *tx, const struct cont_svc *svc, const char *label, struct cont **cont) @@ -2506,6 +2631,7 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr d_iov_t value; daos_prop_t *prop = NULL; struct container_hdl chdl; + container_flags_t container_flags; char zero = 0; int rc = 0; struct d_ownership owner; @@ -2590,10 +2716,23 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr D_ERROR(DF_CONT ": refusing attempt to open with new layout: " DF_X64 "\n", DP_CONT(cont->c_svc->cs_pool_uuid, cont->c_uuid), entry->dpe_val); rc = -DER_NOTSUPPORTED; + daos_prop_free(prop); D_GOTO(out, rc); } } + d_iov_set(&value, &container_flags, sizeof(container_flags)); + rc = rdb_tx_lookup(tx, &cont->c_prop, &ds_cont_prop_ghce, &value); + if (rc != 0) { + daos_prop_free(prop); + goto out; + } + if (container_flags & CONTAINER_F_DESTROYING) { + rc = -DER_CONT_DESTROYING; + daos_prop_free(prop); + goto out; + } + get_cont_prop_access_info(prop, &owner, &acl); rc = ds_sec_cont_get_capabilities(flags, &pool_hdl->sph_cred, &owner, acl, &sec_capas); @@ -2639,7 +2778,8 @@ cont_open(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, cr if (flags & (DAOS_COO_EVICT | DAOS_COO_EVICT_ALL)) { rc = evict_hdls(tx, cont, true /* force */, - (flags & DAOS_COO_EVICT_ALL) ? NULL : pool_hdl, rpc->cr_ctx); + (flags & DAOS_COO_EVICT_ALL) ? NULL : pool_hdl, rpc->cr_ctx, + true /* remotely */); if (rc != 0) { daos_prop_free(prop); goto out; @@ -2889,8 +3029,8 @@ cont_close_one_hdl(struct rdb_tx *tx, struct d_hash_table *nhc, struct cont_svc /* Close an array of handles, possibly belonging to different containers. */ static int -cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, - int nrecs, crt_context_t ctx) +cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, int nrecs, crt_context_t ctx, + bool remotely) { struct rdb_tx tx; struct d_hash_table txs_nhc; /* TX per-container number of handles cache (HT). */ @@ -2904,11 +3044,13 @@ cont_close_hdls(struct cont_svc *svc, struct cont_tgt_close_rec *recs, " recs[0].hce="DF_U64"\n", DP_CONT(svc->cs_pool_uuid, NULL), nrecs, DP_UUID(recs[0].tcr_hdl), recs[0].tcr_hce); - rc = cont_close_recs(ctx, svc, recs, nrecs); - if (rc != 0) { - D_ERROR(DF_CONT": failed to close %d recs: "DF_RC"\n", - DP_CONT(svc->cs_pool_uuid, NULL), nrecs, DP_RC(rc)); - D_GOTO(out, rc); + if (remotely) { + rc = cont_close_recs(ctx, svc, recs, nrecs); + if (rc != 0) { + DL_ERROR(rc, DF_CONT ": failed to close %d recs", + DP_CONT(svc->cs_pool_uuid, NULL), nrecs); + goto out; + } } rc = rdb_tx_begin(svc->cs_rsvc->s_db, svc->cs_rsvc->s_term, &tx); @@ -4419,8 +4561,8 @@ ds_cont_close_by_pool_hdls(uuid_t pool_uuid, uuid_t *pool_hdls, int n_pool_hdls, goto out_buf; if (arg.cia_buf.rb_nrecs > 0) - rc = cont_close_hdls(svc, arg.cia_buf.rb_recs, - arg.cia_buf.rb_nrecs, ctx); + rc = cont_close_hdls(svc, arg.cia_buf.rb_recs, arg.cia_buf.rb_nrecs, ctx, + true /* remotely */); out_buf: recs_buf_fini(&arg.cia_buf); @@ -4444,6 +4586,7 @@ struct list_cont_iter_args { struct daos_pool_cont_info *conts; struct cont_svc *svc; struct rdb_tx *tx; + bool include_destroying; }; /* callback function for list containers iteration. */ @@ -4481,16 +4624,15 @@ enum_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) ap->conts_len = realloc_elems; } - cinfo = &ap->conts[ap->ncont]; - ap->ncont++; - uuid_copy(cinfo->pci_uuid, cont_uuid); - /* Get the label property. FIXME: cont_lookup no need to search * in cs_conts, since we're iterating that KVS already. * Isn't val the container properties KVS? Can it be used directly? */ - rc = cont_lookup(ap->tx, ap->svc, cont_uuid, &cont); - if (rc != 0) { + rc = cont_lookup_internal(ap->tx, ap->svc, cont_uuid, ap->include_destroying, &cont); + if (rc == -DER_NONEXIST && !ap->include_destroying) { + /* Continue iterating. */ + return 0; + } else if (rc != 0) { D_ERROR(DF_CONT": lookup cont failed, "DF_RC"\n", DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); return rc; @@ -4502,6 +4644,9 @@ enum_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); return rc; } + cinfo = &ap->conts[ap->ncont]; + ap->ncont++; + uuid_copy(cinfo->pci_uuid, cont_uuid); if (prop->dpp_entries[0].dpe_str) { strncpy(cinfo->pci_label, prop->dpp_entries[0].dpe_str, DAOS_PROP_LABEL_MAX_LEN); @@ -4517,14 +4662,15 @@ enum_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) /** * List all containers in a pool. * - * \param[in] pool_uuid Pool UUID. - * \param[out] conts Array of container info structures - * to be allocated. Caller must free. - * \param[out] ncont Number of containers in the pool - * (number of items populated in conts[]). + * \param[in] pool_uuid Pool UUID. + * \param[in] include_destroying Whether to include containers that are being destroyed. + * \param[out] conts Array of container info structures + * to be allocated. Caller must free. + * \param[out] ncont Number of containers in the pool + * (number of items populated in conts[]). */ int -ds_cont_list(uuid_t pool_uuid, struct daos_pool_cont_info **conts, +ds_cont_list(uuid_t pool_uuid, bool include_destroying, struct daos_pool_cont_info **conts, uint64_t *ncont) { int rc; @@ -4538,6 +4684,7 @@ ds_cont_list(uuid_t pool_uuid, struct daos_pool_cont_info **conts, args.ncont = 0; /* number of containers in the pool */ args.conts_len = 0; /* allocated length of conts[] */ args.conts = NULL; + args.include_destroying = include_destroying; uuid_copy(args.pool_uuid, pool_uuid); @@ -5690,6 +5837,7 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, const char *clbl = NULL; bool dup_op = false; struct ds_pool_svc_op_val op_val = {0}; + struct cont_dbl_op_val *cdbl_op_val = NULL; bool fi_pass_noreply = DAOS_FAIL_CHECK(DAOS_MD_OP_PASS_NOREPLY); bool fi_fail_noreply = DAOS_FAIL_CHECK(DAOS_MD_OP_FAIL_NOREPLY); bool fi_pass_nl_noreply; @@ -5737,12 +5885,15 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_copy(olbl_out->colo_uuid, cont->c_uuid); break; case CONT_DESTROY_BYLABEL: + cdbl_op_val = (struct cont_dbl_op_val *)op_val.ov_resvd; if (dup_op) goto out_commit; cont_op_in_get_label(rpc, opc, cont_proto_ver, &clbl); rc = cont_lookup_bylabel(&tx, svc, clbl, &cont); if (rc != 0) goto out_commit; + cdbl_op_val->cdv_magic = CONT_DBL_OP_VAL_MAGIC; + uuid_copy(cdbl_op_val->cdv_uuid, cont->c_uuid); rc = cont_op_with_cont(&tx, pool_hdl, cont, rpc, &update_mtime, cont_proto_ver, dup_op, &op_val); break; @@ -5788,6 +5939,46 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, out_lock: ABT_rwlock_unlock(svc->cs_lock); rdb_tx_end(&tx); + if (rc != 0) + goto out; + + if (opc == CONT_DESTROY_BYLABEL || opc == CONT_DESTROY) { + uuid_t *uuid; + + /* Determine the UUID of the container. */ + if (opc == CONT_DESTROY_BYLABEL) { + /* + * Looking up by label again might be unsafe. See the comment for + * cont_dbl_op_val. + */ + if (dup_op) { + if (cdbl_op_val->cdv_magic != CONT_DBL_OP_VAL_MAGIC) { + /* + * From a previous version, which should have finished + * destroying the container already. + */ + D_DEBUG(DB_MD, DF_CONT ": invalid cdv_magic: label=%s\n", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), + clbl); + rc = 0; + goto out; + } + if (uuid_is_null(cdbl_op_val->cdv_uuid)) { + D_ERROR(DF_CONT ": invalid cdv_uuid: label=%s\n", + DP_CONT(pool_hdl->sph_pool->sp_uuid, in->ci_uuid), + clbl); + rc = -DER_IO; + goto out; + } + } + uuid = &cdbl_op_val->cdv_uuid; + } else { + uuid = &in->ci_uuid; + } + + rc = cont_destroy_post(pool_hdl, svc, *uuid, rpc, cont_proto_ver); + } + out: if ((rc == 0) && !dup_op) { /* Propagate new snapshot list by IV */ diff --git a/src/container/srv_layout.h b/src/container/srv_layout.h index 7f10d948368..90546f39095 100644 --- a/src/container/srv_layout.h +++ b/src/container/srv_layout.h @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2023 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -96,6 +96,9 @@ struct container_hdl { * * All keys are strings. Value types are specified for each key below. * + * The ds_cont_prop_ghce property had always been uint64_t value 0 before being + * repurposed for container_flags_t. + * * IMPORTANT! Please add new keys to this KVS like this: * * extern d_iov_t ds_cont_prop_new_key; comment_on_value_type @@ -104,7 +107,8 @@ struct container_hdl { * usage shall be described above in this comment following existing * examples. If the value is another KVS, its type shall be the KVS name. */ -extern d_iov_t ds_cont_prop_ghce; /* uint64_t */ +/* clang-format off */ +extern d_iov_t ds_cont_prop_ghce; /* container_flags_t */ extern d_iov_t ds_cont_prop_alloced_oid; /* uint64_t */ extern d_iov_t ds_cont_prop_label; /* string */ extern d_iov_t ds_cont_prop_layout_type; /* uint64_t */ @@ -139,8 +143,14 @@ extern d_iov_t ds_cont_prop_cont_obj_version; /* uint32_t */ extern d_iov_t ds_cont_prop_nhandles; /* uint32_t */ extern d_iov_t ds_cont_prop_oit_oids; /* snapshot OIT OID KVS */ extern d_iov_t ds_cont_prop_ec_agg_eph; /* uint64_t */ +/* clang-format on */ /* Please read the IMPORTANT notes above before adding new keys. */ +/* Container flags (e.g., CONTAINER_F_DESTROYING) */ +typedef uint64_t container_flags_t; + +#define CONTAINER_F_DESTROYING (1ULL << 0) /* being destroyed */ + struct co_md_times { uint64_t otime; /* container open time */ uint64_t mtime; /* container metadata modify time */ diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 6bfce2f6397..fbc640283b3 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -1357,13 +1357,6 @@ cont_child_destroy_one(void *vin) if (rc != 0) D_GOTO(out_pool, rc); - if (cont->sc_open > 0) { - D_ERROR(DF_CONT": Container is still in open(%d)\n", - DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid), cont->sc_open); - cont_child_put(tls->dt_cont_cache, cont); - D_GOTO(out_pool, rc = -DER_BUSY); - } - if (cont->sc_destroying) { D_DEBUG(DB_MD, DF_CONT ": Container is already being destroyed\n", DP_CONT(cont->sc_pool->spc_uuid, cont->sc_uuid)); diff --git a/src/gurt/tests/test_gurt.c b/src/gurt/tests/test_gurt.c index 3b9e7329e13..94122210822 100644 --- a/src/gurt/tests/test_gurt.c +++ b/src/gurt/tests/test_gurt.c @@ -1,6 +1,6 @@ /* * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -130,11 +130,11 @@ test_d_errstr(void **state) assert_string_equal(value, "DER_UNKNOWN"); /* Check the end of the DAOS error numbers. */ - value = d_errstr(-DER_CONT_NONEXIST); - assert_string_equal(value, "DER_CONT_NONEXIST"); - value = d_errstr(-2050); - assert_string_equal(value, "DER_CONT_NONEXIST"); - value = d_errstr(-(DER_CONT_NONEXIST + 1)); + value = d_errstr(-DER_CONT_DESTROYING); + assert_string_equal(value, "DER_CONT_DESTROYING"); + value = d_errstr(-2051); + assert_string_equal(value, "DER_CONT_DESTROYING"); + value = d_errstr(-(DER_CONT_DESTROYING + 1)); assert_string_equal(value, "DER_UNKNOWN"); } diff --git a/src/include/daos/common.h b/src/include/daos/common.h index a31cf34dfa7..8068736d0dd 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -638,7 +638,9 @@ daos_der2errno(int err) case -DER_SUCCESS: return 0; case -DER_NO_PERM: case -DER_EP_RO: - case -DER_EP_OLD: return EPERM; + case -DER_EP_OLD: + case -DER_CONT_DESTROYING: + return EPERM; case -DER_ENOENT: case -DER_NONEXIST: return ENOENT; case -DER_INVAL: @@ -915,6 +917,7 @@ enum { #define DAOS_POOL_EVICT_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa0) #define DAOS_POOL_RFCHECK_FAIL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa1) #define DAOS_POOL_REINT_SLOW (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa2) +#define DAOS_CONT_DESTROY_FAIL_POST (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xa3) #define DAOS_CHK_CONT_ORPHAN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb0) #define DAOS_CHK_CONT_BAD_LABEL (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0xb1) diff --git a/src/include/daos_errno.h b/src/include/daos_errno.h index 3bbc94a8711..a221bd7b286 100644 --- a/src/include/daos_errno.h +++ b/src/include/daos_errno.h @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -213,7 +213,8 @@ extern "C" { /** Target is overload, retry RPC */ \ ACTION(DER_OVERLOAD_RETRY, retry later because of overloaded service) \ ACTION(DER_NOT_RESUME, Cannot resume former DAOS check instance) \ - ACTION(DER_CONT_NONEXIST, The specified container does not exist) + ACTION(DER_CONT_NONEXIST, The specified container does not exist) \ + ACTION(DER_CONT_DESTROYING, The specified container is being destroyed) /* clang-format on */ diff --git a/src/include/daos_srv/container.h b/src/include/daos_srv/container.h index 510ffc70ac4..0480e6603b6 100644 --- a/src/include/daos_srv/container.h +++ b/src/include/daos_srv/container.h @@ -1,7 +1,7 @@ /* * (C) Copyright 2015-2024 Intel Corporation. * (C) Copyright 2025 Google LLC - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -39,7 +39,9 @@ int daos_prop_t *prop); int ds_cont_svc_refresh_agg_eph(uuid_t pool_uuid); -int ds_cont_list(uuid_t pool_uuid, struct daos_pool_cont_info **conts, uint64_t *ncont); +int + ds_cont_list(uuid_t pool_uuid, bool include_destroying, struct daos_pool_cont_info **conts, + uint64_t *ncont); int ds_cont_filter(uuid_t pool_uuid, daos_pool_cont_filter_t *filt, struct daos_pool_cont_info2 **conts, uint64_t *ncont); int ds_cont_upgrade(uuid_t pool_uuid, struct cont_svc *svc); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 9f05aa9457b..aad4f5955b8 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -5144,7 +5144,7 @@ pool_list_cont_handler(crt_rpc_t *rpc, int handler_version) } /* Call container service to get the list */ - rc = ds_cont_list(in->plci_op.pi_uuid, &cont_buf, &ncont); + rc = ds_cont_list(in->plci_op.pi_uuid, true /* include_destroying */, &cont_buf, &ncont); if (rc != 0) { D_GOTO(out_svc, rc); } else if ((ncont_in > 0) && (ncont > ncont_in)) { @@ -8117,7 +8117,7 @@ pool_recov_cont(crt_context_t ctx, struct pool_svc *svc, struct pool_target_addr if (rc != 0) goto out; - rc = ds_cont_list(svc->ps_uuid, &dpci, &cont_nr); + rc = ds_cont_list(svc->ps_uuid, false /* include_destroying */, &dpci, &cont_nr); if (rc != 0) D_GOTO(out, rc); diff --git a/src/tests/suite/daos_container.c b/src/tests/suite/daos_container.c index 187c5ab5f03..898d60990e7 100644 --- a/src/tests/suite/daos_container.c +++ b/src/tests/suite/daos_container.c @@ -1,6 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -4039,6 +4039,43 @@ co_op_dup_timing(void **state) test_teardown((void **)&arg); } +/* Opening a DESTROYING container should fail. */ +static void +co_open_destroying(void **state) +{ + test_arg_t *arg = *state; + char *label = "c_open_destroying"; + uuid_t uuid; + daos_handle_t coh; + int rc; + + par_barrier(PAR_COMM_WORLD); + + if (arg->myrank != 0) + goto out; + + rc = daos_cont_create_with_label(arg->pool.poh, label, NULL, &uuid, NULL); + assert_rc_equal(rc, 0); + print_message("created container '%s' (" DF_UUIDF ")\n", label, DP_UUID(uuid)); + + print_message("destroying container '%s' with fault injection\n", label); + test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_CONT_DESTROY_FAIL_POST | DAOS_FAIL_ALWAYS); + rc = daos_cont_destroy(arg->pool.poh, label, 1 /* force */, NULL); + test_set_engine_fail_loc(arg, CRT_NO_RANK, 0); + assert_rc_equal(rc, -DER_NOMEM); + + print_message("attempting to open DESTROYING container '%s'\n", label); + rc = daos_cont_open(arg->pool.poh, label, DAOS_COO_RW, &coh, NULL, NULL); + assert_rc_equal(rc, -DER_CONT_DESTROYING); + + print_message("destroying container '%s'\n", label); + rc = daos_cont_destroy(arg->pool.poh, label, 1 /* force */, NULL); + assert_rc_equal(rc, 0); + +out: + par_barrier(PAR_COMM_WORLD); +} + static int co_setup_sync(void **state) { @@ -4111,6 +4148,7 @@ static const struct CMUnitTest co_tests[] = { {"CONT33: exclusive open", co_exclusive_open, NULL, test_case_teardown}, {"CONT34: evict handles", co_evict_hdls, NULL, test_case_teardown}, {"CONT35: container duplicate op detection timing", co_op_dup_timing, NULL, test_case_teardown}, + {"CONT36: open DESTROYING", co_open_destroying, NULL, test_case_teardown}, }; int From c7301f6f573dd09a45495327bd7876d3cbb62de3 Mon Sep 17 00:00:00 2001 From: Li Wei Date: Thu, 12 Mar 2026 17:23:43 +0900 Subject: [PATCH 2/2] DAOS-? container: Hide DESTROYING containers Signed-off-by: Li Wei --- src/container/srv_container.c | 125 +++++++++++++++++++++---------- src/container/srv_epoch.c | 5 +- src/pool/srv_pool.c | 4 +- src/tests/suite/daos_container.c | 4 +- 4 files changed, 91 insertions(+), 47 deletions(-) diff --git a/src/container/srv_container.c b/src/container/srv_container.c index 8d8b40c2368..77d70d49958 100644 --- a/src/container/srv_container.c +++ b/src/container/srv_container.c @@ -1578,6 +1578,43 @@ evict_hdls(struct rdb_tx *tx, struct cont *cont, bool force, struct ds_pool_hdl static void cont_track_eph_leader_delete(struct cont_svc *svc, uuid_t cont_uuid); +/* Delete the entry in the container UUIDs KVS (if added during create). */ +static int +cont_delete_label(struct rdb_tx *tx, struct cont *cont, const char *label, uuid_t pool_uuid, + uuid_t cont_uuid) +{ + d_iov_t key; + d_iov_t val; + int rc; + + d_iov_set(&key, (char *)label, strnlen(label, DAOS_PROP_MAX_LABEL_BUF_LEN)); + d_iov_set(&val, NULL, 0); + rc = rdb_tx_lookup(tx, &cont->c_svc->cs_uuids, &key, &val); + if (rc == -DER_NONEXIST) + return 0; + else if (rc != 0) + return rc; + + /* Ensure the label is indeed ours. */ + if (val.iov_len != sizeof(uuid_t)) { + D_ERROR(DF_CONT ": invalid UUID value: label=%s len=%zu\n", + DP_CONT(pool_uuid, cont_uuid), label, val.iov_len); + return -DER_IO; + } + if (uuid_compare(val.iov_buf, cont_uuid) != 0) { + D_DEBUG(DB_MD, DF_CONT ": not our label: label=%s uuid=" DF_UUID "\n", + DP_CONT(pool_uuid, cont_uuid), label, DP_UUID(val.iov_buf)); + return 0; + } + + rc = rdb_tx_delete(tx, &cont->c_svc->cs_uuids, &key); + if (rc != 0) + return rc; + + D_DEBUG(DB_MD, DF_CONT ": deleted label: %s\n", DP_CONT(pool_uuid, cont_uuid), label); + return 0; +} + static int cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, crt_rpc_t *rpc, int cont_proto_ver) @@ -1589,6 +1626,7 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, struct d_ownership owner; uint32_t force; struct daos_acl *acl; + struct daos_prop_entry *lbl_ent; cont_destroy_in_get_data(rpc, opc_get(rpc->cr_opc), cont_proto_ver, &force, NULL); D_DEBUG(DB_MD, DF_CONT ": processing rpc: %p force=%u\n", @@ -1647,6 +1685,15 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, if (rc != 0) goto out_prop; + /* Delete the label (if any) in the container UUIDs KVS. */ + lbl_ent = daos_prop_entry_get(prop, DAOS_PROP_CO_LABEL); + if (lbl_ent != NULL) { + rc = cont_delete_label(tx, cont, lbl_ent->dpe_str, pool_hdl->sph_pool->sp_uuid, + cont->c_uuid); + if (rc != 0) + goto out_prop; + } + container_flags |= CONTAINER_F_DESTROYING; d_iov_set(&val, &container_flags, sizeof(container_flags)); rc = rdb_tx_update(tx, &cont->c_prop, &ds_cont_prop_ghce, &val); @@ -1657,17 +1704,20 @@ cont_destroy(struct rdb_tx *tx, struct ds_pool_hdl *pool_hdl, struct cont *cont, return rc; } +static int +cont_lookup_internal(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, + bool include_destroying, struct cont **cont); + static int cont_destroy_post(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_t uuid, crt_rpc_t *rpc, int cont_proto_ver) { struct rdb_tx tx; struct cont *cont; - d_iov_t key; - d_iov_t val; - int rc; daos_prop_t *prop = NULL; struct daos_prop_entry *lbl_ent; + d_iov_t key; + int rc; bool need_destroy_oid_oit_kvs = false; if (DAOS_FAIL_CHECK(DAOS_CONT_DESTROY_FAIL_POST)) { @@ -1684,7 +1734,7 @@ cont_destroy_post(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_t uui goto out; ABT_rwlock_wrlock(svc->cs_lock); - rc = cont_lookup(&tx, svc, uuid, &cont); + rc = cont_lookup_internal(&tx, svc, uuid, true /* include_destroying */, &cont); if (rc != 0) { if (rc == -DER_NONEXIST) { /* @@ -1739,37 +1789,30 @@ cont_destroy_post(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, uuid_t uui if (rc != 0) goto out_cont; - /* Delete entry in container UUIDs KVS (if added during create) */ + /* + * Previous versions do not delete the label in cont_destroy. On the + * other hand, if the label _has_ been deleted in cont_destroy, and + * reused by a different container, the cont_delete_label call will + * detect that by checking the container UUID, and keep the label + * intact. + */ rc = cont_prop_read(&tx, cont, DAOS_CO_QUERY_PROP_LABEL, &prop, true); if (rc != 0) goto out_cont; D_ASSERT(prop != NULL); lbl_ent = daos_prop_entry_get(prop, DAOS_PROP_CO_LABEL); if (lbl_ent) { - d_iov_set(&key, lbl_ent->dpe_str, - strnlen(lbl_ent->dpe_str, DAOS_PROP_MAX_LABEL_BUF_LEN)); - d_iov_set(&val, NULL, 0); - rc = rdb_tx_lookup(&tx, &cont->c_svc->cs_uuids, &key, &val); - if (rc != -DER_NONEXIST) { - if (rc == 0) { - rc = rdb_tx_delete(&tx, &cont->c_svc->cs_uuids, &key); - if (rc != 0) - goto out_prop; - D_DEBUG(DB_MD, DF_CONT": deleted label: %s\n", - DP_CONT(pool_hdl->sph_pool->sp_uuid, - cont->c_uuid), - lbl_ent->dpe_str); - } else { - goto out_prop; - } - } + rc = cont_delete_label(&tx, cont, lbl_ent->dpe_str, pool_hdl->sph_pool->sp_uuid, + cont->c_uuid); + if (rc != 0) + goto out_prop; } /* Destroy the container attribute KVS. */ d_iov_set(&key, cont->c_uuid, sizeof(uuid_t)); rc = rdb_tx_destroy_kvs(&tx, &cont->c_svc->cs_conts, &key); if (rc != 0) - goto out_prop; + goto out_cont; rc = rdb_tx_commit(&tx); @@ -2402,7 +2445,6 @@ cont_lookup_internal(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t d_iov_set(&key, (void *)uuid, sizeof(uuid_t)); d_iov_set(&tmp, NULL, 0); - /* check if the container exists or not */ rc = rdb_tx_lookup(tx, &svc->cs_conts, &key, &tmp); if (rc != 0) D_GOTO(err, rc); @@ -2499,7 +2541,7 @@ cont_lookup_internal(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t int cont_lookup(struct rdb_tx *tx, const struct cont_svc *svc, const uuid_t uuid, struct cont **cont) { - return cont_lookup_internal(tx, svc, uuid, true /* include_destroying */, cont); + return cont_lookup_internal(tx, svc, uuid, false /* include_destroying */, cont); } static int @@ -4602,7 +4644,8 @@ enum_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) (void)val; if (key->iov_len != sizeof(uuid_t)) { - D_ERROR("invalid key size: key="DF_U64"\n", key->iov_len); + D_ERROR(DF_UUID ": invalid key size: key=" DF_U64 "\n", DP_UUID(ap->pool_uuid), + key->iov_len); return -DER_IO; } @@ -4906,9 +4949,11 @@ filter_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) /* Lookup container, see if it matches filter specification before adding to ap->conts[] */ rc = cont_lookup(ap->tx, ap->svc, cont_uuid, &cont); - if (rc != 0) { - D_ERROR(DF_CONT": lookup cont failed, "DF_RC"\n", - DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); + if (rc == -DER_NONEXIST) { + /* Continue iterating. */ + return 0; + } else if (rc != 0) { + DL_INFO(rc, DF_CONT ": look up container", DP_CONT(ap->pool_uuid, cont_uuid)); return rc; } @@ -4937,7 +4982,6 @@ filter_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) } pcinfo = &ap->conts[ap->ncont]; - ap->ncont++; uuid_copy(pcinfo->pci_id.pci_uuid, cont_uuid); /* TODO: Specify client cont_proto_version. This is invoked from a pool client RPC */ @@ -4953,6 +4997,7 @@ filter_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) if (rc != 0) { D_ERROR(DF_CONT": cont_prop_read() failed, "DF_RC"\n", DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); + memset(&pcinfo->pci_cinfo, 0, sizeof(pcinfo->pci_cinfo)); goto out_cont; } @@ -4963,6 +5008,8 @@ filter_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) pcinfo->pci_id.pci_label[DAOS_PROP_LABEL_MAX_LEN] = '\0'; } + ap->ncont++; + daos_prop_free(prop); out_cont: @@ -5079,8 +5126,7 @@ upgrade_cont_cb(daos_handle_t ih, d_iov_t *key, d_iov_t *val, void *varg) rc = cont_lookup(ap->tx, ap->svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT": lookup cont failed, "DF_RC"\n", - DP_CONT(ap->pool_uuid, cont_uuid), DP_RC(rc)); + DL_INFO(rc, DF_CONT ": look up container", DP_CONT(ap->pool_uuid, cont_uuid)); return rc; } @@ -5414,8 +5460,7 @@ ds_cont_rf_check(uuid_t pool_uuid, uuid_t cont_uuid, struct rdb_tx *tx) rc = cont_lookup(tx, svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT": lookup cont failed, "DF_RC"\n", - DP_CONT(pool_uuid, cont_uuid), DP_RC(rc)); + DL_INFO(rc, DF_CONT ": look up container", DP_CONT(pool_uuid, cont_uuid)); D_GOTO(out, rc); } @@ -5718,8 +5763,7 @@ ds_cont_prop_iv_update(struct cont_svc *svc, uuid_t cont_uuid) ABT_rwlock_rdlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT": Failed to look container: %d\n", - DP_CONT(svc->cs_pool_uuid, cont_uuid), rc); + DL_INFO(rc, DF_CONT ": look up container", DP_CONT(svc->cs_pool_uuid, cont_uuid)); D_GOTO(out_lock, rc); } @@ -5876,7 +5920,8 @@ cont_op_with_svc(struct ds_pool_hdl *pool_hdl, struct cont_svc *svc, case CONT_OPEN_BYLABEL: cont_op_in_get_label(rpc, opc, cont_proto_ver, &clbl); olbl_out = crt_reply_get(rpc); - rc = cont_lookup_bylabel(&tx, svc, clbl, &cont); + /* FIXME: We should avoid looking up the container UUID if dup_op. */ + rc = cont_lookup_bylabel(&tx, svc, clbl, &cont); if (rc != 0) goto out_commit; /* NB: call common cont_op_with_cont() same as CONT_OPEN case */ @@ -6353,8 +6398,8 @@ ds_cont_set_prop_srv_handler(crt_rpc_t *rpc) else /* CONT_PROP_SET_BYLABEL */ rc = cont_lookup_bylabel(&tx, svc, cont_label, &cont); if (rc != 0) { - DL_ERROR(rc, DF_UUID ": failed to look up container '%s'", DP_UUID(pool_uuid), - cont_id); + DL_INFO(rc, DF_UUID ": failed to look up container '%s'", DP_UUID(pool_uuid), + cont_id); D_GOTO(out_lock, rc); } @@ -6410,7 +6455,7 @@ ds_cont_get_prop(uuid_t pool_uuid, uuid_t cont_uuid, daos_prop_t **prop_out) ABT_rwlock_rdlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - DL_ERROR(rc, DF_CONT " cont_lookup failed", DP_CONT(pool_uuid, cont_uuid)); + DL_INFO(rc, DF_CONT ": look up container", DP_CONT(pool_uuid, cont_uuid)); D_GOTO(out_lock, rc); } diff --git a/src/container/srv_epoch.c b/src/container/srv_epoch.c index 5d5e01e4469..7ed0d70accd 100644 --- a/src/container/srv_epoch.c +++ b/src/container/srv_epoch.c @@ -694,7 +694,7 @@ ds_cont_get_snapshots(uuid_t pool_uuid, uuid_t cont_uuid, ABT_rwlock_rdlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - DL_ERROR(rc, DF_CONT " cont_lookup failed", DP_CONT(pool_uuid, cont_uuid)); + DL_INFO(rc, DF_CONT ": cont_lookup", DP_CONT(pool_uuid, cont_uuid)); D_GOTO(out_lock, rc); } @@ -741,8 +741,7 @@ ds_cont_update_snap_iv(struct cont_svc *svc, uuid_t cont_uuid) ABT_rwlock_rdlock(svc->cs_lock); rc = cont_lookup(&tx, svc, cont_uuid, &cont); if (rc != 0) { - D_ERROR(DF_CONT": Failed to look container: %d\n", - DP_CONT(svc->cs_pool_uuid, cont_uuid), rc); + DL_INFO(rc, DF_CONT ": cont_lookup", DP_CONT(svc->cs_pool_uuid, cont_uuid)); goto out_lock; } diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index aad4f5955b8..c6a9f881164 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -5144,7 +5144,7 @@ pool_list_cont_handler(crt_rpc_t *rpc, int handler_version) } /* Call container service to get the list */ - rc = ds_cont_list(in->plci_op.pi_uuid, true /* include_destroying */, &cont_buf, &ncont); + rc = ds_cont_list(in->plci_op.pi_uuid, false /* include_destroying */, &cont_buf, &ncont); if (rc != 0) { D_GOTO(out_svc, rc); } else if ((ncont_in > 0) && (ncont > ncont_in)) { @@ -5161,7 +5161,7 @@ pool_list_cont_handler(crt_rpc_t *rpc, int handler_version) DP_UUID(in->plci_op.pi_uuid), DP_UUID(in->plci_op.pi_hdl), ncont); /* Send any results only if client provided a handle */ - if (cont_buf && (ncont_in > 0) && (bulk != CRT_BULK_NULL)) + if (ncont > 0 && (ncont_in > 0) && (bulk != CRT_BULK_NULL)) rc = transfer_cont_buf(cont_buf, nbytes, svc, rpc, bulk); } diff --git a/src/tests/suite/daos_container.c b/src/tests/suite/daos_container.c index 898d60990e7..08bda8295d3 100644 --- a/src/tests/suite/daos_container.c +++ b/src/tests/suite/daos_container.c @@ -4061,14 +4061,14 @@ co_open_destroying(void **state) print_message("destroying container '%s' with fault injection\n", label); test_set_engine_fail_loc(arg, CRT_NO_RANK, DAOS_CONT_DESTROY_FAIL_POST | DAOS_FAIL_ALWAYS); rc = daos_cont_destroy(arg->pool.poh, label, 1 /* force */, NULL); - test_set_engine_fail_loc(arg, CRT_NO_RANK, 0); assert_rc_equal(rc, -DER_NOMEM); print_message("attempting to open DESTROYING container '%s'\n", label); rc = daos_cont_open(arg->pool.poh, label, DAOS_COO_RW, &coh, NULL, NULL); - assert_rc_equal(rc, -DER_CONT_DESTROYING); + assert_rc_equal(rc, -DER_NONEXIST); print_message("destroying container '%s'\n", label); + test_set_engine_fail_loc(arg, CRT_NO_RANK, 0); rc = daos_cont_destroy(arg->pool.poh, label, 1 /* force */, NULL); assert_rc_equal(rc, 0);