diff --git a/src/chk/chk_common.c b/src/chk/chk_common.c index adf5d068523..8790e4c02ed 100644 --- a/src/chk/chk_common.c +++ b/src/chk/chk_common.c @@ -76,6 +76,7 @@ chk_pool_alloc(struct btr_instance *tins, d_iov_t *key_iov, d_iov_t *val_iov, D_INIT_LIST_HEAD(&cpr->cpr_shutdown_link); D_INIT_LIST_HEAD(&cpr->cpr_shard_list); D_INIT_LIST_HEAD(&cpr->cpr_pending_list); + D_INIT_LIST_HEAD(&cpr->cpr_ult_list); cpr->cpr_refs = 1; uuid_copy(cpr->cpr_uuid, cpb->cpb_uuid); cpr->cpr_thread = ABT_THREAD_NULL; @@ -930,6 +931,27 @@ chk_pool_shard_cleanup(struct chk_instance *ins) } } +int +chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr) +{ + d_iov_t kiov; + d_iov_t riov; + int rc; + + d_iov_set(&riov, NULL, 0); + d_iov_set(&kiov, &seq, sizeof(seq)); + + ABT_rwlock_rdlock(ins->ci_abt_lock); + rc = dbtree_lookup(ins->ci_pending_hdl, &kiov, &riov); + ABT_rwlock_unlock(ins->ci_abt_lock); + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + + return rc; +} + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, @@ -985,12 +1007,14 @@ chk_pending_del(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec * d_iov_set(&kiov, &seq, sizeof(seq)); ABT_rwlock_wrlock(ins->ci_abt_lock); - rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, &riov); + rc = dbtree_delete(ins->ci_pending_hdl, BTR_PROBE_EQ, &kiov, cpr == NULL ? NULL : &riov); ABT_rwlock_unlock(ins->ci_abt_lock); - if (rc == 0) - *cpr = (struct chk_pending_rec *)riov.iov_buf; - else - *cpr = NULL; + if (cpr != NULL) { + if (rc == 0) + *cpr = (struct chk_pending_rec *)riov.iov_buf; + else + *cpr = NULL; + } D_CDEBUG(rc != 0, DLOG_ERR, DLOG_DBG, "Del pending record with gen "DF_X64", seq "DF_X64": "DF_RC"\n", diff --git a/src/chk/chk_engine.c b/src/chk/chk_engine.c index 3dfb7b9b705..9c8064ecb0a 100644 --- a/src/chk/chk_engine.c +++ b/src/chk/chk_engine.c @@ -262,28 +262,41 @@ chk_engine_post_repair(struct chk_pool_rec *cpr, int *result, bool update) return *result != 0 ? *result : rc; } -static int -chk_engine_pm_orphan(struct chk_pool_rec *cpr, d_rank_t rank, int index, uint32_t status) +struct chk_pm_orphan_args { + struct chk_pool_rec *cpoa_pool; + d_rank_t cpoa_rank; + int cpoa_index; + uint32_t cpoa_status; +}; + +static void +chk_engine_pm_orphan_ult(void *args) { - struct chk_instance *ins = cpr->cpr_ins; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &cpr->cpr_bk; - d_rank_list_t ranks = { 0 }; - struct chk_report_unit cru = { 0 }; - char *strs[2]; - d_iov_t iovs[2]; - d_sg_list_t sgl; - d_sg_list_t *details = NULL; - Chk__CheckInconsistClass cla; - Chk__CheckInconsistAction act; - char msg[CHK_MSG_BUFLEN] = { 0 }; - uint64_t seq = 0; - uint32_t options[2]; - uint32_t option_nr = 0; - uint32_t detail_nr = 0; - int decision = -1; - int result = 0; - int rc = 0; + struct chk_engine_ult *ult; + struct chk_pm_orphan_args *cpoa = args; + struct chk_pool_rec *cpr = cpoa->cpoa_pool; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + d_rank_list_t ranks = {0}; + struct chk_report_unit cru = {0}; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = {0}; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + uint32_t status = cpoa->cpoa_status; + d_rank_t rank = cpoa->cpoa_rank; + int index = cpoa->cpoa_index; + int decision = -1; + int result = 0; + int rc = 0; /* * NOTE: The subsequent check after handling orphan pm entry will not access the @@ -453,32 +466,78 @@ chk_engine_pm_orphan(struct chk_pool_rec *cpr, d_rank_t rank, int index, uint32_ goto report; out: - return chk_engine_post_repair(cpr, &result, rc <= 0); + ult = container_of(args, struct chk_engine_ult, ceu_data); + ult->ceu_result = chk_engine_post_repair(cpr, &result, rc <= 0); } static int -chk_engine_pm_dangling(struct chk_pool_rec *cpr, struct pool_map *map, struct pool_component *comp, - uint32_t status) +chk_engine_pm_orphan(struct chk_pool_rec *cpr, d_rank_t rank, int index, uint32_t status) { - struct chk_instance *ins = cpr->cpr_ins; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &cpr->cpr_bk; - struct chk_report_unit cru = { 0 }; - char *strs[2]; - d_iov_t iovs[2]; - d_sg_list_t sgl; - d_sg_list_t *details = NULL; - Chk__CheckInconsistClass cla; - Chk__CheckInconsistAction act; - char suggested[CHK_MSG_BUFLEN] = { 0 }; - char msg[CHK_MSG_BUFLEN] = { 0 }; - uint64_t seq = 0; - uint32_t options[2]; - uint32_t option_nr = 0; - uint32_t detail_nr = 0; - int decision = -1; - int result = 0; - int rc = 0; + struct chk_engine_ult *ult; + struct chk_pm_orphan_args *args; + int rc; + + D_ALLOC(ult, sizeof(*ult) + sizeof(*args)); + if (ult == NULL) + return -DER_NOMEM; + + args = (struct chk_pm_orphan_args *)&ult->ceu_data[0]; + args->cpoa_pool = cpr; + args->cpoa_rank = rank; + args->cpoa_index = index; + args->cpoa_status = status; + + ult->ceu_ult = ABT_THREAD_NULL; + d_list_add_tail(&ult->ceu_link, &cpr->cpr_ult_list); + + rc = dss_ult_create(chk_engine_pm_orphan_ult, args, DSS_XS_SYS, 0, 0, &ult->ceu_ult); + if (rc != 0) { + d_list_del(&ult->ceu_link); + D_FREE(ult); + D_ERROR(DF_ENGINE " failed to create ULT to handle PM orphan for pool " DF_UUID + ", rank %u, index %d, status %s: " DF_RC "\n", + DP_ENGINE(cpr->cpr_ins), DP_UUID(cpr->cpr_uuid), rank, index, + pool_map_status2name(status), DP_RC(rc)); + } + + return rc; +} + +struct chk_pm_dangling_args { + struct chk_pool_rec *cpda_pool; + struct pool_map *cpda_map; + struct pool_component *cpda_comp; + uint32_t cpda_status; +}; + +static void +chk_engine_pm_dangling_ult(void *args) +{ + struct chk_engine_ult *ult; + struct chk_pm_dangling_args *cpda = args; + struct pool_map *map = cpda->cpda_map; + struct pool_component *comp = cpda->cpda_comp; + struct chk_pool_rec *cpr = cpda->cpda_pool; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = {0}; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char suggested[CHK_MSG_BUFLEN] = {0}; + char msg[CHK_MSG_BUFLEN] = {0}; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + uint32_t status = cpda->cpda_status; + int decision = -1; + int result = 0; + int rc = 0; D_ASSERTF(status == PO_COMP_ST_DOWNOUT || status == PO_COMP_ST_DOWN, "Unexpected pool map status %u\n", status); @@ -635,20 +694,67 @@ chk_engine_pm_dangling(struct chk_pool_rec *cpr, struct pool_map *map, struct po goto report; out: - return chk_engine_post_repair(cpr, &result, rc <= 0); + ult = container_of(args, struct chk_engine_ult, ceu_data); + ult->ceu_result = chk_engine_post_repair(cpr, &result, rc <= 0); } static int -chk_engine_pm_unknown_target(struct chk_pool_rec *cpr, struct pool_component *comp) +chk_engine_pm_dangling(struct chk_pool_rec *cpr, struct pool_map *map, struct pool_component *comp, + uint32_t status) { - struct chk_instance *ins = cpr->cpr_ins; - struct chk_bookmark *cbk = &cpr->cpr_bk; - struct chk_report_unit cru = { 0 }; - Chk__CheckInconsistClass cla; - Chk__CheckInconsistAction act; - char msg[CHK_MSG_BUFLEN] = { 0 }; - uint64_t seq = 0; - int rc; + struct chk_engine_ult *ult; + struct chk_pm_dangling_args *args; + int rc; + + D_ALLOC(ult, sizeof(*ult) + sizeof(*args)); + if (ult == NULL) + return -DER_NOMEM; + + args = (struct chk_pm_dangling_args *)&ult->ceu_data[0]; + args->cpda_pool = cpr; + args->cpda_map = map; + args->cpda_comp = comp; + args->cpda_status = status; + + ult->ceu_ult = ABT_THREAD_NULL; + d_list_add_tail(&ult->ceu_link, &cpr->cpr_ult_list); + + rc = dss_ult_create(chk_engine_pm_dangling_ult, args, DSS_XS_SYS, 0, 0, &ult->ceu_ult); + if (rc != 0) { + d_list_del(&ult->ceu_link); + D_FREE(ult); + D_ERROR(DF_ENGINE " failed to create ULT to handle PM dangling for pool " DF_UUID + ", type %x, index %d, id %u, status %s: " DF_RC "\n", + DP_ENGINE(cpr->cpr_ins), DP_UUID(cpr->cpr_uuid), comp->co_type, + comp->co_index, comp->co_id, pool_map_status2name(status), DP_RC(rc)); + } + + return rc; +} + +struct chk_engine_unknown_args { + struct chk_pool_rec *ceua_pool; + struct chk_cont_rec *ceua_cont; + struct pool_component *ceua_comp; + uint32_t ceua_exp_tgt_nr; +}; + +static void +chk_engine_handle_unknown_ult(void *args) +{ + struct chk_engine_ult *ult; + struct chk_engine_unknown_args *ceua = args; + struct chk_pool_rec *cpr = ceua->ceua_pool; + struct chk_cont_rec *ccr = ceua->ceua_cont; + struct pool_component *comp = ceua->ceua_comp; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = {0}; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = {0}; + uint64_t seq = 0; + int rc; cla = CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN; act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; @@ -663,22 +769,89 @@ chk_engine_pm_unknown_target(struct chk_pool_rec *cpr, struct pool_component *co cru.cru_rank = dss_self_rank(); cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; cru.cru_pool_label = cpr->cpr_label; - snprintf(msg, CHK_MSG_BUFLEN - 1, - "Check engine detects unknown target entry in pool map for pool " - DF_UUIDF", rank %u, index %u, status %u, skip it. You can change " - "its status via DAOS debug tool if it is not for downgraded case.\n", - DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, comp->co_status); + + if (ccr != NULL) { + cru.cru_cont = (uuid_t *)&ccr->ccr_uuid; + if (ccr->ccr_label_prop != NULL && ccr->ccr_label_prop->dpp_entries != NULL) + cru.cru_cont_label = ccr->ccr_label_prop->dpp_entries[0].dpe_str; + snprintf(msg, CHK_MSG_BUFLEN - 1, + "The container " DF_UUID " in the pool " DF_UUID + " lost some shards: %u vs %u\n", + DP_UUID(ccr->ccr_uuid), DP_UUID(cpr->cpr_uuid), ccr->ccr_tgt_nr, + ceua->ceua_exp_tgt_nr); + } else { + D_ASSERT(comp != NULL); + + snprintf( + msg, CHK_MSG_BUFLEN - 1, + "Check engine detects unknown target entry in pool map for pool " DF_UUIDF + ", rank %u, index %u, status %u, skip it. You can change its status via DAOS " + "debug tool if it is not for downgraded case.\n", + DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, comp->co_status); + } + cru.cru_msg = msg; cru.cru_result = 0; rc = chk_engine_report(&cru, &seq, NULL); - D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, - DF_ENGINE" detects unknown target entry in pool map for pool "DF_UUIDF", rank %u, " - "target %u, action %u (no interact), handle_rc 0, report_rc %d, decision 0\n", - DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, act, rc); + if (ccr != NULL) + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE " detects incomplete container " DF_UUIDF "/" DF_UUID "shards " + "%u vs %u, ignore it.\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid), + ccr->ccr_tgt_nr, ceua->ceua_exp_tgt_nr); + else + D_CDEBUG(rc != 0, DLOG_ERR, DLOG_INFO, + DF_ENGINE " detects unknown target entry in pool map for pool " DF_UUIDF + ", rank %u, target %u, ignore it, report_rc %d\n", + DP_ENGINE(ins), DP_UUID(cpr->cpr_uuid), comp->co_rank, comp->co_index, rc); - return chk_engine_post_repair(cpr, &rc, rc <= 0); + ult = container_of(args, struct chk_engine_ult, ceu_data); + ult->ceu_result = chk_engine_post_repair(cpr, &rc, rc <= 0); +} + +static int +chk_engine_handle_unknown(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, + struct pool_component *comp, uint32_t exp_tgt_nr) +{ + struct chk_engine_ult *ult; + struct chk_engine_unknown_args *args; + int rc; + + D_ALLOC(ult, sizeof(*ult) + sizeof(*args)); + if (ult == NULL) + return -DER_NOMEM; + + args = (struct chk_engine_unknown_args *)&ult->ceu_data[0]; + args->ceua_pool = cpr; + args->ceua_cont = ccr; + args->ceua_comp = comp; + args->ceua_exp_tgt_nr = exp_tgt_nr; + + ult->ceu_ult = ABT_THREAD_NULL; + d_list_add_tail(&ult->ceu_link, &cpr->cpr_ult_list); + + rc = dss_ult_create(chk_engine_handle_unknown_ult, args, DSS_XS_SYS, 0, 0, &ult->ceu_ult); + if (rc != 0) { + d_list_del(&ult->ceu_link); + D_FREE(ult); + if (ccr != NULL) + D_ERROR(DF_ENGINE + " failed to create ULT to handle incomplete container " DF_UUID + "/" DF_UUID ", shards %d/%d: " DF_RC "\n", + DP_ENGINE(cpr->cpr_ins), DP_UUID(cpr->cpr_uuid), + DP_UUID(ccr->ccr_uuid), ccr->ccr_tgt_nr, exp_tgt_nr, DP_RC(rc)); + else + D_ERROR(DF_ENGINE + " failed to create ULT to handle PM unknown for pool " DF_UUID + ", type %x, index %d, id %u, status %s: " DF_RC "\n", + DP_ENGINE(cpr->cpr_ins), DP_UUID(cpr->cpr_uuid), comp->co_type, + comp->co_index, comp->co_id, pool_map_status2name(comp->co_status), + DP_RC(rc)); + } + + return rc; } static int @@ -750,7 +923,7 @@ chk_engine_pool_mbs_one(struct chk_pool_rec *cpr, struct pool_map *map, struct c * layout? It is better to keep it there with reporting it * to admin who can adjust the status via DAOS debug tool. */ - rc = chk_engine_pm_unknown_target(cpr, comp); + rc = chk_engine_handle_unknown(cpr, NULL, comp, 0); break; } @@ -947,6 +1120,45 @@ chk_engine_bad_pool_label(struct chk_pool_rec *cpr, struct ds_pool_svc *svc) return chk_engine_post_repair(cpr, &result, rc <= 0); } +struct chk_cont_cleanup_args { + struct chk_pool_rec *ccca_pool; + struct chk_cont_rec *ccca_cont; + struct cont_svc *ccca_svc; +}; + +static int +chk_engine_cont_cleanup_one(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, + struct cont_svc *svc, void (*func)(void *)) +{ + struct chk_engine_ult *ult; + struct chk_cont_cleanup_args *args; + int rc; + + D_ALLOC(ult, sizeof(*ult) + sizeof(*args)); + if (ult == NULL) + return -DER_NOMEM; + + args = (struct chk_cont_cleanup_args *)&ult->ceu_data[0]; + args->ccca_pool = cpr; + args->ccca_cont = ccr; + args->ccca_svc = svc; + + ult->ceu_ult = ABT_THREAD_NULL; + d_list_add_tail(&ult->ceu_link, &cpr->cpr_ult_list); + + rc = dss_ult_create(func, args, DSS_XS_SYS, 0, 0, &ult->ceu_ult); + if (rc != 0) { + d_list_del(&ult->ceu_link); + D_FREE(ult); + D_ERROR(DF_ENGINE " failed to create ULT to cleanup container " DF_UUID "/" DF_UUID + ": " DF_RC "\n", + DP_ENGINE(cpr->cpr_ins), DP_UUID(cpr->cpr_uuid), DP_UUID(ccr->ccr_uuid), + DP_RC(rc)); + } + + return rc; +} + static int chk_engine_cont_list_init(uuid_t pool, struct chk_cont_list_aggregator *aggregator) { @@ -1004,27 +1216,32 @@ chk_engine_cont_list_remote_cb(struct chk_co_rpc_cb_args *cb_args) cb_args->cb_nr); } -static int -chk_engine_cont_orphan(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, struct cont_svc *svc) +static void +chk_engine_cont_orphan_ult(void *args) { - struct chk_instance *ins = cpr->cpr_ins; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &cpr->cpr_bk; - struct chk_report_unit cru = { 0 }; - char *strs[2]; - d_iov_t iovs[2]; - d_sg_list_t sgl; - d_sg_list_t *details = NULL; - Chk__CheckInconsistClass cla; - Chk__CheckInconsistAction act; - char msg[CHK_MSG_BUFLEN] = { 0 }; - uint64_t seq = 0; - uint32_t options[2]; - uint32_t option_nr = 0; - uint32_t detail_nr = 0; - int decision = -1; - int result = 0; - int rc = 0; + struct chk_engine_ult *ult; + struct chk_cont_cleanup_args *ccca = args; + struct chk_pool_rec *cpr = ccca->ccca_pool; + struct chk_cont_rec *ccr = ccca->ccca_cont; + struct cont_svc *svc = ccca->ccca_svc; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + struct chk_report_unit cru = {0}; + char *strs[2]; + d_iov_t iovs[2]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = {0}; + uint64_t seq = 0; + uint32_t options[2]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; cla = CHK__CHECK_INCONSIST_CLASS__CIC_CONT_NONEXIST_ON_PS; act = prop->cp_policies[cla]; @@ -1167,7 +1384,8 @@ chk_engine_cont_orphan(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, struc /* NOTE: For orphan container, mark it as 'skip' since we do not support to add it back. */ ccr->ccr_skip = 1; - return chk_engine_post_repair(cpr, &result, rc <= 0); + ult = container_of(args, struct chk_engine_ult, ceu_data); + ult->ceu_result = chk_engine_post_repair(cpr, &result, rc <= 0); } static daos_prop_t * @@ -1267,29 +1485,34 @@ chk_engine_ccr2label(struct chk_cont_rec *ccr, bool prefer_target) return NULL; } -static int -chk_engine_cont_set_label(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, struct cont_svc *svc) +static void +chk_engine_cont_set_label_ult(void *args) { - struct chk_instance *ins = cpr->cpr_ins; - struct chk_property *prop = &ins->ci_prop; - struct chk_bookmark *cbk = &cpr->cpr_bk; - daos_prop_t *prop_tmp = NULL; - struct chk_report_unit cru = { 0 }; - char strs[3][CHK_MSG_BUFLEN]; - d_iov_t iovs[3]; - d_sg_list_t sgl; - d_sg_list_t *details = NULL; - char *label = NULL; - Chk__CheckInconsistClass cla; - Chk__CheckInconsistAction act; - char msg[CHK_MSG_BUFLEN] = { 0 }; - uint64_t seq = 0; - uint32_t options[3]; - uint32_t option_nr = 0; - uint32_t detail_nr = 0; - int decision = -1; - int result = 0; - int rc = 0; + struct chk_engine_ult *ult; + struct chk_cont_cleanup_args *ccca = args; + struct chk_pool_rec *cpr = ccca->ccca_pool; + struct chk_cont_rec *ccr = ccca->ccca_cont; + struct cont_svc *svc = ccca->ccca_svc; + struct chk_instance *ins = cpr->cpr_ins; + struct chk_property *prop = &ins->ci_prop; + struct chk_bookmark *cbk = &cpr->cpr_bk; + daos_prop_t *prop_tmp = NULL; + struct chk_report_unit cru = {0}; + char strs[3][CHK_MSG_BUFLEN]; + d_iov_t iovs[3]; + d_sg_list_t sgl; + d_sg_list_t *details = NULL; + char *label = NULL; + Chk__CheckInconsistClass cla; + Chk__CheckInconsistAction act; + char msg[CHK_MSG_BUFLEN] = {0}; + uint64_t seq = 0; + uint32_t options[3]; + uint32_t option_nr = 0; + uint32_t detail_nr = 0; + int decision = -1; + int result = 0; + int rc = 0; cla = CHK__CHECK_INCONSIST_CLASS__CIC_CONT_BAD_LABEL; act = prop->cp_policies[cla]; @@ -1550,7 +1773,8 @@ chk_engine_cont_set_label(struct chk_pool_rec *cpr, struct chk_cont_rec *ccr, st daos_prop_free(prop_tmp); - return chk_engine_post_repair(cpr, &result, rc <= 0); + ult = container_of(args, struct chk_engine_ult, ceu_data); + ult->ceu_result = chk_engine_post_repair(cpr, &result, rc <= 0); } static int @@ -1599,10 +1823,7 @@ chk_engine_cont_cleanup(struct chk_pool_rec *cpr, struct ds_pool_svc *ds_svc, struct chk_instance *ins = cpr->cpr_ins; struct cont_svc *svc; struct chk_cont_rec *ccr; - char msg[CHK_MSG_BUFLEN]; - struct chk_cont_label_cb_args cclca = {0}; - struct chk_report_unit cru = {0}; - uint64_t seq; + struct chk_cont_label_cb_args cclca = {0}; uint32_t exp_tgt_nr = 0; int rc = 0; bool failout; @@ -1635,7 +1856,7 @@ chk_engine_cont_cleanup(struct chk_pool_rec *cpr, struct ds_pool_svc *ds_svc, continue; } - rc = chk_engine_cont_orphan(cpr, ccr, svc); + rc = chk_engine_cont_cleanup_one(cpr, ccr, svc, chk_engine_cont_orphan_ult); if (rc != 0) goto out; } @@ -1652,7 +1873,8 @@ chk_engine_cont_cleanup(struct chk_pool_rec *cpr, struct ds_pool_svc *ds_svc, continue; if (!ccr->ccr_label_checked) { - rc = chk_engine_cont_set_label(cpr, ccr, svc); + rc = chk_engine_cont_cleanup_one(cpr, ccr, svc, + chk_engine_cont_set_label_ult); if (rc != 0) goto out; } @@ -1660,24 +1882,7 @@ chk_engine_cont_cleanup(struct chk_pool_rec *cpr, struct ds_pool_svc *ds_svc, if (likely(ccr->ccr_tgt_nr >= exp_tgt_nr)) continue; - snprintf( - msg, CHK_MSG_BUFLEN - 1, - "The container " DF_UUID " in the pool " DF_UUID " lost some shards: %u/%u\n", - DP_UUID(ccr->ccr_uuid), DP_UUID(cpr->cpr_uuid), ccr->ccr_tgt_nr, exp_tgt_nr); - - cru.cru_msg = msg; - cru.cru_gen = cpr->cpr_bk.cb_gen; - cru.cru_cla = CHK__CHECK_INCONSIST_CLASS__CIC_UNKNOWN; - cru.cru_act = CHK__CHECK_INCONSIST_ACTION__CIA_IGNORE; - cru.cru_rank = dss_self_rank(); - cru.cru_pool = (uuid_t *)&cpr->cpr_uuid; - cru.cru_pool_label = cpr->cpr_label; - cru.cru_cont = (uuid_t *)&ccr->ccr_uuid; - if (ccr->ccr_label_prop != NULL && ccr->ccr_label_prop->dpp_entries != NULL) - cru.cru_cont_label = ccr->ccr_label_prop->dpp_entries[0].dpe_str; - - seq = 0; - chk_engine_report(&cru, &seq, NULL); + chk_engine_handle_unknown(cpr, ccr, NULL, exp_tgt_nr); } out: @@ -1728,6 +1933,26 @@ chk_engine_pool_notify(struct chk_pool_rec *cpr) } } +static int +chk_engine_wait_ults(struct chk_pool_rec *cpr) +{ + struct chk_engine_ult *ult; + int rc = 0; + + while ((ult = d_list_pop_entry(&cpr->cpr_ult_list, struct chk_engine_ult, ceu_link)) != + NULL) { + if (ult->ceu_ult != ABT_THREAD_NULL) + ABT_thread_free(&ult->ceu_ult); + + if (rc == 0) + rc = ult->ceu_result; + + D_FREE(ult); + } + + return rc; +} + static void chk_engine_pool_ult(void *args) { @@ -1788,6 +2013,11 @@ chk_engine_pool_ult(void *args) if (rc != 0 || cpr->cpr_skip || cpr->cpr_stop) goto out; + /* Wait for the ULTs that handle orphan, dangling and unknown pool map components. */ + rc = chk_engine_wait_ults(cpr); + if (rc != 0) + goto out; + if (cpr->cpr_map_refreshed) { /* * Under dryrun mode, we cannot make the changed pool map to be used by @@ -1867,6 +2097,10 @@ chk_engine_pool_ult(void *args) rc = ds_pool_svc_schedule_reconf(svc); out: + rc1 = chk_engine_wait_ults(cpr); + if (rc == 0) + rc = rc1; + chk_engine_cont_list_fini(&aggregator); if (map != NULL) pool_map_decref(map); @@ -3177,13 +3411,13 @@ chk_engine_set_policy(uint64_t gen, uint32_t policy_nr, struct chk_policy *polic static int chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) { - struct chk_instance *ins = chk_engine; - struct chk_pending_rec *cpr = NULL; - struct chk_pending_rec *tmp = NULL; - struct chk_pool_rec *pool = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; + struct chk_instance *ins = chk_engine; + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; + bool report_fail = false; D_ASSERT(cru->cru_pool != NULL); @@ -3220,22 +3454,19 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) cru->cru_detail_nr, cru->cru_details, *seq); if (unlikely(rc == -DER_AGAIN)) { D_ASSERT(cru->cru_act == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT); + D_ASSERT(cpr != NULL); - rc = chk_pending_del(ins, *seq, &tmp); - if (rc == 0) - D_ASSERT(tmp == NULL); - else if (rc != -DER_NONEXIST) - goto log; - - chk_pending_destroy(cpr); + cpr->cpr_busy = 0; + chk_pending_del(ins, *seq, NULL); cpr = NULL; goto new_seq; } - /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ - if (rc == 0 && pool != NULL && - likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) + if (rc != 0) + report_fail = true; + else if (pool != NULL && cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) + /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; log: @@ -3273,11 +3504,18 @@ chk_engine_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: - if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + if (cpr != NULL) { + if (report_fail) { + cpr->cpr_busy = 0; + chk_pending_del(ins, *seq, NULL); + } else { + chk_pending_destroy(cpr); + } + } - if (cpr != NULL) - chk_pending_destroy(cpr); + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; return rc; } diff --git a/src/chk/chk_internal.h b/src/chk/chk_internal.h index e4d6d52f3fd..34de268d959 100644 --- a/src/chk/chk_internal.h +++ b/src/chk/chk_internal.h @@ -591,6 +591,14 @@ struct chk_pool_shard { chk_pool_free_data_t cps_free_cb; }; +struct chk_engine_ult { + /* Link into chk_pool_rec::cpr_ult_list. */ + d_list_t ceu_link; + ABT_thread ceu_ult; + int ceu_result; + char ceu_data[0]; +}; + /* Check engine uses it to trace pools. Query logic uses it to organize the result. */ struct chk_pool_rec { /* Link into chk_instance::ci_pool_list. */ @@ -601,6 +609,8 @@ struct chk_pool_rec { d_list_t cpr_shard_list; /* The list of chk_pending_rec. */ d_list_t cpr_pending_list; + /* The list of active ULTs that are handling some inconsistency and maybe blocked. */ + d_list_t cpr_ult_list; uint32_t cpr_shard_nr; uint32_t cpr_started:1, cpr_start_post:1, @@ -742,6 +752,8 @@ int chk_pool_add_shard(daos_handle_t hdl, d_list_t *head, uuid_t uuid, d_rank_t void chk_pool_shard_cleanup(struct chk_instance *ins); +int chk_pending_lookup(struct chk_instance *ins, uint64_t seq, struct chk_pending_rec **cpr); + int chk_pending_add(struct chk_instance *ins, d_list_t *pool_head, d_list_t *rank_head, uuid_t uuid, uint64_t seq, uint32_t rank, uint32_t cla, uint32_t option_nr, uint32_t *options, struct chk_pending_rec **cpr); @@ -1039,6 +1051,7 @@ chk_pool_put(struct chk_pool_rec *cpr) D_ASSERT(cpr->cpr_thread == ABT_THREAD_NULL); D_ASSERT(d_list_empty(&cpr->cpr_pending_list)); D_ASSERT(d_list_empty(&cpr->cpr_shutdown_link)); + D_ASSERT(d_list_empty(&cpr->cpr_ult_list)); while ((cps = d_list_pop_entry(&cpr->cpr_shard_list, struct chk_pool_shard, cps_link)) != NULL) { diff --git a/src/chk/chk_leader.c b/src/chk/chk_leader.c index 3c381346663..f6bece1ce03 100644 --- a/src/chk/chk_leader.c +++ b/src/chk/chk_leader.c @@ -3462,12 +3462,10 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) d_iov_t riov; int rc; - rc = chk_pending_del(ins, seq, &pending); + rc = chk_pending_lookup(ins, seq, &pending); if (rc != 0) goto out; - D_ASSERT(pending->cpr_busy); - if (pending->cpr_on_leader) { ABT_mutex_lock(pending->cpr_mutex); /* @@ -3477,20 +3475,25 @@ chk_leader_act_internal(struct chk_instance *ins, uint64_t seq, uint32_t act) pending->cpr_action = act; ABT_cond_broadcast(pending->cpr_cond); ABT_mutex_unlock(pending->cpr_mutex); + chk_pending_del(ins, seq, &pending); } else { d_iov_set(&riov, NULL, 0); d_iov_set(&kiov, pending->cpr_uuid, sizeof(uuid_t)); rc = dbtree_lookup(ins->ci_pool_hdl, &kiov, &riov); - if (rc == 0) { + if (rc == 0) pool = (struct chk_pool_rec *)riov.iov_buf; - if (pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING) - pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - } rc = chk_act_remote(ins->ci_ranks, ins->ci_bk.cb_gen, seq, pending->cpr_class, act, pending->cpr_rank); + if (rc == 0) { + pending->cpr_busy = 0; + chk_pending_del(ins, seq, NULL); - chk_pending_destroy(pending); + if (pool != NULL && + pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && + d_list_empty(&pool->cpr_pending_list)) + pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; + } } out: @@ -3584,14 +3587,15 @@ chk_leader_set_policy(uint32_t policy_nr, struct chk_policy *policies) int chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) { - struct chk_instance *ins = chk_leader; - struct chk_bookmark *cbk = &ins->ci_bk; - struct chk_pending_rec *cpr = NULL; - struct chk_pool_rec *pool = NULL; - struct chk_rank_rec *crr = NULL; - d_iov_t kiov; - d_iov_t riov; - int rc; + struct chk_instance *ins = chk_leader; + struct chk_bookmark *cbk = &ins->ci_bk; + struct chk_pending_rec *cpr = NULL; + struct chk_pool_rec *pool = NULL; + struct chk_rank_rec *crr = NULL; + d_iov_t kiov; + d_iov_t riov; + int rc; + bool upcall_fail = false; CHK_IS_READY(ins); @@ -3658,9 +3662,10 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) cru->cru_cont, cru->cru_cont_label, cru->cru_obj, cru->cru_dkey, cru->cru_akey, cru->cru_msg, cru->cru_option_nr, cru->cru_options, cru->cru_detail_nr, cru->cru_details); - /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ - if (rc == 0 && pool != NULL && - likely(cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT)) + if (rc != 0) + upcall_fail = true; + else if (pool != NULL && cpr->cpr_action == CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT) + /* Check cpr->cpr_action for the case of "dmg check repair" by race. */ pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_PENDING; log: @@ -3700,14 +3705,19 @@ chk_leader_report(struct chk_report_unit *cru, uint64_t *seq, int *decision) goto again; out: + if (cpr != NULL) { + if (upcall_fail) { + cpr->cpr_busy = 0; + chk_pending_del(ins, *seq, NULL); + } else if (rc != 0 || decision != NULL) { + chk_pending_destroy(cpr); + } + } + if (pool != NULL && pool->cpr_bk.cb_pool_status == CHK__CHECK_POOL_STATUS__CPS_PENDING && - (rc != 0 || (cpr != NULL && - cpr->cpr_action != CHK__CHECK_INCONSIST_ACTION__CIA_INTERACT))) + d_list_empty(&pool->cpr_pending_list)) pool->cpr_bk.cb_pool_status = CHK__CHECK_POOL_STATUS__CPS_CHECKING; - if ((rc != 0 || decision != NULL) && cpr != NULL) - chk_pending_destroy(cpr); - return rc; } diff --git a/src/tests/suite/daos_cr.c b/src/tests/suite/daos_cr.c index f2be8fbc056..e791ec7372e 100644 --- a/src/tests/suite/daos_cr.c +++ b/src/tests/suite/daos_cr.c @@ -3903,6 +3903,119 @@ cr_lost_rank0(void **state) cr_cleanup(arg, &pool, 1); } +/* + * 1. Create two pools and three containers for each. + * 2. Fault injection to make all containers' label inconsistent. + * 3. Start checker with CONT_BAD_LABEL:CIA_INTERACT + * 4. Query checker, should show six interactions. + * 5. Check repair the container label with trust PS (pool/container service). + * 6. Query checker, container label should have been repaired. + * 7. Switch to normal mode and verify the container label. + * 8. Cleanup. + */ +static void +cr_scan_cont_parallel(void **state) +{ + test_arg_t *arg = *state; + struct test_pool pools[2] = {0}; + struct test_cont conts[6] = {0}; + struct daos_check_info dcis[2] = {0}; + char *label = NULL; + uint32_t classes[3]; + uint32_t actions[3]; + int rc; + int i; + int j; + bool once; + + FAULT_INJECTION_REQUIRED(); + + print_message("CR30: scan multiple containers in parallel\n"); + + for (i = 0; i < 2; i++) { + rc = cr_pool_create(state, &pools[i], true, TCC_NONE); + assert_rc_equal(rc, 0); + + for (j = 0; j < 3; j++) { + rc = cr_cont_create(state, &pools[i], &conts[i * 3 + j], 1); + assert_rc_equal(rc, 0); + } + } + + rc = cr_system_stop(false); + assert_rc_equal(rc, 0); + + rc = cr_mode_switch(true); + assert_rc_equal(rc, 0); + + rc = cr_check_start(TCSF_RESET, 0, NULL, "CONT_BAD_LABEL:CIA_INTERACT"); + assert_rc_equal(rc, 0); + + for (i = 0; i < 3; i++) { + classes[i] = TCC_CONT_BAD_LABEL; + actions[i] = TCA_INTERACT; + } + + for (i = 0; i < 2; i++, once = false) { +again: + cr_pool_wait(1, &pools[i].pool_uuid, &dcis[i]); + + rc = cr_ins_verify(&dcis[i], TCIS_RUNNING); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[i], pools[i].pool_uuid, TCPS_PENDING, 3, classes, actions, + NULL); + /* Report for different containers maybe asynchronously, wait 3 seconds and retry */ + if (rc == -DER_INVAL && !once) { + sleep(3); + once = true; + goto again; + } + + assert_rc_equal(rc, 0); + } + + rc = cr_check_set_policy(TCPF_NONE, "CONT_BAD_LABEL:CIA_TRUST_PS"); + assert_rc_equal(rc, 0); + + for (i = 0; i < 3; i++) + actions[i] = TCA_TRUST_PS; + + for (i = 0; i < 2; i++) { + cr_ins_wait(1, &pools[i].pool_uuid, &dcis[i]); + + rc = cr_ins_verify(&dcis[i], TCIS_COMPLETED); + assert_rc_equal(rc, 0); + + rc = cr_pool_verify(&dcis[i], pools[i].pool_uuid, TCPS_CHECKED, 3, classes, actions, + NULL); + assert_rc_equal(rc, 0); + } + + rc = cr_mode_switch(false); + assert_rc_equal(rc, 0); + + rc = cr_system_start(); + assert_rc_equal(rc, 0); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 3; j++) { + rc = cr_cont_get_label(state, &pools[i], &conts[i * 3 + j], j == 0, &label); + assert_rc_equal(rc, 0); + + D_ASSERTF(strcmp(label, conts[i * 3 + j].label) == 0, + "Cont (" DF_UUID ") label is not repaired: %s vs %s\n", + DP_UUID(conts[i * 3 + j].uuid), label, conts[i * 3 + j].label); + + D_FREE(label); + } + + cr_dci_fini(&dcis[i]); + } + + cr_cleanup(arg, pools, 2); +} + /* clang-format off */ static const struct CMUnitTest cr_tests[] = { { "CR1: start checker for specified pools", @@ -3963,6 +4076,8 @@ static const struct CMUnitTest cr_tests[] = { cr_maintenance_mode, async_disable, test_case_teardown}, { "CR29: CR with rank 0 excluded at the beginning", cr_lost_rank0, async_disable, test_case_teardown}, + { "CR30: scan multiple containers in parallel", + cr_scan_cont_parallel, async_disable, test_case_teardown}, }; /* clang-format on */