From b2803315bca717fd69b194c6347303ae2b159aa5 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Sat, 24 Jan 2026 10:58:30 +0800 Subject: [PATCH 1/2] DAOS-18487 object: control EC rebuild resource consumption A degraded EC read will allocate and register an extra buffer to recover data, which may cause ENOMEM in some cases. this workaround does not prevent dynamic buffer allocation and registration, it does provide relatively precise control over the resources consumed by degraded EC reads. Signed-off-by: Liang Zhen --- src/object/srv_obj_migrate.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 09d7af29e8f..07c2357f6cb 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1978,6 +1978,21 @@ migrate_one_ult(void *arg) } data_size = daos_iods_len(mrone->mo_iods, mrone->mo_iod_num); + if (daos_oclass_is_ec(&mrone->mo_oca)) { + /* NB: this is a workaround for EC object: + * The fetch buffer is taken from a pre-registered (R)DMA buffer; + * however, a degraded EC read will allocate and register an extra + * buffer to recover data. + * + * Currently, the resource manager cannot control this extra allocation, + * which can lead to increased memory consumption. + * + * While this workaround does not prevent dynamic buffer allocation and + * registration, it does provide relatively precise control over the + * resources consumed by degraded EC reads. + */ + data_size *= MIN(8, obj_ec_data_tgt_nr(&mrone->mo_oca)); + } data_size += daos_iods_len(mrone->mo_iods_from_parity, mrone->mo_iods_num_from_parity); From 987dc5cdd979af952c25b83ea90e1947cbe89901 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Mon, 26 Jan 2026 20:55:20 +0800 Subject: [PATCH 2/2] DAOS-18487 object: degraded buffer size only impact resource control Signed-off-by: Liang Zhen --- src/object/srv_obj_migrate.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/object/srv_obj_migrate.c b/src/object/srv_obj_migrate.c index 07c2357f6cb..a94083b2bb8 100644 --- a/src/object/srv_obj_migrate.c +++ b/src/object/srv_obj_migrate.c @@ -1966,6 +1966,7 @@ migrate_one_ult(void *arg) struct migrate_one *mrone = arg; struct migrate_pool_tls *tls; daos_size_t data_size; + daos_size_t degraded_size = 0; int rc = 0; while (daos_fail_check(DAOS_REBUILD_TGT_REBUILD_HANG)) @@ -1991,7 +1992,7 @@ migrate_one_ult(void *arg) * registration, it does provide relatively precise control over the * resources consumed by degraded EC reads. */ - data_size *= MIN(8, obj_ec_data_tgt_nr(&mrone->mo_oca)); + degraded_size = data_size * MIN(8, obj_ec_data_tgt_nr(&mrone->mo_oca)); } data_size += daos_iods_len(mrone->mo_iods_from_parity, mrone->mo_iods_num_from_parity); @@ -2001,13 +2002,13 @@ migrate_one_ult(void *arg) D_ASSERT(data_size != (daos_size_t)-1); - rc = migrate_res_hold(tls, MIGR_DATA, data_size, NULL); + rc = migrate_res_hold(tls, MIGR_DATA, data_size + degraded_size, NULL); if (rc) D_GOTO(out, rc); rc = migrate_dkey(tls, mrone, data_size); - migrate_res_release(tls, MIGR_DATA, data_size); + migrate_res_release(tls, MIGR_DATA, data_size + degraded_size); D_DEBUG(DB_REBUILD, DF_UOID" layout %u migrate dkey "DF_KEY" inflight_size "DF_U64": " DF_RC"\n", DP_UOID(mrone->mo_oid), mrone->mo_oid.id_layout_ver,