From da89f7f58d6e0a662dfdeaf7df7aea9e3e7c02b5 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Mon, 29 Dec 2025 20:26:00 +0900 Subject: [PATCH 1/8] Prefer `ALLOCV` over `ALLOCA` for unknown size `ALLOCA` with too large size may result in stack overflow. Incidentally, this suppresses the GCC false maybe-uninitialized warning in `product_each`. Also shrink `struct product_state` when `sizeof(int) < sizeof(VALUE)`. --- enumerator.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/enumerator.c b/enumerator.c index 89ec5035305753..eb241a3b58dab7 100644 --- a/enumerator.c +++ b/enumerator.c @@ -3581,9 +3581,9 @@ enum_product_enum_size(VALUE obj, VALUE args, VALUE eobj) struct product_state { VALUE obj; VALUE block; + int index; int argc; VALUE *argv; - int index; }; static VALUE product_each(VALUE, struct product_state *); @@ -3622,15 +3622,23 @@ enum_product_run(VALUE obj, VALUE block) { struct enum_product *ptr = enum_product_ptr(obj); int argc = RARRAY_LENINT(ptr->enums); + if (argc == 0) { /* no need to allocate state.argv */ + rb_funcall(block, id_call, 1, rb_ary_new()); + return obj; + } + + VALUE argsbuf = 0; struct product_state state = { .obj = obj, .block = block, .index = 0, .argc = argc, - .argv = ALLOCA_N(VALUE, argc), + .argv = ALLOCV_N(VALUE, argsbuf, argc), }; - return product_each(obj, &state); + VALUE ret = product_each(obj, &state); + ALLOCV_END(argsbuf); + return ret; } /* From 56147001ec439a7d6b887402c8a66d3ee625e598 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Mon, 22 Dec 2025 19:38:47 -0500 Subject: [PATCH 2/8] Move MEMO_NEW to imemo.c and rename to rb_imemo_memo_new --- enum.c | 36 ++++++++++++++++++------------------ enumerator.c | 4 ++-- imemo.c | 11 +++++++++++ internal/imemo.h | 12 +----------- re.c | 4 ++-- 5 files changed, 34 insertions(+), 33 deletions(-) diff --git a/enum.c b/enum.c index ba0dc94744cc3b..765c093da79ae6 100644 --- a/enum.c +++ b/enum.c @@ -127,7 +127,7 @@ static VALUE enum_grep0(VALUE obj, VALUE pat, VALUE test) { VALUE ary = rb_ary_new(); - struct MEMO *memo = MEMO_NEW(pat, ary, test); + struct MEMO *memo = rb_imemo_memo_new(pat, ary, test); rb_block_call_func_t fn; if (rb_block_given_p()) { fn = grep_iter_i; @@ -317,7 +317,7 @@ enum_count(int argc, VALUE *argv, VALUE obj) func = count_i; } - memo = MEMO_NEW(item, 0, 0); + memo = rb_imemo_memo_new(item, 0, 0); rb_block_call(obj, id_each, 0, 0, func, (VALUE)memo); return imemo_count_value(memo); } @@ -382,7 +382,7 @@ enum_find(int argc, VALUE *argv, VALUE obj) if_none = rb_check_arity(argc, 0, 1) ? argv[0] : Qnil; RETURN_ENUMERATOR(obj, argc, argv); - memo = MEMO_NEW(Qundef, 0, 0); + memo = rb_imemo_memo_new(Qundef, 0, 0); if (rb_block_pair_yield_optimizable()) rb_block_call2(obj, id_each, 0, 0, find_i_fast, (VALUE)memo, RB_BLOCK_NO_USE_PACKED_ARGS); else @@ -467,7 +467,7 @@ enum_find_index(int argc, VALUE *argv, VALUE obj) func = find_index_i; } - memo = MEMO_NEW(Qnil, condition_value, 0); + memo = rb_imemo_memo_new(Qnil, condition_value, 0); rb_block_call(obj, id_each, 0, 0, func, (VALUE)memo); return memo->v1; } @@ -1084,7 +1084,7 @@ enum_inject(int argc, VALUE *argv, VALUE obj) return ary_inject_op(obj, init, op); } - memo = MEMO_NEW(init, Qnil, op); + memo = rb_imemo_memo_new(init, Qnil, op); rb_block_call(obj, id_each, 0, 0, iter, (VALUE)memo); if (UNDEF_P(memo->v1)) return Qnil; return memo->v1; @@ -1142,7 +1142,7 @@ enum_partition(VALUE obj) RETURN_SIZED_ENUMERATOR(obj, 0, 0, enum_size); - memo = MEMO_NEW(rb_ary_new(), rb_ary_new(), 0); + memo = rb_imemo_memo_new(rb_ary_new(), rb_ary_new(), 0); rb_block_call(obj, id_each, 0, 0, partition_i, (VALUE)memo); return rb_assoc_new(memo->v1, memo->v2); @@ -1345,7 +1345,7 @@ enum_first(int argc, VALUE *argv, VALUE obj) return enum_take(obj, argv[0]); } else { - memo = MEMO_NEW(Qnil, 0, 0); + memo = rb_imemo_memo_new(Qnil, 0, 0); rb_block_call(obj, id_each, 0, 0, first_i, (VALUE)memo); return memo->v1; } @@ -1722,7 +1722,7 @@ enum_sort_by(VALUE obj) RBASIC_CLEAR_CLASS(ary); buf = rb_ary_hidden_new(SORT_BY_BUFSIZE*2); rb_ary_store(buf, SORT_BY_BUFSIZE*2-1, Qnil); - memo = MEMO_NEW(0, 0, 0); + memo = rb_imemo_memo_new(0, 0, 0); data = (struct sort_by_data *)&memo->v1; RB_OBJ_WRITE(memo, &data->ary, ary); RB_OBJ_WRITE(memo, &data->buf, buf); @@ -1766,7 +1766,7 @@ enum_sort_by(VALUE obj) #define ENUM_BLOCK_CALL(name) \ rb_block_call2(obj, id_each, 0, 0, ENUMFUNC(name), (VALUE)memo, rb_block_given_p() && rb_block_pair_yield_optimizable() ? RB_BLOCK_NO_USE_PACKED_ARGS : 0); -#define MEMO_ENUM_NEW(v1) (rb_check_arity(argc, 0, 1), MEMO_NEW((v1), (argc ? *argv : 0), 0)) +#define MEMO_ENUM_NEW(v1) (rb_check_arity(argc, 0, 1), rb_imemo_memo_new((v1), (argc ? *argv : 0), 0)) #define DEFINE_ENUMFUNCS(name) \ static VALUE enum_##name##_func(VALUE result, struct MEMO *memo); \ @@ -2754,7 +2754,7 @@ enum_min_by(int argc, VALUE *argv, VALUE obj) if (argc && !NIL_P(num = argv[0])) return rb_nmin_run(obj, num, 1, 0, 0); - memo = MEMO_NEW(Qundef, Qnil, 0); + memo = rb_imemo_memo_new(Qundef, Qnil, 0); rb_block_call(obj, id_each, 0, 0, min_by_i, (VALUE)memo); return memo->v2; } @@ -2828,7 +2828,7 @@ enum_max_by(int argc, VALUE *argv, VALUE obj) if (argc && !NIL_P(num = argv[0])) return rb_nmin_run(obj, num, 1, 1, 0); - memo = MEMO_NEW(Qundef, Qnil, 0); + memo = rb_imemo_memo_new(Qundef, Qnil, 0); rb_block_call(obj, id_each, 0, 0, max_by_i, (VALUE)memo); return memo->v2; } @@ -2979,7 +2979,7 @@ member_i(RB_BLOCK_CALL_FUNC_ARGLIST(iter, args)) static VALUE enum_member(VALUE obj, VALUE val) { - struct MEMO *memo = MEMO_NEW(val, Qfalse, 0); + struct MEMO *memo = rb_imemo_memo_new(val, Qfalse, 0); rb_block_call(obj, id_each, 0, 0, member_i, (VALUE)memo); return memo->v2; @@ -3231,7 +3231,7 @@ enum_each_slice(VALUE obj, VALUE n) size = limit_by_enum_size(obj, size); ary = rb_ary_new2(size); arity = rb_block_arity(); - memo = MEMO_NEW(ary, dont_recycle_block_arg(arity), size); + memo = rb_imemo_memo_new(ary, dont_recycle_block_arg(arity), size); rb_block_call(obj, id_each, 0, 0, each_slice_i, (VALUE)memo); ary = memo->v1; if (RARRAY_LEN(ary) > 0) rb_yield(ary); @@ -3307,7 +3307,7 @@ enum_each_cons(VALUE obj, VALUE n) RETURN_SIZED_ENUMERATOR(obj, 1, &n, enum_each_cons_size); arity = rb_block_arity(); if (enum_size_over_p(obj, size)) return obj; - memo = MEMO_NEW(rb_ary_new2(size), dont_recycle_block_arg(arity), size); + memo = rb_imemo_memo_new(rb_ary_new2(size), dont_recycle_block_arg(arity), size); rb_block_call(obj, id_each, 0, 0, each_cons_i, (VALUE)memo); return obj; @@ -3536,7 +3536,7 @@ enum_zip(int argc, VALUE *argv, VALUE obj) } /* TODO: use NODE_DOT2 as memo(v, v, -) */ - memo = MEMO_NEW(result, args, 0); + memo = rb_imemo_memo_new(result, args, 0); rb_block_call(obj, id_each, 0, 0, allary ? zip_ary : zip_i, (VALUE)memo); return result; @@ -3579,7 +3579,7 @@ enum_take(VALUE obj, VALUE n) if (len == 0) return rb_ary_new2(0); result = rb_ary_new2(len); - memo = MEMO_NEW(result, 0, len); + memo = rb_imemo_memo_new(result, 0, len); rb_block_call(obj, id_each, 0, 0, take_i, (VALUE)memo); return result; } @@ -3667,7 +3667,7 @@ enum_drop(VALUE obj, VALUE n) } result = rb_ary_new(); - memo = MEMO_NEW(result, 0, len); + memo = rb_imemo_memo_new(result, 0, len); rb_block_call(obj, id_each, 0, 0, drop_i, (VALUE)memo); return result; } @@ -3726,7 +3726,7 @@ enum_drop_while(VALUE obj) RETURN_ENUMERATOR(obj, 0, 0); result = rb_ary_new(); - memo = MEMO_NEW(result, 0, FALSE); + memo = rb_imemo_memo_new(result, 0, FALSE); rb_block_call(obj, id_each, 0, 0, drop_while_i, (VALUE)memo); return result; } diff --git a/enumerator.c b/enumerator.c index eb241a3b58dab7..8580c605dffc16 100644 --- a/enumerator.c +++ b/enumerator.c @@ -671,7 +671,7 @@ enumerator_with_index(int argc, VALUE *argv, VALUE obj) rb_check_arity(argc, 0, 1); RETURN_SIZED_ENUMERATOR(obj, argc, argv, enumerator_enum_size); memo = (!argc || NIL_P(memo = argv[0])) ? INT2FIX(0) : rb_to_int(memo); - return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)MEMO_NEW(memo, 0, 0)); + return enumerator_block_call(obj, enumerator_with_index_i, (VALUE)rb_imemo_memo_new(memo, 0, 0)); } /* @@ -1613,7 +1613,7 @@ lazy_init_yielder(RB_BLOCK_CALL_FUNC_ARGLIST(_, m)) VALUE memos = rb_attr_get(yielder, id_memo); struct MEMO *result; - result = MEMO_NEW(m, rb_enum_values_pack(argc, argv), + result = rb_imemo_memo_new(m, rb_enum_values_pack(argc, argv), argc > 1 ? LAZY_MEMO_PACKED : 0); return lazy_yielder_result(result, yielder, procs_array, memos, 0); } diff --git a/imemo.c b/imemo.c index 2d63b7cd99549a..58029293764aef 100644 --- a/imemo.c +++ b/imemo.c @@ -97,6 +97,17 @@ rb_free_tmp_buffer(volatile VALUE *store) } } +struct MEMO * +rb_imemo_memo_new(VALUE a, VALUE b, VALUE c) +{ + struct MEMO *memo = IMEMO_NEW(struct MEMO, imemo_memo, 0); + *((VALUE *)&memo->v1) = a; + *((VALUE *)&memo->v2) = b; + *((VALUE *)&memo->u3.value) = c; + + return memo; +} + static VALUE imemo_fields_new(VALUE owner, size_t capa, bool shareable) { diff --git a/internal/imemo.h b/internal/imemo.h index f8bda26f0b50f9..7192631e92a5fc 100644 --- a/internal/imemo.h +++ b/internal/imemo.h @@ -134,6 +134,7 @@ typedef struct rb_imemo_tmpbuf_struct rb_imemo_tmpbuf_t; #endif VALUE rb_imemo_new(enum imemo_type type, VALUE v0, size_t size, bool is_shareable); VALUE rb_imemo_tmpbuf_new(void); +struct MEMO *rb_imemo_memo_new(VALUE a, VALUE b, VALUE c); struct vm_ifunc *rb_vm_ifunc_new(rb_block_call_func_t func, const void *data, int min_argc, int max_argc); static inline enum imemo_type imemo_type(VALUE imemo); static inline int imemo_type_p(VALUE imemo, enum imemo_type imemo_type); @@ -152,17 +153,6 @@ RUBY_SYMBOL_EXPORT_BEGIN const char *rb_imemo_name(enum imemo_type type); RUBY_SYMBOL_EXPORT_END -static inline struct MEMO * -MEMO_NEW(VALUE a, VALUE b, VALUE c) -{ - struct MEMO *memo = IMEMO_NEW(struct MEMO, imemo_memo, 0); - *((VALUE *)&memo->v1) = a; - *((VALUE *)&memo->v2) = b; - *((VALUE *)&memo->u3.value) = c; - - return memo; -} - static inline enum imemo_type imemo_type(VALUE imemo) { diff --git a/re.c b/re.c index 19bf674c268665..621af13cd73f16 100644 --- a/re.c +++ b/re.c @@ -2469,7 +2469,7 @@ match_named_captures(int argc, VALUE *argv, VALUE match) } hash = rb_hash_new(); - memo = MEMO_NEW(hash, match, symbolize_names); + memo = rb_imemo_memo_new(hash, match, symbolize_names); onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo); @@ -2508,7 +2508,7 @@ match_deconstruct_keys(VALUE match, VALUE keys) h = rb_hash_new_with_size(onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp))); struct MEMO *memo; - memo = MEMO_NEW(h, match, 1); + memo = rb_imemo_memo_new(h, match, 1); onig_foreach_name(RREGEXP_PTR(RMATCH(match)->regexp), match_named_captures_iter, (void*)memo); From 01cd9c9fade0c1c6f13f11c86e86c0caeefd38bc Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 23 Dec 2025 11:45:16 -0500 Subject: [PATCH 3/8] Add rb_gc_register_pinning_obj --- gc.c | 13 +++++++++++++ gc/default/default.c | 6 ++++++ gc/gc_impl.h | 1 + hash.c | 4 ++++ imemo.c | 5 +++++ internal/gc.h | 1 + proc.c | 3 +++ string.c | 1 + 8 files changed, 34 insertions(+) diff --git a/gc.c b/gc.c index 60860d1e5fe398..a1dc878dc6fafe 100644 --- a/gc.c +++ b/gc.c @@ -638,6 +638,7 @@ typedef struct gc_function_map { void (*declare_weak_references)(void *objspace_ptr, VALUE obj); bool (*handle_weak_references_alive_p)(void *objspace_ptr, VALUE obj); // Compaction + void (*register_pinning_obj)(void *objspace_ptr, VALUE obj); bool (*object_moved_p)(void *objspace_ptr, VALUE obj); VALUE (*location)(void *objspace_ptr, VALUE value); // Write barriers @@ -813,6 +814,7 @@ ruby_modular_gc_init(void) load_modular_gc_func(declare_weak_references); load_modular_gc_func(handle_weak_references_alive_p); // Compaction + load_modular_gc_func(register_pinning_obj); load_modular_gc_func(object_moved_p); load_modular_gc_func(location); // Write barriers @@ -894,6 +896,7 @@ ruby_modular_gc_init(void) # define rb_gc_impl_declare_weak_references rb_gc_functions.declare_weak_references # define rb_gc_impl_handle_weak_references_alive_p rb_gc_functions.handle_weak_references_alive_p // Compaction +# define rb_gc_impl_register_pinning_obj rb_gc_functions.register_pinning_obj # define rb_gc_impl_object_moved_p rb_gc_functions.object_moved_p # define rb_gc_impl_location rb_gc_functions.location // Write barriers @@ -1049,6 +1052,12 @@ rb_wb_protected_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, return newobj_of(rb_ec_ractor_ptr(ec), klass, flags, shape_id, TRUE, size); } +void +rb_gc_register_pinning_obj(VALUE obj) +{ + rb_gc_impl_register_pinning_obj(rb_gc_get_objspace(), obj); +} + #define UNEXPECTED_NODE(func) \ rb_bug(#func"(): GC does not handle T_NODE 0x%x(%p) 0x%"PRIxVALUE, \ BUILTIN_TYPE(obj), (void*)(obj), RBASIC(obj)->flags) @@ -1069,6 +1078,8 @@ rb_data_object_wrap(VALUE klass, void *datap, RUBY_DATA_FUNC dmark, RUBY_DATA_FU if (klass) rb_data_object_check(klass); VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA, ROOT_SHAPE_ID, !dmark, sizeof(struct RTypedData)); + rb_gc_register_pinning_obj(obj); + struct RData *data = (struct RData *)obj; data->dmark = dmark; data->dfree = dfree; @@ -1093,6 +1104,8 @@ typed_data_alloc(VALUE klass, VALUE typed_flag, void *datap, const rb_data_type_ bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark; VALUE obj = newobj_of(GET_RACTOR(), klass, T_DATA | RUBY_TYPED_FL_IS_TYPED_DATA, ROOT_SHAPE_ID, wb_protected, size); + rb_gc_register_pinning_obj(obj); + struct RTypedData *data = (struct RTypedData *)obj; data->fields_obj = 0; *(VALUE *)&data->type = ((VALUE)type) | typed_flag; diff --git a/gc/default/default.c b/gc/default/default.c index 0e92c35598f09f..c33570f920f647 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -7059,6 +7059,12 @@ gc_sort_heap_by_compare_func(rb_objspace_t *objspace, gc_compact_compare_func co } #endif +void +rb_gc_impl_register_pinning_obj(void *objspace_ptr, VALUE obj) +{ + /* no-op */ +} + bool rb_gc_impl_object_moved_p(void *objspace_ptr, VALUE obj) { diff --git a/gc/gc_impl.h b/gc/gc_impl.h index bb97fa4c09a4be..7898316a75e536 100644 --- a/gc/gc_impl.h +++ b/gc/gc_impl.h @@ -86,6 +86,7 @@ GC_IMPL_FN void rb_gc_impl_mark_maybe(void *objspace_ptr, VALUE obj); GC_IMPL_FN void rb_gc_impl_declare_weak_references(void *objspace_ptr, VALUE obj); GC_IMPL_FN bool rb_gc_impl_handle_weak_references_alive_p(void *objspace_ptr, VALUE obj); // Compaction +GC_IMPL_FN void rb_gc_impl_register_pinning_obj(void *objspace_ptr, VALUE obj); GC_IMPL_FN bool rb_gc_impl_object_moved_p(void *objspace_ptr, VALUE obj); GC_IMPL_FN VALUE rb_gc_impl_location(void *objspace_ptr, VALUE value); // Write barriers diff --git a/hash.c b/hash.c index 3669f55d5024d0..c4e8512f64c458 100644 --- a/hash.c +++ b/hash.c @@ -4673,6 +4673,8 @@ rb_hash_compare_by_id(VALUE hash) RHASH_ST_CLEAR(tmp); } + rb_gc_register_pinning_obj(hash); + return hash; } @@ -4702,6 +4704,7 @@ rb_ident_hash_new(void) { VALUE hash = rb_hash_new(); hash_st_table_init(hash, &identhash, 0); + rb_gc_register_pinning_obj(hash); return hash; } @@ -4710,6 +4713,7 @@ rb_ident_hash_new_with_size(st_index_t size) { VALUE hash = rb_hash_new(); hash_st_table_init(hash, &identhash, size); + rb_gc_register_pinning_obj(hash); return hash; } diff --git a/imemo.c b/imemo.c index 58029293764aef..8b3018523f0155 100644 --- a/imemo.c +++ b/imemo.c @@ -54,6 +54,8 @@ rb_imemo_tmpbuf_new(void) VALUE flags = T_IMEMO | (imemo_tmpbuf << FL_USHIFT); NEWOBJ_OF(obj, rb_imemo_tmpbuf_t, 0, flags, sizeof(rb_imemo_tmpbuf_t), NULL); + rb_gc_register_pinning_obj((VALUE)obj); + obj->ptr = NULL; obj->cnt = 0; @@ -101,6 +103,9 @@ struct MEMO * rb_imemo_memo_new(VALUE a, VALUE b, VALUE c) { struct MEMO *memo = IMEMO_NEW(struct MEMO, imemo_memo, 0); + + rb_gc_register_pinning_obj((VALUE)memo); + *((VALUE *)&memo->v1) = a; *((VALUE *)&memo->v2) = b; *((VALUE *)&memo->u3.value) = c; diff --git a/internal/gc.h b/internal/gc.h index 8fff2e83361192..ee1f390e104cff 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -204,6 +204,7 @@ static inline void *ruby_sized_xrealloc_inlined(void *ptr, size_t new_size, size static inline void *ruby_sized_xrealloc2_inlined(void *ptr, size_t new_count, size_t elemsiz, size_t old_count) RUBY_ATTR_RETURNS_NONNULL RUBY_ATTR_ALLOC_SIZE((2, 3)); static inline void ruby_sized_xfree_inlined(void *ptr, size_t size); void rb_gc_obj_id_moved(VALUE obj); +void rb_gc_register_pinning_obj(VALUE obj); void *rb_gc_ractor_cache_alloc(rb_ractor_t *ractor); void rb_gc_ractor_cache_free(void *cache); diff --git a/proc.c b/proc.c index d72f8d952e9465..a0fc3a7e0837f2 100644 --- a/proc.c +++ b/proc.c @@ -898,6 +898,9 @@ rb_vm_ifunc_new(rb_block_call_func_t func, const void *data, int min_argc, int m rb_execution_context_t *ec = GET_EC(); struct vm_ifunc *ifunc = IMEMO_NEW(struct vm_ifunc, imemo_ifunc, (VALUE)rb_vm_svar_lep(ec, ec->cfp)); + + rb_gc_register_pinning_obj((VALUE)ifunc); + ifunc->func = func; ifunc->data = data; ifunc->argc.min = min_argc; diff --git a/string.c b/string.c index b70cee020de6bd..b6c6d3f6268caf 100644 --- a/string.c +++ b/string.c @@ -205,6 +205,7 @@ str_enc_fastpath(VALUE str) RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \ RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \ FL_SET((str), STR_SHARED); \ + rb_gc_register_pinning_obj(str); \ FL_SET((shared_str), STR_SHARED_ROOT); \ if (RBASIC_CLASS((shared_str)) == 0) /* for CoW-friendliness */ \ FL_SET_RAW((shared_str), STR_BORROWED); \ From 7902ae34d01a142fcbc0f669d93b1af5664ece42 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Wed, 24 Dec 2025 20:38:22 -0500 Subject: [PATCH 4/8] Add rb_gc_move_obj_during_marking --- gc.c | 8 ++++++++ gc/gc.h | 1 + 2 files changed, 9 insertions(+) diff --git a/gc.c b/gc.c index a1dc878dc6fafe..8a18a129786f33 100644 --- a/gc.c +++ b/gc.c @@ -3173,6 +3173,14 @@ gc_mark_classext_iclass(rb_classext_t *ext, bool prime, VALUE box_value, void *a #define TYPED_DATA_REFS_OFFSET_LIST(d) (size_t *)(uintptr_t)RTYPEDDATA_TYPE(d)->function.dmark +void +rb_gc_move_obj_during_marking(VALUE from, VALUE to) +{ + if (rb_obj_gen_fields_p(to)) { + rb_mark_generic_ivar(from); + } +} + void rb_gc_mark_children(void *objspace, VALUE obj) { diff --git a/gc/gc.h b/gc/gc.h index 69dacf488f7db1..a5edc266e75b1a 100644 --- a/gc/gc.h +++ b/gc/gc.h @@ -107,6 +107,7 @@ MODULAR_GC_FN void *rb_gc_get_ractor_newobj_cache(void); MODULAR_GC_FN void rb_gc_initialize_vm_context(struct rb_gc_vm_context *context); MODULAR_GC_FN void rb_gc_worker_thread_set_vm_context(struct rb_gc_vm_context *context); MODULAR_GC_FN void rb_gc_worker_thread_unset_vm_context(struct rb_gc_vm_context *context); +MODULAR_GC_FN void rb_gc_move_obj_during_marking(VALUE from, VALUE to); #endif #if USE_MODULAR_GC From 782d959f674b5088377191a4b34ed2f5bbb7c022 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 26 Dec 2025 11:55:42 -0500 Subject: [PATCH 5/8] Implement moving Immix in MMTk This commit implements moving Immix in MMTk, which allows objects to move in the GC. The performance of this implementation is not yet amazing. It is very similar to non-moving Immix in many of them and slightly slower in others. The benchmark results is shown below. -------------- ----------------- ---------- --------- bench Moving Immix (ms) stddev (%) RSS (MiB) activerecord 241.9 0.5 86.6 chunky-png 447.8 0.8 74.9 erubi-rails 1183.9 0.8 136.1 hexapdf 1607.9 2.6 402.3 liquid-c 45.4 6.7 44.9 liquid-compile 44.1 9.3 53.0 liquid-render 105.4 4.5 55.9 lobsters 650.1 9.7 418.4 mail 115.4 2.1 64.4 psych-load 1656.8 0.8 43.6 railsbench 1653.5 1.3 149.8 rubocop 127.0 15.6 142.1 ruby-lsp 130.7 10.5 99.4 sequel 52.8 7.2 45.6 shipit 1187.0 3.9 311.0 -------------- ----------------- ---------- --------- -------------- --------------------- ---------- --------- bench Non-moving Immix (ms) stddev (%) RSS (MiB) activerecord 218.9 2.7 86.1 chunky-png 464.6 0.8 66.7 erubi-rails 1119.0 4.3 132.7 hexapdf 1539.8 1.8 425.2 liquid-c 40.6 6.9 45.2 liquid-compile 40.6 8.1 52.9 liquid-render 99.3 2.3 48.3 mail 107.4 5.3 65.4 psych-load 1535.6 1.0 39.5 railsbench 1565.6 1.1 149.6 rubocop 122.5 14.3 146.7 ruby-lsp 128.4 10.7 106.4 sequel 44.1 4.0 45.7 shipit 1154.5 2.7 358.5 -------------- --------------------- ---------- --------- --- gc/mmtk/Cargo.toml | 2 +- gc/mmtk/mmtk.c | 112 ++++++++++++++++---- gc/mmtk/mmtk.h | 9 +- gc/mmtk/src/abi.rs | 7 +- gc/mmtk/src/api.rs | 7 ++ gc/mmtk/src/binding.rs | 3 + gc/mmtk/src/collection.rs | 19 +++- gc/mmtk/src/lib.rs | 1 + gc/mmtk/src/object_model.rs | 40 ++++++- gc/mmtk/src/pinning_registry.rs | 179 ++++++++++++++++++++++++++++++++ gc/mmtk/src/scanning.rs | 13 ++- gc/mmtk/src/weak_proc.rs | 39 ++++++- 12 files changed, 393 insertions(+), 38 deletions(-) create mode 100644 gc/mmtk/src/pinning_registry.rs diff --git a/gc/mmtk/Cargo.toml b/gc/mmtk/Cargo.toml index d0bc5b3ff5d9f8..d856122900333d 100644 --- a/gc/mmtk/Cargo.toml +++ b/gc/mmtk/Cargo.toml @@ -21,7 +21,7 @@ probe = "0.5" sysinfo = "0.32.0" [dependencies.mmtk] -features = ["is_mmtk_object", "object_pinning", "sticky_immix_non_moving_nursery", "immix_non_moving"] +features = ["is_mmtk_object", "object_pinning", "sticky_immix_non_moving_nursery"] # Uncomment the following lines to use mmtk-core from the official repository. git = "https://github.com/mmtk/mmtk-core.git" diff --git a/gc/mmtk/mmtk.c b/gc/mmtk/mmtk.c index 634abc068a44f5..b507e337b8a132 100644 --- a/gc/mmtk/mmtk.c +++ b/gc/mmtk/mmtk.c @@ -183,6 +183,18 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator) RB_GC_VM_UNLOCK(lock_lev); } +static void +rb_mmtk_before_updating_jit_code(void) +{ + rb_gc_before_updating_jit_code(); +} + +static void +rb_mmtk_after_updating_jit_code(void) +{ + rb_gc_after_updating_jit_code(); +} + static size_t rb_mmtk_number_of_mutators(void) { @@ -247,9 +259,19 @@ rb_mmtk_scan_objspace(void) } static void -rb_mmtk_scan_object_ruby_style(MMTk_ObjectReference object) +rb_mmtk_move_obj_during_marking(MMTk_ObjectReference from, MMTk_ObjectReference to) { - rb_gc_mark_children(rb_gc_get_objspace(), (VALUE)object); + rb_gc_move_obj_during_marking((VALUE)from, (VALUE)to); +} + +static void +rb_mmtk_update_object_references(MMTk_ObjectReference mmtk_object) +{ + VALUE object = (VALUE)mmtk_object; + + if (!RB_FL_TEST(object, RUBY_FL_WEAK_REFERENCE)) { + rb_gc_update_object_references(rb_gc_get_objspace(), object); + } } static void @@ -259,9 +281,15 @@ rb_mmtk_call_gc_mark_children(MMTk_ObjectReference object) } static void -rb_mmtk_handle_weak_references(MMTk_ObjectReference object) +rb_mmtk_handle_weak_references(MMTk_ObjectReference mmtk_object, bool moving) { - rb_gc_handle_weak_references((VALUE)object); + VALUE object = (VALUE)mmtk_object; + + rb_gc_handle_weak_references(object); + + if (moving) { + rb_gc_update_object_references(rb_gc_get_objspace(), object); + } } static void @@ -332,19 +360,35 @@ rb_mmtk_update_finalizer_table(void) } static int -rb_mmtk_update_table_i(VALUE val, void *data) +rb_mmtk_global_tables_count(void) +{ + return RB_GC_VM_WEAK_TABLE_COUNT; +} + +static inline VALUE rb_mmtk_call_object_closure(VALUE obj, bool pin); + +static int +rb_mmtk_update_global_tables_i(VALUE val, void *data) { if (!mmtk_is_reachable((MMTk_ObjectReference)val)) { return ST_DELETE; } + // TODO: check only if in moving GC + if (rb_mmtk_call_object_closure(val, false) != val) { + return ST_REPLACE; + } + return ST_CONTINUE; } static int -rb_mmtk_global_tables_count(void) +rb_mmtk_update_global_tables_replace_i(VALUE *ptr, void *data) { - return RB_GC_VM_WEAK_TABLE_COUNT; + // TODO: cache the new location so we don't call rb_mmtk_call_object_closure twice + *ptr = rb_mmtk_call_object_closure(*ptr, false); + + return ST_CONTINUE; } static void @@ -352,7 +396,14 @@ rb_mmtk_update_global_tables(int table) { RUBY_ASSERT(table < RB_GC_VM_WEAK_TABLE_COUNT); - rb_gc_vm_weak_table_foreach(rb_mmtk_update_table_i, NULL, NULL, true, (enum rb_gc_vm_weak_tables)table); + // TODO: set weak_only to true for non-moving GC + rb_gc_vm_weak_table_foreach( + rb_mmtk_update_global_tables_i, + rb_mmtk_update_global_tables_replace_i, + NULL, + false, + (enum rb_gc_vm_weak_tables)table + ); } static bool @@ -376,11 +427,14 @@ MMTk_RubyUpcalls ruby_upcalls = { rb_mmtk_stop_the_world, rb_mmtk_resume_mutators, rb_mmtk_block_for_gc, + rb_mmtk_before_updating_jit_code, + rb_mmtk_after_updating_jit_code, rb_mmtk_number_of_mutators, rb_mmtk_get_mutators, rb_mmtk_scan_gc_roots, rb_mmtk_scan_objspace, - rb_mmtk_scan_object_ruby_style, + rb_mmtk_move_obj_during_marking, + rb_mmtk_update_object_references, rb_mmtk_call_gc_mark_children, rb_mmtk_handle_weak_references, rb_mmtk_call_obj_free, @@ -738,15 +792,23 @@ rb_gc_impl_free(void *objspace_ptr, void *ptr, size_t old_size) void rb_gc_impl_adjust_memory_usage(void *objspace_ptr, ssize_t diff) { } // Marking +static inline VALUE +rb_mmtk_call_object_closure(VALUE obj, bool pin) +{ + return (VALUE)rb_mmtk_gc_thread_tls->object_closure.c_function( + rb_mmtk_gc_thread_tls->object_closure.rust_closure, + rb_mmtk_gc_thread_tls->gc_context, + (MMTk_ObjectReference)obj, + pin + ); +} + void rb_gc_impl_mark(void *objspace_ptr, VALUE obj) { if (RB_SPECIAL_CONST_P(obj)) return; - rb_mmtk_gc_thread_tls->object_closure.c_function(rb_mmtk_gc_thread_tls->object_closure.rust_closure, - rb_mmtk_gc_thread_tls->gc_context, - (MMTk_ObjectReference)obj, - false); + rb_mmtk_call_object_closure(obj, false); } void @@ -754,8 +816,10 @@ rb_gc_impl_mark_and_move(void *objspace_ptr, VALUE *ptr) { if (RB_SPECIAL_CONST_P(*ptr)) return; - // TODO: make it movable - rb_gc_impl_mark(objspace_ptr, *ptr); + VALUE new_obj = rb_mmtk_call_object_closure(*ptr, false); + if (new_obj != *ptr) { + *ptr = new_obj; + } } void @@ -763,8 +827,7 @@ rb_gc_impl_mark_and_pin(void *objspace_ptr, VALUE obj) { if (RB_SPECIAL_CONST_P(obj)) return; - // TODO: also pin - rb_gc_impl_mark(objspace_ptr, obj); + rb_mmtk_call_object_closure(obj, true); } void @@ -775,11 +838,10 @@ rb_gc_impl_mark_maybe(void *objspace_ptr, VALUE obj) } } -// Weak references - void rb_gc_impl_declare_weak_references(void *objspace_ptr, VALUE obj) { + RB_FL_SET(obj, RUBY_FL_WEAK_REFERENCE); mmtk_declare_weak_references((MMTk_ObjectReference)obj); } @@ -790,16 +852,22 @@ rb_gc_impl_handle_weak_references_alive_p(void *objspace_ptr, VALUE obj) } // Compaction +void +rb_gc_impl_register_pinning_obj(void *objspace_ptr, VALUE obj) +{ + mmtk_register_pinning_obj((MMTk_ObjectReference)obj); +} + bool rb_gc_impl_object_moved_p(void *objspace_ptr, VALUE obj) { - rb_bug("unimplemented"); + return rb_mmtk_call_object_closure(obj, false) != obj; } VALUE -rb_gc_impl_location(void *objspace_ptr, VALUE value) +rb_gc_impl_location(void *objspace_ptr, VALUE obj) { - rb_bug("unimplemented"); + return rb_mmtk_call_object_closure(obj, false); } // Write barriers diff --git a/gc/mmtk/mmtk.h b/gc/mmtk/mmtk.h index 77e82cf06e5614..f7da0f95f0214d 100644 --- a/gc/mmtk/mmtk.h +++ b/gc/mmtk/mmtk.h @@ -62,13 +62,16 @@ typedef struct MMTk_RubyUpcalls { void (*stop_the_world)(void); void (*resume_mutators)(void); void (*block_for_gc)(MMTk_VMMutatorThread tls); + void (*before_updating_jit_code)(void); + void (*after_updating_jit_code)(void); size_t (*number_of_mutators)(void); void (*get_mutators)(void (*visit_mutator)(MMTk_Mutator*, void*), void *data); void (*scan_gc_roots)(void); void (*scan_objspace)(void); - void (*scan_object_ruby_style)(MMTk_ObjectReference object); + void (*move_obj_during_marking)(MMTk_ObjectReference from, MMTk_ObjectReference to); + void (*update_object_references)(MMTk_ObjectReference object); void (*call_gc_mark_children)(MMTk_ObjectReference object); - void (*handle_weak_references)(MMTk_ObjectReference object); + void (*handle_weak_references)(MMTk_ObjectReference object, bool moving); void (*call_obj_free)(MMTk_ObjectReference object); size_t (*vm_live_bytes)(void); void (*update_global_tables)(int tbl_idx); @@ -125,6 +128,8 @@ void mmtk_declare_weak_references(MMTk_ObjectReference object); bool mmtk_weak_references_alive_p(MMTk_ObjectReference object); +void mmtk_register_pinning_obj(MMTk_ObjectReference obj); + void mmtk_object_reference_write_post(MMTk_Mutator *mutator, MMTk_ObjectReference object); void mmtk_register_wb_unprotected_object(MMTk_ObjectReference object); diff --git a/gc/mmtk/src/abi.rs b/gc/mmtk/src/abi.rs index fed4d82f4e31e6..1d7dc62a8753b4 100644 --- a/gc/mmtk/src/abi.rs +++ b/gc/mmtk/src/abi.rs @@ -299,6 +299,8 @@ pub struct RubyUpcalls { pub stop_the_world: extern "C" fn(), pub resume_mutators: extern "C" fn(), pub block_for_gc: extern "C" fn(tls: VMMutatorThread), + pub before_updating_jit_code: extern "C" fn(), + pub after_updating_jit_code: extern "C" fn(), pub number_of_mutators: extern "C" fn() -> usize, pub get_mutators: extern "C" fn( visit_mutator: extern "C" fn(*mut RubyMutator, *mut libc::c_void), @@ -306,9 +308,10 @@ pub struct RubyUpcalls { ), pub scan_gc_roots: extern "C" fn(), pub scan_objspace: extern "C" fn(), - pub scan_object_ruby_style: extern "C" fn(object: ObjectReference), + pub move_obj_during_marking: extern "C" fn(from: ObjectReference, to: ObjectReference), + pub update_object_references: extern "C" fn(object: ObjectReference), pub call_gc_mark_children: extern "C" fn(object: ObjectReference), - pub handle_weak_references: extern "C" fn(object: ObjectReference), + pub handle_weak_references: extern "C" fn(object: ObjectReference, moving: bool), pub call_obj_free: extern "C" fn(object: ObjectReference), pub vm_live_bytes: extern "C" fn() -> usize, pub update_global_tables: extern "C" fn(tbl_idx: c_int), diff --git a/gc/mmtk/src/api.rs b/gc/mmtk/src/api.rs index 0449d3959d7c1e..ec0e5bafe2289a 100644 --- a/gc/mmtk/src/api.rs +++ b/gc/mmtk/src/api.rs @@ -312,6 +312,13 @@ pub extern "C" fn mmtk_weak_references_alive_p(object: ObjectReference) -> bool object.is_reachable() } +// =============== Compaction =============== + +#[no_mangle] +pub extern "C" fn mmtk_register_pinning_obj(obj: ObjectReference) { + crate::binding().pinning_registry.register(obj); +} + // =============== Write barriers =============== #[no_mangle] diff --git a/gc/mmtk/src/binding.rs b/gc/mmtk/src/binding.rs index ddbaafb0767b1f..36d4a992fda79b 100644 --- a/gc/mmtk/src/binding.rs +++ b/gc/mmtk/src/binding.rs @@ -9,6 +9,7 @@ use mmtk::MMTK; use crate::abi; use crate::abi::RubyBindingOptions; +use crate::pinning_registry::PinningRegistry; use crate::weak_proc::WeakProcessor; use crate::Ruby; @@ -54,6 +55,7 @@ pub struct RubyBinding { pub upcalls: *const abi::RubyUpcalls, pub plan_name: Mutex>, pub weak_proc: WeakProcessor, + pub pinning_registry: PinningRegistry, pub gc_thread_join_handles: Mutex>>, pub wb_unprotected_objects: Mutex>, } @@ -77,6 +79,7 @@ impl RubyBinding { upcalls, plan_name: Mutex::new(None), weak_proc: WeakProcessor::new(), + pinning_registry: PinningRegistry::new(), gc_thread_join_handles: Default::default(), wb_unprotected_objects: Default::default(), } diff --git a/gc/mmtk/src/collection.rs b/gc/mmtk/src/collection.rs index 824747b04c4051..0b1221204c1468 100644 --- a/gc/mmtk/src/collection.rs +++ b/gc/mmtk/src/collection.rs @@ -8,9 +8,12 @@ use mmtk::scheduler::*; use mmtk::util::heap::GCTriggerPolicy; use mmtk::util::{VMMutatorThread, VMThread, VMWorkerThread}; use mmtk::vm::{Collection, GCThreadContext}; +use std::sync::atomic::AtomicBool; use std::sync::atomic::Ordering; use std::thread; +static CURRENT_GC_MAY_MOVE: AtomicBool = AtomicBool::new(false); + pub struct VMCollection {} impl Collection for VMCollection { @@ -18,11 +21,21 @@ impl Collection for VMCollection { crate::CONFIGURATION.gc_enabled.load(Ordering::Relaxed) } - fn stop_all_mutators(_tls: VMWorkerThread, mut mutator_visitor: F) + fn stop_all_mutators(tls: VMWorkerThread, mut mutator_visitor: F) where F: FnMut(&'static mut mmtk::Mutator), { (upcalls().stop_the_world)(); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + CURRENT_GC_MAY_MOVE.store(true, Ordering::Relaxed); + (upcalls().before_updating_jit_code)(); + } else { + CURRENT_GC_MAY_MOVE.store(false, Ordering::Relaxed); + } + + crate::binding().pinning_registry.pin_children(tls); + (upcalls().get_mutators)( Self::notify_mutator_ready::, &mut mutator_visitor as *mut F as *mut _, @@ -30,6 +43,10 @@ impl Collection for VMCollection { } fn resume_mutators(_tls: VMWorkerThread) { + if CURRENT_GC_MAY_MOVE.load(Ordering::Relaxed) { + (upcalls().after_updating_jit_code)(); + } + (upcalls().resume_mutators)(); } diff --git a/gc/mmtk/src/lib.rs b/gc/mmtk/src/lib.rs index 8647793522ed82..0da6121883747e 100644 --- a/gc/mmtk/src/lib.rs +++ b/gc/mmtk/src/lib.rs @@ -27,6 +27,7 @@ pub mod binding; pub mod collection; pub mod heap; pub mod object_model; +pub mod pinning_registry; pub mod reference_glue; pub mod scanning; pub mod utils; diff --git a/gc/mmtk/src/object_model.rs b/gc/mmtk/src/object_model.rs index 93b6063a05d506..dd27f83612cb9a 100644 --- a/gc/mmtk/src/object_model.rs +++ b/gc/mmtk/src/object_model.rs @@ -1,4 +1,6 @@ -use crate::abi::{RubyObjectAccess, OBJREF_OFFSET}; +use std::ptr::copy_nonoverlapping; + +use crate::abi::{RubyObjectAccess, MIN_OBJ_ALIGN, OBJREF_OFFSET}; use crate::{abi, Ruby}; use mmtk::util::constants::BITS_IN_BYTE; use mmtk::util::copy::{CopySemantics, GCWorkerCopyContext}; @@ -36,11 +38,39 @@ impl ObjectModel for VMObjectModel { const NEED_VO_BITS_DURING_TRACING: bool = true; fn copy( - _from: ObjectReference, - _semantics: CopySemantics, - _copy_context: &mut GCWorkerCopyContext, + from: ObjectReference, + semantics: CopySemantics, + copy_context: &mut GCWorkerCopyContext, ) -> ObjectReference { - unimplemented!("Copying GC not currently supported") + let from_acc = RubyObjectAccess::from_objref(from); + let from_start = from_acc.obj_start(); + let object_size = from_acc.object_size(); + let to_start = copy_context.alloc_copy(from, object_size, MIN_OBJ_ALIGN, 0, semantics); + debug_assert!(!to_start.is_zero()); + let to_payload = to_start.add(OBJREF_OFFSET); + unsafe { + copy_nonoverlapping::(from_start.to_ptr(), to_start.to_mut_ptr(), object_size); + } + let to_obj = unsafe { ObjectReference::from_raw_address_unchecked(to_payload) }; + copy_context.post_copy(to_obj, object_size, semantics); + trace!("Copied object from {} to {}", from, to_obj); + + (crate::binding().upcalls().move_obj_during_marking)(from, to_obj); + + #[cfg(feature = "clear_old_copy")] + { + trace!( + "Clearing old copy {} ({}-{})", + from, + from_start, + from_start + object_size + ); + // For debug purpose, we clear the old copy so that if the Ruby VM reads from the old + // copy again, it will likely result in an error. + unsafe { std::ptr::write_bytes::(from_start.to_mut_ptr(), 0, object_size) } + } + + to_obj } fn copy_to(_from: ObjectReference, _to: ObjectReference, _region: Address) -> Address { diff --git a/gc/mmtk/src/pinning_registry.rs b/gc/mmtk/src/pinning_registry.rs new file mode 100644 index 00000000000000..7f67bbeef5e4a3 --- /dev/null +++ b/gc/mmtk/src/pinning_registry.rs @@ -0,0 +1,179 @@ +use std::sync::Mutex; + +use mmtk::{ + memory_manager, + scheduler::{GCWork, GCWorker, WorkBucketStage}, + util::{ObjectReference, VMWorkerThread}, + MMTK, +}; + +use crate::{abi::GCThreadTLS, upcalls, Ruby}; + +pub struct PinningRegistry { + pinning_objs: Mutex>, + pinned_objs: Mutex>, +} + +impl PinningRegistry { + pub fn new() -> Self { + Self { + pinning_objs: Default::default(), + pinned_objs: Default::default(), + } + } + + pub fn register(&self, object: ObjectReference) { + let mut pinning_objs = self.pinning_objs.lock().unwrap(); + pinning_objs.push(object); + } + + pub fn pin_children(&self, tls: VMWorkerThread) { + if !crate::mmtk().get_plan().current_gc_may_move_object() { + log::debug!("The current GC is non-moving, skipping pinning children."); + return; + } + + let gc_tls = unsafe { GCThreadTLS::from_vwt_check(tls) }; + let worker = gc_tls.worker(); + + let pinning_objs = self + .pinning_objs + .try_lock() + .expect("PinningRegistry should not have races during GC."); + + let packet_size = 512; + let work_packets = pinning_objs + .chunks(packet_size) + .map(|chunk| { + Box::new(PinPinningChildren { + pinning_objs: chunk.to_vec(), + }) as _ + }) + .collect(); + + worker.scheduler().work_buckets[WorkBucketStage::Prepare].bulk_add(work_packets); + } + + pub fn cleanup(&self, worker: &mut GCWorker) { + worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(RemoveDeadPinnings); + if crate::mmtk().get_plan().current_gc_may_move_object() { + let packet = { + let mut pinned_objs = self + .pinned_objs + .try_lock() + .expect("Unexpected contention on pinned_objs"); + UnpinPinnedObjects { + objs: std::mem::take(&mut pinned_objs), + } + }; + + worker.scheduler().work_buckets[WorkBucketStage::VMRefClosure].add(packet); + } else { + debug!("The current GC is non-moving, skipping unpinning objects."); + debug_assert_eq!( + { + let pinned_objs = self + .pinned_objs + .try_lock() + .expect("Unexpected contention on pinned_objs"); + pinned_objs.len() + }, + 0 + ); + } + } +} + +impl Default for PinningRegistry { + fn default() -> Self { + Self::new() + } +} + +struct PinPinningChildren { + pinning_objs: Vec, +} + +impl GCWork for PinPinningChildren { + fn do_work(&mut self, worker: &mut GCWorker, _mmtk: &'static MMTK) { + let gc_tls = unsafe { GCThreadTLS::from_vwt_check(worker.tls) }; + let mut pinned_objs = vec![]; + let mut newly_pinned_objs = vec![]; + + let visit_object = |_worker, target_object: ObjectReference, pin| { + log::trace!( + " -> {} {}", + if pin { "(pin)" } else { " " }, + target_object + ); + if pin { + pinned_objs.push(target_object); + } + target_object + }; + + gc_tls + .object_closure + .set_temporarily_and_run_code(visit_object, || { + for obj in self.pinning_objs.iter().cloned() { + log::trace!(" Pinning: {}", obj); + (upcalls().call_gc_mark_children)(obj); + } + }); + + for target_object in pinned_objs { + if memory_manager::pin_object(target_object) { + newly_pinned_objs.push(target_object); + } + } + + let mut pinned_objs = crate::binding() + .pinning_registry + .pinned_objs + .lock() + .unwrap(); + pinned_objs.append(&mut newly_pinned_objs); + } +} + +struct RemoveDeadPinnings; + +impl GCWork for RemoveDeadPinnings { + fn do_work(&mut self, _worker: &mut GCWorker, _mmtk: &'static MMTK) { + log::debug!("Removing dead Pinnings..."); + + let registry = &crate::binding().pinning_registry; + { + let mut pinning_objs = registry + .pinning_objs + .try_lock() + .expect("PinningRegistry should not have races during GC."); + + pinning_objs.retain_mut(|obj| { + if obj.is_live() { + let new_obj = obj.get_forwarded_object().unwrap_or(*obj); + *obj = new_obj; + true + } else { + log::trace!(" Dead Pinning removed: {}", *obj); + false + } + }); + } + } +} + +struct UnpinPinnedObjects { + objs: Vec, +} + +impl GCWork for UnpinPinnedObjects { + fn do_work(&mut self, _worker: &mut GCWorker, _mmtk: &'static MMTK) { + log::debug!("Unpinning pinned objects..."); + + for obj in self.objs.iter() { + let unpinned = memory_manager::unpin_object(*obj); + debug_assert!(unpinned); + } + } +} diff --git a/gc/mmtk/src/scanning.rs b/gc/mmtk/src/scanning.rs index af435813770a8a..eca4769e2f9d15 100644 --- a/gc/mmtk/src/scanning.rs +++ b/gc/mmtk/src/scanning.rs @@ -54,7 +54,11 @@ impl Scanning for VMScanning { gc_tls .object_closure .set_temporarily_and_run_code(visit_object, || { - (upcalls().scan_object_ruby_style)(object); + (upcalls().call_gc_mark_children)(object); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + (upcalls().update_object_references)(object); + } }); } @@ -131,6 +135,7 @@ impl Scanning for VMScanning { crate::binding() .weak_proc .process_weak_stuff(worker, tracer_context); + crate::binding().pinning_registry.cleanup(worker); false } @@ -248,7 +253,11 @@ impl> GCWork for ScanWbUnprotectedRoots { for object in self.objects.iter().copied() { if object.is_reachable() { debug!("[wb_unprot_roots] Visiting WB-unprotected object (parent): {object}"); - (upcalls().scan_object_ruby_style)(object); + (upcalls().call_gc_mark_children)(object); + + if crate::mmtk().get_plan().current_gc_may_move_object() { + (upcalls().update_object_references)(object); + } } else { debug!( "[wb_unprot_roots] Skipping young WB-unprotected object (parent): {object}" diff --git a/gc/mmtk/src/weak_proc.rs b/gc/mmtk/src/weak_proc.rs index ce80d323495e73..3184c4f6d183f2 100644 --- a/gc/mmtk/src/weak_proc.rs +++ b/gc/mmtk/src/weak_proc.rs @@ -116,16 +116,49 @@ impl GCWork for ProcessObjFreeCandidates { struct ProcessWeakReferences; impl GCWork for ProcessWeakReferences { - fn do_work(&mut self, _worker: &mut GCWorker, _mmtk: &'static mmtk::MMTK) { + fn do_work(&mut self, worker: &mut GCWorker, _mmtk: &'static mmtk::MMTK) { + if crate::mmtk().get_plan().current_gc_may_move_object() { + let gc_tls: &mut GCThreadTLS = unsafe { GCThreadTLS::from_vwt_check(worker.tls) }; + + let visit_object = |_worker, target_object: ObjectReference, _pin| { + debug_assert!( + mmtk::memory_manager::is_mmtk_object(target_object.to_raw_address()).is_some(), + "Destination is not an MMTk object" + ); + + target_object + .get_forwarded_object() + .unwrap_or(target_object) + }; + + gc_tls + .object_closure + .set_temporarily_and_run_code(visit_object, || { + self.process_weak_references(true); + }) + } else { + self.process_weak_references(false); + } + } +} + +impl ProcessWeakReferences { + fn process_weak_references(&mut self, moving_gc: bool) { let mut weak_references = crate::binding() .weak_proc .weak_references .try_lock() .expect("Mutators should not be holding the lock."); - weak_references.retain(|&object| { + weak_references.retain_mut(|object_ptr| { + let object = object_ptr.get_forwarded_object().unwrap_or(*object_ptr); + + if object != *object_ptr { + *object_ptr = object; + } + if object.is_reachable() { - (upcalls().handle_weak_references)(object); + (upcalls().handle_weak_references)(object, moving_gc); true } else { From 8afd4fade69b3a53a5e309499595f7b192f87726 Mon Sep 17 00:00:00 2001 From: Thomas Marshall Date: Mon, 29 Dec 2025 12:54:28 +0000 Subject: [PATCH 6/8] [ruby/prism] Add unterminated construct tests https://github.com/ruby/prism/commit/166764f794 --- test/prism/errors/unterminated_begin.txt | 4 ++++ test/prism/errors/unterminated_begin_upcase.txt | 4 ++++ test/prism/errors/unterminated_block_do_end.txt | 4 ++++ test/prism/errors/unterminated_class.txt | 4 ++++ test/prism/errors/unterminated_def.txt | 5 +++++ test/prism/errors/unterminated_end_upcase.txt | 4 ++++ test/prism/errors/unterminated_for.txt | 5 +++++ test/prism/errors/unterminated_if.txt | 5 +++++ test/prism/errors/unterminated_if_else.txt | 5 +++++ test/prism/errors/unterminated_lambda_brace.txt | 4 ++++ test/prism/errors/unterminated_module.txt | 4 ++++ test/prism/errors/unterminated_pattern_bracket.txt | 7 +++++++ test/prism/errors/unterminated_pattern_paren.txt | 7 +++++++ test/prism/errors/unterminated_until.txt | 5 +++++ 14 files changed, 67 insertions(+) create mode 100644 test/prism/errors/unterminated_begin.txt create mode 100644 test/prism/errors/unterminated_begin_upcase.txt create mode 100644 test/prism/errors/unterminated_block_do_end.txt create mode 100644 test/prism/errors/unterminated_class.txt create mode 100644 test/prism/errors/unterminated_def.txt create mode 100644 test/prism/errors/unterminated_end_upcase.txt create mode 100644 test/prism/errors/unterminated_for.txt create mode 100644 test/prism/errors/unterminated_if.txt create mode 100644 test/prism/errors/unterminated_if_else.txt create mode 100644 test/prism/errors/unterminated_lambda_brace.txt create mode 100644 test/prism/errors/unterminated_module.txt create mode 100644 test/prism/errors/unterminated_pattern_bracket.txt create mode 100644 test/prism/errors/unterminated_pattern_paren.txt create mode 100644 test/prism/errors/unterminated_until.txt diff --git a/test/prism/errors/unterminated_begin.txt b/test/prism/errors/unterminated_begin.txt new file mode 100644 index 00000000000000..6217f80a0b5e97 --- /dev/null +++ b/test/prism/errors/unterminated_begin.txt @@ -0,0 +1,4 @@ +begin + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `begin` statement + diff --git a/test/prism/errors/unterminated_begin_upcase.txt b/test/prism/errors/unterminated_begin_upcase.txt new file mode 100644 index 00000000000000..92b975bf766d61 --- /dev/null +++ b/test/prism/errors/unterminated_begin_upcase.txt @@ -0,0 +1,4 @@ +BEGIN { + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected a `}` to close the `BEGIN` statement + diff --git a/test/prism/errors/unterminated_block_do_end.txt b/test/prism/errors/unterminated_block_do_end.txt new file mode 100644 index 00000000000000..fb7ca53d6aff5f --- /dev/null +++ b/test/prism/errors/unterminated_block_do_end.txt @@ -0,0 +1,4 @@ +foo do + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected a block beginning with `do` to end with `end` + diff --git a/test/prism/errors/unterminated_class.txt b/test/prism/errors/unterminated_class.txt new file mode 100644 index 00000000000000..ea80ab8fc71a33 --- /dev/null +++ b/test/prism/errors/unterminated_class.txt @@ -0,0 +1,4 @@ +class Foo + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `class` statement + diff --git a/test/prism/errors/unterminated_def.txt b/test/prism/errors/unterminated_def.txt new file mode 100644 index 00000000000000..83ec939feaec63 --- /dev/null +++ b/test/prism/errors/unterminated_def.txt @@ -0,0 +1,5 @@ +def foo + ^ expected a delimiter to close the parameters + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `def` statement + diff --git a/test/prism/errors/unterminated_end_upcase.txt b/test/prism/errors/unterminated_end_upcase.txt new file mode 100644 index 00000000000000..42ccd22bd5d3aa --- /dev/null +++ b/test/prism/errors/unterminated_end_upcase.txt @@ -0,0 +1,4 @@ +END { + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected a `}` to close the `END` statement + diff --git a/test/prism/errors/unterminated_for.txt b/test/prism/errors/unterminated_for.txt new file mode 100644 index 00000000000000..cbd17f0b84d97b --- /dev/null +++ b/test/prism/errors/unterminated_for.txt @@ -0,0 +1,5 @@ +for x in y + ^ unexpected end-of-input; expected a 'do', newline, or ';' after the 'for' loop collection + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `for` loop + diff --git a/test/prism/errors/unterminated_if.txt b/test/prism/errors/unterminated_if.txt new file mode 100644 index 00000000000000..559a00602210f6 --- /dev/null +++ b/test/prism/errors/unterminated_if.txt @@ -0,0 +1,5 @@ +if true + ^ expected `then` or `;` or '\n' + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the conditional clause + diff --git a/test/prism/errors/unterminated_if_else.txt b/test/prism/errors/unterminated_if_else.txt new file mode 100644 index 00000000000000..35a181e844b4ac --- /dev/null +++ b/test/prism/errors/unterminated_if_else.txt @@ -0,0 +1,5 @@ +if true +else + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `else` clause + diff --git a/test/prism/errors/unterminated_lambda_brace.txt b/test/prism/errors/unterminated_lambda_brace.txt new file mode 100644 index 00000000000000..bb8c1090abdd24 --- /dev/null +++ b/test/prism/errors/unterminated_lambda_brace.txt @@ -0,0 +1,4 @@ +-> { + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected a lambda block beginning with `{` to end with `}` + diff --git a/test/prism/errors/unterminated_module.txt b/test/prism/errors/unterminated_module.txt new file mode 100644 index 00000000000000..cf207c06a72dac --- /dev/null +++ b/test/prism/errors/unterminated_module.txt @@ -0,0 +1,4 @@ +module Foo + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `module` statement + diff --git a/test/prism/errors/unterminated_pattern_bracket.txt b/test/prism/errors/unterminated_pattern_bracket.txt new file mode 100644 index 00000000000000..cc2630f8e98850 --- /dev/null +++ b/test/prism/errors/unterminated_pattern_bracket.txt @@ -0,0 +1,7 @@ +case x +in [1 + ^ expected a `]` to close the pattern expression + ^ expected a delimiter after the patterns of an `in` clause + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `case` statement + diff --git a/test/prism/errors/unterminated_pattern_paren.txt b/test/prism/errors/unterminated_pattern_paren.txt new file mode 100644 index 00000000000000..162a12854675da --- /dev/null +++ b/test/prism/errors/unterminated_pattern_paren.txt @@ -0,0 +1,7 @@ +case x +in (1 + ^ expected a `)` to close the pattern expression + ^ expected a delimiter after the patterns of an `in` clause + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `case` statement + diff --git a/test/prism/errors/unterminated_until.txt b/test/prism/errors/unterminated_until.txt new file mode 100644 index 00000000000000..b9d7eee40ff062 --- /dev/null +++ b/test/prism/errors/unterminated_until.txt @@ -0,0 +1,5 @@ +until true + ^ expected a predicate expression for the `until` statement + ^ unexpected end-of-input, assuming it is closing the parent top level context + ^ expected an `end` to close the `until` statement + From 14fbcf0e6ed37c4a0d15fd3f016778465f774f2c Mon Sep 17 00:00:00 2001 From: Thomas Marshall Date: Mon, 29 Dec 2025 12:55:18 +0000 Subject: [PATCH 7/8] [ruby/prism] Report missing end errors at opening token This commit adds an `expect1_opening` function that expects a token and attaches the error to the opening token location rather than the current position. This is useful for errors about missing closing tokens, where we want to point to the line with the opening token rather than the end of the file. For example: ```ruby def foo def bar def baz ^ expected an `end` to close the `def` statement ^ expected an `end` to close the `def` statement ^ expected an `end` to close the `def` statement ``` This would previously produce three identical errors at the end of the file. After this commit, they would be reported at the opening token location: ```ruby def foo ^~~ expected an `end` to close the `def` statement def bar ^~~ expected an `end` to close the `def` statement def baz ^~~ expected an `end` to close the `def` statement ``` I considered using the end of the line where the opening token is located, but in some cases that would be less useful than the opening token location itself. For example: ```ruby def foo def bar def baz ``` Here the end of the line where the opening token is located would be the same for each of the unclosed `def` nodes. https://github.com/ruby/prism/commit/2d7829f060 --- prism/prism.c | 67 ++++++++++++------- ...ginning_with_brace_and_ending_with_end.txt | 2 +- test/prism/errors/command_calls_2.txt | 2 +- test/prism/errors/command_calls_24.txt | 2 +- test/prism/errors/command_calls_25.txt | 2 +- test/prism/errors/heredoc_unterminated.txt | 2 +- test/prism/errors/infix_after_label.txt | 2 +- .../errors/label_in_interpolated_string.txt | 2 +- test/prism/errors/pattern_string_key.txt | 2 +- test/prism/errors/shadow_args_in_lambda.txt | 2 +- test/prism/errors/unterminated_begin.txt | 2 +- .../errors/unterminated_begin_upcase.txt | 2 +- test/prism/errors/unterminated_block.txt | 2 +- .../errors/unterminated_block_do_end.txt | 2 +- test/prism/errors/unterminated_class.txt | 2 +- test/prism/errors/unterminated_def.txt | 2 +- test/prism/errors/unterminated_end_upcase.txt | 2 +- test/prism/errors/unterminated_for.txt | 2 +- test/prism/errors/unterminated_if.txt | 2 +- test/prism/errors/unterminated_if_else.txt | 2 +- .../errors/unterminated_lambda_brace.txt | 2 +- test/prism/errors/unterminated_module.txt | 2 +- .../errors/unterminated_pattern_bracket.txt | 4 +- .../errors/unterminated_pattern_paren.txt | 4 +- test/prism/errors/unterminated_until.txt | 2 +- test/prism/errors/while_endless_method.txt | 2 +- 26 files changed, 69 insertions(+), 52 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index 4c8ab91f0ef4e6..1a8cdf7568fe98 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -12422,6 +12422,22 @@ expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ide } } +/** + * A special expect1 that attaches the error to the opening token location + * rather than the current position. This is useful for errors about missing + * closing tokens, where we want to point to the line with the opening token + * (e.g., `def`, `class`, `if`, `{`) rather than the end of the file. + */ +static void +expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) { + if (accept1(parser, type)) return; + + pm_parser_err(parser, opening->start, opening->end, diag_id); + + parser->previous.start = opening->end; + parser->previous.type = PM_TOKEN_MISSING; +} + static pm_node_t * parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth); @@ -14764,7 +14780,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) { statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1))); } - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening); } else { if (!match1(parser, PM_TOKEN_KEYWORD_END)) { if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) { @@ -14779,7 +14795,7 @@ parse_block(pm_parser_t *parser, uint16_t depth) { } } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening); } pm_constant_id_list_t locals; @@ -15204,7 +15220,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON); parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword); pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous); @@ -15221,7 +15237,7 @@ parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newl } } else { parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword); } // Set the appropriate end location for all of the nodes in the subtree. @@ -16202,7 +16218,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) { inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); + expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening); } closing = parser->previous; @@ -16214,7 +16230,7 @@ parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) { inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening); } closing = parser->previous; @@ -16594,7 +16610,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET); + expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening); pm_token_t closing = parser->previous; switch (PM_NODE_TYPE(inner)) { @@ -16672,7 +16688,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening); pm_token_t closing = parser->previous; node->base.location.start = opening.start; @@ -16798,7 +16814,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm parser->pattern_matching_newlines = previous_pattern_matching_newlines; accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen); return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous)); } default: { @@ -16896,7 +16912,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, p pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1)); accept1(parser, PM_TOKEN_NEWLINE); - expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN); + expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening); pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0)); if (!alternation) { @@ -17748,7 +17764,8 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_accepts_block_stack_push(parser, true); parser_lex(parser); - pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous); + pm_token_t opening = parser->previous; + pm_hash_node_t *node = pm_hash_node_create(parser, &opening); if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) { if (current_hash_keys != NULL) { @@ -17763,7 +17780,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } pm_accepts_block_stack_pop(parser); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening); pm_hash_node_closing_loc_set(node, &parser->previous); return UP(node); @@ -18380,7 +18397,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword); if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) { pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous); @@ -18413,7 +18430,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements); parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword); begin_node->base.location.end = parser->previous.end; pm_begin_node_end_keyword_set(begin_node, &parser->previous); @@ -18438,7 +18455,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t opening = parser->previous; pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening); pm_context_t context = parser->current_context->context; if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) { pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL); @@ -18568,7 +18585,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); pm_constant_id_list_t locals; pm_locals_order(parser, &parser->current_scope->locals, &locals, false); @@ -18626,7 +18643,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false); } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword); if (context_def_p(parser)) { pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD); @@ -18936,7 +18953,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_accepts_block_stack_pop(parser); pm_do_loop_stack_pop(parser); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword); end_keyword = parser->previous; } @@ -19030,7 +19047,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_token_t opening = parser->previous; pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1)); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening); return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous)); } case PM_TOKEN_KEYWORD_FALSE: @@ -19094,7 +19111,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword); return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous)); } @@ -19245,7 +19262,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_locals_order(parser, &parser->current_scope->locals, &locals, false); pm_parser_scope_pop(parser); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword); if (context_def_p(parser)) { pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD); @@ -19311,7 +19328,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword); return UP(pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); } @@ -19345,7 +19362,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false); - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword); return UP(pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0)); } @@ -20091,7 +20108,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b } parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false); - expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE); + expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening); } else { expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN); opening = parser->previous; @@ -20109,7 +20126,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false); } - expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END); + expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator); } pm_constant_id_list_t locals; diff --git a/test/prism/errors/block_beginning_with_brace_and_ending_with_end.txt b/test/prism/errors/block_beginning_with_brace_and_ending_with_end.txt index 16af8200ec8930..1184b38ce840df 100644 --- a/test/prism/errors/block_beginning_with_brace_and_ending_with_end.txt +++ b/test/prism/errors/block_beginning_with_brace_and_ending_with_end.txt @@ -1,5 +1,5 @@ x.each { x end ^~~ unexpected 'end', expecting end-of-input ^~~ unexpected 'end', ignoring it - ^ expected a block beginning with `{` to end with `}` + ^ expected a block beginning with `{` to end with `}` diff --git a/test/prism/errors/command_calls_2.txt b/test/prism/errors/command_calls_2.txt index b0983c015bb49d..13e10f7ebfd2c7 100644 --- a/test/prism/errors/command_calls_2.txt +++ b/test/prism/errors/command_calls_2.txt @@ -1,5 +1,5 @@ {a: b c} - ^ expected a `}` to close the hash literal +^ expected a `}` to close the hash literal ^ unexpected local variable or method, expecting end-of-input ^ unexpected '}', expecting end-of-input ^ unexpected '}', ignoring it diff --git a/test/prism/errors/command_calls_24.txt b/test/prism/errors/command_calls_24.txt index 3046b36dc1256f..27a32ea3bf1235 100644 --- a/test/prism/errors/command_calls_24.txt +++ b/test/prism/errors/command_calls_24.txt @@ -1,5 +1,5 @@ ->a=b c{} ^ expected a `do` keyword or a `{` to open the lambda block ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a lambda block beginning with `do` to end with `end` +^~ expected a lambda block beginning with `do` to end with `end` diff --git a/test/prism/errors/command_calls_25.txt b/test/prism/errors/command_calls_25.txt index 5fddd90fdd09ae..cf04508f87d8a7 100644 --- a/test/prism/errors/command_calls_25.txt +++ b/test/prism/errors/command_calls_25.txt @@ -4,5 +4,5 @@ ^ unexpected ')', expecting end-of-input ^ unexpected ')', ignoring it ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a lambda block beginning with `do` to end with `end` +^~ expected a lambda block beginning with `do` to end with `end` diff --git a/test/prism/errors/heredoc_unterminated.txt b/test/prism/errors/heredoc_unterminated.txt index 3c6aeaeb81fdb5..56bd1629987ce8 100644 --- a/test/prism/errors/heredoc_unterminated.txt +++ b/test/prism/errors/heredoc_unterminated.txt @@ -3,7 +3,7 @@ a=>{< 1 } ^ unexpected '.'; expected a value in the hash literal - ^ expected a `}` to close the hash literal +^ expected a `}` to close the hash literal ^ unexpected '}', expecting end-of-input ^ unexpected '}', ignoring it diff --git a/test/prism/errors/label_in_interpolated_string.txt b/test/prism/errors/label_in_interpolated_string.txt index e8f40dd2a8aa69..29af5310a15b1a 100644 --- a/test/prism/errors/label_in_interpolated_string.txt +++ b/test/prism/errors/label_in_interpolated_string.txt @@ -2,6 +2,7 @@ case in el""Q ^~~~ expected a predicate for a case matching statement ^ expected a delimiter after the patterns of an `in` clause ^ unexpected constant, expecting end-of-input +^~~~ expected an `end` to close the `case` statement !"""#{in el"":Q ^~ unexpected 'in', assuming it is closing the parent 'in' clause ^ expected a `}` to close the embedded expression @@ -10,5 +11,4 @@ case in el""Q ^ cannot parse the string part ^~~~~~~~~~~ unexpected label ^~~~~~~~~~~ expected a string for concatenation - ^ expected an `end` to close the `case` statement diff --git a/test/prism/errors/pattern_string_key.txt b/test/prism/errors/pattern_string_key.txt index 9f28feddb96dea..41bc1fa57b95f4 100644 --- a/test/prism/errors/pattern_string_key.txt +++ b/test/prism/errors/pattern_string_key.txt @@ -1,8 +1,8 @@ case:a +^~~~ expected an `end` to close the `case` statement in b:"","#{}" ^~~~~ expected a label after the `,` in the hash pattern ^ expected a pattern expression after the key ^ expected a delimiter after the patterns of an `in` clause ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `case` statement diff --git a/test/prism/errors/shadow_args_in_lambda.txt b/test/prism/errors/shadow_args_in_lambda.txt index 2399a0ebd5443c..7fc78d7d8f5102 100644 --- a/test/prism/errors/shadow_args_in_lambda.txt +++ b/test/prism/errors/shadow_args_in_lambda.txt @@ -1,5 +1,5 @@ ->a;b{} ^ expected a `do` keyword or a `{` to open the lambda block ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a lambda block beginning with `do` to end with `end` +^~ expected a lambda block beginning with `do` to end with `end` diff --git a/test/prism/errors/unterminated_begin.txt b/test/prism/errors/unterminated_begin.txt index 6217f80a0b5e97..2733f830c9b0fe 100644 --- a/test/prism/errors/unterminated_begin.txt +++ b/test/prism/errors/unterminated_begin.txt @@ -1,4 +1,4 @@ begin ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `begin` statement +^~~~~ expected an `end` to close the `begin` statement diff --git a/test/prism/errors/unterminated_begin_upcase.txt b/test/prism/errors/unterminated_begin_upcase.txt index 92b975bf766d61..5512f2089e9cc9 100644 --- a/test/prism/errors/unterminated_begin_upcase.txt +++ b/test/prism/errors/unterminated_begin_upcase.txt @@ -1,4 +1,4 @@ BEGIN { ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a `}` to close the `BEGIN` statement + ^ expected a `}` to close the `BEGIN` statement diff --git a/test/prism/errors/unterminated_block.txt b/test/prism/errors/unterminated_block.txt index 8cc772db162206..db6a4aa56c05f4 100644 --- a/test/prism/errors/unterminated_block.txt +++ b/test/prism/errors/unterminated_block.txt @@ -1,4 +1,4 @@ foo { ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a block beginning with `{` to end with `}` + ^ expected a block beginning with `{` to end with `}` diff --git a/test/prism/errors/unterminated_block_do_end.txt b/test/prism/errors/unterminated_block_do_end.txt index fb7ca53d6aff5f..0b7c64965fd162 100644 --- a/test/prism/errors/unterminated_block_do_end.txt +++ b/test/prism/errors/unterminated_block_do_end.txt @@ -1,4 +1,4 @@ foo do ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a block beginning with `do` to end with `end` + ^~ expected a block beginning with `do` to end with `end` diff --git a/test/prism/errors/unterminated_class.txt b/test/prism/errors/unterminated_class.txt index ea80ab8fc71a33..f47a3aa7dfe918 100644 --- a/test/prism/errors/unterminated_class.txt +++ b/test/prism/errors/unterminated_class.txt @@ -1,4 +1,4 @@ class Foo ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `class` statement +^~~~~ expected an `end` to close the `class` statement diff --git a/test/prism/errors/unterminated_def.txt b/test/prism/errors/unterminated_def.txt index 83ec939feaec63..a6212e3a21cd55 100644 --- a/test/prism/errors/unterminated_def.txt +++ b/test/prism/errors/unterminated_def.txt @@ -1,5 +1,5 @@ def foo ^ expected a delimiter to close the parameters ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `def` statement +^~~ expected an `end` to close the `def` statement diff --git a/test/prism/errors/unterminated_end_upcase.txt b/test/prism/errors/unterminated_end_upcase.txt index 42ccd22bd5d3aa..ef01caa0ca6068 100644 --- a/test/prism/errors/unterminated_end_upcase.txt +++ b/test/prism/errors/unterminated_end_upcase.txt @@ -1,4 +1,4 @@ END { ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a `}` to close the `END` statement + ^ expected a `}` to close the `END` statement diff --git a/test/prism/errors/unterminated_for.txt b/test/prism/errors/unterminated_for.txt index cbd17f0b84d97b..75978a7cae8e53 100644 --- a/test/prism/errors/unterminated_for.txt +++ b/test/prism/errors/unterminated_for.txt @@ -1,5 +1,5 @@ for x in y ^ unexpected end-of-input; expected a 'do', newline, or ';' after the 'for' loop collection ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `for` loop +^~~ expected an `end` to close the `for` loop diff --git a/test/prism/errors/unterminated_if.txt b/test/prism/errors/unterminated_if.txt index 559a00602210f6..1697931773e1fa 100644 --- a/test/prism/errors/unterminated_if.txt +++ b/test/prism/errors/unterminated_if.txt @@ -1,5 +1,5 @@ if true ^ expected `then` or `;` or '\n' ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the conditional clause +^~ expected an `end` to close the conditional clause diff --git a/test/prism/errors/unterminated_if_else.txt b/test/prism/errors/unterminated_if_else.txt index 35a181e844b4ac..db7828cce8565e 100644 --- a/test/prism/errors/unterminated_if_else.txt +++ b/test/prism/errors/unterminated_if_else.txt @@ -1,5 +1,5 @@ if true +^~ expected an `end` to close the `else` clause else ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `else` clause diff --git a/test/prism/errors/unterminated_lambda_brace.txt b/test/prism/errors/unterminated_lambda_brace.txt index bb8c1090abdd24..75474c7534f59e 100644 --- a/test/prism/errors/unterminated_lambda_brace.txt +++ b/test/prism/errors/unterminated_lambda_brace.txt @@ -1,4 +1,4 @@ -> { ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected a lambda block beginning with `{` to end with `}` + ^ expected a lambda block beginning with `{` to end with `}` diff --git a/test/prism/errors/unterminated_module.txt b/test/prism/errors/unterminated_module.txt index cf207c06a72dac..4c50ba5f63ccab 100644 --- a/test/prism/errors/unterminated_module.txt +++ b/test/prism/errors/unterminated_module.txt @@ -1,4 +1,4 @@ module Foo ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `module` statement +^~~~~~ expected an `end` to close the `module` statement diff --git a/test/prism/errors/unterminated_pattern_bracket.txt b/test/prism/errors/unterminated_pattern_bracket.txt index cc2630f8e98850..4f35cd84af66f4 100644 --- a/test/prism/errors/unterminated_pattern_bracket.txt +++ b/test/prism/errors/unterminated_pattern_bracket.txt @@ -1,7 +1,7 @@ case x +^~~~ expected an `end` to close the `case` statement in [1 - ^ expected a `]` to close the pattern expression + ^ expected a `]` to close the pattern expression ^ expected a delimiter after the patterns of an `in` clause ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `case` statement diff --git a/test/prism/errors/unterminated_pattern_paren.txt b/test/prism/errors/unterminated_pattern_paren.txt index 162a12854675da..426d614e61719f 100644 --- a/test/prism/errors/unterminated_pattern_paren.txt +++ b/test/prism/errors/unterminated_pattern_paren.txt @@ -1,7 +1,7 @@ case x +^~~~ expected an `end` to close the `case` statement in (1 - ^ expected a `)` to close the pattern expression + ^ expected a `)` to close the pattern expression ^ expected a delimiter after the patterns of an `in` clause ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `case` statement diff --git a/test/prism/errors/unterminated_until.txt b/test/prism/errors/unterminated_until.txt index b9d7eee40ff062..42a0545200109b 100644 --- a/test/prism/errors/unterminated_until.txt +++ b/test/prism/errors/unterminated_until.txt @@ -1,5 +1,5 @@ until true ^ expected a predicate expression for the `until` statement ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `until` statement +^~~~~ expected an `end` to close the `until` statement diff --git a/test/prism/errors/while_endless_method.txt b/test/prism/errors/while_endless_method.txt index 6f062d89d0f850..cdd7ba9abab3aa 100644 --- a/test/prism/errors/while_endless_method.txt +++ b/test/prism/errors/while_endless_method.txt @@ -1,5 +1,5 @@ while def f = g do end ^ expected a predicate expression for the `while` statement ^ unexpected end-of-input, assuming it is closing the parent top level context - ^ expected an `end` to close the `while` statement +^~~~~ expected an `end` to close the `while` statement From 65634d8df57ea1636efffb5f040fa31c156d307f Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Tue, 23 Dec 2025 14:41:09 +0100 Subject: [PATCH 8/8] [ruby/prism] Optimize ruby visitor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `compact_child_nodes` allocates an array. We can skip that step by simply yielding the nodes. Benchmark for visiting the rails codebase: ```rb require "prism" require "benchmark/ips" files = Dir.glob("../rails/**/*.rb") results = files.map { Prism.parse_file(it) } visitor = Prism::Visitor.new Benchmark.ips do |x| x.config(warmup: 3, time: 10) x.report do results.each do visitor.visit(it.value) end end end RubyVM::YJIT.enable Benchmark.ips do |x| x.config(warmup: 3, time: 10) x.report do results.each do visitor.visit(it.value) end end end ``` Before: ``` ruby 3.4.8 (2025-12-17 revision https://github.com/ruby/prism/commit/995b59f666) +PRISM [x86_64-linux] Warming up -------------------------------------- 1.000 i/100ms Calculating ------------------------------------- 2.691 (± 0.0%) i/s (371.55 ms/i) - 27.000 in 10.089422s ruby 3.4.8 (2025-12-17 revision https://github.com/ruby/prism/commit/995b59f666) +YJIT +PRISM [x86_64-linux] Warming up -------------------------------------- 1.000 i/100ms Calculating ------------------------------------- 7.278 (±13.7%) i/s (137.39 ms/i) - 70.000 in 10.071568s ``` After: ``` ruby 3.4.8 (2025-12-17 revision https://github.com/ruby/prism/commit/995b59f666) +PRISM [x86_64-linux] Warming up -------------------------------------- 1.000 i/100ms Calculating ------------------------------------- 3.429 (± 0.0%) i/s (291.65 ms/i) - 35.000 in 10.208580s ruby 3.4.8 (2025-12-17 revision https://github.com/ruby/prism/commit/995b59f666) +YJIT +PRISM [x86_64-linux] Warming up -------------------------------------- 1.000 i/100ms Calculating ------------------------------------- 16.815 (± 0.0%) i/s (59.47 ms/i) - 169.000 in 10.054668s ``` ~21% faster on the interpreter, ~56% with YJIT https://github.com/ruby/prism/commit/bf631750cf --- lib/prism/translation/ruby_parser.rb | 2 +- prism/templates/lib/prism/compiler.rb.erb | 4 ++-- prism/templates/lib/prism/node.rb.erb | 25 ++++++++++++++++++++++- prism/templates/lib/prism/visitor.rb.erb | 4 ++-- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb index 1fb0e278462eeb..c026c4ad9c57af 100644 --- a/lib/prism/translation/ruby_parser.rb +++ b/lib/prism/translation/ruby_parser.rb @@ -1422,7 +1422,7 @@ def visit_or_node(node) # ``` def visit_parameters_node(node) children = - node.compact_child_nodes.map do |element| + node.each_child_node.map do |element| if element.is_a?(MultiTargetNode) visit_destructured_parameter(element) else diff --git a/prism/templates/lib/prism/compiler.rb.erb b/prism/templates/lib/prism/compiler.rb.erb index 9102025c20bdbe..66dbe666b90934 100644 --- a/prism/templates/lib/prism/compiler.rb.erb +++ b/prism/templates/lib/prism/compiler.rb.erb @@ -29,14 +29,14 @@ module Prism # Visit the child nodes of the given node. def visit_child_nodes(node) - node.compact_child_nodes.map { |node| node.accept(self) } + node.each_child_node.map { |node| node.accept(self) } end <%- nodes.each_with_index do |node, index| -%> <%= "\n" if index != 0 -%> # Compile a <%= node.name %> node def visit_<%= node.human %>(node) - node.compact_child_nodes.map { |node| node.accept(self) } + node.each_child_node.map { |node| node.accept(self) } end <%- end -%> end diff --git a/prism/templates/lib/prism/node.rb.erb b/prism/templates/lib/prism/node.rb.erb index ceee2b0ffec820..c97c029d3bf410 100644 --- a/prism/templates/lib/prism/node.rb.erb +++ b/prism/templates/lib/prism/node.rb.erb @@ -187,7 +187,7 @@ module Prism while (node = queue.shift) result << node - node.compact_child_nodes.each do |child_node| + node.each_child_node do |child_node| child_location = child_node.location start_line = child_location.start_line @@ -259,6 +259,13 @@ module Prism alias deconstruct child_nodes + # With a block given, yields each child node. Without a block, returns + # an enumerator that contains each child node. Excludes any `nil`s in + # the place of optional nodes that were not present. + def each_child_node + raise NoMethodError, "undefined method `each_child_node' for #{inspect}" + end + # Returns an array of child nodes, excluding any `nil`s in the place of # optional nodes that were not present. def compact_child_nodes @@ -335,6 +342,22 @@ module Prism }.compact.join(", ") %>] end + # def each_child_node: () { (Prism::node) -> void } -> void | () -> Enumerator[Prism::node] + def each_child_node + return to_enum(:each_child_node) unless block_given? + + <%- node.fields.each do |field| -%> + <%- case field -%> + <%- when Prism::Template::NodeField -%> + yield <%= field.name %> + <%- when Prism::Template::OptionalNodeField -%> + yield <%= field.name %> if <%= field.name %> + <%- when Prism::Template::NodeListField -%> + <%= field.name %>.each {|node| yield node } + <%- end -%> + <%- end -%> + end + # def compact_child_nodes: () -> Array[Node] def compact_child_nodes <%- if node.fields.any? { |field| field.is_a?(Prism::Template::OptionalNodeField) } -%> diff --git a/prism/templates/lib/prism/visitor.rb.erb b/prism/templates/lib/prism/visitor.rb.erb index b1a03c3f1ac9e3..76f907724fd5da 100644 --- a/prism/templates/lib/prism/visitor.rb.erb +++ b/prism/templates/lib/prism/visitor.rb.erb @@ -20,7 +20,7 @@ module Prism # Visits the child nodes of `node` by calling `accept` on each one. def visit_child_nodes(node) # @type self: _Visitor - node.compact_child_nodes.each { |node| node.accept(self) } + node.each_child_node { |node| node.accept(self) } end end @@ -48,7 +48,7 @@ module Prism <%= "\n" if index != 0 -%> # Visit a <%= node.name %> node def visit_<%= node.human %>(node) - node.compact_child_nodes.each { |node| node.accept(self) } + node.each_child_node { |node| node.accept(self) } end <%- end -%> end