From 4d377c8c2eedc42f274b3b2a841fa24d5b4c5541 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 14:41:35 +0100 Subject: [PATCH 01/15] [ruby/json] Improve `JSON.load` and `JSON.unsafe_load` to allow passing options as second argument Otherwise it's very error prone. https://github.com/ruby/json/commit/c54de70f90 --- ext/json/lib/json/common.rb | 21 +++++++++++++++++++-- test/json/json_common_interface_test.rb | 7 +++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 233b8c7e62d628..fcdc0d9f0be3e1 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -550,6 +550,7 @@ def pretty_generate(obj, opts = nil) :create_additions => nil, } # :call-seq: + # JSON.unsafe_load(source, options = {}) -> object # JSON.unsafe_load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -681,7 +682,12 @@ def pretty_generate(obj, opts = nil) # def unsafe_load(source, proc = nil, options = nil) opts = if options.nil? - _unsafe_load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _unsafe_load_default_options + end else _unsafe_load_default_options.merge(options) end @@ -709,6 +715,7 @@ def unsafe_load(source, proc = nil, options = nil) end # :call-seq: + # JSON.load(source, options = {}) -> object # JSON.load(source, proc = nil, options = {}) -> object # # Returns the Ruby objects created by parsing the given +source+. @@ -845,8 +852,18 @@ def unsafe_load(source, proc = nil, options = nil) # @attributes={"type"=>"Admin", "password"=>"0wn3d"}>} # def load(source, proc = nil, options = nil) + if proc && options.nil? && proc.is_a?(Hash) + options = proc + proc = nil + end + opts = if options.nil? - _load_default_options + if proc && proc.is_a?(Hash) + options, proc = proc, nil + options + else + _load_default_options + end else _load_default_options.merge(options) end diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 37fa439575cd87..13e2ca062a92bf 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -149,6 +149,7 @@ def test_load_with_proc def test_load_with_options json = '{ "foo": NaN }' assert JSON.load(json, nil, :allow_nan => true)['foo'].nan? + assert JSON.load(json, :allow_nan => true)['foo'].nan? end def test_load_null @@ -215,6 +216,12 @@ def test_unsafe_load_with_proc assert_equal expected, visited end + def test_unsafe_load_with_options + json = '{ "foo": NaN }' + assert JSON.unsafe_load(json, nil, :allow_nan => true)['foo'].nan? + assert JSON.unsafe_load(json, :allow_nan => true)['foo'].nan? + end + def test_unsafe_load_default_options too_deep = '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]' assert JSON.unsafe_load(too_deep, nil).is_a?(Array) From 54a73a57a292d8d7e88cadd3fd8454a3084a60e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Thu, 27 Nov 2025 16:12:17 +0100 Subject: [PATCH 02/15] [ruby/json] Test and restore behavior around to_json changing depth When serializing an Array, and one of the elements of the Array requires calling `to_json`, if the depth is changed, it will be used for the next entries, which wasn't the case before https://github.com/ruby/json/commit/5abd43490714, and is not the case with TruffleRuby and JRuby. Additionally, with TruffleRuby and JRuby the state's depth after the `to_json` call is used to close the Array, which isn't the case with CRuby. https://github.com/ruby/json/commit/386b36fde5 --- ext/json/generator/generator.c | 2 ++ test/json/json_generator_test.rb | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 35d543a7a768fa..9e6e617a59a7a8 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1294,6 +1294,8 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d VALUE tmp; if (rb_respond_to(obj, i_to_json)) { tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); + GET_STATE(data->vstate); + data->depth = state->depth; Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 9600f4be8d15d0..ab3a6807d65fec 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -321,6 +321,30 @@ def test_allow_nan end end + def test_depth_bad_to_json + obj = Object.new + def obj.to_json(state) + state.depth += 1 + "{#{state.object_nl}"\ + "#{state.indent * state.depth}\"foo\":#{state.space}1#{state.object_nl}"\ + "#{state.indent * (state.depth - 1)}}" + end + indent = " " * 2 if RUBY_ENGINE != "ruby" + assert_equal <<~JSON.chomp, JSON.pretty_generate([obj] * 2) + [ + { + "foo": 1 + }, + { + "foo": 1 + } + #{indent}] + JSON + state = JSON::State.new(object_nl: "\n", array_nl: "\n", space: " ", indent: " ") + state.generate(obj) + assert_equal 1, state.depth + end + def test_depth pretty = { object_nl: "\n", array_nl: "\n", space: " ", indent: " " } state = JSON.state.new(**pretty) From 32c7c3c19aa7c9c3fda10a9520d29e244baeaa6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Fri, 28 Nov 2025 15:53:58 +0100 Subject: [PATCH 03/15] [ruby/json] Reproduce C ext behavior of ignoring mutated depth in arrays https://github.com/ruby/json/commit/e0257b9f82 --- test/json/json_generator_test.rb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index ab3a6807d65fec..e623e05409ee6c 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -329,7 +329,6 @@ def obj.to_json(state) "#{state.indent * state.depth}\"foo\":#{state.space}1#{state.object_nl}"\ "#{state.indent * (state.depth - 1)}}" end - indent = " " * 2 if RUBY_ENGINE != "ruby" assert_equal <<~JSON.chomp, JSON.pretty_generate([obj] * 2) [ { @@ -338,11 +337,14 @@ def obj.to_json(state) { "foo": 1 } - #{indent}] + ] JSON state = JSON::State.new(object_nl: "\n", array_nl: "\n", space: " ", indent: " ") state.generate(obj) - assert_equal 1, state.depth + assert_equal 1, state.depth # FIXME + state.depth = 0 + state.generate([obj]) + assert_equal 0, state.depth end def test_depth From 05383a1de2f2afe263ab894e851eca51e40bb543 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 15:13:00 +0100 Subject: [PATCH 04/15] [ruby/json] Fix duplicated test_unsafe_load_with_options test case https://github.com/ruby/json/commit/7b62fac525 --- test/json/json_common_interface_test.rb | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 13e2ca062a92bf..3dfd0623cd98bc 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -216,12 +216,6 @@ def test_unsafe_load_with_proc assert_equal expected, visited end - def test_unsafe_load_with_options - json = '{ "foo": NaN }' - assert JSON.unsafe_load(json, nil, :allow_nan => true)['foo'].nan? - assert JSON.unsafe_load(json, :allow_nan => true)['foo'].nan? - end - def test_unsafe_load_default_options too_deep = '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]' assert JSON.unsafe_load(too_deep, nil).is_a?(Array) @@ -237,6 +231,7 @@ def test_unsafe_load_with_options assert_raise(JSON::ParserError) { JSON.unsafe_load(nan_json, nil, :allow_nan => false)['foo'].nan? } # make sure it still uses the defaults when something is provided assert JSON.unsafe_load(nan_json, nil, :allow_blank => true)['foo'].nan? + assert JSON.unsafe_load(nan_json, :allow_nan => true)['foo'].nan? end def test_unsafe_load_null From 5770c186d1e9d8e7202c83763c9619faa1f4c97c Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 10:42:50 +0100 Subject: [PATCH 05/15] Rename `rb_obj_exivar_p` -> `rb_obj_gen_fields_p` The "EXIVAR" terminology has been replaced by "gen fields" AKA "generic fields". Exivar implies variable, but generic fields include more than just variables, e.g. `object_id`. --- ext/-test-/tracepoint/tracepoint.c | 2 +- gc.c | 4 ++-- hash.c | 4 ++-- ractor.c | 6 +++--- shape.h | 2 +- string.c | 2 +- variable.c | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ext/-test-/tracepoint/tracepoint.c b/ext/-test-/tracepoint/tracepoint.c index e0bd182d18278b..03887b1d5b6549 100644 --- a/ext/-test-/tracepoint/tracepoint.c +++ b/ext/-test-/tracepoint/tracepoint.c @@ -93,7 +93,7 @@ on_newobj_event(VALUE tpval, void *data) { VALUE obj = rb_tracearg_object(rb_tracearg_from_tracepoint(tpval)); if (RB_TYPE_P(obj, T_STRING)) { - // Would fail !rb_obj_exivar_p(str) assertion in fstring_concurrent_set_create + // Would fail !rb_obj_gen_fields_p(str) assertion in fstring_concurrent_set_create return; } if (!rb_objspace_internal_object_p(obj)) rb_obj_id(obj); diff --git a/gc.c b/gc.c index 557a3cbff4017d..9ccaffdc0b371c 100644 --- a/gc.c +++ b/gc.c @@ -2061,7 +2061,7 @@ rb_gc_obj_free_vm_weak_references(VALUE obj) { obj_free_object_id(obj); - if (rb_obj_exivar_p(obj)) { + if (rb_obj_gen_fields_p(obj)) { rb_free_generic_ivar(obj); } @@ -3116,7 +3116,7 @@ rb_gc_mark_children(void *objspace, VALUE obj) { struct gc_mark_classext_foreach_arg foreach_args; - if (rb_obj_exivar_p(obj)) { + if (rb_obj_gen_fields_p(obj)) { rb_mark_generic_ivar(obj); } diff --git a/hash.c b/hash.c index 0b98b68d169b44..ac9a71794c8430 100644 --- a/hash.c +++ b/hash.c @@ -1554,7 +1554,7 @@ rb_hash_dup(VALUE hash) const VALUE flags = RBASIC(hash)->flags; VALUE ret = hash_dup(hash, rb_obj_class(hash), flags & RHASH_PROC_DEFAULT); - if (rb_obj_exivar_p(hash)) { + if (rb_obj_gen_fields_p(hash)) { rb_copy_generic_ivar(ret, hash); } return ret; @@ -2876,7 +2876,7 @@ hash_aset(st_data_t *key, st_data_t *val, struct update_arg *arg, int existing) VALUE rb_hash_key_str(VALUE key) { - if (!rb_obj_exivar_p(key) && RBASIC_CLASS(key) == rb_cString) { + if (!rb_obj_gen_fields_p(key) && RBASIC_CLASS(key) == rb_cString) { return rb_fstring(key); } else { diff --git a/ractor.c b/ractor.c index 8238d9a456e233..ea09583c5f7f7c 100644 --- a/ractor.c +++ b/ractor.c @@ -1137,7 +1137,7 @@ rb_obj_set_shareable_no_assert(VALUE obj) { FL_SET_RAW(obj, FL_SHAREABLE); - if (rb_obj_exivar_p(obj)) { + if (rb_obj_gen_fields_p(obj)) { VALUE fields = rb_obj_fields_no_ractor_check(obj); if (imemo_type_p(fields, imemo_fields)) { // no recursive mark @@ -1750,7 +1750,7 @@ obj_traverse_replace_i(VALUE obj, struct obj_traverse_replace_data *data) else if (data->replacement != _val) { RB_OBJ_WRITE(parent_obj, &v, data->replacement); } \ } while (0) - if (UNLIKELY(rb_obj_exivar_p(obj))) { + if (UNLIKELY(rb_obj_gen_fields_p(obj))) { VALUE fields_obj = rb_obj_fields_no_ractor_check(obj); if (UNLIKELY(rb_shape_obj_too_complex_p(obj))) { @@ -1984,7 +1984,7 @@ move_leave(VALUE obj, struct obj_traverse_replace_data *data) rb_gc_writebarrier_remember(data->replacement); void rb_replace_generic_ivar(VALUE clone, VALUE obj); // variable.c - if (UNLIKELY(rb_obj_exivar_p(obj))) { + if (UNLIKELY(rb_obj_gen_fields_p(obj))) { rb_replace_generic_ivar(data->replacement, obj); } diff --git a/shape.h b/shape.h index b0bb4db0bfce1b..9478d4b3a95dc6 100644 --- a/shape.h +++ b/shape.h @@ -437,7 +437,7 @@ rb_shape_obj_has_fields(VALUE obj) } static inline bool -rb_obj_exivar_p(VALUE obj) +rb_obj_gen_fields_p(VALUE obj) { switch (TYPE(obj)) { case T_NONE: diff --git a/string.c b/string.c index c794b36748e6e6..56c83ca2d53cd6 100644 --- a/string.c +++ b/string.c @@ -549,7 +549,7 @@ fstring_concurrent_set_create(VALUE str, void *data) RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); RUBY_ASSERT(OBJ_FROZEN(str)); RUBY_ASSERT(!FL_TEST_RAW(str, STR_FAKESTR)); - RUBY_ASSERT(!rb_obj_exivar_p(str)); + RUBY_ASSERT(!rb_obj_gen_fields_p(str)); RUBY_ASSERT(RBASIC_CLASS(str) == rb_cString); RUBY_ASSERT(!rb_objspace_garbage_object_p(str)); diff --git a/variable.c b/variable.c index d4c5d91e25d6dc..9a7e11850dcef5 100644 --- a/variable.c +++ b/variable.c @@ -1291,7 +1291,7 @@ rb_obj_fields(VALUE obj, ID field_name) void rb_free_generic_ivar(VALUE obj) { - if (rb_obj_exivar_p(obj)) { + if (rb_obj_gen_fields_p(obj)) { st_data_t key = (st_data_t)obj, value; switch (BUILTIN_TYPE(obj)) { case T_DATA: @@ -2218,7 +2218,7 @@ rb_copy_generic_ivar(VALUE dest, VALUE obj) rb_check_frozen(dest); - if (!rb_obj_exivar_p(obj)) { + if (!rb_obj_gen_fields_p(obj)) { return; } From b78db63be4d078b7ac29c8e9fcb40cb20d232265 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 10:47:28 +0100 Subject: [PATCH 06/15] fstring_concurrent_set_create: only assert the string has no ivars The NEWOBJ tracepoint can generate an object_id, that's alright, what we don't want is actual instance variables. --- ext/-test-/tracepoint/tracepoint.c | 4 ---- string.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ext/-test-/tracepoint/tracepoint.c b/ext/-test-/tracepoint/tracepoint.c index 03887b1d5b6549..7f7aa246628858 100644 --- a/ext/-test-/tracepoint/tracepoint.c +++ b/ext/-test-/tracepoint/tracepoint.c @@ -92,10 +92,6 @@ static void on_newobj_event(VALUE tpval, void *data) { VALUE obj = rb_tracearg_object(rb_tracearg_from_tracepoint(tpval)); - if (RB_TYPE_P(obj, T_STRING)) { - // Would fail !rb_obj_gen_fields_p(str) assertion in fstring_concurrent_set_create - return; - } if (!rb_objspace_internal_object_p(obj)) rb_obj_id(obj); } diff --git a/string.c b/string.c index 56c83ca2d53cd6..0370fc5d1e02e6 100644 --- a/string.c +++ b/string.c @@ -549,7 +549,7 @@ fstring_concurrent_set_create(VALUE str, void *data) RUBY_ASSERT(RB_TYPE_P(str, T_STRING)); RUBY_ASSERT(OBJ_FROZEN(str)); RUBY_ASSERT(!FL_TEST_RAW(str, STR_FAKESTR)); - RUBY_ASSERT(!rb_obj_gen_fields_p(str)); + RUBY_ASSERT(!rb_shape_obj_has_ivars(str)); RUBY_ASSERT(RBASIC_CLASS(str) == rb_cString); RUBY_ASSERT(!rb_objspace_garbage_object_p(str)); From 208271e3723653cd4cb9cd2eb4a6c631eee0b09c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89tienne=20Barri=C3=A9?= Date: Wed, 3 Dec 2025 15:43:28 +0100 Subject: [PATCH 07/15] [ruby/json] Fix handling of depth https://github.com/ruby/json/commit/ccca602274 --- ext/json/generator/generator.c | 50 +++------------------ ext/json/lib/json/common.rb | 2 +- test/json/json_generator_test.rb | 77 ++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 57 deletions(-) diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index 9e6e617a59a7a8..d202e97ea18156 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -968,14 +968,16 @@ static void vstate_spill(struct generate_json_data *data) RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); } -static inline VALUE vstate_get(struct generate_json_data *data) +static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) { if (RB_UNLIKELY(!data->vstate)) { vstate_spill(data); } GET_STATE(data->vstate); state->depth = data->depth; - return data->vstate; + VALUE tmp = rb_funcall(obj, i_to_json, 1, data->vstate); + // no need to restore state->depth, vstate is just a temporary State + return tmp; } static VALUE @@ -1293,9 +1295,7 @@ static void generate_json_fallback(FBuffer *buffer, struct generate_json_data *d { VALUE tmp; if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); - GET_STATE(data->vstate); - data->depth = state->depth; + tmp = json_call_to_json(data, obj); Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { @@ -1477,16 +1477,6 @@ static VALUE generate_json_try(VALUE d) return fbuffer_finalize(data->buffer); } -// Preserves the deprecated behavior of State#depth being set. -static VALUE generate_json_ensure_deprecated(VALUE d) -{ - struct generate_json_data *data = (struct generate_json_data *)d; - fbuffer_free(data->buffer); - data->state->depth = data->depth; - - return Qundef; -} - static VALUE generate_json_ensure(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; @@ -1507,13 +1497,13 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, struct generate_json_data data = { .buffer = &buffer, - .vstate = self, + .vstate = Qfalse, // don't use self as it may be frozen and its depth is mutated when calling to_json .state = state, .depth = state->depth, .obj = obj, .func = func }; - return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure_deprecated, (VALUE)&data); + return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); } /* call-seq: @@ -1532,31 +1522,6 @@ static VALUE cState_generate(int argc, VALUE *argv, VALUE self) return cState_partial_generate(self, obj, generate_json, io); } -static VALUE cState_generate_new(int argc, VALUE *argv, VALUE self) -{ - rb_check_arity(argc, 1, 2); - VALUE obj = argv[0]; - VALUE io = argc > 1 ? argv[1] : Qnil; - - GET_STATE(self); - - char stack_buffer[FBUFFER_STACK_SIZE]; - FBuffer buffer = { - .io = RTEST(io) ? io : Qfalse, - }; - fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); - - struct generate_json_data data = { - .buffer = &buffer, - .vstate = Qfalse, - .state = state, - .depth = state->depth, - .obj = obj, - .func = generate_json - }; - return rb_ensure(generate_json_try, (VALUE)&data, generate_json_ensure, (VALUE)&data); -} - static VALUE cState_initialize(int argc, VALUE *argv, VALUE self) { rb_warn("The json gem extension was loaded with the stdlib ruby code. You should upgrade rubygems with `gem update --system`"); @@ -2145,7 +2110,6 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, -1); - rb_define_method(cState, "generate_new", cState_generate_new, -1); // :nodoc: rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index fcdc0d9f0be3e1..877b96814e8bca 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -1074,7 +1074,7 @@ def initialize(options = nil, &as_json) # # Serialize the given object into a \JSON document. def dump(object, io = nil) - @state.generate_new(object, io) + @state.generate(object, io) end alias_method :generate, :dump diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index e623e05409ee6c..9f8b35de093271 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -321,7 +321,8 @@ def test_allow_nan end end - def test_depth_bad_to_json + # An object that changes state.depth when it receives to_json(state) + def bad_to_json obj = Object.new def obj.to_json(state) state.depth += 1 @@ -329,21 +330,44 @@ def obj.to_json(state) "#{state.indent * state.depth}\"foo\":#{state.space}1#{state.object_nl}"\ "#{state.indent * (state.depth - 1)}}" end - assert_equal <<~JSON.chomp, JSON.pretty_generate([obj] * 2) + obj + end + + def test_depth_restored_bad_to_json + state = JSON::State.new + state.generate(bad_to_json) + assert_equal 0, state.depth + end + + def test_depth_restored_bad_to_json_in_Array + assert_equal <<~JSON.chomp, JSON.pretty_generate([bad_to_json] * 2) [ { "foo": 1 }, { - "foo": 1 - } + "foo": 1 + } ] JSON - state = JSON::State.new(object_nl: "\n", array_nl: "\n", space: " ", indent: " ") - state.generate(obj) - assert_equal 1, state.depth # FIXME - state.depth = 0 - state.generate([obj]) + state = JSON::State.new + state.generate([bad_to_json]) + assert_equal 0, state.depth + end + + def test_depth_restored_bad_to_json_in_Hash + assert_equal <<~JSON.chomp, JSON.pretty_generate(a: bad_to_json, b: bad_to_json) + { + "a": { + "foo": 1 + }, + "b": { + "foo": 1 + } + } + JSON + state = JSON::State.new + state.generate(a: bad_to_json) assert_equal 0, state.depth end @@ -361,10 +385,36 @@ def test_depth_nesting_error ary = []; ary << ary assert_raise(JSON::NestingError) { generate(ary) } assert_raise(JSON::NestingError) { JSON.pretty_generate(ary) } - s = JSON.state.new - assert_equal 0, s.depth + end + + def test_depth_nesting_error_to_json + ary = []; ary << ary + s = JSON.state.new(depth: 1) assert_raise(JSON::NestingError) { ary.to_json(s) } - assert_equal 100, s.depth + assert_equal 1, s.depth + end + + def test_depth_nesting_error_Hash_to_json + hash = {}; hash[:a] = hash + s = JSON.state.new(depth: 1) + assert_raise(JSON::NestingError) { hash.to_json(s) } + assert_equal 1, s.depth + end + + def test_depth_nesting_error_generate + ary = []; ary << ary + s = JSON.state.new(depth: 1) + assert_raise(JSON::NestingError) { s.generate(ary) } + assert_equal 1, s.depth + end + + def test_depth_exception_calling_to_json + def (obj = Object.new).to_json(*) + raise + end + s = JSON.state.new(depth: 1).freeze + assert_raise(RuntimeError) { s.generate([{ hash: obj }]) } + assert_equal 1, s.depth end def test_buffer_initial_length @@ -1006,7 +1056,8 @@ def test_nesting_recovery state = JSON::State.new ary = [] ary << ary - assert_raise(JSON::NestingError) { state.generate_new(ary) } + assert_raise(JSON::NestingError) { state.generate(ary) } + assert_equal 0, state.depth assert_equal '{"a":1}', state.generate({ a: 1 }) end end From 94581b1ffde5e2afeba4631152955c18ec52ccf0 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 16:23:14 +0100 Subject: [PATCH 08/15] [ruby/json] Release 2.17.0 https://github.com/ruby/json/commit/4bdb2d14fe --- ext/json/lib/json/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index cc25a0453e20c9..b7de7c27e21446 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.16.0' + VERSION = '2.17.0' end From 20fc8aff05ce857f3d3b759d92f1941132398b65 Mon Sep 17 00:00:00 2001 From: git Date: Wed, 3 Dec 2025 15:27:19 +0000 Subject: [PATCH 09/15] Update default gems list at 94581b1ffde5e2afeba4631152955c [ci skip] --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index bec7ee9db45036..f3e7f1ae0e3a6e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -198,7 +198,7 @@ The following default gems are updated. * io-console 0.8.1 * io-nonblock 0.3.2 * io-wait 0.4.0.dev -* json 2.16.0 +* json 2.17.0 * net-http 0.8.0 * openssl 4.0.0.pre * optparse 0.8.0 From d7dffcdbeeee81bb3bbe63b86620cb682eb3ab23 Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 3 Dec 2025 15:41:23 +0100 Subject: [PATCH 10/15] [ruby/prism] Follow repo move from oracle/truffleruby to truffleruby/truffleruby https://github.com/ruby/prism/commit/c8e1b11120 --- prism/prism.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.h b/prism/prism.h index dc31f26e786a5c..c468db18bef3c2 100644 --- a/prism/prism.h +++ b/prism/prism.h @@ -314,7 +314,7 @@ PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint * dependencies. It is currently being integrated into * [CRuby](https://github.com/ruby/ruby), * [JRuby](https://github.com/jruby/jruby), - * [TruffleRuby](https://github.com/oracle/truffleruby), + * [TruffleRuby](https://github.com/truffleruby/truffleruby), * [Sorbet](https://github.com/sorbet/sorbet), and * [Syntax Tree](https://github.com/ruby-syntax-tree/syntax_tree). * From fcf3939780972d587b18afc26c4abd2da2c0b7ec Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Wed, 3 Dec 2025 14:50:35 +0100 Subject: [PATCH 11/15] Speedup TypedData_Get_Struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While profiling `Monitor#synchronize` and `Mutex#synchronize` I noticed a fairly significant amount of time spent in `rb_check_typeddata`. By implementing a fast path that assumes the object is valid and that can be inlined, it does make a significant difference: Before: ``` Mutex 13.548M (± 3.6%) i/s (73.81 ns/i) - 68.566M in 5.067444 Monitor 10.497M (± 6.5%) i/s (95.27 ns/i) - 52.529M in 5.032698s ``` After: ``` Mutex 20.887M (± 0.3%) i/s (47.88 ns/i) - 106.021M in 5.075989s Monitor 16.245M (±13.3%) i/s (61.56 ns/i) - 80.705M in 5.099680s ``` ```ruby require 'bundler/inline' gemfile do gem "benchmark-ips" end mutex = Mutex.new require "monitor" monitor = Monitor.new Benchmark.ips do |x| x.report("Mutex") { mutex.synchronize { } } x.report("Monitor") { monitor.synchronize { } } end ``` --- include/ruby/internal/core/rtypeddata.h | 45 ++++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h index 24e87e63f979f9..aaf8f7997c8459 100644 --- a/include/ruby/internal/core/rtypeddata.h +++ b/include/ruby/internal/core/rtypeddata.h @@ -507,19 +507,6 @@ RBIMPL_SYMBOL_EXPORT_END() sizeof(type)) #endif -/** - * Obtains a C struct from inside of a wrapper Ruby object. - * - * @param obj An instance of ::RTypedData. - * @param type Type name of the C struct. - * @param data_type The data type describing `type`. - * @param sval Variable name of obtained C struct. - * @exception rb_eTypeError `obj` is not a kind of `data_type`. - * @return Unwrapped C struct that `obj` holds. - */ -#define TypedData_Get_Struct(obj,type,data_type,sval) \ - ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) - static inline bool RTYPEDDATA_EMBEDDED_P(VALUE obj) { @@ -614,6 +601,38 @@ RTYPEDDATA_TYPE(VALUE obj) return (const struct rb_data_type_struct *)(RTYPEDDATA(obj)->type & TYPED_DATA_PTR_MASK); } +RBIMPL_ATTR_PURE_UNLESS_DEBUG() +RBIMPL_ATTR_ARTIFICIAL() +/** + * @private + * + * This is an implementation detail of TypedData_Get_Struct(). Don't use it + * directly. + */ +static inline void * +rbimpl_check_typeddata(VALUE obj, const rb_data_type_t *type) +{ + if (RB_LIKELY(RB_TYPE_P(obj, T_DATA) && RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == type)) { + return RTYPEDDATA_GET_DATA(obj); + } + + return rb_check_typeddata(obj, type); +} + + +/** + * Obtains a C struct from inside of a wrapper Ruby object. + * + * @param obj An instance of ::RTypedData. + * @param type Type name of the C struct. + * @param data_type The data type describing `type`. + * @param sval Variable name of obtained C struct. + * @exception rb_eTypeError `obj` is not a kind of `data_type`. + * @return Unwrapped C struct that `obj` holds. + */ +#define TypedData_Get_Struct(obj,type,data_type,sval) \ + ((sval) = RBIMPL_CAST((type *)rbimpl_check_typeddata((obj), (data_type)))) + /** * While we don't stop you from using this function, it seems to be an * implementation detail of #TypedData_Make_Struct, which is preferred over From f9cd94f17d6fef49f1ee5cbb8f66839f0d7a5db9 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Mon, 1 Dec 2025 15:15:42 -0800 Subject: [PATCH 12/15] wb-protect autoload_const --- variable.c | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/variable.c b/variable.c index 9a7e11850dcef5..faeffec660a9f2 100644 --- a/variable.c +++ b/variable.c @@ -2734,7 +2734,7 @@ autoload_const_free(void *ptr) static const rb_data_type_t autoload_const_type = { "autoload_const", {autoload_const_mark_and_move, autoload_const_free, autoload_const_memsize, autoload_const_mark_and_move,}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED }; static struct autoload_data * @@ -2778,12 +2778,12 @@ autoload_copy_table_for_box_i(st_data_t key, st_data_t value, st_data_t arg) struct autoload_data *autoload_data = rb_check_typeddata(autoload_data_value, &autoload_data_type); VALUE new_value = TypedData_Make_Struct(0, struct autoload_const, &autoload_const_type, autoload_const); - autoload_const->box_value = rb_get_box_object((rb_box_t *)box); - autoload_const->module = src_const->module; + RB_OBJ_WRITE(new_value, &autoload_const->box_value, rb_get_box_object((rb_box_t *)box)); + RB_OBJ_WRITE(new_value, &autoload_const->module, src_const->module); autoload_const->name = src_const->name; - autoload_const->value = src_const->value; + RB_OBJ_WRITE(new_value, &autoload_const->value, src_const->value); autoload_const->flag = src_const->flag; - autoload_const->autoload_data_value = autoload_data_value; + RB_OBJ_WRITE(new_value, &autoload_const->autoload_data_value, autoload_data_value); ccan_list_add_tail(&autoload_data->constants, &autoload_const->cnode); st_insert(tbl, (st_data_t)autoload_const->name, (st_data_t)new_value); @@ -2907,12 +2907,12 @@ autoload_synchronized(VALUE _arguments) { struct autoload_const *autoload_const; VALUE autoload_const_value = TypedData_Make_Struct(0, struct autoload_const, &autoload_const_type, autoload_const); - autoload_const->box_value = arguments->box_value; - autoload_const->module = arguments->module; + RB_OBJ_WRITE(autoload_const_value, &autoload_const->box_value, arguments->box_value); + RB_OBJ_WRITE(autoload_const_value, &autoload_const->module, arguments->module); autoload_const->name = arguments->name; autoload_const->value = Qundef; autoload_const->flag = CONST_PUBLIC; - autoload_const->autoload_data_value = autoload_data_value; + RB_OBJ_WRITE(autoload_const_value, &autoload_const->autoload_data_value, autoload_data_value); ccan_list_add_tail(&autoload_data->constants, &autoload_const->cnode); st_insert(autoload_table, (st_data_t)arguments->name, (st_data_t)autoload_const_value); RB_OBJ_WRITTEN(autoload_table_value, Qundef, autoload_const_value); @@ -3920,21 +3920,21 @@ rb_const_set(VALUE klass, ID id, VALUE val) const_added(klass, id); } -static struct autoload_data * -autoload_data_for_named_constant(VALUE module, ID name, struct autoload_const **autoload_const_pointer) +static VALUE +autoload_const_value_for_named_constant(VALUE module, ID name, struct autoload_const **autoload_const_pointer) { - VALUE autoload_data_value = autoload_data(module, name); - if (!autoload_data_value) return 0; + VALUE autoload_const_value = autoload_data(module, name); + if (!autoload_const_value) return Qfalse; - struct autoload_data *autoload_data = get_autoload_data(autoload_data_value, autoload_const_pointer); - if (!autoload_data) return 0; + struct autoload_data *autoload_data = get_autoload_data(autoload_const_value, autoload_const_pointer); + if (!autoload_data) return Qfalse; /* for autoloading thread, keep the defined value to autoloading storage */ if (autoload_by_current(autoload_data)) { - return autoload_data; + return autoload_const_value; } - return 0; + return Qfalse; } static void @@ -3954,13 +3954,13 @@ const_tbl_update(struct autoload_const *ac, int autoload_force) RUBY_ASSERT_CRITICAL_SECTION_ENTER(); VALUE file = ac->file; int line = ac->line; - struct autoload_data *ele = autoload_data_for_named_constant(klass, id, &ac); + VALUE autoload_const_value = autoload_const_value_for_named_constant(klass, id, &ac); - if (!autoload_force && ele) { + if (!autoload_force && autoload_const_value) { rb_clear_constant_cache_for_id(id); - ac->value = val; /* autoload_data is non-WB-protected */ - ac->file = rb_source_location(&ac->line); + RB_OBJ_WRITE(autoload_const_value, &ac->value, val); + RB_OBJ_WRITE(autoload_const_value, &ac->file, rb_source_location(&ac->line)); } else { /* otherwise autoloaded constant, allow to override */ @@ -4054,10 +4054,7 @@ set_const_visibility(VALUE mod, int argc, const VALUE *argv, ce->flag &= ~mask; ce->flag |= flag; if (UNDEF_P(ce->value)) { - struct autoload_data *ele; - - ele = autoload_data_for_named_constant(mod, id, &ac); - if (ele) { + if (autoload_const_value_for_named_constant(mod, id, &ac)) { ac->flag &= ~mask; ac->flag |= flag; } From ed31a0caa88006afa507fd387e3f84ad8b8ddb00 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Wed, 3 Dec 2025 09:05:13 +0100 Subject: [PATCH 13/15] [ruby/prism] Correctly handle line continuations in %w/i% interrupted by heredocs See https://bugs.ruby-lang.org/issues/21756. Ripper fails to parse this, but prism actually also doesn't handle it correctly. When heredocs are used, even in lowercase percent arays there can be multiple `STRING_CONTENT` tokens. We need to concat them. Luckily we don't need to handle as many cases as in uppercase arrays where interpolation is allowed. https://github.com/ruby/prism/commit/211677000e --- prism/prism.c | 71 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/prism/prism.c b/prism/prism.c index cd4d166a124ef1..291d1d85218c1e 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -19299,18 +19299,52 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t opening = parser->previous; pm_array_node_t *array = pm_array_node_create(parser, &opening); + pm_node_t *current = NULL; while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + // Interpolation is not possible but nested heredocs can still lead to + // consecutive (disjoint) string tokens when the final newline is escaped. + while (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_token_t opening = not_provided(parser); pm_token_t closing = not_provided(parser); - pm_array_node_elements_append(array, UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing))); + + // Record the string node, moving to interpolation if needed. + if (current == NULL) { + current = UP(pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing)); + parser_lex(parser); + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) { + pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); + parser_lex(parser); + pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) { + pm_symbol_node_t *cast = (pm_symbol_node_t *) current; + pm_token_t bounds = not_provided(parser); + + pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end }; + pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped)); + pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing)); + parser_lex(parser); + + pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing); + pm_interpolated_symbol_node_append(interpolated, first_string); + pm_interpolated_symbol_node_append(interpolated, second_string); + + xfree(current); + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } } - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); + if (current) { + pm_array_node_elements_append(array, current); + current = NULL; + } else { + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + } } pm_token_t closing = parser->current; @@ -19489,23 +19523,42 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b parser_lex(parser); pm_token_t opening = parser->previous; pm_array_node_t *array = pm_array_node_create(parser, &opening); - - // skip all leading whitespaces - accept1(parser, PM_TOKEN_WORDS_SEP); + pm_node_t *current = NULL; while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) { accept1(parser, PM_TOKEN_WORDS_SEP); if (match1(parser, PM_TOKEN_STRING_END)) break; - if (match1(parser, PM_TOKEN_STRING_CONTENT)) { + // Interpolation is not possible but nested heredocs can still lead to + // consecutive (disjoint) string tokens when the final newline is escaped. + while (match1(parser, PM_TOKEN_STRING_CONTENT)) { pm_token_t opening = not_provided(parser); pm_token_t closing = not_provided(parser); pm_node_t *string = UP(pm_string_node_create_current_string(parser, &opening, &parser->current, &closing)); - pm_array_node_elements_append(array, string); + + // Record the string node, moving to interpolation if needed. + if (current == NULL) { + current = string; + } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) { + pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string); + } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) { + pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing); + pm_interpolated_string_node_append(interpolated, current); + pm_interpolated_string_node_append(interpolated, string); + current = UP(interpolated); + } else { + assert(false && "unreachable"); + } + parser_lex(parser); } - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + if (current) { + pm_array_node_elements_append(array, current); + current = NULL; + } else { + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + } } pm_token_t closing = parser->current; From d5c7cf0a1a1d2a72421b9a166e19442f89b99868 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Wed, 3 Dec 2025 09:06:22 +0100 Subject: [PATCH 14/15] [ruby/prism] Fix wrong error message for lower percent i arrays Not so sure how to trigger it but this is definitly more correct. https://github.com/ruby/prism/commit/1bc8ec5e5d --- prism/prism.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.c b/prism/prism.c index 291d1d85218c1e..02247734e2ace7 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -19343,7 +19343,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b pm_array_node_elements_append(array, current); current = NULL; } else { - expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT); + expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT); } } From fd02356e36198b5bb0bb64f303a716a4ada9ed15 Mon Sep 17 00:00:00 2001 From: Goshanraj Govindaraj Date: Wed, 3 Dec 2025 13:37:43 -0500 Subject: [PATCH 15/15] ZJIT: Optimize NewArray to use rb_ec_ary_new_from_values (#15391) --- zjit/src/codegen.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index bb19d7d8209ff8..57ee65e91b754b 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -1451,15 +1451,16 @@ fn gen_new_array( ) -> lir::Opnd { gen_prepare_leaf_call_with_gc(asm, state); - let length: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); + let num: c_long = elements.len().try_into().expect("Unable to fit length of elements into c_long"); - let new_array = asm_ccall!(asm, rb_ary_new_capa, length.into()); - - for val in elements { - asm_ccall!(asm, rb_ary_push, new_array, val); + if elements.is_empty() { + asm_ccall!(asm, rb_ec_ary_new_from_values, EC, 0i64.into(), Opnd::UImm(0)) + } else { + let argv = gen_push_opnds(asm, &elements); + let new_array = asm_ccall!(asm, rb_ec_ary_new_from_values, EC, num.into(), argv); + gen_pop_opnds(asm, &elements); + new_array } - - new_array } /// Compile array access (`array[index]`)