From 2547051b1cc1d5ffe2a709e88827807925ffefe0 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 11 Dec 2025 10:42:23 -0600 Subject: [PATCH 1/2] Invoke initialize from Ruby, not native code JRuby does not expose the `initialize` method for a given Data instance to native code, so there's no equivalent to calling `rb_struct_initialize`. This change moves that initialization to a Ruby `__send__` and removes the native stub method `init_data`. Fixes broken JRuby `Data` logic after #692. --- ext/psych/psych_to_ruby.c | 7 ------- lib/psych/visitors/to_ruby.rb | 4 +--- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/ext/psych/psych_to_ruby.c b/ext/psych/psych_to_ruby.c index 0132b2c9..3ab0138b 100644 --- a/ext/psych/psych_to_ruby.c +++ b/ext/psych/psych_to_ruby.c @@ -28,12 +28,6 @@ static VALUE path2class(VALUE self, VALUE path) return rb_path_to_class(path); } -static VALUE init_data(VALUE self, VALUE data, VALUE values) -{ - rb_struct_initialize(data, values); - return data; -} - void Init_psych_to_ruby(void) { VALUE psych = rb_define_module("Psych"); @@ -43,7 +37,6 @@ void Init_psych_to_ruby(void) VALUE visitor = rb_define_class_under(visitors, "Visitor", rb_cObject); cPsychVisitorsToRuby = rb_define_class_under(visitors, "ToRuby", visitor); - rb_define_private_method(cPsychVisitorsToRuby, "init_data", init_data, 2); rb_define_private_method(cPsychVisitorsToRuby, "build_exception", build_exception, 2); rb_define_private_method(class_loader, "path2class", path2class, 1); } diff --git a/lib/psych/visitors/to_ruby.rb b/lib/psych/visitors/to_ruby.rb index e62311ae..d27c9226 100644 --- a/lib/psych/visitors/to_ruby.rb +++ b/lib/psych/visitors/to_ruby.rb @@ -219,9 +219,7 @@ def visit_Psych_Nodes_Mapping o revive_data_members(members, o) end data ||= allocate_anon_data(o, members) - values = data.members.map { |m| members[m] } - init_data(data, values) - data.freeze + data.__send__ :initialize, **members data when /^!ruby\/object:?(.*)?$/ From a91280ccbeffea0827ed928722f4cf5620088770 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 11 Dec 2025 11:16:19 -0600 Subject: [PATCH 2/2] Eliminate support for anon Data or Data with ivars This commit removes support for the following features: * Dumping an anonymous Data. This is unsupported for the same reasons as dumping anonymous modules or classes: there's no way to reference the proper Data class, and defining a new anonymous Data for every such element would not round-trip properly. * Dumping a Data with instance variables. Instance variables on a Data can only be set during the initialize chain when first constructed, and such a chain is custom for each such Data class. As there is no way to modify a Data after constructed and no way to accurately invoke the custom initialize method, all Data with instance variables are rejected. Data is also no longer registered while loading, since it's not possible to have a self-referential Data instance without a custom initialize that sets an instance variable to self. The combination of these removed features simplifies Data dumping and loading and eliminates all allocate+initialize hacks. --- lib/psych/visitors/to_ruby.rb | 32 ++++------------- lib/psych/visitors/yaml_tree.rb | 41 +++++----------------- test/psych/test_coder.rb | 6 ++-- test/psych/test_data.rb | 46 ++++--------------------- test/psych/test_date_time.rb | 2 +- test/psych/test_encoding.rb | 10 +++--- test/psych/test_object_references.rb | 5 --- test/psych/test_parser.rb | 8 ++--- test/psych/test_safe_load.rb | 22 ------------ test/psych/test_serialize_subclasses.rb | 3 +- test/psych/test_string.rb | 2 +- test/psych/visitors/test_yaml_tree.rb | 14 -------- 12 files changed, 37 insertions(+), 154 deletions(-) diff --git a/lib/psych/visitors/to_ruby.rb b/lib/psych/visitors/to_ruby.rb index d27c9226..11203d05 100644 --- a/lib/psych/visitors/to_ruby.rb +++ b/lib/psych/visitors/to_ruby.rb @@ -197,30 +197,11 @@ def visit_Psych_Nodes_Mapping o s end - when /^!ruby\/data(-with-ivars)?(?::(.*))?$/ - data = register(o, resolve_class($2).allocate) if $2 - members = {} - - if $1 # data-with-ivars - ivars = {} - o.children.each_slice(2) do |type, vars| - case accept(type) - when 'members' - revive_data_members(members, vars) - data ||= allocate_anon_data(o, members) - when 'ivars' - revive_hash(ivars, vars) - end - end - ivars.each do |ivar, v| - data.instance_variable_set ivar, v - end - else - revive_data_members(members, o) - end - data ||= allocate_anon_data(o, members) - data.__send__ :initialize, **members - data + when /^!ruby\/data?(?::(.*))?$/ + data_class = resolve_class($1) if $1 + members = revive_data_members(o) + data = (data_class || Data.define(*members.keys)).new(**members) + register(o, data) # replace temp with actual when /^!ruby\/object:?(.*)?$/ name = $1 || 'Object' @@ -370,7 +351,8 @@ def allocate_anon_data node, members register(node, klass.allocate) end - def revive_data_members hash, o + def revive_data_members o + hash = {} o.children.each_slice(2) do |k,v| name = accept(k) value = accept(v) diff --git a/lib/psych/visitors/yaml_tree.rb b/lib/psych/visitors/yaml_tree.rb index b6c86f4c..19104780 100644 --- a/lib/psych/visitors/yaml_tree.rb +++ b/lib/psych/visitors/yaml_tree.rb @@ -163,41 +163,16 @@ def visit_Object o alias :visit_Delegator :visit_Object def visit_Data o - ivars = o.instance_variables - if ivars.empty? - tag = ['!ruby/data', o.class.name].compact.join(':') - register o, @emitter.start_mapping(nil, tag, false, Nodes::Mapping::BLOCK) - o.members.each do |member| - @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY - accept o.send member - end - @emitter.end_mapping + raise TypeError, "can't dump Data with ivars: #{o}" unless o.instance_variables.empty? + raise TypeError, "can't dump anonymous Data: #{o}" unless o.class.name - else - tag = ['!ruby/data-with-ivars', o.class.name].compact.join(':') - node = @emitter.start_mapping(nil, tag, false, Psych::Nodes::Mapping::BLOCK) - register(o, node) - - # Dump the members - accept 'members' - @emitter.start_mapping nil, nil, true, Nodes::Mapping::BLOCK - o.members.each do |member| - @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY - accept o.send member - end - @emitter.end_mapping - - # Dump the ivars - accept 'ivars' - @emitter.start_mapping nil, nil, true, Nodes::Mapping::BLOCK - ivars.each do |ivar| - accept ivar.to_s - accept o.instance_variable_get ivar - end - @emitter.end_mapping - - @emitter.end_mapping + tag = ['!ruby/data', o.class.name].compact.join(':') + register o, @emitter.start_mapping(nil, tag, false, Nodes::Mapping::BLOCK) + o.members.each do |member| + @emitter.scalar member.to_s, nil, nil, true, false, Nodes::Scalar::ANY + accept o.send member end + @emitter.end_mapping end unless RUBY_VERSION < "3.2" def visit_Struct o diff --git a/test/psych/test_coder.rb b/test/psych/test_coder.rb index a6f5ad7f..11151421 100644 --- a/test/psych/test_coder.rb +++ b/test/psych/test_coder.rb @@ -220,7 +220,7 @@ def test_coder_style_map_default end def test_coder_style_map_any - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ foo = Psych.dump CustomEncode.new \ map: {a: 1, b: 2}, @@ -230,7 +230,7 @@ def test_coder_style_map_any end def test_coder_style_map_block - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ foo = Psych.dump CustomEncode.new \ map: {a: 1, b: 2}, @@ -240,7 +240,7 @@ def test_coder_style_map_block end def test_coder_style_map_flow - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ foo = Psych.dump CustomEncode.new \ map: { a: 1, b: 2 }, diff --git a/test/psych/test_data.rb b/test/psych/test_data.rb index a67a037b..6350e758 100644 --- a/test/psych/test_data.rb +++ b/test/psych/test_data.rb @@ -1,67 +1,35 @@ # frozen_string_literal: true require_relative 'helper' -class PsychDataWithIvar < Data.define(:foo) - attr_reader :bar - def initialize(**) - @bar = 'hello' - super - end -end unless RUBY_VERSION < "3.2" +PsychData = Data.define(:foo) unless RUBY_VERSION < "3.2" module Psych class TestData < TestCase - class SelfReferentialData < Data.define(:foo) - attr_accessor :ref - def initialize(foo:) - @ref = self - super - end - end unless RUBY_VERSION < "3.2" - def setup omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" end # TODO: move to another test? def test_dump_data - assert_equal <<~eoyml, Psych.dump(PsychDataWithIvar["bar"]) - --- !ruby/data-with-ivars:PsychDataWithIvar - members: - foo: bar - ivars: - "@bar": hello + assert_equal <<~eoyml, Psych.dump(PsychData["bar"]) + --- !ruby/data:PsychData + foo: bar eoyml end - def test_self_referential_data - circular = SelfReferentialData.new("foo") - - loaded = Psych.unsafe_load(Psych.dump(circular)) - assert_instance_of(SelfReferentialData, loaded.ref) - - assert_equal(circular, loaded) - assert_same(loaded, loaded.ref) - end - def test_roundtrip - thing = PsychDataWithIvar.new("bar") + thing = PsychData.new("bar") data = Psych.unsafe_load(Psych.dump(thing)) - assert_equal "hello", data.bar assert_equal "bar", data.foo end def test_load obj = Psych.unsafe_load(<<~eoyml) - --- !ruby/data-with-ivars:PsychDataWithIvar - members: - foo: bar - ivars: - "@bar": hello + --- !ruby/data:PsychData + foo: bar eoyml - assert_equal "hello", obj.bar assert_equal "bar", obj.foo end end diff --git a/test/psych/test_date_time.rb b/test/psych/test_date_time.rb index 79a48e24..51164d08 100644 --- a/test/psych/test_date_time.rb +++ b/test/psych/test_date_time.rb @@ -87,7 +87,7 @@ def test_alias_with_time end def test_overwritten_to_s - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ s = Psych.dump(Date.new(2023, 9, 2), permitted_classes: [Date]) assert_separately(%W[-rpsych -rdate - #{s}], "#{<<~"begin;"}\n#{<<~'end;'}") class Date diff --git a/test/psych/test_encoding.rb b/test/psych/test_encoding.rb index 1867d59e..42a38648 100644 --- a/test/psych/test_encoding.rb +++ b/test/psych/test_encoding.rb @@ -119,7 +119,7 @@ def test_io_utf8_read_as_binary end def test_emit_alias - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @emitter.start_stream Psych::Parser::UTF8 @emitter.start_document [], [], true @@ -153,7 +153,7 @@ def test_start_mapping @emitter.end_mapping @emitter.end_document false @emitter.end_stream - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse @buffer.string assert_encodings @utf8, @handler.strings @@ -173,7 +173,7 @@ def test_start_sequence @emitter.end_sequence @emitter.end_document false @emitter.end_stream - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse @buffer.string assert_encodings @utf8, @handler.strings @@ -191,7 +191,7 @@ def test_doc_tag_encoding @emitter.scalar 'foo', nil, nil, true, false, Nodes::Scalar::ANY @emitter.end_document false @emitter.end_stream - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse @buffer.string assert_encodings @utf8, @handler.strings @@ -268,7 +268,7 @@ def test_doc_tag end def test_dump_non_ascii_string_to_file - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ Tempfile.create(['utf8', 'yml'], :encoding => 'UTF-8') do |t| h = {'one' => 'いち'} diff --git a/test/psych/test_object_references.rb b/test/psych/test_object_references.rb index 0498d54e..86bb9034 100644 --- a/test/psych/test_object_references.rb +++ b/test/psych/test_object_references.rb @@ -31,11 +31,6 @@ def test_struct_has_references assert_reference_trip Struct.new(:foo).new(1) end - def test_data_has_references - omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" - assert_reference_trip Data.define(:foo).new(1) - end - def assert_reference_trip obj yml = Psych.dump([obj, obj]) assert_match(/\*-?\d+/, yml) diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb index c1e0abb8..e8d9d325 100644 --- a/test/psych/test_parser.rb +++ b/test/psych/test_parser.rb @@ -85,7 +85,7 @@ def test_filename def test_line_numbers assert_equal 0, @parser.mark.line - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse "---\n- hello\n- world" line_calls = @handler.marks.map(&:line).zip(@handler.calls.map(&:first)) @@ -112,7 +112,7 @@ def test_line_numbers def test_column_numbers assert_equal 0, @parser.mark.column - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse "---\n- hello\n- world" col_calls = @handler.marks.map(&:column).zip(@handler.calls.map(&:first)) @@ -139,7 +139,7 @@ def test_column_numbers def test_index_numbers assert_equal 0, @parser.mark.index - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse "---\n- hello\n- world" idx_calls = @handler.marks.map(&:index).zip(@handler.calls.map(&:first)) @@ -358,7 +358,7 @@ def test_start_document_tag end def test_event_location - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @parser.parse "foo:\n" \ " barbaz: [1, 2]" diff --git a/test/psych/test_safe_load.rb b/test/psych/test_safe_load.rb index e6ca1e14..81ab6b4b 100644 --- a/test/psych/test_safe_load.rb +++ b/test/psych/test_safe_load.rb @@ -124,28 +124,6 @@ def test_data_depends_on_sym end end - def test_anon_data - omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" - assert Psych.safe_load(<<-eoyml, permitted_classes: [Data, Symbol]) ---- !ruby/data - foo: bar - eoyml - - assert_raise(Psych::DisallowedClass) do - Psych.safe_load(<<-eoyml, permitted_classes: [Data]) ---- !ruby/data - foo: bar - eoyml - end - - assert_raise(Psych::DisallowedClass) do - Psych.safe_load(<<-eoyml, permitted_classes: [Symbol]) ---- !ruby/data - foo: bar - eoyml - end - end - def test_safe_load_default_fallback assert_nil Psych.safe_load("") end diff --git a/test/psych/test_serialize_subclasses.rb b/test/psych/test_serialize_subclasses.rb index 640c3313..c388da30 100644 --- a/test/psych/test_serialize_subclasses.rb +++ b/test/psych/test_serialize_subclasses.rb @@ -38,12 +38,11 @@ def test_struct_subclass class DataSubclass < Data.define(:foo) def initialize(foo:) - @bar = "hello #{foo}" super(foo: foo) end def == other - super(other) && @bar == other.instance_eval{ @bar } + super(other) end end unless RUBY_VERSION < "3.2" diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index cfd235a5..4f3178f3 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -51,7 +51,7 @@ def test_doublequotes_when_there_is_a_single end def test_single_quote_when_matching_date - pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ + # pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ lib = File.expand_path("../../../lib", __FILE__) assert_separately(["-I", lib, "-r", "psych"], __FILE__, __LINE__ + 1, <<~'RUBY') diff --git a/test/psych/visitors/test_yaml_tree.rb b/test/psych/visitors/test_yaml_tree.rb index bd3919f8..79554514 100644 --- a/test/psych/visitors/test_yaml_tree.rb +++ b/test/psych/visitors/test_yaml_tree.rb @@ -80,20 +80,6 @@ def test_data assert_cycle D.new('bar') end - def test_data_anon - omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" - d = Data.define(:foo).new('bar') - obj = Psych.unsafe_load(Psych.dump(d)) - assert_equal d.foo, obj.foo - end - - def test_data_override_method - omit "Data requires ruby >= 3.2" if RUBY_VERSION < "3.2" - d = Data.define(:method).new('override') - obj = Psych.unsafe_load(Psych.dump(d)) - assert_equal d.method, obj.method - end - def test_exception ex = Exception.new 'foo' loaded = Psych.unsafe_load(Psych.dump(ex))