From f9b67f482284308b4a815c0197d0311d408b0aa5 Mon Sep 17 00:00:00 2001 From: BurdetteLamar Date: Tue, 30 Jun 2026 18:19:04 -0500 Subject: [PATCH 01/31] [DOC] Doc for Pathname#parent --- pathname_builtin.rb | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pathname_builtin.rb b/pathname_builtin.rb index 9e510eddb239a9..7a1c8a06a8c4a9 100644 --- a/pathname_builtin.rb +++ b/pathname_builtin.rb @@ -541,9 +541,19 @@ def cleanpath_conservative # :nodoc: end private :cleanpath_conservative - # Returns the parent directory. + # :markup: markdown + # + # call-seq: + # parent -> new_pathname + # + # Returns a new pathname representing the parent directory + # of the entry represented by `self`: + # + # ```ruby + # pn = Pathname('/etc/passwd') # => # + # pn.parent # => # + # ``` # - # This is same as self + '..'. def parent self + '..' end From 0ea1e09c6de90887308d80806d8a3053ab658d69 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 21:25:04 +0900 Subject: [PATCH 02/31] Fix the gemspec error snippet on Windows drive-letter paths Bundler::DSLError#to_s read the offending line number from `trace_line.split(":")[1]`. On Windows the backtrace path carries a drive letter such as `C:`, so that field is the path rather than the number, and the negative index that follows garbles the source snippet. Match the number that sits right before `:in` or the end of the line instead. Co-Authored-By: Claude Opus 4.8 --- lib/bundler/dsl.rb | 5 ++++- spec/bundler/runtime/setup_spec.rb | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/bundler/dsl.rb b/lib/bundler/dsl.rb index e7504d0455d5b6..a3f5ef67bede32 100644 --- a/lib/bundler/dsl.rb +++ b/lib/bundler/dsl.rb @@ -671,8 +671,11 @@ def to_s trace_line = backtrace.find {|l| l.include?(dsl_path) } || trace_line return m unless trace_line - line_number = trace_line.split(":")[1].to_i - 1 + # Match the line number right before `:in` or the end of the line so a + # Windows drive letter like `C:` does not get mistaken for the number. + line_number = trace_line[/:(\d+)(?::in\b|\z)/, 1] return m unless line_number + line_number = line_number.to_i - 1 lines = contents.lines.to_a indent = " # " diff --git a/spec/bundler/runtime/setup_spec.rb b/spec/bundler/runtime/setup_spec.rb index 2b4ac3085c318b..fc841f1de155ef 100644 --- a/spec/bundler/runtime/setup_spec.rb +++ b/spec/bundler/runtime/setup_spec.rb @@ -1162,8 +1162,6 @@ def clean_load_path(lp) end it "error intelligently if the gemspec has a LoadError" do - skip "whitespace issue?" if Gem.win_platform? - ref = update_git "bar", gemspec: false do |s| s.write "bar.gemspec", "require 'foobarbaz'" end.ref_for("HEAD") From 1424fe4e002b232579c70037569e13e6342f0b5a Mon Sep 17 00:00:00 2001 From: Augustin Gottlieb <33221555+aguspe@users.noreply.github.com> Date: Thu, 2 Jul 2026 01:07:29 +0200 Subject: [PATCH 03/31] [Tests] Add test cases for String#insert Cover behavior documented in rdoc but not asserted in test/ruby/test_string.rb: * insert returns self * boundary negative index -(length + 1) prepends * out-of-range index (positive and negative) raises IndexError * non-string argument raises TypeError * frozen string raises FrozenError * multibyte: index counts characters, not bytes --- test/ruby/test_string.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index d6d4ba40deeedd..59dce175f13341 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1482,6 +1482,16 @@ def test_insert assert_equal("abcdX", S("abcd").insert(4, 'X')) assert_equal("abXcd", S("abcd").insert(-3, 'X')) assert_equal("abcdX", S("abcd").insert(-1, 'X')) + assert_equal("Xabcd", S("abcd").insert(-5, 'X')) + assert_equal("こんbarにちは", S("こんにちは").insert(2, 'bar')) + + str = S("abcd") + assert_same(str, str.insert(2, 'X')) + + assert_raise(IndexError) { S("abcd").insert(5, 'X') } + assert_raise(IndexError) { S("abcd").insert(-6, 'X') } + assert_raise(TypeError) { S("abcd").insert(2, 42) } + assert_raise(FrozenError) { S("abcd").freeze.insert(2, 'X') } end def test_intern From 815692018c620898242eee03795f5a9a66e2f330 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 2 Jul 2026 09:09:43 +0900 Subject: [PATCH 04/31] Remove obsolete Windows skip guards from core tests These guards no longer match current mswin behavior. The affected tests pass reliably on an x64-mswin64_140 build, repeatedly and under -j16 for the flaky-history ones. Only the mswin conditions are removed. test_ractor and test_transcode keep their darwin and s390x guards. test_thread_queue keeps the MinGW omit and drops only the retired AppVeyor branch. test_box re-enables box_1 only, since box_2 still crashes at process exit under mswin and stays pended. Co-Authored-By: Claude Opus 4.8 --- test/objspace/test_objspace.rb | 1 - test/ruby/test_box.rb | 1 - test/ruby/test_ractor.rb | 2 +- test/ruby/test_stack.rb | 2 -- test/ruby/test_thread.rb | 2 -- test/ruby/test_thread_queue.rb | 3 --- test/ruby/test_transcode.rb | 2 +- 7 files changed, 2 insertions(+), 11 deletions(-) diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index 5a36377ffe94e5..ba051b670f8616 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -135,7 +135,6 @@ def test_reachable_objects_from end def test_reachable_objects_during_iteration - omit 'flaky on Visual Studio with: [BUG] Unnormalized Fixnum value' if /mswin/ =~ RUBY_PLATFORM opts = %w[--disable-gem --disable=frozen-string-literal -robjspace] assert_ruby_status opts, "#{<<-"begin;"}\n#{<<-'end;'}" begin; diff --git a/test/ruby/test_box.rb b/test/ruby/test_box.rb index a425c5eb7d6632..6d91ca1797140a 100644 --- a/test/ruby/test_box.rb +++ b/test/ruby/test_box.rb @@ -1075,7 +1075,6 @@ def test_very_basic_method_calls_and_constants end def test_loading_extension_libs_in_main_box_1 - pend if /mswin|mingw/ =~ RUBY_PLATFORM # timeout on windows environments assert_separately([ENV_ENABLE_BOX], __FILE__, __LINE__, "#{<<~"begin;"}\n#{<<~'end;'}", ignore_stderr: true) begin; require "prism" diff --git a/test/ruby/test_ractor.rb b/test/ruby/test_ractor.rb index e7eb0cd4b34fe7..8b1545d0a0b17f 100644 --- a/test/ruby/test_ractor.rb +++ b/test/ruby/test_ractor.rb @@ -276,7 +276,7 @@ def test_require_non_string # [Bug #21398] def test_port_receive_dnt_with_port_send - omit 'unstable on windows and macos-14' if RUBY_PLATFORM =~ /mswin|mingw|darwin/ + omit 'unstable on macos-14' if RUBY_PLATFORM =~ /darwin/ assert_ractor(<<~'RUBY', timeout: 90) THREADS = 10 JOBS_PER_THREAD = 50 diff --git a/test/ruby/test_stack.rb b/test/ruby/test_stack.rb index 8a788483229419..b6b7c030a374ec 100644 --- a/test/ruby/test_stack.rb +++ b/test/ruby/test_stack.rb @@ -65,8 +65,6 @@ def test_vm_stack_size # Depending on OS, machine stack size may not change size. def test_machine_stack_size - return if /mswin|mingw/ =~ RUBY_PLATFORM - script = '$stdout.sync=true; def rec; print "."; 1.times{1.times{1.times{rec}}}; end; Fiber.new{rec}.resume' vm_stack_size = 1024 * 1024 diff --git a/test/ruby/test_thread.rb b/test/ruby/test_thread.rb index c3d9dcf56dabae..475bdc7558cdfe 100644 --- a/test/ruby/test_thread.rb +++ b/test/ruby/test_thread.rb @@ -1480,8 +1480,6 @@ def test_thread_native_thread_id_across_fork_on_linux end def test_thread_interrupt_for_killed_thread - pend "hang-up" if /mswin|mingw/ =~ RUBY_PLATFORM - opts = { timeout: 5, timeout_error: nil } assert_normal_exit(<<-_end, '[Bug #8996]', **opts) diff --git a/test/ruby/test_thread_queue.rb b/test/ruby/test_thread_queue.rb index 4046185fd21ebf..4c2d921e1166dd 100644 --- a/test/ruby/test_thread_queue.rb +++ b/test/ruby/test_thread_queue.rb @@ -656,9 +656,6 @@ def test_queue_close_multi_multi end def test_queue_with_trap - if ENV['APPVEYOR'] == 'True' && RUBY_PLATFORM.match?(/mswin/) - omit 'This test fails too often on AppVeyor vs140' - end if RUBY_PLATFORM.match?(/mingw/) omit 'This test fails too often on MinGW' end diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 61c05216fcc5d5..d0710358ebefdc 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -2347,7 +2347,7 @@ def test_ractor_lazy_load_encoding end def test_ractor_lazy_load_encoding_random - omit 'unstable on s390x and windows' if RUBY_PLATFORM =~ /s390x|mswin/ + omit 'unstable on s390x' if RUBY_PLATFORM =~ /s390x/ assert_ractor("#{<<~"begin;"}\n#{<<~'end;'}", timeout: 30) begin; rs = [] From 1dc4cb798a909aa81c2e0f197f7ac87eae105931 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Thu, 2 Jul 2026 10:07:04 +0900 Subject: [PATCH 05/31] Give box_1 extension-loading test a longer timeout Loading the extension libraries in a box exceeds the default 10-second assert_separately timeout under -j16 CI load. Use 60 seconds, matching the sibling box test that already does. Co-Authored-By: Claude Opus 4.8 --- test/ruby/test_box.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ruby/test_box.rb b/test/ruby/test_box.rb index 6d91ca1797140a..3d19bcfc4b2ebb 100644 --- a/test/ruby/test_box.rb +++ b/test/ruby/test_box.rb @@ -1075,7 +1075,7 @@ def test_very_basic_method_calls_and_constants end def test_loading_extension_libs_in_main_box_1 - assert_separately([ENV_ENABLE_BOX], __FILE__, __LINE__, "#{<<~"begin;"}\n#{<<~'end;'}", ignore_stderr: true) + assert_separately([ENV_ENABLE_BOX], __FILE__, __LINE__, "#{<<~"begin;"}\n#{<<~'end;'}", ignore_stderr: true, timeout: 60) begin; require "prism" require "optparse" From ce089a3de1953ee8f2cf730f68dc72dd7cec3a3b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 06:48:28 +0900 Subject: [PATCH 06/31] [ruby/psych] Add experimental opt-in libfyaml backend Built only with --enable-libfyaml; without the flag the default libyaml backend is unchanged. The parser and emitter are reimplemented against libfyaml's event API in separate translation units guarded by PSYCH_USE_LIBFYAML, and the backend is not supported on Windows. https://github.com/ruby/psych/commit/51f2493a2e Co-Authored-By: Claude Opus 4.8 --- ext/psych/extconf.rb | 16 ++ ext/psych/psych.c | 28 +++ ext/psych/psych.h | 4 + ext/psych/psych_emitter.c | 4 + ext/psych/psych_emitter_fy.c | 429 +++++++++++++++++++++++++++++++ ext/psych/psych_parser.c | 4 + ext/psych/psych_parser_fy.c | 476 +++++++++++++++++++++++++++++++++++ 7 files changed, 961 insertions(+) create mode 100644 ext/psych/psych_emitter_fy.c create mode 100644 ext/psych/psych_parser_fy.c diff --git a/ext/psych/extconf.rb b/ext/psych/extconf.rb index 589e201c1cdb9d..e1ea7510e1c145 100644 --- a/ext/psych/extconf.rb +++ b/ext/psych/extconf.rb @@ -2,6 +2,22 @@ # frozen_string_literal: true require 'mkmf' +# Experimental, opt-in libfyaml backend. Only used when psych is built with +# --enable-libfyaml. Without the flag nothing below changes and the default +# libyaml backend is built exactly as before. +if enable_config("libfyaml", false) + if $mswin or $mingw or $cygwin + abort "The libfyaml backend (--enable-libfyaml) is not supported on Windows" + end + unless pkg_config('libfyaml') + abort "libfyaml was requested with --enable-libfyaml but was not found via pkg-config" + end + $defs << "-DPSYCH_USE_LIBFYAML" + + create_makefile 'psych' + return +end + if $mswin or $mingw or $cygwin $CPPFLAGS << " -DYAML_DECLARE_STATIC" end diff --git a/ext/psych/psych.c b/ext/psych/psych.c index afbd7a35714f9d..2a5db212ad2a10 100644 --- a/ext/psych/psych.c +++ b/ext/psych/psych.c @@ -9,7 +9,16 @@ static VALUE libyaml_version(VALUE module) int major, minor, patch; VALUE list[3]; +#ifdef PSYCH_USE_LIBFYAML + /* Experimental libfyaml backend: there is no libyaml linked in. Report + * the libfyaml version so callers still receive a 3-element version. */ + const struct fy_version *v = fy_version_default(); + major = v ? v->major : 0; + minor = v ? v->minor : 0; + patch = 0; +#else yaml_get_version(&major, &minor, &patch); +#endif list[0] = INT2NUM(major); list[1] = INT2NUM(minor); @@ -18,6 +27,18 @@ static VALUE libyaml_version(VALUE module) return rb_ary_new4((long)3, list); } +#ifdef PSYCH_USE_LIBFYAML +/* call-seq: Psych.libfyaml_version + * + * Returns the libfyaml version string, or nil when not built with libfyaml. + */ +static VALUE libfyaml_version(VALUE module) +{ + const char *v = fy_library_version(); + return v ? rb_usascii_str_new2(v) : Qnil; +} +#endif + VALUE mPsych; void Init_psych(void) @@ -29,6 +50,13 @@ void Init_psych(void) rb_define_singleton_method(mPsych, "libyaml_version", libyaml_version, 0); +#ifdef PSYCH_USE_LIBFYAML + rb_define_singleton_method(mPsych, "libfyaml_version", libfyaml_version, 0); + rb_define_const(mPsych, "BACKEND", rb_usascii_str_new2("libfyaml")); +#else + rb_define_const(mPsych, "BACKEND", rb_usascii_str_new2("libyaml")); +#endif + Init_psych_parser(); Init_psych_emitter(); Init_psych_to_ruby(); diff --git a/ext/psych/psych.h b/ext/psych/psych.h index 6b3d63f246c0fa..0e1465880b1d5a 100644 --- a/ext/psych/psych.h +++ b/ext/psych/psych.h @@ -4,7 +4,11 @@ #include #include +#ifdef PSYCH_USE_LIBFYAML +#include +#else #include +#endif #include #include diff --git a/ext/psych/psych_emitter.c b/ext/psych/psych_emitter.c index 624ab7c528f864..187aebc3233a1a 100644 --- a/ext/psych/psych_emitter.c +++ b/ext/psych/psych_emitter.c @@ -1,5 +1,7 @@ #include +#ifndef PSYCH_USE_LIBFYAML + #if !defined(RARRAY_CONST_PTR) #define RARRAY_CONST_PTR(s) (const VALUE *)RARRAY_PTR(s) #endif @@ -587,3 +589,5 @@ void Init_psych_emitter(void) id_indentation = rb_intern("indentation"); id_canonical = rb_intern("canonical"); } + +#endif /* PSYCH_USE_LIBFYAML */ diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c new file mode 100644 index 00000000000000..826792001561d6 --- /dev/null +++ b/ext/psych/psych_emitter_fy.c @@ -0,0 +1,429 @@ +#include + +#ifdef PSYCH_USE_LIBFYAML +/* + * Experimental libfyaml-backed emitter. Only compiled when psych is built + * with --enable-libfyaml. Mirrors ext/psych/psych_emitter.c. + */ + +#if !defined(RARRAY_CONST_PTR) +#define RARRAY_CONST_PTR(s) (const VALUE *)RARRAY_PTR(s) +#endif +#if !defined(RARRAY_AREF) +#define RARRAY_AREF(a, i) RARRAY_CONST_PTR(a)[i] +#endif + +VALUE cPsychEmitter; +static ID id_io; +static ID id_write; +static ID id_line_width; +static ID id_indentation; +static ID id_canonical; + +typedef struct { + struct fy_emitter *emit; + struct fy_emitter_cfg cfg; + int indent; + int width; + int canonical; +} psych_fy_emitter_t; + +static int emitter_output(struct fy_emitter *emit, enum fy_emitter_write_type type, + const char *str, int len, void *userdata) +{ + VALUE self = (VALUE)userdata; + VALUE io = rb_attr_get(self, id_io); + VALUE s = rb_enc_str_new(str, (long)len, rb_utf8_encoding()); + rb_funcall(io, id_write, 1, s); + return len; +} + +static void dealloc(void *ptr) +{ + psych_fy_emitter_t *e = (psych_fy_emitter_t *)ptr; + if (e->emit) { + fy_emitter_destroy(e->emit); + } + xfree(e); +} + +static const rb_data_type_t psych_emitter_type = { + "Psych/emitter", + {0, dealloc, 0,}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static VALUE allocate(VALUE klass) +{ + psych_fy_emitter_t *e; + VALUE obj = TypedData_Make_Struct(klass, psych_fy_emitter_t, &psych_emitter_type, e); + + e->emit = NULL; + e->indent = 2; + e->width = -1; + e->canonical = 0; + + return obj; +} + +static unsigned int build_flags(psych_fy_emitter_t *e) +{ + unsigned int flags = FYECF_MODE_ORIGINAL | + FYECF_DOC_START_MARK_AUTO | FYECF_DOC_END_MARK_AUTO; + int indent = (e->indent >= 1 && e->indent <= 9) ? e->indent : 2; + flags |= FYECF_INDENT(indent); + if (e->width <= 0) { + flags |= FYECF_WIDTH_INF; + } else { + flags |= FYECF_WIDTH(e->width > 255 ? 255 : e->width); + } + return flags; +} + +/* (Re)create the underlying fy_emitter from the current option state. Safe to + * call before any event has been emitted. */ +static void rebuild_emitter(VALUE self, psych_fy_emitter_t *e) +{ + if (e->emit) { + fy_emitter_destroy(e->emit); + e->emit = NULL; + } + e->cfg.flags = build_flags(e); + e->cfg.output = emitter_output; + e->cfg.userdata = (void *)self; + e->cfg.diag = NULL; + e->emit = fy_emitter_create(&e->cfg); + if (!e->emit) { + rb_raise(rb_eNoMemError, "could not create libfyaml emitter"); + } +} + +static void do_emit(psych_fy_emitter_t *e, struct fy_event *event) +{ + if (!event) { + rb_raise(rb_eRuntimeError, "libfyaml: could not create event"); + } + if (fy_emit_event(e->emit, event) != 0) { + rb_raise(rb_eRuntimeError, "libfyaml: emit failed"); + } +} + +/* call-seq: Psych::Emitter.new(io, options = Psych::Emitter::OPTIONS) */ +static VALUE initialize(int argc, VALUE *argv, VALUE self) +{ + psych_fy_emitter_t *e; + VALUE io, options; + + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + if (rb_scan_args(argc, argv, "11", &io, &options) == 2) { + e->width = NUM2INT(rb_funcall(options, id_line_width, 0)); + e->indent = NUM2INT(rb_funcall(options, id_indentation, 0)); + e->canonical = (Qtrue == rb_funcall(options, id_canonical, 0)) ? 1 : 0; + } + + rb_ivar_set(self, id_io, io); + rebuild_emitter(self, e); + + return self; +} + +static VALUE start_stream(VALUE self, VALUE encoding) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + Check_Type(encoding, T_FIXNUM); + + do_emit(e, fy_emit_event_create(e->emit, FYET_STREAM_START)); + return self; +} + +static VALUE end_stream(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + do_emit(e, fy_emit_event_create(e->emit, FYET_STREAM_END)); + return self; +} + +static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp) +{ + psych_fy_emitter_t *e; + struct fy_version ver; + const struct fy_version *verp = NULL; + struct fy_tag *tag_storage = NULL; + const struct fy_tag **tag_ptrs = NULL; + VALUE *exported = NULL; + long len = 0; + + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + Check_Type(version, T_ARRAY); + + if (RARRAY_LEN(version) >= 2) { + ver.major = NUM2INT(rb_ary_entry(version, 0)); + ver.minor = NUM2INT(rb_ary_entry(version, 1)); + verp = &ver; + } + + if (RTEST(tags)) { + rb_encoding *encoding = rb_utf8_encoding(); + Check_Type(tags, T_ARRAY); + len = RARRAY_LEN(tags); + if (len > 0) { + long i; + tag_storage = xcalloc((size_t)len, sizeof(struct fy_tag)); + tag_ptrs = xcalloc((size_t)len + 1, sizeof(struct fy_tag *)); + exported = xcalloc((size_t)len * 2, sizeof(VALUE)); + for (i = 0; i < len; i++) { + VALUE tuple = RARRAY_AREF(tags, i); + VALUE name, value; + Check_Type(tuple, T_ARRAY); + if (RARRAY_LEN(tuple) < 2) { + xfree(tag_storage); xfree(tag_ptrs); xfree(exported); + rb_raise(rb_eRuntimeError, "tag tuple must be of length 2"); + } + name = RARRAY_AREF(tuple, 0); + value = RARRAY_AREF(tuple, 1); + StringValue(name); + StringValue(value); + name = rb_str_export_to_enc(name, encoding); + value = rb_str_export_to_enc(value, encoding); + exported[i * 2] = name; + exported[i * 2 + 1] = value; + tag_storage[i].handle = StringValueCStr(name); + tag_storage[i].prefix = StringValueCStr(value); + tag_ptrs[i] = &tag_storage[i]; + } + tag_ptrs[len] = NULL; + } + } + + struct fy_event *event = fy_emit_event_create(e->emit, FYET_DOCUMENT_START, + imp ? 1 : 0, verp, tag_ptrs); + + if (exported) { (void)exported[0]; } + do_emit(e, event); + + if (tag_storage) xfree(tag_storage); + if (tag_ptrs) xfree(tag_ptrs); + if (exported) xfree(exported); + + return self; +} + +static VALUE end_document(VALUE self, VALUE imp) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + do_emit(e, fy_emit_event_create(e->emit, FYET_DOCUMENT_END, imp ? 1 : 0)); + return self; +} + +static enum fy_scalar_style psych_to_fyss(int style, int quoted) +{ + switch (style) { + case 1: return FYSS_PLAIN; + case 2: return FYSS_SINGLE_QUOTED; + case 3: return FYSS_DOUBLE_QUOTED; + case 4: return FYSS_LITERAL; + case 5: return FYSS_FOLDED; + default: + /* style ANY: honour psych's quoted hint so number-like strings are + * not silently re-typed on reload. */ + return quoted ? FYSS_DOUBLE_QUOTED : FYSS_ANY; + } +} + +static enum fy_node_style psych_to_fyns(int style) +{ + switch (style) { + case 1: return FYNS_BLOCK; + case 2: return FYNS_FLOW; + default: return FYNS_ANY; + } +} + +static VALUE scalar(VALUE self, VALUE value, VALUE anchor, VALUE tag, + VALUE plain, VALUE quoted, VALUE style) +{ + psych_fy_emitter_t *e; + rb_encoding *encoding = rb_utf8_encoding(); + + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + Check_Type(value, T_STRING); + + value = rb_str_export_to_enc(value, encoding); + if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); + if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + + enum fy_scalar_style fyss = psych_to_fyss(NUM2INT(style), RTEST(quoted)); + + struct fy_event *event = fy_emit_event_create(e->emit, FYET_SCALAR, + fyss, + RSTRING_PTR(value), (size_t)RSTRING_LEN(value), + NIL_P(anchor) ? NULL : StringValueCStr(anchor), + NIL_P(tag) ? NULL : StringValueCStr(tag)); + + do_emit(e, event); + RB_GC_GUARD(value); + return self; +} + +static VALUE start_sequence(VALUE self, VALUE anchor, VALUE tag, + VALUE implicit, VALUE style) +{ + psych_fy_emitter_t *e; + rb_encoding *encoding = rb_utf8_encoding(); + + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); + if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + + struct fy_event *event = fy_emit_event_create(e->emit, FYET_SEQUENCE_START, + psych_to_fyns(NUM2INT(style)), + NIL_P(anchor) ? NULL : StringValueCStr(anchor), + NIL_P(tag) ? NULL : StringValueCStr(tag)); + + do_emit(e, event); + return self; +} + +static VALUE end_sequence(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + do_emit(e, fy_emit_event_create(e->emit, FYET_SEQUENCE_END)); + return self; +} + +static VALUE start_mapping(VALUE self, VALUE anchor, VALUE tag, + VALUE implicit, VALUE style) +{ + psych_fy_emitter_t *e; + rb_encoding *encoding = rb_utf8_encoding(); + + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); + if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + + struct fy_event *event = fy_emit_event_create(e->emit, FYET_MAPPING_START, + psych_to_fyns(NUM2INT(style)), + NIL_P(anchor) ? NULL : StringValueCStr(anchor), + NIL_P(tag) ? NULL : StringValueCStr(tag)); + + do_emit(e, event); + return self; +} + +static VALUE end_mapping(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + do_emit(e, fy_emit_event_create(e->emit, FYET_MAPPING_END)); + return self; +} + +static VALUE alias(VALUE self, VALUE anchor) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + + if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, rb_utf8_encoding()); + + do_emit(e, fy_emit_event_create(e->emit, FYET_ALIAS, + NIL_P(anchor) ? NULL : StringValueCStr(anchor))); + return self; +} + +static VALUE set_canonical(VALUE self, VALUE style) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + e->canonical = (Qtrue == style) ? 1 : 0; + rebuild_emitter(self, e); + return style; +} + +static VALUE canonical(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + return e->canonical ? Qtrue : Qfalse; +} + +static VALUE set_indentation(VALUE self, VALUE level) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + e->indent = NUM2INT(level); + rebuild_emitter(self, e); + return level; +} + +static VALUE indentation(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + return INT2NUM(e->indent); +} + +static VALUE line_width(VALUE self) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + return INT2NUM(e->width); +} + +static VALUE set_line_width(VALUE self, VALUE width) +{ + psych_fy_emitter_t *e; + TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + e->width = NUM2INT(width); + rebuild_emitter(self, e); + return width; +} + +void Init_psych_emitter(void) +{ +#undef rb_intern + VALUE psych = rb_define_module("Psych"); + VALUE handler = rb_define_class_under(psych, "Handler", rb_cObject); + cPsychEmitter = rb_define_class_under(psych, "Emitter", handler); + + rb_define_alloc_func(cPsychEmitter, allocate); + + rb_define_method(cPsychEmitter, "initialize", initialize, -1); + rb_define_method(cPsychEmitter, "start_stream", start_stream, 1); + rb_define_method(cPsychEmitter, "end_stream", end_stream, 0); + rb_define_method(cPsychEmitter, "start_document", start_document, 3); + rb_define_method(cPsychEmitter, "end_document", end_document, 1); + rb_define_method(cPsychEmitter, "scalar", scalar, 6); + rb_define_method(cPsychEmitter, "start_sequence", start_sequence, 4); + rb_define_method(cPsychEmitter, "end_sequence", end_sequence, 0); + rb_define_method(cPsychEmitter, "start_mapping", start_mapping, 4); + rb_define_method(cPsychEmitter, "end_mapping", end_mapping, 0); + rb_define_method(cPsychEmitter, "alias", alias, 1); + rb_define_method(cPsychEmitter, "canonical", canonical, 0); + rb_define_method(cPsychEmitter, "canonical=", set_canonical, 1); + rb_define_method(cPsychEmitter, "indentation", indentation, 0); + rb_define_method(cPsychEmitter, "indentation=", set_indentation, 1); + rb_define_method(cPsychEmitter, "line_width", line_width, 0); + rb_define_method(cPsychEmitter, "line_width=", set_line_width, 1); + + id_io = rb_intern("io"); + id_write = rb_intern("write"); + id_line_width = rb_intern("line_width"); + id_indentation = rb_intern("indentation"); + id_canonical = rb_intern("canonical"); +} + +#endif /* PSYCH_USE_LIBFYAML */ diff --git a/ext/psych/psych_parser.c b/ext/psych/psych_parser.c index 05a8fa9eb0b6a5..2729273751fe8c 100644 --- a/ext/psych/psych_parser.c +++ b/ext/psych/psych_parser.c @@ -1,5 +1,7 @@ #include +#ifndef PSYCH_USE_LIBFYAML + VALUE cPsychParser; static ID id_read; @@ -571,3 +573,5 @@ void Init_psych_parser(void) id_end_mapping = rb_intern("end_mapping"); id_event_location = rb_intern("event_location"); } + +#endif /* PSYCH_USE_LIBFYAML */ diff --git a/ext/psych/psych_parser_fy.c b/ext/psych/psych_parser_fy.c new file mode 100644 index 00000000000000..fe03b818052e33 --- /dev/null +++ b/ext/psych/psych_parser_fy.c @@ -0,0 +1,476 @@ +#include + +#ifdef PSYCH_USE_LIBFYAML +/* + * Experimental libfyaml-backed parser. Only compiled when psych is built + * with --enable-libfyaml. Mirrors the event protocol of the libyaml backend + * in ext/psych/psych_parser.c so the Ruby layer is unchanged. + */ + +VALUE cPsychParser; + +static ID id_read; +static ID id_empty; +static ID id_start_stream; +static ID id_end_stream; +static ID id_start_document; +static ID id_end_document; +static ID id_alias; +static ID id_scalar; +static ID id_start_sequence; +static ID id_end_sequence; +static ID id_start_mapping; +static ID id_end_mapping; +static ID id_event_location; + +#define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \ + do { \ + rb_enc_associate_index((_str), (_yaml_enc)); \ + if(_internal_enc) \ + (_str) = rb_str_export_to_enc((_str), (_internal_enc)); \ + } while (0) + +/* libyaml-compatible encoding constants exposed to the Ruby layer. */ +#define PSYCH_ANY_ENCODING 0 +#define PSYCH_UTF8_ENCODING 1 +#define PSYCH_UTF16LE_ENCODING 2 +#define PSYCH_UTF16BE_ENCODING 3 + +typedef struct { + struct fy_parser *fyp; + size_t mark_line; + size_t mark_column; + size_t mark_index; +} psych_fy_parser_t; + +static ssize_t io_reader(void *user, void *buf, size_t count) +{ + VALUE io = (VALUE)user; + VALUE string = rb_funcall(io, id_read, 1, SIZET2NUM(count)); + + if (NIL_P(string)) { + return 0; /* EOF */ + } + + StringValue(string); + size_t len = (size_t)RSTRING_LEN(string); + if (len > count) { + len = count; + } + memcpy(buf, RSTRING_PTR(string), len); + return (ssize_t)len; +} + +static void dealloc(void *ptr) +{ + psych_fy_parser_t *parser = (psych_fy_parser_t *)ptr; + if (parser->fyp) { + fy_parser_destroy(parser->fyp); + } + xfree(parser); +} + +static const rb_data_type_t psych_parser_type = { + "Psych/parser", + {0, dealloc, 0,}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static VALUE allocate(VALUE klass) +{ + psych_fy_parser_t *parser; + VALUE obj = TypedData_Make_Struct(klass, psych_fy_parser_t, &psych_parser_type, parser); + + static const struct fy_parse_cfg cfg = { + .flags = FYPCF_QUIET | FYPCF_COLLECT_DIAG | FYPCF_DEFAULT_VERSION_AUTO, + }; + parser->fyp = fy_parser_create(&cfg); + if (!parser->fyp) { + rb_raise(rb_eNoMemError, "could not create libfyaml parser"); + } + + return obj; +} + +/* TODO: libfyaml's diagnostics are collected via fy_diag; reconstructing the + * libyaml-style problem/context/offset is left for a later pass. For now we + * raise a Psych::SyntaxError with the best-effort mark we tracked. */ +static VALUE make_exception(psych_fy_parser_t *parser, VALUE path) +{ + VALUE ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); + + return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, + path, + SIZET2NUM(parser->mark_line + 1), + SIZET2NUM(parser->mark_column + 1), + SIZET2NUM(parser->mark_index), + rb_usascii_str_new2("could not parse YAML"), + Qnil); +} + +static VALUE transcode_string(VALUE src) +{ + int utf8 = rb_utf8_encindex(); + int source_encoding = rb_enc_get_index(src); + + if (source_encoding == utf8 || source_encoding == rb_usascii_encindex()) { + return src; + } + + src = rb_str_export_to_enc(src, rb_utf8_encoding()); + return src; +} + +/* ---- protected handler trampolines (identical protocol to libyaml backend) */ + +static VALUE protected_start_stream(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_start_stream, 1, args[1]); +} + +static VALUE protected_start_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_document, 3, args + 1); +} + +static VALUE protected_end_document(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_end_document, 1, args[1]); +} + +static VALUE protected_alias(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall(args[0], id_alias, 1, args[1]); +} + +static VALUE protected_scalar(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_scalar, 6, args + 1); +} + +static VALUE protected_start_sequence(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_sequence, 4, args + 1); +} + +static VALUE protected_end_sequence(VALUE handler) +{ + return rb_funcall(handler, id_end_sequence, 0); +} + +static VALUE protected_start_mapping(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_start_mapping, 4, args + 1); +} + +static VALUE protected_end_mapping(VALUE handler) +{ + return rb_funcall(handler, id_end_mapping, 0); +} + +static VALUE protected_empty(VALUE handler) +{ + return rb_funcall(handler, id_empty, 0); +} + +static VALUE protected_end_stream(VALUE handler) +{ + return rb_funcall(handler, id_end_stream, 0); +} + +static VALUE protected_event_location(VALUE pointer) +{ + VALUE *args = (VALUE *)pointer; + return rb_funcall3(args[0], id_event_location, 4, args + 1); +} + +/* ---- enum translation: libfyaml -> psych/libyaml integer constants -------- */ + +static int fyss_to_psych(enum fy_scalar_style s) +{ + switch (s) { + case FYSS_PLAIN: return 1; + case FYSS_SINGLE_QUOTED: return 2; + case FYSS_DOUBLE_QUOTED: return 3; + case FYSS_LITERAL: return 4; + case FYSS_FOLDED: return 5; + default: return 0; /* FYSS_ANY */ + } +} + +static int fyns_to_psych(enum fy_node_style s) +{ + switch (s) { + case FYNS_FLOW: return 2; + case FYNS_BLOCK: return 1; + default: return 0; /* FYNS_ANY */ + } +} + +static VALUE token_to_str(struct fy_token *tok, int encoding, rb_encoding *internal_enc) +{ + size_t len = 0; + const char *text; + + if (!tok) { + return Qnil; + } + text = fy_token_get_text(tok, &len); + if (!text) { + return Qnil; + } + VALUE str = rb_str_new(text, (long)len); + PSYCH_TRANSCODE(str, encoding, internal_enc); + return str; +} + +static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path) +{ + psych_fy_parser_t *parser; + struct fy_event *event; + int done = 0; + int state = 0; + int encoding = rb_utf8_encindex(); + rb_encoding *internal_enc = rb_default_internal_encoding(); + + TypedData_Get_Struct(self, psych_fy_parser_t, &psych_parser_type, parser); + + fy_parser_reset(parser->fyp); + parser->mark_line = parser->mark_column = parser->mark_index = 0; + + if (rb_respond_to(yaml, id_read)) { + if (fy_parser_set_input_callback(parser->fyp, (void *)yaml, io_reader) != 0) { + rb_raise(rb_eRuntimeError, "could not set libfyaml input"); + } + } else { + StringValue(yaml); + yaml = transcode_string(yaml); + if (fy_parser_set_string(parser->fyp, + RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml)) != 0) { + rb_raise(rb_eRuntimeError, "could not set libfyaml input"); + } + } + + while (!done) { + VALUE event_args[5]; + const struct fy_mark *sm, *em; + + event = fy_parser_parse(parser->fyp); + + if (!event) { + VALUE exception = make_exception(parser, path); + rb_exc_raise(exception); + } + + sm = fy_event_start_mark(event); + em = fy_event_end_mark(event); + if (sm) { + parser->mark_line = (size_t)sm->line; + parser->mark_column = (size_t)sm->column; + parser->mark_index = sm->input_pos; + } + + event_args[0] = handler; + event_args[1] = SIZET2NUM(sm ? (size_t)sm->line : 0); + event_args[2] = SIZET2NUM(sm ? (size_t)sm->column : 0); + event_args[3] = SIZET2NUM(em ? (size_t)em->line : 0); + event_args[4] = SIZET2NUM(em ? (size_t)em->column : 0); + rb_protect(protected_event_location, (VALUE)event_args, &state); + + switch (event->type) { + case FYET_STREAM_START: + { + VALUE args[2]; + args[0] = handler; + args[1] = INT2NUM(PSYCH_UTF8_ENCODING); + rb_protect(protected_start_stream, (VALUE)args, &state); + } + break; + case FYET_DOCUMENT_START: + { + VALUE args[4]; + VALUE version = rb_ary_new(); + VALUE tag_directives = rb_ary_new(); + struct fy_document_state *ds = event->document_start.document_state; + + if (ds && fy_document_state_version_explicit(ds)) { + const struct fy_version *v = fy_document_state_version(ds); + if (v) { + version = rb_ary_new3((long)2, + INT2NUM(v->major), INT2NUM(v->minor)); + } + } + + if (ds && fy_document_state_tags_explicit(ds)) { + void *iter = NULL; + const struct fy_tag *tag; + while ((tag = fy_document_state_tag_directive_iterate(ds, &iter)) != NULL) { + /* skip the implicit defaults ("!" and "!!") */ + if (tag->handle && tag->prefix) { + if ((strcmp(tag->handle, "!") == 0 && strcmp(tag->prefix, "!") == 0) || + (strcmp(tag->handle, "!!") == 0 && + strcmp(tag->prefix, "tag:yaml.org,2002:") == 0)) { + continue; + } + } + VALUE handle = tag->handle ? rb_str_new2(tag->handle) : Qnil; + VALUE prefix = tag->prefix ? rb_str_new2(tag->prefix) : Qnil; + if (!NIL_P(handle)) PSYCH_TRANSCODE(handle, encoding, internal_enc); + if (!NIL_P(prefix)) PSYCH_TRANSCODE(prefix, encoding, internal_enc); + rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix)); + } + } + + args[0] = handler; + args[1] = version; + args[2] = tag_directives; + args[3] = event->document_start.implicit ? Qtrue : Qfalse; + rb_protect(protected_start_document, (VALUE)args, &state); + } + break; + case FYET_DOCUMENT_END: + { + VALUE args[2]; + args[0] = handler; + args[1] = event->document_end.implicit ? Qtrue : Qfalse; + rb_protect(protected_end_document, (VALUE)args, &state); + } + break; + case FYET_ALIAS: + { + VALUE args[2]; + args[0] = handler; + args[1] = token_to_str(event->alias.anchor, encoding, internal_enc); + rb_protect(protected_alias, (VALUE)args, &state); + } + break; + case FYET_SCALAR: + { + VALUE args[7]; + enum fy_scalar_style fyss = fy_token_scalar_style(event->scalar.value); + int has_tag = (event->scalar.tag != NULL); + int plain_style = (fyss == FYSS_PLAIN); + + args[0] = handler; + args[1] = token_to_str(event->scalar.value, encoding, internal_enc); + if (NIL_P(args[1])) args[1] = rb_str_new2(""); + args[2] = token_to_str(event->scalar.anchor, encoding, internal_enc); + args[3] = token_to_str(event->scalar.tag, encoding, internal_enc); + /* libfyaml does not expose libyaml's plain_implicit / + * quoted_implicit pair, so reconstruct them from the explicit + * tag presence and the scalar style, matching libyaml: + * plain, untagged -> (plain=1, quoted=0) + * quoted, untagged -> (plain=0, quoted=1) + * tagged -> (plain=0, quoted=0) */ + args[4] = (!has_tag && plain_style) ? Qtrue : Qfalse; + args[5] = (!has_tag && !plain_style) ? Qtrue : Qfalse; + args[6] = INT2NUM(fyss_to_psych(fyss)); + rb_protect(protected_scalar, (VALUE)args, &state); + } + break; + case FYET_SEQUENCE_START: + { + VALUE args[5]; + args[0] = handler; + args[1] = token_to_str(event->sequence_start.anchor, encoding, internal_enc); + args[2] = token_to_str(event->sequence_start.tag, encoding, internal_enc); + args[3] = event->sequence_start.tag ? Qfalse : Qtrue; + args[4] = INT2NUM(fyns_to_psych(fy_event_get_node_style(event))); + rb_protect(protected_start_sequence, (VALUE)args, &state); + } + break; + case FYET_SEQUENCE_END: + rb_protect(protected_end_sequence, handler, &state); + break; + case FYET_MAPPING_START: + { + VALUE args[5]; + args[0] = handler; + args[1] = token_to_str(event->mapping_start.anchor, encoding, internal_enc); + args[2] = token_to_str(event->mapping_start.tag, encoding, internal_enc); + args[3] = event->mapping_start.tag ? Qfalse : Qtrue; + args[4] = INT2NUM(fyns_to_psych(fy_event_get_node_style(event))); + rb_protect(protected_start_mapping, (VALUE)args, &state); + } + break; + case FYET_MAPPING_END: + rb_protect(protected_end_mapping, handler, &state); + break; + case FYET_NONE: + rb_protect(protected_empty, handler, &state); + break; + case FYET_STREAM_END: + rb_protect(protected_end_stream, handler, &state); + done = 1; + break; + } + + fy_parser_event_free(parser->fyp, event); + if (state) rb_jump_tag(state); + } + + RB_GC_GUARD(yaml); + return self; +} + +/* + * call-seq: + * parser.mark # => # + */ +static VALUE mark(VALUE self) +{ + VALUE mark_klass; + VALUE args[3]; + psych_fy_parser_t *parser; + + TypedData_Get_Struct(self, psych_fy_parser_t, &psych_parser_type, parser); + mark_klass = rb_const_get_at(cPsychParser, rb_intern("Mark")); + args[0] = SIZET2NUM(parser->mark_index); + args[1] = SIZET2NUM(parser->mark_line); + args[2] = SIZET2NUM(parser->mark_column); + + return rb_class_new_instance(3, args, mark_klass); +} + +void Init_psych_parser(void) +{ +#undef rb_intern + cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject); + rb_define_alloc_func(cPsychParser, allocate); + + rb_define_const(cPsychParser, "ANY", INT2NUM(PSYCH_ANY_ENCODING)); + rb_define_const(cPsychParser, "UTF8", INT2NUM(PSYCH_UTF8_ENCODING)); + rb_define_const(cPsychParser, "UTF16LE", INT2NUM(PSYCH_UTF16LE_ENCODING)); + rb_define_const(cPsychParser, "UTF16BE", INT2NUM(PSYCH_UTF16BE_ENCODING)); + + rb_require("psych/syntax_error"); + + rb_define_private_method(cPsychParser, "_native_parse", parse, 3); + rb_define_method(cPsychParser, "mark", mark, 0); + + id_read = rb_intern("read"); + id_empty = rb_intern("empty"); + id_start_stream = rb_intern("start_stream"); + id_end_stream = rb_intern("end_stream"); + id_start_document = rb_intern("start_document"); + id_end_document = rb_intern("end_document"); + id_alias = rb_intern("alias"); + id_scalar = rb_intern("scalar"); + id_start_sequence = rb_intern("start_sequence"); + id_end_sequence = rb_intern("end_sequence"); + id_start_mapping = rb_intern("start_mapping"); + id_end_mapping = rb_intern("end_mapping"); + id_event_location = rb_intern("event_location"); +} + +#endif /* PSYCH_USE_LIBFYAML */ From 4e225df5edb13eeefef2ca4c94e4d26b3a3b22bc Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 07:00:30 +0900 Subject: [PATCH 07/31] [ruby/psych] Resolve booleans per YAML 1.2 on the libfyaml backend Scalar type resolution happens in ScalarScanner, not the C backend, so swapping to libfyaml alone still resolved yes/no/on/off to booleans. Key the boolean set on Psych::BACKEND so the libyaml default keeps the YAML 1.1 set while the experimental libfyaml backend follows 1.2. https://github.com/ruby/psych/commit/b374869c59 Co-Authored-By: Claude Opus 4.8 --- ext/psych/lib/psych/scalar_scanner.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ext/psych/lib/psych/scalar_scanner.rb b/ext/psych/lib/psych/scalar_scanner.rb index d744e61183df2e..68e17ecf5f2584 100644 --- a/ext/psych/lib/psych/scalar_scanner.rb +++ b/ext/psych/lib/psych/scalar_scanner.rb @@ -24,6 +24,18 @@ class ScalarScanner |[-+]?(?:0|[1-9](?:[0-9]|,[0-9]|_[0-9])*) (?# base 10) |[-+]?0x[_,]*[0-9a-fA-F][0-9a-fA-F_,]* (?# base 16))$/x + # YAML 1.1 treats yes/no/on/off as booleans in addition to true/false, + # while YAML 1.2's core schema only recognizes true/false. The default + # libyaml backend keeps the 1.1 set for backward compatibility; the + # experimental libfyaml backend follows 1.2. + if defined?(Psych::BACKEND) && Psych::BACKEND == 'libfyaml' + BOOLEAN_TRUE = /^true$/i + BOOLEAN_FALSE = /^false$/i + else + BOOLEAN_TRUE = /^(yes|true|on)$/i + BOOLEAN_FALSE = /^(no|false|off)$/i + end + attr_reader :class_loader # Create a new scanner @@ -48,9 +60,9 @@ def tokenize string string elsif string == '~' || string.match?(/^null$/i) nil - elsif string.match?(/^(yes|true|on)$/i) + elsif string.match?(BOOLEAN_TRUE) true - elsif string.match?(/^(no|false|off)$/i) + elsif string.match?(BOOLEAN_FALSE) false else string From 0a66bb87351bf64cef31e32c06302ef36dd7be3c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 07:22:24 +0900 Subject: [PATCH 08/31] [ruby/psych] Improve libfyaml parser fidelity and error reporting Create a fresh parser per parse instead of reusing one via fy_parser_reset(), which left default tag handles unset and rejected bare ("---"-less) tag-led documents. Recover the real message and position by switching the parser's own diagnostic object to collect mode; creating a replacement diag crashes libfyaml 0.9.6. Drop the spurious empty tag directive libfyaml reports. https://github.com/ruby/psych/commit/d383361c23 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych_parser_fy.c | 76 +++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/ext/psych/psych_parser_fy.c b/ext/psych/psych_parser_fy.c index fe03b818052e33..8c297c15a416ad 100644 --- a/ext/psych/psych_parser_fy.c +++ b/ext/psych/psych_parser_fy.c @@ -43,6 +43,14 @@ typedef struct { size_t mark_index; } psych_fy_parser_t; +static const struct fy_parse_cfg psych_parse_cfg = { + /* Keep libfyaml's strict YAML 1.2 flow-indentation checks. This backend + * exists to follow the 1.2 spec, so we reject malformed flow indentation + * (e.g. wrongly indented flow sequences) rather than relaxing to libyaml's + * 1.1-era leniency with FYPCF_SLOPPY_FLOW_INDENTATION. */ + .flags = FYPCF_QUIET | FYPCF_DEFAULT_VERSION_AUTO, +}; + static ssize_t io_reader(void *user, void *buf, size_t count) { VALUE io = (VALUE)user; @@ -84,30 +92,40 @@ static VALUE allocate(VALUE klass) psych_fy_parser_t *parser; VALUE obj = TypedData_Make_Struct(klass, psych_fy_parser_t, &psych_parser_type, parser); - static const struct fy_parse_cfg cfg = { - .flags = FYPCF_QUIET | FYPCF_COLLECT_DIAG | FYPCF_DEFAULT_VERSION_AUTO, - }; - parser->fyp = fy_parser_create(&cfg); - if (!parser->fyp) { - rb_raise(rb_eNoMemError, "could not create libfyaml parser"); - } + parser->fyp = NULL; return obj; } -/* TODO: libfyaml's diagnostics are collected via fy_diag; reconstructing the - * libyaml-style problem/context/offset is left for a later pass. For now we - * raise a Psych::SyntaxError with the best-effort mark we tracked. */ +/* Reconstruct a Psych::SyntaxError from libfyaml's collected diagnostics. The + * parser is created with FYPCF_COLLECT_DIAG, so the first collected error gives + * us the message and position. */ static VALUE make_exception(psych_fy_parser_t *parser, VALUE path) { VALUE ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); + VALUE problem = Qnil; + size_t line = parser->mark_line; + size_t column = parser->mark_column; + + struct fy_diag *diag = fy_parser_get_diag(parser->fyp); + if (diag) { + void *iter = NULL; + struct fy_diag_error *err = fy_diag_errors_iterate(diag, &iter); + if (err) { + if (err->msg) problem = rb_usascii_str_new2(err->msg); + if (err->line >= 0) line = (size_t)err->line; + if (err->column >= 0) column = (size_t)err->column; + } + fy_diag_unref(diag); + } + if (NIL_P(problem)) problem = rb_usascii_str_new2("could not parse YAML"); return rb_funcall(ePsychSyntaxError, rb_intern("new"), 6, path, - SIZET2NUM(parser->mark_line + 1), - SIZET2NUM(parser->mark_column + 1), + SIZET2NUM(line), + SIZET2NUM(column), SIZET2NUM(parser->mark_index), - rb_usascii_str_new2("could not parse YAML"), + problem, Qnil); } @@ -245,9 +263,31 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path) TypedData_Get_Struct(self, psych_fy_parser_t, &psych_parser_type, parser); - fy_parser_reset(parser->fyp); + /* Use a pristine parser for each parse, like fy-tool does. Reusing a + * parser across documents via fy_parser_reset() left the default tag + * handles unset for bare (no "---") tag-led documents. */ + if (parser->fyp) { + fy_parser_destroy(parser->fyp); + parser->fyp = NULL; + } + parser->fyp = fy_parser_create(&psych_parse_cfg); + if (!parser->fyp) { + rb_raise(rb_eNoMemError, "could not create libfyaml parser"); + } parser->mark_line = parser->mark_column = parser->mark_index = 0; + /* Make the parser's own diagnostic object collect errors instead of + * printing them to stderr, so make_exception() can recover the message. + * Replacing the diag with a freshly created one crashes libfyaml 0.9.6, + * so mutate the existing default diag in place. */ + { + struct fy_diag *diag = fy_parser_get_diag(parser->fyp); + if (diag) { + fy_diag_set_collect_errors(diag, true); + fy_diag_unref(diag); + } + } + if (rb_respond_to(yaml, id_read)) { if (fy_parser_set_input_callback(parser->fyp, (void *)yaml, io_reader) != 0) { rb_raise(rb_eRuntimeError, "could not set libfyaml input"); @@ -315,8 +355,12 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path) void *iter = NULL; const struct fy_tag *tag; while ((tag = fy_document_state_tag_directive_iterate(ds, &iter)) != NULL) { - /* skip the implicit defaults ("!" and "!!") */ - if (tag->handle && tag->prefix) { + /* skip the implicit defaults ("!", "!!" and the empty + * primary handle libfyaml reports) */ + if (!tag->handle || tag->handle[0] == '\0') { + continue; + } + if (tag->prefix) { if ((strcmp(tag->handle, "!") == 0 && strcmp(tag->prefix, "!") == 0) || (strcmp(tag->handle, "!!") == 0 && strcmp(tag->prefix, "tag:yaml.org,2002:") == 0)) { From 7a912af002a9ff43daa51285d8aea37303d0b33d Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 07:28:14 +0900 Subject: [PATCH 09/31] [ruby/psych] Match libyaml scalar emission on the libfyaml backend Drop the tag when plain_implicit or quoted_implicit is set, matching how libyaml omits a tag that the value resolves to on reload; otherwise nil emitted as "!" instead of an empty scalar. Honor the plain hint when choosing the scalar style, and restore the Check_Type guards on anchor and tag so non-string arguments raise TypeError. https://github.com/ruby/psych/commit/ff15ddf399 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych_emitter_fy.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c index 826792001561d6..ff0d7176bde542 100644 --- a/ext/psych/psych_emitter_fy.c +++ b/ext/psych/psych_emitter_fy.c @@ -224,7 +224,7 @@ static VALUE end_document(VALUE self, VALUE imp) return self; } -static enum fy_scalar_style psych_to_fyss(int style, int quoted) +static enum fy_scalar_style psych_to_fyss(int style, int plain, int quoted) { switch (style) { case 1: return FYSS_PLAIN; @@ -233,9 +233,13 @@ static enum fy_scalar_style psych_to_fyss(int style, int quoted) case 4: return FYSS_LITERAL; case 5: return FYSS_FOLDED; default: - /* style ANY: honour psych's quoted hint so number-like strings are - * not silently re-typed on reload. */ - return quoted ? FYSS_DOUBLE_QUOTED : FYSS_ANY; + /* style ANY: honour psych's plain/quoted hints. Forcing a plain + * scalar plain keeps libfyaml from tagging empty scalars (nil) as + * explicit nulls; the quoted hint keeps number-like strings from + * being re-typed on reload. */ + if (quoted) return FYSS_DOUBLE_QUOTED; + if (plain) return FYSS_PLAIN; + return FYSS_ANY; } } @@ -258,16 +262,22 @@ static VALUE scalar(VALUE self, VALUE value, VALUE anchor, VALUE tag, Check_Type(value, T_STRING); value = rb_str_export_to_enc(value, encoding); - if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); - if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, encoding); } + if (!NIL_P(tag)) { Check_Type(tag, T_STRING); tag = rb_str_export_to_enc(tag, encoding); } - enum fy_scalar_style fyss = psych_to_fyss(NUM2INT(style), RTEST(quoted)); + enum fy_scalar_style fyss = psych_to_fyss(NUM2INT(style), RTEST(plain), RTEST(quoted)); + + /* libyaml omits the tag when plain_implicit (or quoted_implicit) is set, + * since the value resolves to that tag on reload. fy_emit_event_create() + * has no implicit flag and would always print the tag (e.g. nil as + * "!"), so drop it here to match. */ + int emit_tag = !NIL_P(tag) && !RTEST(plain) && !RTEST(quoted); struct fy_event *event = fy_emit_event_create(e->emit, FYET_SCALAR, fyss, RSTRING_PTR(value), (size_t)RSTRING_LEN(value), NIL_P(anchor) ? NULL : StringValueCStr(anchor), - NIL_P(tag) ? NULL : StringValueCStr(tag)); + emit_tag ? StringValueCStr(tag) : NULL); do_emit(e, event); RB_GC_GUARD(value); @@ -282,8 +292,8 @@ static VALUE start_sequence(VALUE self, VALUE anchor, VALUE tag, TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); - if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); - if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, encoding); } + if (!NIL_P(tag)) { Check_Type(tag, T_STRING); tag = rb_str_export_to_enc(tag, encoding); } struct fy_event *event = fy_emit_event_create(e->emit, FYET_SEQUENCE_START, psych_to_fyns(NUM2INT(style)), @@ -311,8 +321,8 @@ static VALUE start_mapping(VALUE self, VALUE anchor, VALUE tag, TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); - if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, encoding); - if (!NIL_P(tag)) tag = rb_str_export_to_enc(tag, encoding); + if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, encoding); } + if (!NIL_P(tag)) { Check_Type(tag, T_STRING); tag = rb_str_export_to_enc(tag, encoding); } struct fy_event *event = fy_emit_event_create(e->emit, FYET_MAPPING_START, psych_to_fyns(NUM2INT(style)), @@ -337,7 +347,7 @@ static VALUE alias(VALUE self, VALUE anchor) psych_fy_emitter_t *e; TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); - if (!NIL_P(anchor)) anchor = rb_str_export_to_enc(anchor, rb_utf8_encoding()); + if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, rb_utf8_encoding()); } do_emit(e, fy_emit_event_create(e->emit, FYET_ALIAS, NIL_P(anchor) ? NULL : StringValueCStr(anchor))); From e88db7ea809c2c09f8153187fa839be43667d40d Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 08:05:12 +0900 Subject: [PATCH 10/31] [ruby/psych] Transcode UTF-16 IO input to UTF-8 on the libfyaml backend libfyaml only consumes UTF-8, so a UTF-16 IO fed through the chunked reader reached it as raw bytes and was rejected as invalid UTF-8. When the IO's external encoding is UTF-16LE/BE, slurp the whole stream and transcode it first; a 2-byte unit could otherwise straddle a read boundary. Other non-UTF-8 encodings stay raw and libfyaml rejects them, matching psych's UTF-8/UTF-16-only IO contract (Shift_JIS still raises). https://github.com/ruby/psych/commit/4bd1343551 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych_parser_fy.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ext/psych/psych_parser_fy.c b/ext/psych/psych_parser_fy.c index 8c297c15a416ad..6b9d04e3864afb 100644 --- a/ext/psych/psych_parser_fy.c +++ b/ext/psych/psych_parser_fy.c @@ -289,7 +289,25 @@ static VALUE parse(VALUE self, VALUE handler, VALUE yaml, VALUE path) } if (rb_respond_to(yaml, id_read)) { - if (fy_parser_set_input_callback(parser->fyp, (void *)yaml, io_reader) != 0) { + VALUE ext_enc = rb_funcall(yaml, rb_intern("external_encoding"), 0); + int ext_idx = NIL_P(ext_enc) ? -1 : rb_to_encoding_index(ext_enc); + + if (ext_idx == rb_enc_find_index("UTF-16LE") || + ext_idx == rb_enc_find_index("UTF-16BE")) { + /* libfyaml only consumes UTF-8. A UTF-16 stream cannot be fed + * through the chunked reader because a 2-byte unit may straddle a + * read boundary, so slurp the whole stream and transcode it. Any + * other non-UTF-8 external encoding is left raw and libfyaml will + * reject it, matching psych's "UTF-8/UTF-16 only" IO contract. */ + VALUE content = rb_funcall(yaml, id_read, 0); + if (NIL_P(content)) content = rb_str_new("", 0); + StringValue(content); + yaml = transcode_string(content); + if (fy_parser_set_string(parser->fyp, + RSTRING_PTR(yaml), (size_t)RSTRING_LEN(yaml)) != 0) { + rb_raise(rb_eRuntimeError, "could not set libfyaml input"); + } + } else if (fy_parser_set_input_callback(parser->fyp, (void *)yaml, io_reader) != 0) { rb_raise(rb_eRuntimeError, "could not set libfyaml input"); } } else { From 49681874e024303eedcd3cc9c2553d5d22c3dbe8 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 09:02:34 +0900 Subject: [PATCH 11/31] [ruby/psych] Free tag-directive buffers on the error path in start_document The xcalloc'd tag buffers leaked when StringValue, the tuple-length check, or the emit raised mid-way. Wrap the work in rb_ensure so the buffers are always freed, and keep the exported directive strings in a Ruby array so the GC cannot reclaim them while their C pointers are in use. https://github.com/ruby/psych/commit/6fdede0177 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych_emitter_fy.c | 80 +++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c index ff0d7176bde542..7dd60f60a70e1e 100644 --- a/ext/psych/psych_emitter_fy.c +++ b/ext/psych/psych_emitter_fy.c @@ -150,17 +150,27 @@ static VALUE end_stream(VALUE self) return self; } -static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp) +struct start_document_data { + VALUE self; + VALUE version; + VALUE tags; + VALUE imp; + struct fy_tag *tag_storage; + const struct fy_tag **tag_ptrs; +}; + +static VALUE start_document_try(VALUE d) { + struct start_document_data *data = (struct start_document_data *)d; + VALUE version = data->version; + VALUE tags = data->tags; psych_fy_emitter_t *e; struct fy_version ver; const struct fy_version *verp = NULL; - struct fy_tag *tag_storage = NULL; - const struct fy_tag **tag_ptrs = NULL; - VALUE *exported = NULL; - long len = 0; + VALUE guard = Qnil; + struct fy_event *event; - TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); + TypedData_Get_Struct(data->self, psych_fy_emitter_t, &psych_emitter_type, e); Check_Type(version, T_ARRAY); if (RARRAY_LEN(version) >= 2) { @@ -171,19 +181,20 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp) if (RTEST(tags)) { rb_encoding *encoding = rb_utf8_encoding(); + long i, len; Check_Type(tags, T_ARRAY); len = RARRAY_LEN(tags); if (len > 0) { - long i; - tag_storage = xcalloc((size_t)len, sizeof(struct fy_tag)); - tag_ptrs = xcalloc((size_t)len + 1, sizeof(struct fy_tag *)); - exported = xcalloc((size_t)len * 2, sizeof(VALUE)); + /* Ruby array keeps the exported strings reachable for the GC while + * their C pointers live in tag_storage. */ + guard = rb_ary_new_capa(len * 2); + data->tag_storage = xcalloc((size_t)len, sizeof(struct fy_tag)); + data->tag_ptrs = xcalloc((size_t)len + 1, sizeof(struct fy_tag *)); for (i = 0; i < len; i++) { VALUE tuple = RARRAY_AREF(tags, i); VALUE name, value; Check_Type(tuple, T_ARRAY); if (RARRAY_LEN(tuple) < 2) { - xfree(tag_storage); xfree(tag_ptrs); xfree(exported); rb_raise(rb_eRuntimeError, "tag tuple must be of length 2"); } name = RARRAY_AREF(tuple, 0); @@ -192,27 +203,48 @@ static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp) StringValue(value); name = rb_str_export_to_enc(name, encoding); value = rb_str_export_to_enc(value, encoding); - exported[i * 2] = name; - exported[i * 2 + 1] = value; - tag_storage[i].handle = StringValueCStr(name); - tag_storage[i].prefix = StringValueCStr(value); - tag_ptrs[i] = &tag_storage[i]; + rb_ary_push(guard, name); + rb_ary_push(guard, value); + data->tag_storage[i].handle = StringValueCStr(name); + data->tag_storage[i].prefix = StringValueCStr(value); + data->tag_ptrs[i] = &data->tag_storage[i]; } - tag_ptrs[len] = NULL; + data->tag_ptrs[len] = NULL; } } - struct fy_event *event = fy_emit_event_create(e->emit, FYET_DOCUMENT_START, - imp ? 1 : 0, verp, tag_ptrs); + event = fy_emit_event_create(e->emit, FYET_DOCUMENT_START, + data->imp ? 1 : 0, verp, data->tag_ptrs); - if (exported) { (void)exported[0]; } do_emit(e, event); + RB_GC_GUARD(guard); - if (tag_storage) xfree(tag_storage); - if (tag_ptrs) xfree(tag_ptrs); - if (exported) xfree(exported); + return data->self; +} - return self; +static VALUE start_document_ensure(VALUE d) +{ + struct start_document_data *data = (struct start_document_data *)d; + + xfree(data->tag_storage); + xfree(data->tag_ptrs); + + return Qnil; +} + +static VALUE start_document(VALUE self, VALUE version, VALUE tags, VALUE imp) +{ + struct start_document_data data = { + .self = self, + .version = version, + .tags = tags, + .imp = imp, + .tag_storage = NULL, + .tag_ptrs = NULL, + }; + + return rb_ensure(start_document_try, (VALUE)&data, + start_document_ensure, (VALUE)&data); } static VALUE end_document(VALUE self, VALUE imp) From 71c888d1a741ca89c4c6ca9e60917749d3a9c904 Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 09:45:45 +0900 Subject: [PATCH 12/31] [ruby/psych] Make the test suite backend-aware for the libfyaml backend Add a libfyaml? helper and guard the tests that intentionally diverge on the experimental YAML 1.2 backend: yes/no/on/off are strings rather than booleans, flow collections and block scalars are formatted differently, tags and marks are reported differently, and non-ASCII tags/aliases are rejected. test_boolean asserts the 1.2 string result directly; the formatting and mark cases are skipped. The default libyaml build is unaffected (every guard keys off Psych::BACKEND). https://github.com/ruby/psych/commit/67b6365f5a Co-Authored-By: Claude Opus 4.8 --- test/psych/helper.rb | 7 +++++++ test/psych/json/test_stream.rb | 4 ++++ test/psych/test_boolean.rb | 19 +++++++++++++++++-- test/psych/test_coder.rb | 7 +++++++ test/psych/test_data.rb | 1 + test/psych/test_encoding.rb | 4 ++++ test/psych/test_exception.rb | 3 ++- test/psych/test_json_tree.rb | 2 ++ test/psych/test_omap.rb | 1 + test/psych/test_parser.rb | 3 +++ test/psych/test_psych.rb | 6 ++++++ test/psych/test_set.rb | 1 + test/psych/test_string.rb | 5 +++++ test/psych/test_symbol.rb | 6 ++++++ test/psych/test_tree_builder.rb | 4 ++++ test/psych/test_yaml.rb | 3 +++ test/psych/test_yaml_special_cases.rb | 1 + test/psych/visitors/test_to_ruby.rb | 6 ++++++ 18 files changed, 80 insertions(+), 3 deletions(-) diff --git a/test/psych/helper.rb b/test/psych/helper.rb index 639f6055ff6b8f..b6bf20139eca94 100644 --- a/test/psych/helper.rb +++ b/test/psych/helper.rb @@ -14,6 +14,13 @@ def self.suppress_warning $VERBOSE = verbose end + # True when psych was built with the experimental libfyaml backend + # (--enable-libfyaml), which follows YAML 1.2 and formats output + # differently from the default libyaml backend. + def libfyaml? + defined?(Psych::BACKEND) && Psych::BACKEND == 'libfyaml' + end + def with_default_external(enc) verbose, $VERBOSE = $VERBOSE, nil origenc, Encoding.default_external = Encoding.default_external, enc diff --git a/test/psych/json/test_stream.rb b/test/psych/json/test_stream.rb index 90a770c1b783be..bdbe5ea9b30155 100644 --- a/test/psych/json/test_stream.rb +++ b/test/psych/json/test_stream.rb @@ -51,6 +51,7 @@ def test_float end def test_hash + omit 'libfyaml emits JSON flow mappings multi-line' if libfyaml? hash = { 'one' => 'two' } @stream.push hash @@ -62,6 +63,7 @@ def test_hash end def test_list_to_json + omit 'libfyaml emits JSON flow sequences multi-line' if libfyaml? list = %w{ one two } @stream.push list @@ -93,6 +95,7 @@ def test_json_list_dump_exclude_tag end def test_time + omit 'libfyaml emits JSON flow mappings multi-line' if libfyaml? time = Time.utc(2010, 10, 10) @stream.push({'a' => time }) json = @io.string @@ -100,6 +103,7 @@ def test_time end def test_datetime + omit 'libfyaml emits JSON flow mappings multi-line' if libfyaml? time = Time.new(2010, 10, 10).to_datetime @stream.push({'a' => time }) json = @io.string diff --git a/test/psych/test_boolean.rb b/test/psych/test_boolean.rb index a4b80fc13d1b80..ec31c831355bdf 100644 --- a/test/psych/test_boolean.rb +++ b/test/psych/test_boolean.rb @@ -6,18 +6,33 @@ module Psych # Test booleans from YAML spec: # http://yaml.org/type/bool.html class TestBoolean < TestCase - %w{ yes Yes YES true True TRUE on On ON }.each do |truth| + # true/false are booleans in both YAML 1.1 and 1.2. + %w{ true True TRUE }.each do |truth| define_method(:"test_#{truth}") do assert_equal true, Psych.load("--- #{truth}") end end - %w{ no No NO false False FALSE off Off OFF }.each do |truth| + %w{ false False FALSE }.each do |truth| define_method(:"test_#{truth}") do assert_equal false, Psych.load("--- #{truth}") end end + # yes/on and no/off are booleans only under YAML 1.1 (the libyaml backend). + # The YAML 1.2 libfyaml backend keeps them as plain strings. + %w{ yes Yes YES on On ON }.each do |truth| + define_method(:"test_#{truth}") do + assert_equal(libfyaml? ? truth : true, Psych.load("--- #{truth}")) + end + end + + %w{ no No NO off Off OFF }.each do |truth| + define_method(:"test_#{truth}") do + assert_equal(libfyaml? ? truth : false, Psych.load("--- #{truth}")) + end + end + ### # YAML spec says "y" and "Y" may be used as true, but Syck treats them # as literal strings diff --git a/test/psych/test_coder.rb b/test/psych/test_coder.rb index a6f5ad7f363c5a..3883ceee704341 100644 --- a/test/psych/test_coder.rb +++ b/test/psych/test_coder.rb @@ -196,6 +196,7 @@ def test_load_dumped_tagging end def test_dump_with_tag + omit 'libfyaml emits the flow mapping multi-line' if libfyaml? foo = TaggingCoder.new assert_match(/hello/, Psych.dump(foo)) assert_match(/\{aa/, Psych.dump(foo)) @@ -240,6 +241,7 @@ def test_coder_style_map_block end def test_coder_style_map_flow + omit 'libfyaml emits flow collections multi-line' if libfyaml? pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ foo = Psych.dump CustomEncode.new \ @@ -271,6 +273,7 @@ def test_coder_style_seq_block end def test_coder_style_seq_flow + omit 'libfyaml emits flow collections multi-line' if libfyaml? foo = Psych.dump CustomEncode.new \ seq: [ 1, 2, 3 ], style: Psych::Nodes::Sequence::FLOW, @@ -300,6 +303,7 @@ def test_coder_style_scalar_plain end def test_coder_style_scalar_single_quoted + omit 'libfyaml does not synthesize the non-specific ! tag' if libfyaml? foo = Psych.dump CustomEncode.new \ scalar: 'some scalar', style: Psych::Nodes::Scalar::SINGLE_QUOTED, @@ -308,6 +312,7 @@ def test_coder_style_scalar_single_quoted end def test_coder_style_scalar_double_quoted + omit 'libfyaml does not synthesize the non-specific ! tag' if libfyaml? foo = Psych.dump CustomEncode.new \ scalar: 'some scalar', style: Psych::Nodes::Scalar::DOUBLE_QUOTED, @@ -316,6 +321,7 @@ def test_coder_style_scalar_double_quoted end def test_coder_style_scalar_literal + omit 'libfyaml does not synthesize the non-specific ! tag' if libfyaml? foo = Psych.dump CustomEncode.new \ scalar: 'some scalar', style: Psych::Nodes::Scalar::LITERAL, @@ -324,6 +330,7 @@ def test_coder_style_scalar_literal end def test_coder_style_scalar_folded + omit 'libfyaml does not synthesize the non-specific ! tag' if libfyaml? foo = Psych.dump CustomEncode.new \ scalar: 'some scalar', style: Psych::Nodes::Scalar::FOLDED, diff --git a/test/psych/test_data.rb b/test/psych/test_data.rb index 5e340c580a3f24..cf22cbbc3ed157 100644 --- a/test/psych/test_data.rb +++ b/test/psych/test_data.rb @@ -25,6 +25,7 @@ def setup # TODO: move to another test? def test_dump_data + omit 'libfyaml formats the dump differently (data still round-trips)' if libfyaml? assert_equal <<~eoyml, Psych.dump(PsychDataWithIvar["bar"]) --- !ruby/data-with-ivars:PsychDataWithIvar members: diff --git a/test/psych/test_encoding.rb b/test/psych/test_encoding.rb index 1867d59ea64f81..0a31a6803c2469 100644 --- a/test/psych/test_encoding.rb +++ b/test/psych/test_encoding.rb @@ -119,6 +119,7 @@ def test_io_utf8_read_as_binary end def test_emit_alias + omit 'libfyaml rejects non-ASCII aliases with a different error' if libfyaml? pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @emitter.start_stream Psych::Parser::UTF8 @@ -141,6 +142,7 @@ def test_to_yaml_is_valid end def test_start_mapping + omit 'libfyaml rejects the non-ASCII tag as an invalid tag' if libfyaml? foo = 'foo' bar = 'バー' @@ -161,6 +163,7 @@ def test_start_mapping end def test_start_sequence + omit 'libfyaml rejects the non-ASCII tag as an invalid tag' if libfyaml? foo = 'foo' bar = 'バー' @@ -181,6 +184,7 @@ def test_start_sequence end def test_doc_tag_encoding + omit 'libfyaml rejects the non-ASCII tag directive prefix' if libfyaml? key = '鍵' @emitter.start_stream Psych::Parser::UTF8 @emitter.start_document( diff --git a/test/psych/test_exception.rb b/test/psych/test_exception.rb index 6fd92abf9d22d6..20ee2262cf1171 100644 --- a/test/psych/test_exception.rb +++ b/test/psych/test_exception.rb @@ -156,7 +156,8 @@ def test_attributes # assert_equal 5, e.offset assert e.problem - assert e.context + # libfyaml's diagnostics do not carry libyaml's separate "context" text. + assert e.context unless libfyaml? end def test_convert diff --git a/test/psych/test_json_tree.rb b/test/psych/test_json_tree.rb index 3c59a8dbda9097..37c05075b039ac 100644 --- a/test/psych/test_json_tree.rb +++ b/test/psych/test_json_tree.rb @@ -53,12 +53,14 @@ def test_list_to_json end def test_time + omit 'libfyaml emits JSON flow mappings multi-line' if libfyaml? time = Time.utc(2010, 10, 10) assert_equal "{\"a\": \"2010-10-10 00:00:00.000000000 Z\"}\n", Psych.to_json({'a' => time }) end def test_datetime + omit 'libfyaml emits JSON flow mappings multi-line' if libfyaml? time = Time.new(2010, 10, 10).to_datetime assert_equal "{\"a\": \"#{time.strftime("%Y-%m-%d %H:%M:%S.%9N %:z")}\"}\n", Psych.to_json({'a' => time }) end diff --git a/test/psych/test_omap.rb b/test/psych/test_omap.rb index 6de0286406f309..d59f0b29f5c9e4 100644 --- a/test/psych/test_omap.rb +++ b/test/psych/test_omap.rb @@ -39,6 +39,7 @@ def test_square end def test_dump + omit 'libfyaml emits the verbose tag !' if libfyaml? map = Psych::Omap['a', 'b', 'c', 'd'] yaml = Psych.dump(map) assert_match('!omap', yaml) diff --git a/test/psych/test_parser.rb b/test/psych/test_parser.rb index c175b8a1eb7e48..786cf016359b17 100644 --- a/test/psych/test_parser.rb +++ b/test/psych/test_parser.rb @@ -84,6 +84,7 @@ def test_filename end def test_line_numbers + omit 'libfyaml reports event marks differently from libyaml' if libfyaml? assert_equal 0, @parser.mark.line pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @@ -111,6 +112,7 @@ def test_line_numbers end def test_column_numbers + omit 'libfyaml reports event marks differently from libyaml' if libfyaml? assert_equal 0, @parser.mark.column pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ @@ -138,6 +140,7 @@ def test_column_numbers end def test_index_numbers + omit 'libfyaml reports event marks differently from libyaml' if libfyaml? assert_equal 0, @parser.mark.index pend "Failing on JRuby" if RUBY_PLATFORM =~ /java/ diff --git a/test/psych/test_psych.rb b/test/psych/test_psych.rb index 8e5ec9419e4565..6ba84e5e9b92a6 100644 --- a/test/psych/test_psych.rb +++ b/test/psych/test_psych.rb @@ -36,6 +36,7 @@ def test_indent end def test_canonical + omit 'canonical output is not supported on the libfyaml backend' if libfyaml? yml = Psych.dump({:a => {'b' => 'c'}}, {:canonical => true}) assert_match(/\? "b/, yml) end @@ -436,6 +437,7 @@ def test_safe_dump_unpermitted_class end def test_safe_dump_extra_permitted_classes + omit 'libfyaml formats the empty flow mapping differently' if libfyaml? assert_equal "--- !ruby/object {}\n", Psych.safe_dump(Object.new, permitted_classes: [Object]) end @@ -452,6 +454,9 @@ def test_safe_dump_symbols end def test_safe_dump_stringify_names + # The 1.2 libfyaml backend does not quote 'no', so the expected escaping + # of the "no" key does not apply. + omit "libfyaml does not quote the 'no' key" if libfyaml? yaml = <<-eoyml --- foo: @@ -478,6 +483,7 @@ def test_safe_dump_stringify_names end def test_safe_dump_aliases + omit 'libfyaml formats anchors and aliases differently' if libfyaml? x = [] x << x error = assert_raise Psych::BadAlias do diff --git a/test/psych/test_set.rb b/test/psych/test_set.rb index ccd591c6263beb..f071acb9fd7393 100644 --- a/test/psych/test_set.rb +++ b/test/psych/test_set.rb @@ -10,6 +10,7 @@ def setup end def test_dump + omit 'libfyaml formats the dump differently (data still round-trips)' if libfyaml? assert_equal <<~YAML, Psych.dump(@set) --- !ruby/object:Set hash: diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index 1621f0604afa68..b7abaafbfef059 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -24,6 +24,9 @@ def initialize # "ambiguity" in the emitted document def test_all_yaml_1_1_booleans_are_quoted + # The YAML 1.2 libfyaml backend does not treat yes/no/on/off as booleans, + # so it has no reason to quote them. + omit 'YAML 1.1 booleans are not special on the libfyaml backend' if libfyaml? yaml_1_1_booleans = %w[y Y yes Yes YES n N no No NO true True TRUE false False FALSE on On ON off Off OFF] # from https://yaml.org/type/bool.html yaml_1_1_booleans.each do |boolean| assert_match(/"#{boolean}"|'#{boolean}'/, Psych.dump(boolean)) @@ -86,6 +89,7 @@ def test_plain_when_shorten_than_line_width_and_with_final_line_break end def test_folded_when_longer_than_line_width_and_with_final_line_break + omit 'libfyaml uses a different block chomping indicator' if libfyaml? str = "Lorem ipsum dolor sit\n" yaml = Psych.dump str, line_width: 12 assert_match(/---\s*>\n(.*\n){2}\Z/, yaml) @@ -101,6 +105,7 @@ def test_folded_strip_when_longer_than_line_width_and_no_newlines end def test_literal_when_inner_and_final_line_break + omit 'libfyaml uses a different block chomping indicator' if libfyaml? [ "Lorem ipsum\ndolor\n", "Lorem ipsum\nZolor\n", diff --git a/test/psych/test_symbol.rb b/test/psych/test_symbol.rb index 36416ffe293e76..9a26bdc93e84e3 100644 --- a/test/psych/test_symbol.rb +++ b/test/psych/test_symbol.rb @@ -8,6 +8,12 @@ def test_cycle_empty end def test_cycle_colon + # Known limitation: libyaml's emitter adds a non-specific "!" tag when it + # must quote a scalar that was requested plain, preserving the plain + # resolution (so ":" round-trips as a Symbol). libfyaml's streaming + # emitter does not synthesize that tag, so a Symbol whose name is a YAML + # indicator character reloads as a String. + omit 'libfyaml does not round-trip symbols named after YAML indicators' if libfyaml? assert_cycle :':' end diff --git a/test/psych/test_tree_builder.rb b/test/psych/test_tree_builder.rb index dfb5da98929661..faf7fe4f70046c 100644 --- a/test/psych/test_tree_builder.rb +++ b/test/psych/test_tree_builder.rb @@ -5,6 +5,10 @@ module Psych class TestTreeBuilder < TestCase def setup super + # This fixture is an explicit YAML 1.1 document whose flow mapping relies + # on 1.1-era lenient indentation. The strict YAML 1.2 libfyaml backend + # correctly rejects it, so these tree-location tests apply to libyaml only. + omit 'YAML 1.1 lenient flow indentation is rejected by the strict libfyaml backend' if libfyaml? @parser = Psych::Parser.new TreeBuilder.new @parser.parse(<<-eoyml) %YAML 1.1 diff --git a/test/psych/test_yaml.rb b/test/psych/test_yaml.rb index 134c346c90585d..320920ce629318 100644 --- a/test/psych/test_yaml.rb +++ b/test/psych/test_yaml.rb @@ -461,6 +461,9 @@ def test_spec_type_float end def test_spec_type_misc + # The fixture relies on yes/no parsing as booleans, which is YAML 1.1 + # behavior the libfyaml backend does not follow. + omit 'yes/no are strings on the YAML 1.2 libfyaml backend' if libfyaml? assert_parse_only( { nil => nil, true => true, false => false, 'string' => '12345' }, < Date: Wed, 1 Jul 2026 09:59:18 +0900 Subject: [PATCH 13/31] [ruby/psych] Harden the libfyaml backend after code review Mark the reconstructed SyntaxError message UTF-8 instead of US-ASCII, so a diagnostic that embeds a multibyte snippet of the input does not raise Encoding::CompatibilityError when concatenated with UTF-8. Add RB_GC_GUARD for the anchor and tag strings in the emitter (matching the existing guard on the scalar value) so their C pointers cannot dangle if a GC runs inside fy_emit_event_create. https://github.com/ruby/psych/commit/1c73f43df1 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych_emitter_fy.c | 7 +++++++ ext/psych/psych_parser_fy.c | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c index 7dd60f60a70e1e..33d0c366699b75 100644 --- a/ext/psych/psych_emitter_fy.c +++ b/ext/psych/psych_emitter_fy.c @@ -313,6 +313,8 @@ static VALUE scalar(VALUE self, VALUE value, VALUE anchor, VALUE tag, do_emit(e, event); RB_GC_GUARD(value); + RB_GC_GUARD(anchor); + RB_GC_GUARD(tag); return self; } @@ -333,6 +335,8 @@ static VALUE start_sequence(VALUE self, VALUE anchor, VALUE tag, NIL_P(tag) ? NULL : StringValueCStr(tag)); do_emit(e, event); + RB_GC_GUARD(anchor); + RB_GC_GUARD(tag); return self; } @@ -362,6 +366,8 @@ static VALUE start_mapping(VALUE self, VALUE anchor, VALUE tag, NIL_P(tag) ? NULL : StringValueCStr(tag)); do_emit(e, event); + RB_GC_GUARD(anchor); + RB_GC_GUARD(tag); return self; } @@ -383,6 +389,7 @@ static VALUE alias(VALUE self, VALUE anchor) do_emit(e, fy_emit_event_create(e->emit, FYET_ALIAS, NIL_P(anchor) ? NULL : StringValueCStr(anchor))); + RB_GC_GUARD(anchor); return self; } diff --git a/ext/psych/psych_parser_fy.c b/ext/psych/psych_parser_fy.c index 6b9d04e3864afb..431705f101170e 100644 --- a/ext/psych/psych_parser_fy.c +++ b/ext/psych/psych_parser_fy.c @@ -112,7 +112,11 @@ static VALUE make_exception(psych_fy_parser_t *parser, VALUE path) void *iter = NULL; struct fy_diag_error *err = fy_diag_errors_iterate(diag, &iter); if (err) { - if (err->msg) problem = rb_usascii_str_new2(err->msg); + /* The message may embed a snippet of the (possibly multibyte) + * input, so mark it UTF-8 rather than US-ASCII. */ + if (err->msg) { + problem = rb_enc_str_new_cstr(err->msg, rb_utf8_encoding()); + } if (err->line >= 0) line = (size_t)err->line; if (err->column >= 0) column = (size_t)err->column; } From a02377fee8f92554b1375c816fe1d4be5f24f57b Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 10:28:29 +0900 Subject: [PATCH 14/31] [ruby/psych] Add positive tests for the libfyaml backend Cover what the libfyaml backend does distinctly, each placed with the concern it belongs to: the Psych::BACKEND and Psych.libfyaml_version checks in test_psych.rb, the YAML 1.2 "Norway problem" boolean case in test_boolean.rb, and the "1.1 booleans are not quoted" emission case in test_string.rb. The 1.2 assertions are skipped on the default libyaml backend so the same suite passes under both. https://github.com/ruby/psych/commit/35d0a437a2 Co-Authored-By: Claude Opus 4.8 --- test/psych/test_boolean.rb | 10 ++++++++++ test/psych/test_psych.rb | 17 +++++++++++++++++ test/psych/test_string.rb | 8 ++++++++ 3 files changed, 35 insertions(+) diff --git a/test/psych/test_boolean.rb b/test/psych/test_boolean.rb index ec31c831355bdf..bf7227f528a4e4 100644 --- a/test/psych/test_boolean.rb +++ b/test/psych/test_boolean.rb @@ -48,5 +48,15 @@ def test_n assert_equal "n", Psych.load("--- n") assert_equal "N", Psych.load("--- N") end + + ### + # The "Norway problem": under YAML 1.2 (the libfyaml backend) yes/no/on/off + # are plain strings in every context, so the country code "no" no longer + # becomes false. + def test_norway_problem + omit 'libfyaml (YAML 1.2) backend only' unless libfyaml? + assert_equal({ 'country' => 'no' }, Psych.load("country: no")) + assert_equal %w[yes no on off], Psych.load("- yes\n- no\n- on\n- off\n") + end end end diff --git a/test/psych/test_psych.rb b/test/psych/test_psych.rb index 6ba84e5e9b92a6..4b02e844bcd777 100644 --- a/test/psych/test_psych.rb +++ b/test/psych/test_psych.rb @@ -118,6 +118,23 @@ def test_libyaml_version assert_equal Psych.libyaml_version.join('.'), Psych::LIBYAML_VERSION end + def test_backend + omit 'Psych::BACKEND is not defined on this backend' unless defined?(Psych::BACKEND) + assert_includes %w[libyaml libfyaml], Psych::BACKEND + assert_equal 'libfyaml', Psych::BACKEND if libfyaml? + end + + def test_libfyaml_version + omit 'libfyaml backend only' unless libfyaml? + assert_kind_of String, Psych.libfyaml_version + assert_match(/\A\d+\.\d+/, Psych.libfyaml_version) + end + + def test_libfyaml_version_absent_without_libfyaml + omit 'libfyaml backend defines libfyaml_version' if libfyaml? + refute_respond_to Psych, :libfyaml_version + end + def test_load_stream docs = Psych.load_stream("--- foo\n...\n--- bar\n...") assert_equal %w{ foo bar }, docs diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index b7abaafbfef059..2b53844faa1dc6 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -33,6 +33,14 @@ def test_all_yaml_1_1_booleans_are_quoted end end + def test_yaml_1_1_booleans_are_not_quoted_on_libfyaml + omit 'YAML 1.1 booleans are plain strings on the libfyaml backend' unless libfyaml? + %w[yes no on off].each do |boolean| + assert_equal "--- #{boolean}\n", Psych.dump(boolean) + assert_equal boolean, Psych.load(Psych.dump(boolean)) + end + end + def test_string_with_newline assert_equal "1\n2", Psych.load("--- ! '1\n\n 2'\n") end From 82ab14ecb0827f1be9aefa43f492e0214d1ff2ff Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 11:20:50 +0900 Subject: [PATCH 15/31] [ruby/psych] Address Copilot review feedback on the libfyaml backend Reject canonical output with NotImplementedError instead of silently ignoring the request, and honor the implicit flag in start_sequence and start_mapping so an implicit tag is not printed as a redundant verbose tag. Relax the unquoted-boolean dump test to allow an optional document end marker. Also correct the version docstrings and a stale comment about how the parser collects diagnostics. https://github.com/ruby/psych/commit/23f60daff1 Co-Authored-By: Claude Opus 4.8 --- ext/psych/psych.c | 8 ++++++-- ext/psych/psych_emitter_fy.c | 27 ++++++++++++++++++++++----- ext/psych/psych_parser_fy.c | 4 ++-- test/psych/test_emitter.rb | 8 ++++++++ test/psych/test_string.rb | 3 ++- 5 files changed, 40 insertions(+), 10 deletions(-) diff --git a/ext/psych/psych.c b/ext/psych/psych.c index 2a5db212ad2a10..2e3f96782aac6b 100644 --- a/ext/psych/psych.c +++ b/ext/psych/psych.c @@ -2,7 +2,9 @@ /* call-seq: Psych.libyaml_version * - * Returns the version of libyaml being used + * Returns the version of the underlying YAML library as a three-element + * array. This is libyaml by default. On the experimental libfyaml backend, + * where libyaml is not linked, it reports the libfyaml version instead. */ static VALUE libyaml_version(VALUE module) { @@ -30,7 +32,9 @@ static VALUE libyaml_version(VALUE module) #ifdef PSYCH_USE_LIBFYAML /* call-seq: Psych.libfyaml_version * - * Returns the libfyaml version string, or nil when not built with libfyaml. + * Returns the libfyaml version string. This method is only defined when + * psych was built with the experimental libfyaml backend + * (+--enable-libfyaml+). */ static VALUE libfyaml_version(VALUE module) { diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c index 33d0c366699b75..1179a5db91467a 100644 --- a/ext/psych/psych_emitter_fy.c +++ b/ext/psych/psych_emitter_fy.c @@ -122,7 +122,12 @@ static VALUE initialize(int argc, VALUE *argv, VALUE self) if (rb_scan_args(argc, argv, "11", &io, &options) == 2) { e->width = NUM2INT(rb_funcall(options, id_line_width, 0)); e->indent = NUM2INT(rb_funcall(options, id_indentation, 0)); - e->canonical = (Qtrue == rb_funcall(options, id_canonical, 0)) ? 1 : 0; + /* libfyaml has no canonical emit mode, so fail fast instead of + * silently producing non-canonical output. */ + if (RTEST(rb_funcall(options, id_canonical, 0))) { + rb_raise(rb_eNotImpError, + "canonical output is not supported by the libfyaml backend"); + } } rb_ivar_set(self, id_io, io); @@ -329,10 +334,14 @@ static VALUE start_sequence(VALUE self, VALUE anchor, VALUE tag, if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, encoding); } if (!NIL_P(tag)) { Check_Type(tag, T_STRING); tag = rb_str_export_to_enc(tag, encoding); } + /* An implicit tag can be omitted, matching libyaml; emitting it anyway + * would print a redundant (often verbose) tag. */ + int emit_tag = !NIL_P(tag) && !RTEST(implicit); + struct fy_event *event = fy_emit_event_create(e->emit, FYET_SEQUENCE_START, psych_to_fyns(NUM2INT(style)), NIL_P(anchor) ? NULL : StringValueCStr(anchor), - NIL_P(tag) ? NULL : StringValueCStr(tag)); + emit_tag ? StringValueCStr(tag) : NULL); do_emit(e, event); RB_GC_GUARD(anchor); @@ -360,10 +369,14 @@ static VALUE start_mapping(VALUE self, VALUE anchor, VALUE tag, if (!NIL_P(anchor)) { Check_Type(anchor, T_STRING); anchor = rb_str_export_to_enc(anchor, encoding); } if (!NIL_P(tag)) { Check_Type(tag, T_STRING); tag = rb_str_export_to_enc(tag, encoding); } + /* An implicit tag can be omitted, matching libyaml; emitting it anyway + * would print a redundant (often verbose) tag. */ + int emit_tag = !NIL_P(tag) && !RTEST(implicit); + struct fy_event *event = fy_emit_event_create(e->emit, FYET_MAPPING_START, psych_to_fyns(NUM2INT(style)), NIL_P(anchor) ? NULL : StringValueCStr(anchor), - NIL_P(tag) ? NULL : StringValueCStr(tag)); + emit_tag ? StringValueCStr(tag) : NULL); do_emit(e, event); RB_GC_GUARD(anchor); @@ -397,8 +410,12 @@ static VALUE set_canonical(VALUE self, VALUE style) { psych_fy_emitter_t *e; TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); - e->canonical = (Qtrue == style) ? 1 : 0; - rebuild_emitter(self, e); + /* libfyaml has no canonical emit mode, so reject enabling it rather than + * pretending to honor the request. */ + if (RTEST(style)) { + rb_raise(rb_eNotImpError, + "canonical output is not supported by the libfyaml backend"); + } return style; } diff --git a/ext/psych/psych_parser_fy.c b/ext/psych/psych_parser_fy.c index 431705f101170e..96aa0fe6c25892 100644 --- a/ext/psych/psych_parser_fy.c +++ b/ext/psych/psych_parser_fy.c @@ -98,8 +98,8 @@ static VALUE allocate(VALUE klass) } /* Reconstruct a Psych::SyntaxError from libfyaml's collected diagnostics. The - * parser is created with FYPCF_COLLECT_DIAG, so the first collected error gives - * us the message and position. */ + * parser's diag was switched to collect mode with fy_diag_set_collect_errors() + * in parse(), so the first collected error gives us the message and position. */ static VALUE make_exception(psych_fy_parser_t *parser, VALUE path) { VALUE ePsychSyntaxError = rb_const_get(mPsych, rb_intern("SyntaxError")); diff --git a/test/psych/test_emitter.rb b/test/psych/test_emitter.rb index 506d72241c1ab6..7755fec093bbb8 100644 --- a/test/psych/test_emitter.rb +++ b/test/psych/test_emitter.rb @@ -17,6 +17,14 @@ def test_line_width end def test_set_canonical + if libfyaml? + # The libfyaml backend has no canonical mode and rejects enabling it. + assert_raise(NotImplementedError) { @emitter.canonical = true } + @emitter.canonical = false + assert_equal false, @emitter.canonical + return + end + @emitter.canonical = true assert_equal true, @emitter.canonical diff --git a/test/psych/test_string.rb b/test/psych/test_string.rb index 2b53844faa1dc6..1cb1ed03b86d03 100644 --- a/test/psych/test_string.rb +++ b/test/psych/test_string.rb @@ -36,7 +36,8 @@ def test_all_yaml_1_1_booleans_are_quoted def test_yaml_1_1_booleans_are_not_quoted_on_libfyaml omit 'YAML 1.1 booleans are plain strings on the libfyaml backend' unless libfyaml? %w[yes no on off].each do |boolean| - assert_equal "--- #{boolean}\n", Psych.dump(boolean) + # Unquoted plain scalar, allowing an optional document end marker. + assert_match(/\A--- #{boolean}\n(?:\.\.\.\n)?\z/, Psych.dump(boolean)) assert_equal boolean, Psych.load(Psych.dump(boolean)) end end From 5ef8b6ef8185a4ea8cbc47eeecea59bea0cdcb8f Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Wed, 1 Jul 2026 12:05:57 +0900 Subject: [PATCH 16/31] [ruby/psych] Require libfyaml 0.9 and build it from source in CI The libfyaml-dev package on Ubuntu is 0.8, which segfaults psych's emitter, while 0.9.6 (used on macOS via Homebrew) passes the whole suite. Build the same 0.9.6 release from source in the Linux CI job, and reject libfyaml older than 0.9 in extconf so users get a clear error instead of a runtime crash. Also drop an unused variable in set_canonical. https://github.com/ruby/psych/commit/88ac4a0cc0 Co-Authored-By: Claude Opus 4.8 --- ext/psych/extconf.rb | 6 ++++++ ext/psych/psych_emitter_fy.c | 2 -- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ext/psych/extconf.rb b/ext/psych/extconf.rb index e1ea7510e1c145..de7bd6c8c638a4 100644 --- a/ext/psych/extconf.rb +++ b/ext/psych/extconf.rb @@ -12,6 +12,12 @@ unless pkg_config('libfyaml') abort "libfyaml was requested with --enable-libfyaml but was not found via pkg-config" end + # libfyaml 0.8 and earlier crash psych's emitter, so require a known-good + # version rather than building something that segfaults at runtime. + pkgconfig = ENV["PKG_CONFIG"] || "pkg-config" + unless system(pkgconfig, "--atleast-version=0.9", "libfyaml") + abort "The libfyaml backend requires libfyaml 0.9 or newer" + end $defs << "-DPSYCH_USE_LIBFYAML" create_makefile 'psych' diff --git a/ext/psych/psych_emitter_fy.c b/ext/psych/psych_emitter_fy.c index 1179a5db91467a..22bf23e5323cb6 100644 --- a/ext/psych/psych_emitter_fy.c +++ b/ext/psych/psych_emitter_fy.c @@ -408,8 +408,6 @@ static VALUE alias(VALUE self, VALUE anchor) static VALUE set_canonical(VALUE self, VALUE style) { - psych_fy_emitter_t *e; - TypedData_Get_Struct(self, psych_fy_emitter_t, &psych_emitter_type, e); /* libfyaml has no canonical emit mode, so reject enabling it rather than * pretending to honor the request. */ if (RTEST(style)) { From 988fbe038ff9ff4940241e02c82bae3ce2ec2f44 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Jun 2026 09:47:02 +0900 Subject: [PATCH 17/31] Move EXTSTATIC check to PRE_LIBRUBY_UPDATE --- template/Makefile.in | 6 +++--- win32/Makefile.sub | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/template/Makefile.in b/template/Makefile.in index 18f54c6f7d9c99..e621ec0473b439 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -322,14 +322,14 @@ $(PROGRAM): $(PROGRAM): @XRUBY_LIBPATHENV_WRAPPER@ -PRE_LIBRUBY_UPDATE = [ -n "$(LIBRUBY_SO_UPDATE)" ] || $(gnumake:yes=exec) $(RM) $(LIBRUBY_EXTS) +PRE_LIBRUBY_UPDATE = [ -z "$(EXTSTATIC)" -o -n "$(LIBRUBY_SO_UPDATE)" ] || $(gnumake:yes=exec) $(RM) $(LIBRUBY_EXTS) # We must `rm' the library each time this rule is invoked because "updating" a # MAB library on Apple/NeXT (see --enable-fat-binary in configure) is not # supported. $(LIBRUBY_A): @$(RM) $@ - @-[ -z "$(EXTSTATIC)" ] || $(PRE_LIBRUBY_UPDATE) + @-$(PRE_LIBRUBY_UPDATE) $(ECHO) linking static-library $@ $(Q) $(AR) $(ARFLAGS) $@ $(LIBRUBY_A_OBJS) $(INITOBJS) @-$(RANLIB) $@ 2> /dev/null || true @@ -340,7 +340,7 @@ verify-static-library: $(LIBRUBY_A) @$(RMALL) conftest$(EXEEXT) conftest.c conftest.dSYM $(LIBRUBY_SO): - @-[ -n "$(EXTSTATIC)" ] || $(PRE_LIBRUBY_UPDATE) + @-$(PRE_LIBRUBY_UPDATE) $(ECHO) linking shared-library $@ $(Q) $(LDSHARED) $(DLDFLAGS) $(OBJS) $(DLDOBJS) $(SOLIBS) $(EXTSOLIBS) $(OUTFLAG)$@ -$(Q) $(OBJCOPY) -w -L '$(SYMBOL_PREFIX)Init_*' -L '$(SYMBOL_PREFIX)InitVM_*' \ diff --git a/win32/Makefile.sub b/win32/Makefile.sub index 47837c42ad5411..cd0ba933dafc3b 100644 --- a/win32/Makefile.sub +++ b/win32/Makefile.sub @@ -1137,16 +1137,14 @@ $(STUBPROGRAM): rubystub.$(OBJEXT) $(LIBRUBY) $(LIBRUBY_SO) $(RUBY_INSTALL_NAME) $(OUTFLAG)$@ $(LIBRUBYARG) -link $(LDFLAGS) $(XLDFLAGS) !endif -!if "$(LIBRUBY_SO_UPDATE)" == "" +!if "$(EXTSTATIC)" != "" && "$(LIBRUBY_SO_UPDATE)" == "" PRE_LIBRUBY_UPDATE = $(RM) $(LIBRUBY_EXTS) !else PRE_LIBRUBY_UPDATE = !endif $(LIBRUBY_A): $(OBJS) $(INITOBJS) -!if "$(EXTSTATIC)" != "" @-$(PRE_LIBRUBY_UPDATE) -!endif $(ECHO) linking static-library $(@:\=/) $(Q) $(AR) $(ARFLAGS)$@ $(OBJS) $(INITOBJS) @@ -1156,9 +1154,7 @@ $(LIBRUBY): $(RUBYDEF) $(LIBRUBY_SO): $(LIBRUBY_A) $(DLDOBJS) $(RUBYDEF) $(RUBY_SO_NAME).res @echo $(DLDOBJS) -!if "$(EXTSTATIC)" == "" @-$(PRE_LIBRUBY_UPDATE) -!endif $(ECHO) linking shared-library $(@:\=/) $(Q) $(LDSHARED) $(DLDOBJS) $(LIBRUBY_A) \ $(RUBY_SO_NAME).res $(SOLIBS) $(EXTSOLIBS) $(LIBS) \ From 172bdf58813daa34003a71c7f1318f25351e8ae2 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Thu, 18 Jun 2026 10:30:52 +0900 Subject: [PATCH 18/31] Fix static-linked-exts dependency --- common.mk | 2 ++ template/Makefile.in | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/common.mk b/common.mk index e00a7050dfe90f..29fd0f5bcf6655 100644 --- a/common.mk +++ b/common.mk @@ -414,6 +414,8 @@ $(LIBRUBY_A): $(LIBRUBY_A_OBJS) $(MAINOBJ) $(INITOBJS) $(ARCHFILE) $(LIBRUBY_SO): $(OBJS) $(DLDOBJS) $(LIBRUBY_A) $(PREP) $(BUILTIN_ENCOBJS) +$(LIBRUBY_A) $(LIBRUBY_SO): $(LIBRUBY_SO_UPDATE) + $(LIBRUBY_EXTS): @$(NULLCMD) > $@ diff --git a/template/Makefile.in b/template/Makefile.in index e621ec0473b439..1a454fb9db0bb3 100644 --- a/template/Makefile.in +++ b/template/Makefile.in @@ -351,9 +351,6 @@ $(LIBRUBY_SO): aliases.each { |link| File.delete link rescue nil; File.symlink so, link }' \ $(LIBRUBY_SO) $(LIBRUBY_ALIASES) || true -LIBRUBY_WITH_EXT = @LIBRUBY_WITH_EXT@ -$(LIBRUBY_$(LIBRUBY_WITH_EXT)): $(LIBRUBY_SO_UPDATE) - PKG_CONFIG = @PKG_CONFIG@ ruby_pc = @ruby_pc@ $(ruby_pc): config.status Makefile From d39d807211c1c98275db4c6544724e7dcd9e05a1 Mon Sep 17 00:00:00 2001 From: Jun Aruga Date: Tue, 23 Jun 2026 17:06:33 +0200 Subject: [PATCH 19/31] [ruby/rubygems] bundler: Fix Bundler::Fetcher for PQC support, adding integration connection tests Create spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb adding non-PQC and PQC server/client connection integration tests. As "Bundler::Fetcher local SSL server #connection PQC connects with client cert auth" failed with the following error due to hardcoded `OpenSSL::PKey::RSA.new` in `Bundler::Fetcher#connection`, fixed it to support ML-DSA ssl_client_cert. ``` $ bin/rspec spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb ... Failures: 1) Bundler::Fetcher local SSL server #connection PQC connects with client cert auth Failure/Error: fetcher = Bundler::Fetcher.new(remote) OpenSSL::PKey::PKeyError: incorrect pkey type: UNDEF # /home/jaruga/.local/ruby-4.1.0-debug-3ef48ef9c8-openssl-4.1.0-7194354488/lib/ruby/4.1.0+1/openssl/pkey.rb:394:in 'OpenSSL::PKey::RSA#initialize' # /home/jaruga/.local/ruby-4.1.0-debug-3ef48ef9c8-openssl-4.1.0-7194354488/lib/ruby/4.1.0+1/openssl/pkey.rb:394:in 'Class#new' # /home/jaruga/.local/ruby-4.1.0-debug-3ef48ef9c8-openssl-4.1.0-7194354488/lib/ruby/4.1.0+1/openssl/pkey.rb:394:in 'OpenSSL::PKey::RSA.new' # ./bundler/lib/bundler/fetcher.rb:321:in 'Bundler::Fetcher#connection' # ./bundler/lib/bundler/fetcher.rb:140:in 'Bundler::Fetcher#initialize' # ./spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb:69:in 'RSpec::ExampleGroups::BundlerFetcherLocalSSLServer#fetch_path' # ./spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb:60:in 'block (4 levels) in ' ... ``` Create test/rubygems/local_ssl_server_utilities.rb to manage utility methods called by RubyGems test-unit and Bundler rspec tests. Assisted-by: Claude:claude-opus-4-6[1m] https://github.com/ruby/rubygems/commit/6aea5fcc17 --- lib/bundler/fetcher.rb | 2 +- ...em_remote_fetcher_local_ssl_server_spec.rb | 80 +++++++++ spec/bundler/bundler/fetcher_spec.rb | 6 +- spec/bundler/support/path.rb | 4 + spec/bundler/support/shards.rb | 1 + test/rubygems/local_ssl_server_utilities.rb | 154 ++++++++++++++++++ ...est_gem_remote_fetcher_local_ssl_server.rb | 150 ++--------------- 7 files changed, 258 insertions(+), 139 deletions(-) create mode 100644 spec/bundler/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb create mode 100644 test/rubygems/local_ssl_server_utilities.rb diff --git a/lib/bundler/fetcher.rb b/lib/bundler/fetcher.rb index cce005dd34bec1..39e1856909a155 100644 --- a/lib/bundler/fetcher.rb +++ b/lib/bundler/fetcher.rb @@ -318,7 +318,7 @@ def connection if ssl_client_cert pem = File.read(ssl_client_cert) con.cert = OpenSSL::X509::Certificate.new(pem) - con.key = OpenSSL::PKey::RSA.new(pem) + con.key = OpenSSL::PKey.read(pem) end con.read_timeout = Fetcher.api_timeout diff --git a/spec/bundler/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb b/spec/bundler/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb new file mode 100644 index 00000000000000..91f02005586fc3 --- /dev/null +++ b/spec/bundler/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: true + +require "bundler/fetcher" +require Spec::Path.rubygems_test_dir.join("local_ssl_server_utilities") + +RSpec.describe "Bundler::Fetcher local SSL server", if: Gem::HAVE_OPENSSL do + include Gem::LocalSSLServerUtilities + + before do + initialize_ssl_server + end + + after do + stop_ssl_server + end + + describe "#connection" do + context "non-PQC" do + it "connects" do + ssl_server = start_ssl_server + allow(Bundler.settings).to receive(:[]).and_call_original + allow(Bundler.settings).to receive(:[]).with(:ssl_ca_cert).and_return(File.join(certs_dir, "ca_cert.pem")) + response = fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") + expect(response.code).to eq("200") + end + + it "connects with client cert auth" do + ssl_server = start_ssl_server( + verify_mode: OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT + ) + allow(Bundler.settings).to receive(:[]).and_call_original + allow(Bundler.settings).to receive(:[]).with(:ssl_ca_cert).and_return(File.join(certs_dir, "ca_cert.pem")) + allow(Bundler.settings).to receive(:[]).with(:ssl_client_cert).and_return(File.join(certs_dir, "client.pem")) + response = fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") + expect(response.code).to eq("200") + end + end + + context "PQC" do + before do + skip_unless_support_pqc + end + + it "connects" do + ssl_server = start_ssl_server(mode: :pqc) + allow(Bundler.settings).to receive(:[]).and_call_original + allow(Bundler.settings).to receive(:[]).with(:ssl_ca_cert).and_return(File.join(certs_dir, "mldsa65_ca_cert.pem")) + response = fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") + expect(response.code).to eq("200") + end + + it "connects with client cert auth" do + ssl_server = start_ssl_server( + mode: :pqc, + verify_mode: OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT + ) + allow(Bundler.settings).to receive(:[]).and_call_original + allow(Bundler.settings).to receive(:[]).with(:ssl_ca_cert).and_return(File.join(certs_dir, "mldsa65_ca_cert.pem")) + allow(Bundler.settings).to receive(:[]).with(:ssl_client_cert).and_return(File.join(certs_dir, "mldsa65_client.pem")) + response = fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") + expect(response.code).to eq("200") + end + end + end + + def fetch_path(uri) + uri = Gem::URI(uri) + remote = double("remote", uri: uri, original_uri: nil) + fetcher = Bundler::Fetcher.new(remote) + + connection = fetcher.send(:connection) + connection.request(uri) + end + + def skip_unless_support_pqc + without_pqc_support do |message| + skip message + end + end +end diff --git a/spec/bundler/bundler/fetcher_spec.rb b/spec/bundler/bundler/fetcher_spec.rb index e20f7e7c481a7d..3685d104c0eb42 100644 --- a/spec/bundler/bundler/fetcher_spec.rb +++ b/spec/bundler/bundler/fetcher_spec.rb @@ -93,14 +93,14 @@ end end - context "when bunder ssl ssl configuration is set" do + context "when bunder ssl configuration is set" do before do cert = File.join(Spec::Path.tmpdir, "cert") File.open(cert, "w") {|f| f.write "PEM" } allow(Bundler.settings).to receive(:[]).and_return(nil) allow(Bundler.settings).to receive(:[]).with(:ssl_client_cert).and_return(cert) expect(OpenSSL::X509::Certificate).to receive(:new).with("PEM").and_return("cert") - expect(OpenSSL::PKey::RSA).to receive(:new).with("PEM").and_return("key") + expect(OpenSSL::PKey).to receive(:read).with("PEM").and_return("key") end after do FileUtils.rm File.join(Spec::Path.tmpdir, "cert") @@ -120,7 +120,7 @@ ) expect(File).to receive(:read).and_return("") expect(OpenSSL::X509::Certificate).to receive(:new).and_return("cert") - expect(OpenSSL::PKey::RSA).to receive(:new).and_return("key") + expect(OpenSSL::PKey).to receive(:read).and_return("key") store = double("ca store") expect(store).to receive(:add_file) expect(OpenSSL::X509::Store).to receive(:new).and_return(store) diff --git a/spec/bundler/support/path.rb b/spec/bundler/support/path.rb index 17dafb91b70ffb..0efea82cbaf9ee 100644 --- a/spec/bundler/support/path.rb +++ b/spec/bundler/support/path.rb @@ -79,6 +79,10 @@ def spec_dir @spec_dir ||= source_root.join(ruby_core? ? "spec/bundler" : "spec") end + def rubygems_test_dir + @rubygems_test_dir ||= source_root.join("test/rubygems") + end + def man_dir @man_dir ||= lib_dir.join("bundler/man") end diff --git a/spec/bundler/support/shards.rb b/spec/bundler/support/shards.rb index 5554c87b0c1ad3..5718e775b23a9b 100644 --- a/spec/bundler/support/shards.rb +++ b/spec/bundler/support/shards.rb @@ -194,6 +194,7 @@ module Shards "spec/bundler/override_spec.rb", "spec/install/gemfile/override_spec.rb", "spec/install/path_spec.rb", + "spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb", ], }.freeze end diff --git a/test/rubygems/local_ssl_server_utilities.rb b/test/rubygems/local_ssl_server_utilities.rb new file mode 100644 index 00000000000000..a068efa9642d96 --- /dev/null +++ b/test/rubygems/local_ssl_server_utilities.rb @@ -0,0 +1,154 @@ +# frozen_string_literal: true + +# This file can be loaded by RubyGems test-unit files and Bundler rspec files. +# Don't add test-unit or rspec dependent logic in this file. + +require "socket" +require "openssl" + +module Gem::LocalSSLServerUtilities + CERTS_DIR = __dir__ + + def certs_dir + CERTS_DIR + end + + def initialize_ssl_server + @ssl_server_thread = nil + @ssl_server = nil + end + + def stop_ssl_server + if @ssl_server_thread + @ssl_server_thread.kill.join + @ssl_server_thread = nil + end + if @ssl_server + @ssl_server.close + @ssl_server = nil + end + end + + # mode: + # :non_pqc - Run single server with PQC-unsupported RSA (default) + # :pqc - Run single server with PQC-supported key exchange, + # X25519MLKEM768, and PQC-supported certificate, ML-DSA-65 + def start_ssl_server(config = {}) + mode = config.fetch(:mode, :non_pqc) + server = TCPServer.new(0) + ctx = OpenSSL::SSL::SSLContext.new + + case mode + when :non_pqc + ctx.cert = cert("ssl_cert.pem") + ctx.key = key("ssl_key.pem") + ctx.ca_file = File.join(certs_dir, "ca_cert.pem") + when :pqc + ctx.cert = cert("mldsa65_ssl_cert.pem") + ctx.key = key("mldsa65_ssl_key.pem") + ctx.ca_file = File.join(certs_dir, "mldsa65_ca_cert.pem") + ctx.groups = "X25519MLKEM768" + end + + ctx.verify_mode = config[:verify_mode] if config[:verify_mode] + @ssl_server = OpenSSL::SSL::SSLServer.new(server, ctx) + @ssl_server_thread = Thread.new do + loop do + ssl_client = @ssl_server.accept + Thread.new(ssl_client) do |client| + handle_request(client) + ensure + client.close + end + rescue OpenSSL::SSL::SSLError + # Ignore SSL errors because we're testing them implicitly + end + end + @ssl_server + end + + def handle_request(client) + request = client.gets + if request&.start_with?("GET /yaml") + client.print "HTTP/1.1 200 OK\r\nContent-Type: text/yaml\r\n\r\n--- true\n" + elsif request&.start_with?("GET /insecure_redirect") + location = request.match(/to=([^ ]+)/)[1] + client.print "HTTP/1.1 301 Moved Permanently\r\nLocation: #{location}\r\n\r\n" + else + client.print "HTTP/1.1 404 Not Found\r\n\r\n" + end + end + + def cert(filename) + OpenSSL::X509::Certificate.new(File.read(File.join(certs_dir, filename))) + end + + def key(filename) + OpenSSL::PKey.read(File.read(File.join(certs_dir, filename))) + end + + def without_pqc_support(&block) + # PQC algorithms ML-KEM and ML-DSA require OpenSSL >= 3.5. + # https://openssl-library.org/post/2025-04-08-openssl-35-final-release/ + unless OpenSSL::OPENSSL_VERSION_NUMBER >= 0x30500000 + yield "PQC algorithms require OpenSSL >= 3.5" + return + end + # ctx.groups (OpenSSL::SSL::SSLContext#groups) used in start_ssl_server + # mode :pqc requires Ruby OpenSSL >= 4.0. + unless Gem::Version.new(OpenSSL::VERSION) >= Gem::Version.new("4.0") + yield "PQC test requires Ruby OpenSSL >= 4.0" + return + end + # Even with a new enough OpenSSL, the runtime may keep PQC groups and + # signature algorithms out of its default negotiation lists (for example + # RHEL's system-wide crypto policies). The PQC server forces both, while + # the gem fetcher connects with the default client configuration, so a + # real loopback handshake is the only reliable way to tell whether this + # environment can negotiate PQC at all. + unless Gem::LocalSSLServerUtilities.support_pqc_handshake? + yield "PQC handshake is not available in this OpenSSL configuration" + end + end + + # Probe an actual PQC handshake between a forced-PQC server and a + # default-configured client, mirroring what the integration tests exercise. + # Memoized so the probe runs at most once per process. + def self.support_pqc_handshake? + return @support_pqc_handshake unless @support_pqc_handshake.nil? + + @support_pqc_handshake = probe_pqc_handshake + end + + def self.probe_pqc_handshake + server = TCPServer.new("127.0.0.1", 0) + ctx = OpenSSL::SSL::SSLContext.new + ctx.cert = OpenSSL::X509::Certificate.new(File.read(File.join(CERTS_DIR, "mldsa65_ssl_cert.pem"))) + ctx.key = OpenSSL::PKey.read(File.read(File.join(CERTS_DIR, "mldsa65_ssl_key.pem"))) + ctx.groups = "X25519MLKEM768" + ssl_server = OpenSSL::SSL::SSLServer.new(server, ctx) + + port = server.addr[1] + server_thread = Thread.new do + client = ssl_server.accept + client.close + rescue OpenSSL::OpenSSLError + nil + end + + client_ctx = OpenSSL::SSL::SSLContext.new + client_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE + socket = TCPSocket.new("127.0.0.1", port) + ssl = OpenSSL::SSL::SSLSocket.new(socket, client_ctx) + ssl.connect + ssl.close + true + rescue OpenSSL::OpenSSLError, SystemCallError + false + ensure + server_thread&.join(5) + server_thread&.kill if server_thread&.alive? + ssl_server&.close + server&.close + end +end diff --git a/test/rubygems/test_gem_remote_fetcher_local_ssl_server.rb b/test/rubygems/test_gem_remote_fetcher_local_ssl_server.rb index 780846d0156b1a..95e6d3ac65a80a 100644 --- a/test/rubygems/test_gem_remote_fetcher_local_ssl_server.rb +++ b/test/rubygems/test_gem_remote_fetcher_local_ssl_server.rb @@ -1,8 +1,7 @@ # frozen_string_literal: true require_relative "helper" -require "socket" -require "openssl" +require_relative "local_ssl_server_utilities" unless Gem::HAVE_OPENSSL warn "Skipping Gem::RemoteFetcher tests. openssl not found." @@ -13,28 +12,21 @@ class TestGemRemoteFetcherLocalSSLServer < Gem::TestCase include Gem::DefaultUserInteraction + include Gem::LocalSSLServerUtilities def setup super - @ssl_server_thread = nil - @ssl_server = nil + initialize_ssl_server end def teardown - if @ssl_server_thread - @ssl_server_thread.kill.join - @ssl_server_thread = nil - end - if @ssl_server - @ssl_server.close - @ssl_server = nil - end + stop_ssl_server super end def test_ssl_connection ssl_server = start_ssl_server - temp_ca_cert = File.join(__dir__, "ca_cert.pem") + temp_ca_cert = File.join(certs_dir, "ca_cert.pem") with_configured_fetcher(":ssl_ca_cert: #{temp_ca_cert}") do |fetcher| fetcher.fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") end @@ -44,7 +36,7 @@ def test_pqc_ssl_connection omit_unless_support_pqc ssl_server = start_ssl_server(mode: :pqc) - temp_ca_cert = File.join(__dir__, "mldsa65_ca_cert.pem") + temp_ca_cert = File.join(certs_dir, "mldsa65_ca_cert.pem") with_configured_fetcher(":ssl_ca_cert: #{temp_ca_cert}") do |fetcher| fetcher.fetch_path("https://localhost:#{ssl_server.addr[1]}/yaml") end @@ -55,8 +47,8 @@ def test_ssl_client_cert_auth_connection { verify_mode: OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT } ) - temp_ca_cert = File.join(__dir__, "ca_cert.pem") - temp_client_cert = File.join(__dir__, "client.pem") + temp_ca_cert = File.join(certs_dir, "ca_cert.pem") + temp_client_cert = File.join(certs_dir, "client.pem") with_configured_fetcher( ":ssl_ca_cert: #{temp_ca_cert}\n" \ @@ -74,8 +66,8 @@ def test_pqc_ssl_client_cert_auth_connection verify_mode: OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT ) - temp_ca_cert = File.join(__dir__, "mldsa65_ca_cert.pem") - temp_client_cert = File.join(__dir__, "mldsa65_client.pem") + temp_ca_cert = File.join(certs_dir, "mldsa65_ca_cert.pem") + temp_client_cert = File.join(certs_dir, "mldsa65_client.pem") with_configured_fetcher( ":ssl_ca_cert: #{temp_ca_cert}\n" \ @@ -90,8 +82,8 @@ def test_do_not_allow_invalid_client_cert_auth_connection { verify_mode: OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT } ) - temp_ca_cert = File.join(__dir__, "ca_cert.pem") - temp_client_cert = File.join(__dir__, "invalid_client.pem") + temp_ca_cert = File.join(certs_dir, "ca_cert.pem") + temp_client_cert = File.join(certs_dir, "invalid_client.pem") with_configured_fetcher( ":ssl_ca_cert: #{temp_ca_cert}\n" \ @@ -122,7 +114,7 @@ def test_ssl_connection_allow_verify_none def test_do_not_follow_insecure_redirect @server_uri = "http://example.com" ssl_server = start_ssl_server - temp_ca_cert = File.join(__dir__, "ca_cert.pem") + temp_ca_cert = File.join(certs_dir, "ca_cert.pem") expected_error_message = "redirecting to non-https resource: #{@server_uri} (https://localhost:#{ssl_server.addr[1]}/insecure_redirect?to=#{@server_uri})" @@ -164,121 +156,9 @@ def with_configured_fetcher(config_str = nil, &block) Gem.configuration = nil end - # mode: - # :non_pqc - Run single server with PQC-unsupported RSA (default) - # :pqc - Run single server with PQC-supported key exchange, - # X25519MLKEM768, and PQC-supported certificate, ML-DSA-65 - def start_ssl_server(config = {}) - mode = config.fetch(:mode, :non_pqc) - server = TCPServer.new(0) - ctx = OpenSSL::SSL::SSLContext.new - - case mode - when :non_pqc - ctx.cert = cert("ssl_cert.pem") - ctx.key = key("ssl_key.pem") - ctx.ca_file = File.join(__dir__, "ca_cert.pem") - when :pqc - ctx.cert = cert("mldsa65_ssl_cert.pem") - ctx.key = key("mldsa65_ssl_key.pem") - ctx.ca_file = File.join(__dir__, "mldsa65_ca_cert.pem") - ctx.groups = "X25519MLKEM768" - end - - ctx.verify_mode = config[:verify_mode] if config[:verify_mode] - @ssl_server = OpenSSL::SSL::SSLServer.new(server, ctx) - @ssl_server_thread = Thread.new do - loop do - ssl_client = @ssl_server.accept - Thread.new(ssl_client) do |client| - handle_request(client) - ensure - client.close - end - rescue OpenSSL::SSL::SSLError - # Ignore SSL errors because we're testing them implicitly - end - end - @ssl_server - end - - def handle_request(client) - request = client.gets - if request.start_with?("GET /yaml") - client.print "HTTP/1.1 200 OK\r\nContent-Type: text/yaml\r\n\r\n--- true\n" - elsif request.start_with?("GET /insecure_redirect") - location = request.match(/to=([^ ]+)/)[1] - client.print "HTTP/1.1 301 Moved Permanently\r\nLocation: #{location}\r\n\r\n" - else - client.print "HTTP/1.1 404 Not Found\r\n\r\n" - end - end - - def cert(filename) - OpenSSL::X509::Certificate.new(File.read(File.join(__dir__, filename))) - end - - def key(filename) - OpenSSL::PKey.read(File.read(File.join(__dir__, filename))) - end - def omit_unless_support_pqc - # PQC algorithms ML-KEM and ML-DSA require OpenSSL >= 3.5. - # https://openssl-library.org/post/2025-04-08-openssl-35-final-release/ - omit "PQC algorithms require OpenSSL >= 3.5" unless - OpenSSL::OPENSSL_VERSION_NUMBER >= 0x30500000 - # ctx.groups (OpenSSL::SSL::SSLContext#groups) used in start_ssl_server - # mode :pqc requires Ruby OpenSSL >= 4.0. - omit "PQC test requires Ruby OpenSSL >= 4.0" unless - Gem::Version.new(OpenSSL::VERSION) >= Gem::Version.new("4.0") - # Even with a new enough OpenSSL, the runtime may keep PQC groups and - # signature algorithms out of its default negotiation lists (for example - # RHEL's system-wide crypto policies). The PQC server forces both, while - # the gem fetcher connects with the default client configuration, so a - # real loopback handshake is the only reliable way to tell whether this - # environment can negotiate PQC at all. - omit "PQC handshake is not available in this OpenSSL configuration" unless - self.class.support_pqc_handshake? - end - - # Probe an actual PQC handshake between a forced-PQC server and a - # default-configured client, mirroring what the integration tests exercise. - # Memoized so the probe runs at most once per process. - def self.support_pqc_handshake? - return @support_pqc_handshake unless @support_pqc_handshake.nil? - - @support_pqc_handshake = probe_pqc_handshake - end - - def self.probe_pqc_handshake - server = TCPServer.new("127.0.0.1", 0) - ctx = OpenSSL::SSL::SSLContext.new - ctx.cert = OpenSSL::X509::Certificate.new(File.read(File.join(__dir__, "mldsa65_ssl_cert.pem"))) - ctx.key = OpenSSL::PKey.read(File.read(File.join(__dir__, "mldsa65_ssl_key.pem"))) - ctx.groups = "X25519MLKEM768" - ssl_server = OpenSSL::SSL::SSLServer.new(server, ctx) - - port = server.addr[1] - server_thread = Thread.new do - client = ssl_server.accept - client.close - rescue OpenSSL::OpenSSLError - nil + without_pqc_support do |message| + omit message end - - client_ctx = OpenSSL::SSL::SSLContext.new - client_ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE - socket = TCPSocket.new("127.0.0.1", port) - ssl = OpenSSL::SSL::SSLSocket.new(socket, client_ctx) - ssl.connect - ssl.close - true - rescue OpenSSL::OpenSSLError, SystemCallError - false - ensure - server_thread&.join(5) - server_thread&.kill if server_thread&.alive? - ssl_server&.close - server&.close end end if Gem::HAVE_OPENSSL From c1134f871f50e95c84f932015dec92c2d2385551 Mon Sep 17 00:00:00 2001 From: Jun Aruga Date: Mon, 29 Jun 2026 14:43:39 +0100 Subject: [PATCH 20/31] [ruby/rubygems] Fix Artifice.deactivate to properly restore Gem::Net::HTTP The hardcoded Gem::Net::HTTP in Artifice.deactivate, is actually the replaced Artifice::Net::HTTP. This doesn't restore the original Gem::Net::HTTP. Restore the saved original Gem::Net::HTTP in Artifice.deactivate This issue caused the tests sometimes failed in spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb using Artifice::Net::HTTP unintentionally. Because localhost is not included in spec/support/artifice/helpers/endpoint.rb - permitted_hosts caused HTTP status 403 (response.body: "Host not permitted"). But Gem::Net::HTTP should be used in the tests instead of Artifice::Net::HTTP. Possibly this issue happens when Artifice.deactivate is called in other tests such as spec/commands/ssl_spec.rb. That's why this issue sometimes happened rather than always. ``` 1) Bundler::Fetcher local SSL server #connection PQC connects Failure/Error: expect(response.code).to eq("200") expected: # "200" got: # "403" (compared using ==) # ./spec/bundler/fetcher/gem_remote_fetcher_local_ssl_server_spec.rb:49:in 'block (4 levels) in ' # ./spec/spec_helper.rb:164:in 'block (4 levels) in ' # ./spec/spec_helper.rb:164:in 'block (3 levels) in ' # ./spec/support/helpers.rb:414:in 'block in Spec::Helpers#with_gem_path_as' # ./spec/support/helpers.rb:428:in 'Spec::Helpers#without_env_side_effects' # ./spec/support/helpers.rb:409:in 'Spec::Helpers#with_gem_path_as' # ./spec/spec_helper.rb:163:in 'block (2 levels) in ' # ./lib/rubygems.rb:306:in 'Kernel#load' # ./lib/rubygems.rb:306:in 'Gem.activate_and_load_bin_path' ``` spec/commands/ssl_spec.rb ``` ... after(:each) do ... Artifice.deactivate ... end ... ``` Assisted-by: Claude:claude-opus-4-6[1m] https://github.com/ruby/rubygems/commit/0ae3860638 --- spec/bundler/support/artifice/helpers/artifice.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/spec/bundler/support/artifice/helpers/artifice.rb b/spec/bundler/support/artifice/helpers/artifice.rb index 788268295c7d94..61bac6231be1e7 100644 --- a/spec/bundler/support/artifice/helpers/artifice.rb +++ b/spec/bundler/support/artifice/helpers/artifice.rb @@ -12,13 +12,18 @@ module Artifice def self.activate_with(endpoint) require_relative "rack_request" + # Preserve the original on first activation only. Without ||=, a second + # activate_with call saves the already-replaced Artifice::Net::HTTP, so + # deactivate would fail to restore the real Gem::Net::HTTP. + @original_net_http ||= ::Gem::Net::HTTP Net::HTTP.endpoint = endpoint replace_net_http(Artifice::Net::HTTP) end # Deactivate the Artifice replacement. def self.deactivate - replace_net_http(::Gem::Net::HTTP) + replace_net_http(@original_net_http) if @original_net_http + @original_net_http = nil end def self.replace_net_http(value) From b44dae0627b4e2e21f1306d914fc5e68d1d4d7c8 Mon Sep 17 00:00:00 2001 From: Jun Aruga Date: Mon, 29 Jun 2026 18:40:11 +0100 Subject: [PATCH 21/31] [ruby/rubygems] Fix parallel_installer_spec by activating Artifice explicitly. This commit fixes the following failures. The `require "support/artifice/compact_index"` is not enough because the file is not loaded when it is called second time. ``` 1) Bundler::ParallelInstaller connect to make jobserver takes all available slots Failure/Error: raise HTTPError, e.message Bundler::HTTPError: Could not reach host gem.repo2. Check your network connection and try again. # ./spec/bundler/installer/parallel_installer_spec.rb:151:in 'block (4 levels) in ' # ./spec/bundler/installer/parallel_installer_spec.rb:221:in 'RSpec::ExampleGroups::BundlerParallelInstaller::ConnectToMakeJobserver#redefine_build_jobs' # ./spec/bundler/installer/parallel_installer_spec.rb:150:in 'block (3 levels) in ' # ./spec/spec_helper.rb:164:in 'block (4 levels) in ' # ./spec/spec_helper.rb:164:in 'block (3 levels) in ' # ./spec/support/helpers.rb:414:in 'block in Spec::Helpers#with_gem_path_as' # ./spec/support/helpers.rb:428:in 'Spec::Helpers#without_env_side_effects' # ./spec/support/helpers.rb:409:in 'Spec::Helpers#with_gem_path_as' # ./spec/spec_helper.rb:163:in 'block (2 levels) in ' # ./lib/rubygems.rb:306:in 'Kernel#load' # ./lib/rubygems.rb:306:in 'Gem.activate_and_load_bin_path' # ------------------ # --- Caused by: --- # Bundler::HTTPError: # Could not reach host gem.repo2. Check your network connection and try again. # ./spec/bundler/installer/parallel_installer_spec.rb:151:in 'block (4 levels) in ' ... ``` Assisted-by: Claude:claude-opus-4-6[1m] https://github.com/ruby/rubygems/commit/62d4fb06fc --- spec/bundler/bundler/installer/parallel_installer_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/bundler/bundler/installer/parallel_installer_spec.rb b/spec/bundler/bundler/installer/parallel_installer_spec.rb index 528dc1ae93e525..1f057cc4061145 100644 --- a/spec/bundler/bundler/installer/parallel_installer_spec.rb +++ b/spec/bundler/bundler/installer/parallel_installer_spec.rb @@ -9,6 +9,7 @@ describe "priority queue" do before do require "support/artifice/compact_index" + Artifice.activate_with(CompactIndexAPI) @previous_client = Gem::Request::ConnectionPools.client Gem::Request::ConnectionPools.client = Gem::Net::HTTP @@ -98,6 +99,7 @@ end require "support/artifice/compact_index" + Artifice.activate_with(CompactIndexAPI) @previous_client = Gem::Request::ConnectionPools.client Gem::Request::ConnectionPools.client = Gem::Net::HTTP From e021de15afd6ebf49e69424ce9169fdacf9a90b0 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Wed, 1 Jul 2026 19:19:56 +0900 Subject: [PATCH 22/31] Trivial refactor around rb_ary_subseq_step Check for `step` before the length, although it looks impossible to create 0-step `ArithmeticSequence` currently. --- array.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/array.c b/array.c index 600543d7f0bf8e..6514015de21628 100644 --- a/array.c +++ b/array.c @@ -1740,32 +1740,29 @@ rb_ary_entry(VALUE ary, long offset) return rb_ary_entry_internal(ary, offset); } -static VALUE -rb_ary_subseq_step(VALUE ary, long beg, long len, long step) +static long +ary_subseq_len(VALUE ary, long beg, long len) { - VALUE klass; long alen = RARRAY_LEN(ary); - if (beg > alen) return Qnil; - if (beg < 0 || len < 0) return Qnil; + if (beg > alen) return -1; + if (beg < 0 || len < 0) return -1; if (alen < len || alen < beg + len) { len = alen - beg; } - klass = rb_cArray; - if (len == 0) return ary_new(klass, 0); - if (step == 0) - rb_raise(rb_eArgError, "slice step cannot be zero"); - if (step == 1) - return ary_make_partial(ary, klass, beg, len); - else - return ary_make_partial_step(ary, klass, beg, len, step); + ASSUME(len >= 0); + return len; } VALUE rb_ary_subseq(VALUE ary, long beg, long len) { - return rb_ary_subseq_step(ary, beg, len, 1); + const VALUE klass = rb_cArray; + len = ary_subseq_len(ary, beg, len); + if (len < 0) return Qnil; + if (len == 0) return ary_new(klass, 0); + return ary_make_partial(ary, klass, beg, len); } static VALUE rb_ary_aref2(VALUE ary, VALUE b, VALUE e); @@ -1913,6 +1910,7 @@ VALUE rb_ary_aref1(VALUE ary, VALUE arg) { long beg, len, step; + const VALUE klass = rb_cArray; /* special case - speeding up */ if (FIXNUM_P(arg)) { @@ -1925,7 +1923,11 @@ rb_ary_aref1(VALUE ary, VALUE arg) case Qnil: return Qnil; default: - return rb_ary_subseq_step(ary, beg, len, step); + if (step == 0) rb_raise(rb_eArgError, "slice step cannot be zero"); + len = ary_subseq_len(ary, beg, len); + if (len == 0) return ary_new(klass, 0); + if (step == 1) return ary_make_partial(ary, klass, beg, len); + return ary_make_partial_step(ary, klass, beg, len, step); } return rb_ary_entry(ary, NUM2LONG(arg)); From 8e801930eb02f0378eddf0a51cd060091f9b0cd1 Mon Sep 17 00:00:00 2001 From: Burdette Lamar Date: Wed, 1 Jul 2026 23:18:18 -0500 Subject: [PATCH 23/31] [DOC] Doc for Pathname#opendir --- pathname_builtin.rb | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/pathname_builtin.rb b/pathname_builtin.rb index 7a1c8a06a8c4a9..beaff62de2acfa 100644 --- a/pathname_builtin.rb +++ b/pathname_builtin.rb @@ -2077,7 +2077,32 @@ def mkdir(...) Dir.mkdir(@path, ...) end # See Dir.rmdir. Remove the referenced directory. def rmdir() Dir.rmdir(@path) end - # See Dir.open. + # :markup: markdown + # + # call-seq: + # opendir {|dir| ... } -> object + # opendir -> dir + # + # Creates a Dir object `dir` for the directory at the path represented by `self`; + # opens `dir`. + # + # With a block given, calls the block with `dir`; + # on block exit, closes `dir` and returns the block's return value: + # + # ```ruby + # pn = Pathname('.') + # pn.opendir {|dir| dir.entries.take(3) } + # # => ["README.md", "html", ".git"] + # ``` + # + # With no block given, returns the open directory `dir`: + # + # ```ruby + # dir = pn.opendir # => # + # dir.entries.take(3) # => ["README.md", "html", ".git"] + # dir.close + # ``` + # def opendir(&block) # :yield: dir Dir.open(@path, &block) end From cfb1a231881ef8887df7568c6a96171b34047161 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Tue, 30 Jun 2026 10:44:12 +0900 Subject: [PATCH 24/31] Make rb_shape_get_root_shape static --- shape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shape.c b/shape.c index 688635971fdc73..e6f4726a1daf73 100644 --- a/shape.c +++ b/shape.c @@ -317,7 +317,7 @@ rb_shape_tree_t rb_shape_tree = { 0 }; // Should be on its own cache line static RUBY_ALIGNAS(128) rb_atomic_t shape_next_id; -rb_shape_t * +static rb_shape_t * rb_shape_get_root_shape(void) { return rb_shape_tree.shape_list; From 439c06b4792e62e0549f15f15de47cd8e0b15c5b Mon Sep 17 00:00:00 2001 From: Sampo Kuokkanen Date: Thu, 2 Jul 2026 13:26:34 +0900 Subject: [PATCH 25/31] Annotate Symbol#empty? as leaf (#17464) Move `Symbol#empty?` from the C `sym_empty` into `symbol.rb`, implemented as `RBOOL(RSTRING_LEN(rb_sym2str(self)) == 0)`, mirroring the existing Symbol#to_s and #name leaf builtins. Also added an `annotate_builtin! ` in `zjit/src/cruby_methods.rs` to tell ZJIT that the method does not need GC now. `Symbol#empty?` as a leaf builtin, via `benchmark-driver` (`GC.disable`, `loop_count: 20_000_000`, Apple M1). Speedup vs master (CFUNC `sym_empty`): | benchmark | interpreter | `--yjit` | `--zjit` | |---|---:|---:|---:| | `:"".empty?` | **1.24x** | **1.30x** | **1.19x** | | `:abcdefgh.empty?` | **1.19x** | **1.19x** | **1.17x** | Windows native and WSL2 also get faster: | benchmark | interpreter | |---|---:| | `:"".empty?` | **1.47x** | | `:abcdefgh.empty?` | **1.36x** | ### WSL2 (Ubuntu) `x86_64-linux` | benchmark | interpreter | `--yjit` | `--zjit` | |---|---:|---:|---:| | `:"".empty?` | **1.39x** | **1.33x** | **1.32x** | | `:abcdefgh.empty?` | **1.37x** | **1.39x** | **1.33x** | --- benchmark/symbol_empty.yml | 8 ++++++++ string.c | 15 --------------- symbol.rb | 9 +++++++++ zjit/src/cruby_methods.rs | 1 + 4 files changed, 18 insertions(+), 15 deletions(-) create mode 100644 benchmark/symbol_empty.yml diff --git a/benchmark/symbol_empty.yml b/benchmark/symbol_empty.yml new file mode 100644 index 00000000000000..0bcabf01177d75 --- /dev/null +++ b/benchmark/symbol_empty.yml @@ -0,0 +1,8 @@ +prelude: | + sym0 = :"" + sym8 = :abcdefgh + GC.disable +benchmark: + symbol_empty-0: sym0.empty? + symbol_empty-8: sym8.empty? +loop_count: 20000000 diff --git a/string.c b/string.c index 074a4a514bbfa3..a499c60a817a28 100644 --- a/string.c +++ b/string.c @@ -12622,20 +12622,6 @@ sym_length(VALUE sym) return rb_str_length(rb_sym2str(sym)); } -/* - * call-seq: - * empty? -> true or false - * - * Returns +true+ if +self+ is :'', +false+ otherwise. - * - */ - -static VALUE -sym_empty(VALUE sym) -{ - return rb_str_empty(rb_sym2str(sym)); -} - /* * call-seq: * upcase(mapping) -> symbol @@ -13072,7 +13058,6 @@ Init_String(void) rb_define_method(rb_cSymbol, "slice", sym_aref, -1); rb_define_method(rb_cSymbol, "length", sym_length, 0); rb_define_method(rb_cSymbol, "size", sym_length, 0); - rb_define_method(rb_cSymbol, "empty?", sym_empty, 0); rb_define_method(rb_cSymbol, "match", sym_match_m, -1); rb_define_method(rb_cSymbol, "match?", sym_match_m_p, -1); diff --git a/symbol.rb b/symbol.rb index 458d02b177e8f3..089e91f63bf9ed 100644 --- a/symbol.rb +++ b/symbol.rb @@ -28,6 +28,15 @@ def name Primitive.cexpr! 'rb_sym2str(self)' end + # call-seq: + # empty? -> true or false + # + # Returns +true+ if +self+ is :'', +false+ otherwise. + def empty? + Primitive.attr! :leaf + Primitive.cexpr! 'RBOOL(RSTRING_LEN(rb_sym2str(self)) == 0)' + end + # call-seq: # to_sym -> self # diff --git a/zjit/src/cruby_methods.rs b/zjit/src/cruby_methods.rs index 616808f84c88ab..fec3b3db5ec284 100644 --- a/zjit/src/cruby_methods.rs +++ b/zjit/src/cruby_methods.rs @@ -290,6 +290,7 @@ pub fn init() -> Annotations { annotate_builtin!(rb_cSymbol, "to_s", types::StringExact); annotate_builtin!(rb_cString, "ascii_only?", inline_string_ascii_only_p, types::BoolExact, no_gc, leaf); annotate_builtin!(rb_cString, "valid_encoding?", inline_string_valid_encoding_p, types::BoolExact, no_gc, leaf); + annotate_builtin!(rb_cSymbol, "empty?", types::BoolExact); // Array iteration builtins (used in with_jit Array#each, map, select, find) builtin_funcs.insert(rb_jit_fixnum_inc as *mut c_void, FnProperties { inline: inline_fixnum_inc, return_type: types::Fixnum, ..Default::default() }); From 89a5a0a1de8639576dcf2fbc66607cef35851b6a Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Thu, 2 Jul 2026 13:45:02 +0900 Subject: [PATCH 26/31] Update depend for psych https://github.com/ruby/psych/pull/805 added files psych_emitter_fy.c and psych_parser_fy.c so we need to update the depend file too. --- ext/psych/depend | 356 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) diff --git a/ext/psych/depend b/ext/psych/depend index 95175841a2e541..a361413eccc3c8 100644 --- a/ext/psych/depend +++ b/ext/psych/depend @@ -369,6 +369,184 @@ psych_emitter.o: psych_emitter.h psych_emitter.o: psych_parser.h psych_emitter.o: psych_to_ruby.h psych_emitter.o: psych_yaml_tree.h +psych_emitter_fy.o: $(RUBY_EXTCONF_H) +psych_emitter_fy.o: $(arch_hdrdir)/ruby/config.h +psych_emitter_fy.o: $(hdrdir)/ruby.h +psych_emitter_fy.o: $(hdrdir)/ruby/assert.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/assume.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/attributes.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/bool.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/inttypes.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/limits.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/long_long.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/stdalign.h +psych_emitter_fy.o: $(hdrdir)/ruby/backward/2/stdarg.h +psych_emitter_fy.o: $(hdrdir)/ruby/defines.h +psych_emitter_fy.o: $(hdrdir)/ruby/encoding.h +psych_emitter_fy.o: $(hdrdir)/ruby/intern.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/abi.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/anyargs.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/char.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/double.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/int.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/long.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/short.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/assume.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/artificial.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/cold.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/const.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/constexpr.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/deprecated.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/error.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/forceinline.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/format.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/noalias.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/noexcept.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/noinline.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/nonnull.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/noreturn.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/pure.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/restrict.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/warning.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/attr/weakref.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/cast.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/compiler_since.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/config.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/constant_p.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rarray.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rbasic.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rbignum.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rclass.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rdata.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rfile.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rhash.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/robject.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rregexp.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rstring.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rstruct.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/ctype.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/dllexport.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/dosish.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/coderange.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/ctype.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/encoding.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/pathname.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/re.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/string.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/symbol.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/encoding/transcode.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/error.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/eval.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/event.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/fl_type.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/gc.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/glob.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/globals.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/attribute.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/builtin.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/c_attribute.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/extension.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/feature.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/has/warning.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/array.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/bignum.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/class.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/compar.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/complex.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/cont.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/dir.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/enum.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/enumerator.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/error.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/eval.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/file.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/hash.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/io.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/load.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/marshal.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/numeric.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/object.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/parse.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/proc.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/process.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/random.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/range.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/rational.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/re.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/ruby.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/select.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/set.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/signal.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/sprintf.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/string.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/struct.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/thread.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/time.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/variable.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/intern/vm.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/interpreter.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/iterator.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/memory.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/method.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/module.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/newobj.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/scan_args.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/special_consts.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/static_assert.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/stdalign.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/stdbool.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/stdckdint.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/symbol.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/value.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/value_type.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/variable.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/warning_push.h +psych_emitter_fy.o: $(hdrdir)/ruby/internal/xmalloc.h +psych_emitter_fy.o: $(hdrdir)/ruby/missing.h +psych_emitter_fy.o: $(hdrdir)/ruby/onigmo.h +psych_emitter_fy.o: $(hdrdir)/ruby/oniguruma.h +psych_emitter_fy.o: $(hdrdir)/ruby/ruby.h +psych_emitter_fy.o: $(hdrdir)/ruby/st.h +psych_emitter_fy.o: $(hdrdir)/ruby/subst.h +psych_emitter_fy.o: psych.h +psych_emitter_fy.o: psych_emitter.h +psych_emitter_fy.o: psych_emitter_fy.c +psych_emitter_fy.o: psych_parser.h +psych_emitter_fy.o: psych_to_ruby.h +psych_emitter_fy.o: psych_yaml_tree.h psych_parser.o: $(RUBY_EXTCONF_H) psych_parser.o: $(arch_hdrdir)/ruby/config.h psych_parser.o: $(hdrdir)/ruby.h @@ -547,6 +725,184 @@ psych_parser.o: psych_parser.c psych_parser.o: psych_parser.h psych_parser.o: psych_to_ruby.h psych_parser.o: psych_yaml_tree.h +psych_parser_fy.o: $(RUBY_EXTCONF_H) +psych_parser_fy.o: $(arch_hdrdir)/ruby/config.h +psych_parser_fy.o: $(hdrdir)/ruby.h +psych_parser_fy.o: $(hdrdir)/ruby/assert.h +psych_parser_fy.o: $(hdrdir)/ruby/backward.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/assume.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/attributes.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/bool.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/inttypes.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/limits.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/long_long.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/stdalign.h +psych_parser_fy.o: $(hdrdir)/ruby/backward/2/stdarg.h +psych_parser_fy.o: $(hdrdir)/ruby/defines.h +psych_parser_fy.o: $(hdrdir)/ruby/encoding.h +psych_parser_fy.o: $(hdrdir)/ruby/intern.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/abi.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/anyargs.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/char.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/double.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/fixnum.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/gid_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/int.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/intptr_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/long.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/long_long.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/mode_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/off_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/pid_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/short.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/size_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/st_data_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/arithmetic/uid_t.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/assume.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/alloc_size.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/artificial.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/cold.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/const.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/constexpr.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/deprecated.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/diagnose_if.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/enum_extensibility.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/error.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/flag_enum.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/forceinline.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/format.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/maybe_unused.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/noalias.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/nodiscard.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/noexcept.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/noinline.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/nonnull.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/noreturn.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/packed_struct.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/pure.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/restrict.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/returns_nonnull.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/warning.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/attr/weakref.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/cast.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/apple.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/clang.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/gcc.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/intel.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/msvc.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_is/sunpro.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/compiler_since.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/config.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/constant_p.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rarray.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rbasic.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rbignum.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rclass.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rdata.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rfile.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rhash.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/robject.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rregexp.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rstring.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rstruct.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/core/rtypeddata.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/ctype.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/dllexport.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/dosish.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/coderange.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/ctype.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/encoding.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/pathname.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/re.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/sprintf.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/string.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/symbol.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/encoding/transcode.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/error.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/eval.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/event.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/fl_type.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/gc.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/glob.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/globals.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/attribute.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/builtin.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/c_attribute.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/cpp_attribute.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/declspec_attribute.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/extension.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/feature.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/has/warning.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/array.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/bignum.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/class.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/compar.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/complex.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/cont.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/dir.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/enum.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/enumerator.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/error.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/eval.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/file.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/hash.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/io.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/load.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/marshal.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/numeric.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/object.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/parse.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/proc.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/process.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/random.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/range.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/rational.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/re.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/ruby.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/select.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/select/largesize.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/set.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/signal.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/sprintf.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/string.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/struct.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/thread.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/time.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/variable.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/intern/vm.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/interpreter.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/iterator.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/memory.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/method.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/module.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/newobj.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/scan_args.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/special_consts.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/static_assert.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/stdalign.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/stdbool.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/stdckdint.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/symbol.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/value.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/value_type.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/variable.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/warning_push.h +psych_parser_fy.o: $(hdrdir)/ruby/internal/xmalloc.h +psych_parser_fy.o: $(hdrdir)/ruby/missing.h +psych_parser_fy.o: $(hdrdir)/ruby/onigmo.h +psych_parser_fy.o: $(hdrdir)/ruby/oniguruma.h +psych_parser_fy.o: $(hdrdir)/ruby/ruby.h +psych_parser_fy.o: $(hdrdir)/ruby/st.h +psych_parser_fy.o: $(hdrdir)/ruby/subst.h +psych_parser_fy.o: psych.h +psych_parser_fy.o: psych_emitter.h +psych_parser_fy.o: psych_parser.h +psych_parser_fy.o: psych_parser_fy.c +psych_parser_fy.o: psych_to_ruby.h +psych_parser_fy.o: psych_yaml_tree.h psych_to_ruby.o: $(RUBY_EXTCONF_H) psych_to_ruby.o: $(arch_hdrdir)/ruby/config.h psych_to_ruby.o: $(hdrdir)/ruby.h From 99ae28f4c06979c327df2096ec11d7a1528f74ac Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Fri, 26 Jun 2026 09:30:58 -0400 Subject: [PATCH 27/31] ZJIT: Merge C function specialization into type_specialize No need for a separate pass. We can handle it with the other specialization. --- zjit/src/hir.rs | 474 ++++++++++++++++++-------------------- zjit/src/hir/opt_tests.rs | 194 ++++++++-------- 2 files changed, 315 insertions(+), 353 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 2ecab51e76c585..d35b00bad53f58 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3774,7 +3774,7 @@ impl Function { self.try_rewrite_freeze(block, insn_id, recv, state), Insn::Send { recv, block: None, args, state, cd, .. } if ruby_call_method_id(cd) == ID!(minusat) && args.is_empty() => self.try_rewrite_uminus(block, insn_id, recv, state), - Insn::Send { mut recv, cd, state, block: send_block, args, .. } => { + ref send@Insn::Send { mut recv, cd, state, block: send_block, ref args, .. } => { let has_block = send_block.is_some(); let (klass, profiled_type) = match self.resolve_receiver_type(recv, self.type_of(recv), state) { ReceiverTypeResolution::StaticallyKnown { class } => (class, None), @@ -3837,8 +3837,8 @@ impl Function { } // If the call site info indicates that the `Function` has overly complex arguments, then do not optimize into a `SendDirect`. - // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) handle their own argument constraints (e.g., kw_splat for Proc call). - if def_type != VM_METHOD_TYPE_OPTIMIZED && unspecializable_call_type(flags) { + // Optimized methods(`VM_METHOD_TYPE_OPTIMIZED`) and C methods handle their own argument constraints (e.g., kw_splat for Proc call). + if def_type != VM_METHOD_TYPE_OPTIMIZED && def_type != VM_METHOD_TYPE_CFUNC && unspecializable_call_type(flags) { self.count_complex_call_features(block, flags); self.set_dynamic_send_reason(insn_id, ComplexArgPass); self.push_insn_id(block, insn_id); continue; @@ -4064,6 +4064,221 @@ impl Function { self.push_insn_id(block, insn_id); continue; }, }; + } else if def_type == VM_METHOD_TYPE_CFUNC && !unsafe { rb_zjit_method_tracing_currently_enabled() } { + // Try to reduce a Send insn to a CCallWithFrame + fn reduce_send_to_ccall( + fun: &mut Function, + block: BlockId, + send: Insn, + send_insn_id: InsnId, + recv_class: VALUE, + profiled_type: Option, + cme: *const rb_callable_method_entry_struct, + ) -> Result<(), ()> { + let Insn::Send { mut recv, cd, block: send_block, args, state, .. } = send else { + return Err(()); + }; + + let call_info = unsafe { (*cd).ci }; + let argc = unsafe { vm_ci_argc(call_info) }; + let method_id = unsafe { rb_vm_ci_mid(call_info) }; + + let ci_flags = unsafe { vm_ci_flag(call_info) }; + // When seeing &block argument, fall back to dynamic dispatch for now + // TODO: Support block forwarding + if unspecializable_c_call_type(ci_flags) { + // Only count features NOT already counted in type_specialize. + if !unspecializable_call_type(ci_flags) { + fun.count_complex_call_features(block, ci_flags); + } + fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); + return Err(()); + } + + let blockiseq = match send_block { + Some(BlockHandler::BlockArg) => unreachable!("unsupported &block should have been filtered out"), + Some(BlockHandler::BlockIseq(blockiseq)) => Some(blockiseq), + None => None, + }; + + let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; + // Find the `argc` (arity) of the C method, which describes the parameters it expects + let cfunc_argc = unsafe { get_mct_argc(cfunc) }; + let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); + let name = unsafe { (*cme).called_id }; + + // Look up annotations + let props = ZJITState::get_method_annotations().get_cfunc_properties(cme); + if props.is_none() && get_option!(stats) { + fun.count_not_annotated_cfunc(block, cme); + } + let props = props.unwrap_or_default(); + let return_type = props.return_type; + let elidable = match blockiseq { + Some(_) => false, // Don't consider cfuncs with block arguments as elidable for now + None => props.elidable, + }; + + match cfunc_argc { + 0.. => { + // (self, arg0, arg1, ..., argc) form + // + // Bail on argc mismatch + if argc != cfunc_argc as u32 { + fun.set_dynamic_send_reason(send_insn_id, ArgcParamMismatch); + return Err(()); + } + + // TODO: Support passing arguments on the stack in C calls + // +1 for self + if (argc as usize)+1 > C_ARG_OPNDS.len() { + fun.set_dynamic_send_reason(send_insn_id, TooManyArgsForLir); + return Err(()); + } + + // Check singleton class assumption first, before emitting other patchpoints + if !fun.assume_no_singleton_classes(block, recv_class, state) { + fun.set_dynamic_send_reason(send_insn_id, SingletonClassSeen); + return Err(()); + } + + // Commit to the replacement. Put PatchPoint. + fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state); + + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + fun.insn_types[recv.0] = fun.infer_type(recv); + } + + // Try inlining the cfunc into HIR. Only inline if we don't have a block argument + if blockiseq.is_none() { + let tmp_block = fun.new_block(u32::MAX); + if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { + // Copy contents of tmp_block to block + assert_ne!(block, tmp_block); + let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); + fun.blocks[block.0].insns.extend(insns); + fun.count(block, Counter::inline_cfunc_optimized_send_count); + fun.make_equal_to(send_insn_id, replacement); + if fun.type_of(replacement).bit_equal(types::Any) { + // Not set yet; infer type + fun.insn_types[replacement.0] = fun.infer_type(replacement); + } + fun.remove_block(tmp_block); + return Ok(()); + } + + // Only allow leaf calls if we don't have a block argument + if props.leaf && props.no_gc { + fun.count(block, Counter::inline_cfunc_optimized_send_count); + let owner = unsafe { (*cme).owner }; + let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); + fun.make_equal_to(send_insn_id, ccall); + return Ok(()); + } + } + + // Emit a call + if get_option!(stats) { + fun.count_not_inlined_cfunc(block, cme); + } + let ccall = fun.push_insn(block, Insn::CCallWithFrame(Box::new(CCallWithFrameData { + cd, + cfunc: cfunc_ptr, + recv, + args, + cme, + name, + state, + return_type, + elidable, + block: blockiseq.map(BlockHandler::BlockIseq), + }))); + fun.make_equal_to(send_insn_id, ccall); + Ok(()) + } + // Variadic method + -1 => { + // The method gets a pointer to the first argument + // func(int argc, VALUE *argv, VALUE recv) + + // Check singleton class assumption first, before emitting other patchpoints + if !fun.assume_no_singleton_classes(block, recv_class, state) { + fun.set_dynamic_send_reason(send_insn_id, SingletonClassSeen); + return Err(()); + } + + fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state); + + if let Some(profiled_type) = profiled_type { + // Guard receiver class + recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + fun.insn_types[recv.0] = fun.infer_type(recv); + } + + // Try inlining the cfunc into HIR. Only inline if we don't have a block argument + if blockiseq.is_none() { + let tmp_block = fun.new_block(u32::MAX); + if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { + // Copy contents of tmp_block to block + assert_ne!(block, tmp_block); + let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); + fun.blocks[block.0].insns.extend(insns); + fun.count(block, Counter::inline_cfunc_optimized_send_count); + fun.make_equal_to(send_insn_id, replacement); + if fun.type_of(replacement).bit_equal(types::Any) { + // Not set yet; infer type + fun.insn_types[replacement.0] = fun.infer_type(replacement); + } + fun.remove_block(tmp_block); + return Ok(()); + } + + // Only allow leaf calls if we don't have a block argument + if props.leaf && props.no_gc { + fun.count(block, Counter::inline_cfunc_optimized_send_count); + let owner = unsafe { (*cme).owner }; + let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); + fun.make_equal_to(send_insn_id, ccall); + return Ok(()); + } + } + + // No inlining; emit a call + if get_option!(stats) { + fun.count_not_inlined_cfunc(block, cme); + } + + let ccall = fun.push_insn(block, Insn::CCallVariadic(Box::new(CCallVariadicData { + cfunc: cfunc_ptr, + recv, + args, + cme, + name: method_id, + state, + return_type, + elidable, + block: blockiseq.map(BlockHandler::BlockIseq), + }))); + + fun.make_equal_to(send_insn_id, ccall); + Ok(()) + } + -2 => { + // (self, args_ruby_array) + fun.set_dynamic_send_reason(send_insn_id, SendCfuncArrayVariadic); + Err(()) + } + _ => unreachable!("unknown cfunc kind: argc={argc}") + } + } + + if reduce_send_to_ccall(self, block, send.clone(), insn_id, klass, profiled_type, cme).is_ok() { + continue; + } + + self.push_insn_id(block, insn_id); } else { let reason = if has_block { SendNotOptimizedMethodType(MethodType::from(def_type)) } else { SendWithoutBlockNotOptimizedMethodType(MethodType::from(def_type)) }; self.set_dynamic_send_reason(insn_id, reason); @@ -5014,265 +5229,12 @@ impl Function { /// Optimize Send that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { - if unsafe { rb_zjit_method_tracing_currently_enabled() } { - return; - } - - // Try to reduce a Send insn to a CCallWithFrame - fn reduce_send_to_ccall( - fun: &mut Function, - block: BlockId, - self_type: Type, - send: Insn, - send_insn_id: InsnId, - ) -> Result<(), ()> { - let Insn::Send { mut recv, cd, block: send_block, args, state, .. } = send else { - return Err(()); - }; - - let call_info = unsafe { (*cd).ci }; - let argc = unsafe { vm_ci_argc(call_info) }; - let method_id = unsafe { rb_vm_ci_mid(call_info) }; - - // If we have info about the class of the receiver - let (recv_class, profiled_type) = match fun.resolve_receiver_type(recv, self_type, state) { - ReceiverTypeResolution::StaticallyKnown { class } => (class, None), - ReceiverTypeResolution::Monomorphic { profiled_type } - | ReceiverTypeResolution::SkewedPolymorphic { profiled_type} => (profiled_type.class(), Some(profiled_type)), - ReceiverTypeResolution::SkewedMegamorphic { .. } | ReceiverTypeResolution::Polymorphic | ReceiverTypeResolution::Megamorphic | ReceiverTypeResolution::NoProfile => return Err(()), - }; - - // Do method lookup - let mut cme: *const rb_callable_method_entry_struct = unsafe { rb_callable_method_entry(recv_class, method_id) }; - if cme.is_null() { - fun.set_dynamic_send_reason(send_insn_id, SendNotOptimizedMethodType(MethodType::Null)); - return Err(()); - } - - // Filter for C methods - let mut def_type = unsafe { get_cme_def_type(cme) }; - while def_type == VM_METHOD_TYPE_ALIAS { - cme = unsafe { rb_aliased_callable_method_entry(cme) }; - def_type = unsafe { get_cme_def_type(cme) }; - } - if def_type != VM_METHOD_TYPE_CFUNC { - return Err(()); - } - - - let ci_flags = unsafe { vm_ci_flag(call_info) }; - let visibility = unsafe { METHOD_ENTRY_VISI(cme) }; - match (visibility, ci_flags & VM_CALL_FCALL != 0) { - (METHOD_VISI_PUBLIC, _) => {} - (METHOD_VISI_PRIVATE, true) => {} - (METHOD_VISI_PROTECTED, true) => {} - _ => { - fun.set_dynamic_send_reason(send_insn_id, SendNotOptimizedNeedPermission); - return Err(()); - } - } - - // When seeing &block argument, fall back to dynamic dispatch for now - // TODO: Support block forwarding - if unspecializable_c_call_type(ci_flags) { - // Only count features NOT already counted in type_specialize. - if !unspecializable_call_type(ci_flags) { - fun.count_complex_call_features(block, ci_flags); - } - fun.set_dynamic_send_reason(send_insn_id, ComplexArgPass); - return Err(()); - } - - let blockiseq = match send_block { - Some(BlockHandler::BlockArg) => unreachable!("unsupported &block should have been filtered out"), - Some(BlockHandler::BlockIseq(blockiseq)) => Some(blockiseq), - None => None, - }; - - let cfunc = unsafe { get_cme_def_body_cfunc(cme) }; - // Find the `argc` (arity) of the C method, which describes the parameters it expects - let cfunc_argc = unsafe { get_mct_argc(cfunc) }; - let cfunc_ptr = unsafe { get_mct_func(cfunc) }.cast(); - let name = unsafe { (*cme).called_id }; - - // Look up annotations - let props = ZJITState::get_method_annotations().get_cfunc_properties(cme); - if props.is_none() && get_option!(stats) { - fun.count_not_annotated_cfunc(block, cme); - } - let props = props.unwrap_or_default(); - let return_type = props.return_type; - let elidable = match blockiseq { - Some(_) => false, // Don't consider cfuncs with block arguments as elidable for now - None => props.elidable, - }; - - match cfunc_argc { - 0.. => { - // (self, arg0, arg1, ..., argc) form - // - // Bail on argc mismatch - if argc != cfunc_argc as u32 { - return Err(()); - } - - // TODO: Support passing arguments on the stack in C calls - // +1 for self - if (argc as usize)+1 > C_ARG_OPNDS.len() { - fun.set_dynamic_send_reason(send_insn_id, TooManyArgsForLir); - return Err(()); - } - - // Check singleton class assumption first, before emitting other patchpoints - if !fun.assume_no_singleton_classes(block, recv_class, state) { - fun.set_dynamic_send_reason(send_insn_id, SingletonClassSeen); - return Err(()); - } - - // Commit to the replacement. Put PatchPoint. - fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state); - - if let Some(profiled_type) = profiled_type { - // Guard receiver class - recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - fun.insn_types[recv.0] = fun.infer_type(recv); - } - - // Try inlining the cfunc into HIR. Only inline if we don't have a block argument - if blockiseq.is_none() { - let tmp_block = fun.new_block(u32::MAX); - if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { - // Copy contents of tmp_block to block - assert_ne!(block, tmp_block); - let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); - fun.blocks[block.0].insns.extend(insns); - fun.count(block, Counter::inline_cfunc_optimized_send_count); - fun.make_equal_to(send_insn_id, replacement); - if fun.type_of(replacement).bit_equal(types::Any) { - // Not set yet; infer type - fun.insn_types[replacement.0] = fun.infer_type(replacement); - } - fun.remove_block(tmp_block); - return Ok(()); - } - - // Only allow leaf calls if we don't have a block argument - if props.leaf && props.no_gc { - fun.count(block, Counter::inline_cfunc_optimized_send_count); - let owner = unsafe { (*cme).owner }; - let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); - fun.make_equal_to(send_insn_id, ccall); - return Ok(()); - } - } - - // Emit a call - if get_option!(stats) { - fun.count_not_inlined_cfunc(block, cme); - } - let ccall = fun.push_insn(block, Insn::CCallWithFrame(Box::new(CCallWithFrameData { - cd, - cfunc: cfunc_ptr, - recv, - args, - cme, - name, - state, - return_type, - elidable, - block: blockiseq.map(BlockHandler::BlockIseq), - }))); - fun.make_equal_to(send_insn_id, ccall); - Ok(()) - } - // Variadic method - -1 => { - // The method gets a pointer to the first argument - // func(int argc, VALUE *argv, VALUE recv) - - // Check singleton class assumption first, before emitting other patchpoints - if !fun.assume_no_singleton_classes(block, recv_class, state) { - fun.set_dynamic_send_reason(send_insn_id, SingletonClassSeen); - return Err(()); - } - - fun.gen_patch_points_for_optimized_ccall(block, recv_class, method_id, cme, state); - - if let Some(profiled_type) = profiled_type { - // Guard receiver class - recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - fun.insn_types[recv.0] = fun.infer_type(recv); - } - - // Try inlining the cfunc into HIR. Only inline if we don't have a block argument - if blockiseq.is_none() { - let tmp_block = fun.new_block(u32::MAX); - if let Some(replacement) = (props.inline)(fun, tmp_block, recv, &args, state) { - // Copy contents of tmp_block to block - assert_ne!(block, tmp_block); - let insns = std::mem::take(&mut fun.blocks[tmp_block.0].insns); - fun.blocks[block.0].insns.extend(insns); - fun.count(block, Counter::inline_cfunc_optimized_send_count); - fun.make_equal_to(send_insn_id, replacement); - if fun.type_of(replacement).bit_equal(types::Any) { - // Not set yet; infer type - fun.insn_types[replacement.0] = fun.infer_type(replacement); - } - fun.remove_block(tmp_block); - return Ok(()); - } - - // Only allow leaf calls if we don't have a block argument - if props.leaf && props.no_gc { - fun.count(block, Counter::inline_cfunc_optimized_send_count); - let owner = unsafe { (*cme).owner }; - let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); - fun.make_equal_to(send_insn_id, ccall); - return Ok(()); - } - } - - // No inlining; emit a call - if get_option!(stats) { - fun.count_not_inlined_cfunc(block, cme); - } - - let ccall = fun.push_insn(block, Insn::CCallVariadic(Box::new(CCallVariadicData { - cfunc: cfunc_ptr, - recv, - args, - cme, - name: method_id, - state, - return_type, - elidable, - block: blockiseq.map(BlockHandler::BlockIseq), - }))); - - fun.make_equal_to(send_insn_id, ccall); - Ok(()) - } - -2 => { - // (self, args_ruby_array) - fun.set_dynamic_send_reason(send_insn_id, SendCfuncArrayVariadic); - Err(()) - } - _ => unreachable!("unknown cfunc kind: argc={argc}") - } - } - for block in self.reverse_post_order() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); assert!(self.blocks[block.0].insns.is_empty()); for insn_id in old_insns { let send = self.find(insn_id); match send { - send @ Insn::Send { recv, .. } => { - let recv_type = self.type_of(recv); - if reduce_send_to_ccall(self, block, recv_type, send, insn_id).is_ok() { - continue; - } - } Insn::InvokeBuiltin { bf, recv, args, state, .. } => { let props = ZJITState::get_method_annotations().get_builtin_properties(bf).unwrap_or_default(); // Try inlining the cfunc into HIR diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 2b29204f8e9381..c813b6d4dbf8ac 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -1683,7 +1683,7 @@ mod hir_opt_tests { v4:BasicObject = LoadArg :self@0 Jump bb3(v4) bb3(v6:BasicObject): - v11:BasicObject = Send v6, :foo # SendFallbackReason: Send: unsupported method type Null + v11:BasicObject = Send v6, :foo # SendFallbackReason: SendWithoutBlock: unsupported method type Null CheckInterrupts Return v11 "); @@ -3594,7 +3594,7 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:Fixnum[1] = Const Value(1) v12:Fixnum[0] = Const Value(0) - v14:BasicObject = Send v10, :itself, v12 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v14:BasicObject = Send v10, :itself, v12 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v14 "); @@ -3922,8 +3922,8 @@ mod hir_opt_tests { v15:Fixnum[1] = Const Value(1) SetLocal :a, l0, EP@3, v15 PatchPoint MethodRedefined(Object@0x1008, lambda@0x1010, cme:0x1018) - v46:ObjectSubclass[class_exact*:Object@VALUE(0x1008)] = GuardType v10, ObjectSubclass[class_exact*:Object@VALUE(0x1008)] recompile - v47:BasicObject = CCallWithFrame v46, :Kernel#lambda@0x1040, block=0x1048 + v43:ObjectSubclass[class_exact*:Object@VALUE(0x1008)] = GuardType v10, ObjectSubclass[class_exact*:Object@VALUE(0x1008)] recompile + v44:BasicObject = CCallWithFrame v43, :Kernel#lambda@0x1040, block=0x1048 v22:CPtr = GetEP 0 v23:BasicObject = LoadField v22, :a@0x1000 PatchPoint MethodRedefined(Object@0x1008, foo@0x1050, cme:0x1058) @@ -4736,12 +4736,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, initialize@0x1038, cme:0x1040) PushInlineFrame v48 (0x1068), v17 - v62:CShape = LoadField v48, :shape_id@0x1070 - v63:CShape[0x1071] = GuardBitEquals v62, CShape(0x1071) recompile + v65:CShape = LoadField v48, :shape_id@0x1070 + v66:CShape[0x1071] = GuardBitEquals v65, CShape(0x1071) recompile StoreField v48, :@x@0x1072, v17 WriteBarrier v48, v17 - v66:CShape[0x1073] = Const CShape(0x1073) - StoreField v48, :shape_id@0x1070, v66 + v69:CShape[0x1073] = Const CShape(0x1073) + StoreField v48, :shape_id@0x1070, v69 CheckInterrupts PopInlineFrame Return v48 @@ -4836,22 +4836,22 @@ mod hir_opt_tests { v46:Fixnum[0] = Const Value(0) PatchPoint NoSingletonClass(Hash@0x1008) PatchPoint MethodRedefined(Hash@0x1008, initialize@0x1038, cme:0x1040) - v94:Fixnum[0] = Const Value(0) - v95:NilClass = Const Value(nil) + v97:Fixnum[0] = Const Value(0) + v98:NilClass = Const Value(nil) PushInlineFrame v45 (0x1068), v46 - v61:TrueClass = Const Value(true) - v79:CPtr = GetEP 0 - v80:CUInt64 = LoadField v79, :VM_ENV_DATA_INDEX_FLAGS@0x1070 - v81:CBool = IsBlockParamModified v80 - CondBranch v81, bb10(), bb11() - bb10(): - v83:BasicObject = LoadField v79, :block@0x1071 - Jump bb12(v83) + v64:TrueClass = Const Value(true) + v82:CPtr = GetEP 0 + v83:CUInt64 = LoadField v82, :VM_ENV_DATA_INDEX_FLAGS@0x1070 + v84:CBool = IsBlockParamModified v83 + CondBranch v84, bb11(), bb12() bb11(): - v85:BasicObject = GetBlockParam :block, l0, EP@4 - Jump bb12(v85) - bb12(v78:BasicObject): - v88:BasicObject = InvokeBuiltin rb_hash_init, v45, v46, v61, v61, v78 + v86:BasicObject = LoadField v82, :block@0x1071 + Jump bb13(v86) + bb12(): + v88:BasicObject = GetBlockParam :block, l0, EP@4 + Jump bb13(v88) + bb13(v81:BasicObject): + v91:BasicObject = InvokeBuiltin rb_hash_init, v45, v46, v64, v64, v81 CheckInterrupts PopInlineFrame Return v45 @@ -4883,9 +4883,9 @@ mod hir_opt_tests { v17:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Array@0x1008, new@0x1009, cme:0x1010) PatchPoint MethodRedefined(Class@0x1038, new@0x1009, cme:0x1010) - v52:BasicObject = CCallVariadic v12, :Array.new@0x1040, v17 + v56:BasicObject = CCallVariadic v12, :Array.new@0x1040, v17 CheckInterrupts - Return v52 + Return v56 "); } @@ -5035,7 +5035,7 @@ mod hir_opt_tests { PatchPoint StableConstantNames(0x1000, C) v12:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) v14:Fixnum[1] = Const Value(1) - v16:BasicObject = Send v12, :allocate, v14 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v16:BasicObject = Send v12, :allocate, v14 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v16 "); @@ -6680,7 +6680,7 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:HashExact = NewHash v12:NilClass = Const Value(nil) - v14:BasicObject = Send v10, :freeze, v12 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v14:BasicObject = Send v10, :freeze, v12 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v14 "); @@ -6776,7 +6776,7 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:ArrayExact = NewArray v12:NilClass = Const Value(nil) - v14:BasicObject = Send v10, :freeze, v12 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v14:BasicObject = Send v10, :freeze, v12 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v14 "); @@ -6874,7 +6874,7 @@ mod hir_opt_tests { v10:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v11:StringExact = StringCopy v10 v13:NilClass = Const Value(nil) - v15:BasicObject = Send v11, :freeze, v13 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v15:BasicObject = Send v11, :freeze, v13 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v15 "); @@ -12061,7 +12061,7 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:Fixnum[4] = Const Value(4) v12:Fixnum[1] = Const Value(1) - v14:BasicObject = Send v10, :succ, v12 # SendFallbackReason: SendWithoutBlock: unsupported method type Cfunc + v14:BasicObject = Send v10, :succ, v12 # SendFallbackReason: Argument count does not match parameter count CheckInterrupts Return v14 "); @@ -14540,12 +14540,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, class@0x1008, cme:0x1010) v43:ObjectSubclass[class_exact:C] = GuardType v6, ObjectSubclass[class_exact:C] recompile - v45:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) + v51:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) v13:StaticSymbol[:_lex_actions] = Const Value(VALUE(0x1038)) v15:TrueClass = Const Value(true) PatchPoint MethodRedefined(Class@0x1040, respond_to?@0x1048, cme:0x1050) PatchPoint MethodRedefined(Class@0x1040, _lex_actions@0x1078, cme:0x1080) - v51:TrueClass = Const Value(true) + v50:TrueClass = Const Value(true) CheckInterrupts v26:StaticSymbol[:CORRECT] = Const Value(VALUE(0x10a8)) Return v26 @@ -14576,11 +14576,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, class@0x1010, cme:0x1018) v25:ObjectSubclass[class_exact:C] = GuardType v10, ObjectSubclass[class_exact:C] recompile - v27:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) + v31:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(Class@0x1040, name@0x1048, cme:0x1050) - v31:StringExact|NilClass = CCall v27, :Module#name@0x1078 + v30:StringExact|NilClass = CCall v31, :Module#name@0x1078 CheckInterrupts - Return v31 + Return v30 "); } @@ -15296,7 +15296,7 @@ mod hir_opt_tests { PatchPoint SingleRactorMode PatchPoint StableConstantNames(0x1000, Obj) v12:BasicObjectExact[VALUE(0x1008)] = Const Value(VALUE(0x1008)) - v14:BasicObject = Send v12, :initialize # SendFallbackReason: Send: method private or protected and no FCALL + v14:BasicObject = Send v12, :initialize # SendFallbackReason: SendWithoutBlock: method private or protected and no FCALL CheckInterrupts Return v14 "); @@ -15590,16 +15590,16 @@ mod hir_opt_tests { v31:RubyValue = LoadField v28, :VM_ENV_DATA_INDEX_SPECVAL@0x1050 v32:FalseClass = GuardBitEquals v31, Value(false) PushInlineFrame v9 (0x1058), v10 - v40:Fixnum[2] = Const Value(2) + v44:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1060, *@0x1068, cme:0x1070) - v54:Fixnum = GuardType v10, Fixnum recompile - v55:Fixnum = FixnumMult v54, v40 + v58:Fixnum = GuardType v10, Fixnum recompile + v59:Fixnum = FixnumMult v58, v44 CheckInterrupts PopInlineFrame v18:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Integer@0x1060, +@0x1098, cme:0x10a0) - v59:Fixnum = FixnumAdd v55, v18 - Return v59 + v37:Fixnum = FixnumAdd v59, v18 + Return v37 "); } @@ -17780,17 +17780,17 @@ mod hir_opt_tests { v138:ObjectSubclass[class_exact:C] = GuardType v12, ObjectSubclass[class_exact:C] recompile v139:BasicObject = LoadField v138, :var@0x1040 PatchPoint MethodRedefined(Integer@0x1048, +@0x1050, cme:0x1058) - v179:Fixnum = GuardType v139, Fixnum + v143:Fixnum = GuardType v139, Fixnum PatchPoint NoEPEscape(test) - v185:Fixnum = FixnumAdd v179, v179 - v190:Fixnum = FixnumAdd v185, v179 - v195:Fixnum = FixnumAdd v190, v179 - v200:Fixnum = FixnumAdd v195, v179 - v205:Fixnum = FixnumAdd v200, v179 - v210:Fixnum = FixnumAdd v205, v179 - v215:Fixnum = FixnumAdd v210, v179 - v220:Fixnum = FixnumAdd v215, v179 - v225:Fixnum = FixnumAdd v220, v179 + v153:Fixnum = FixnumAdd v143, v143 + v162:Fixnum = FixnumAdd v153, v143 + v171:Fixnum = FixnumAdd v162, v143 + v180:Fixnum = FixnumAdd v171, v143 + v189:Fixnum = FixnumAdd v180, v143 + v198:Fixnum = FixnumAdd v189, v143 + v207:Fixnum = FixnumAdd v198, v143 + v216:Fixnum = FixnumAdd v207, v143 + v225:Fixnum = FixnumAdd v216, v143 CheckInterrupts Return v225 "); @@ -18524,11 +18524,11 @@ mod hir_opt_tests { v23:ObjectSubclass[class_exact:Child] = GuardType v10, ObjectSubclass[class_exact:Child] recompile PushInlineFrame v23 (0x1040) PatchPoint MethodRedefined(Parent@0x1048, greet@0x1010, cme:0x1050) - v51:CPtr = GetEP 0 - v52:RubyValue = LoadField v51, :VM_ENV_DATA_INDEX_ME_CREF@0x1078 - v53:CallableMethodEntry[VALUE(0x1018)] = GuardBitEquals v52, Value(VALUE(0x1018)) - v54:RubyValue = LoadField v51, :VM_ENV_DATA_INDEX_SPECVAL@0x1079 - v55:FalseClass = GuardBitEquals v54, Value(false) + v46:CPtr = GetEP 0 + v47:RubyValue = LoadField v46, :VM_ENV_DATA_INDEX_ME_CREF@0x1078 + v48:CallableMethodEntry[VALUE(0x1018)] = GuardBitEquals v47, Value(VALUE(0x1018)) + v49:RubyValue = LoadField v46, :VM_ENV_DATA_INDEX_SPECVAL@0x1079 + v50:FalseClass = GuardBitEquals v49, Value(false) PushInlineFrame v23 (0x1040) v61:StringExact[VALUE(0x1080)] = Const Value(VALUE(0x1080)) v62:StringExact = StringCopy v61 @@ -18538,10 +18538,10 @@ mod hir_opt_tests { v33:StringExact = StringCopy v32 PatchPoint NoSingletonClass(String@0x1090) PatchPoint MethodRedefined(String@0x1090, +@0x1098, cme:0x10a0) - v49:BasicObject = CCallWithFrame v62, :String#+@0x10c8, v33 + v56:BasicObject = CCallWithFrame v62, :String#+@0x10c8, v33 CheckInterrupts PopInlineFrame - Return v49 + Return v56 "); } @@ -19287,18 +19287,18 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(Point@0x1008) PatchPoint MethodRedefined(Point@0x1008, initialize@0x1038, cme:0x1040) PushInlineFrame v91 (0x1068), v17, v19 - v116:CShape = LoadField v91, :shape_id@0x1070 - v117:CShape[0x1071] = GuardBitEquals v116, CShape(0x1071) recompile + v122:CShape = LoadField v91, :shape_id@0x1070 + v123:CShape[0x1071] = GuardBitEquals v122, CShape(0x1071) recompile StoreField v91, :@x@0x1072, v17 WriteBarrier v91, v17 - v120:CShape[0x1073] = Const CShape(0x1073) - StoreField v91, :shape_id@0x1070, v120 + v126:CShape[0x1073] = Const CShape(0x1073) + StoreField v91, :shape_id@0x1070, v126 PatchPoint NoEPEscape(initialize) PatchPoint SingleRactorMode StoreField v91, :@y@0x1074, v19 WriteBarrier v91, v19 - v135:CShape[0x1075] = Const CShape(0x1075) - StoreField v91, :shape_id@0x1070, v135 + v141:CShape[0x1075] = Const CShape(0x1075) + StoreField v91, :shape_id@0x1070, v141 CheckInterrupts PopInlineFrame PatchPoint SingleRactorMode @@ -19308,59 +19308,59 @@ mod hir_opt_tests { v52:Fixnum[1] = Const Value(1) v54:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Point@0x1008, new@0x1009, cme:0x1010) - v98:ObjectSubclass[class_exact:Point] = ObjectAllocClass Point:VALUE(0x1008) + v101:ObjectSubclass[class_exact:Point] = ObjectAllocClass Point:VALUE(0x1008) PatchPoint NoSingletonClass(Point@0x1008) PatchPoint MethodRedefined(Point@0x1008, initialize@0x1038, cme:0x1040) - PushInlineFrame v98 (0x1068), v52, v54 - v155:CShape = LoadField v98, :shape_id@0x1070 - v156:CShape[0x1071] = GuardBitEquals v155, CShape(0x1071) recompile - StoreField v98, :@x@0x1072, v52 - WriteBarrier v98, v52 - v159:CShape[0x1073] = Const CShape(0x1073) - StoreField v98, :shape_id@0x1070, v159 + PushInlineFrame v101 (0x1068), v52, v54 + v161:CShape = LoadField v101, :shape_id@0x1070 + v162:CShape[0x1071] = GuardBitEquals v161, CShape(0x1071) recompile + StoreField v101, :@x@0x1072, v52 + WriteBarrier v101, v52 + v165:CShape[0x1073] = Const CShape(0x1073) + StoreField v101, :shape_id@0x1070, v165 PatchPoint NoEPEscape(initialize) PatchPoint SingleRactorMode - StoreField v98, :@y@0x1074, v54 - WriteBarrier v98, v54 - v174:CShape[0x1075] = Const CShape(0x1075) - StoreField v98, :shape_id@0x1070, v174 + StoreField v101, :@y@0x1074, v54 + WriteBarrier v101, v54 + v180:CShape[0x1075] = Const CShape(0x1075) + StoreField v101, :shape_id@0x1070, v180 CheckInterrupts PopInlineFrame PatchPoint NoSingletonClass(Point@0x1008) PatchPoint MethodRedefined(Point@0x1008, ==@0x1080, cme:0x1088) - PushInlineFrame v91 (0x1068), v98 + PushInlineFrame v91 (0x1068), v101 PatchPoint SingleRactorMode - v192:CShape = LoadField v91, :shape_id@0x1070 - v193:CShape[0x1075] = GuardBitEquals v192, CShape(0x1075) recompile - v194:BasicObject = LoadField v91, :@x@0x1072 + v198:CShape = LoadField v91, :shape_id@0x1070 + v199:CShape[0x1075] = GuardBitEquals v198, CShape(0x1075) recompile + v200:BasicObject = LoadField v91, :@x@0x1072 PatchPoint NoEPEscape(==) PatchPoint MethodRedefined(Point@0x1008, x@0x10b0, cme:0x10b8) PatchPoint MethodRedefined(Integer@0x10e0, ==@0x1080, cme:0x10e8) - v240:Fixnum = GuardType v194, Fixnum recompile - v242:BoolExact = FixnumEq v240, v52 - v206:CBool = Test v242 - v207:FalseClass = RefineType v242, Falsy - CondBranch v206, bb17(), bb16(v91, v98, v207) - bb17(): + v255:Fixnum = GuardType v200, Fixnum recompile + v257:BoolExact = FixnumEq v255, v52 + v212:CBool = Test v257 + v213:FalseClass = RefineType v257, Falsy + CondBranch v212, bb19(), bb18(v91, v101, v213) + bb19(): PatchPoint SingleRactorMode - v214:CShape = LoadField v91, :shape_id@0x1070 - v215:CShape[0x1075] = GuardBitEquals v214, CShape(0x1075) recompile - v216:BasicObject = LoadField v91, :@y@0x1074 + v220:CShape = LoadField v91, :shape_id@0x1070 + v221:CShape[0x1075] = GuardBitEquals v220, CShape(0x1075) recompile + v222:BasicObject = LoadField v91, :@y@0x1074 PatchPoint NoEPEscape(==) PatchPoint NoSingletonClass(Point@0x1008) PatchPoint MethodRedefined(Point@0x1008, y@0x1110, cme:0x1118) - v261:CShape = LoadField v98, :shape_id@0x1070 - v262:CShape[0x1075] = GuardBitEquals v261, CShape(0x1075) recompile - v263:BasicObject = LoadField v98, :@y@0x1074 + v262:CShape = LoadField v101, :shape_id@0x1070 + v263:CShape[0x1075] = GuardBitEquals v262, CShape(0x1075) recompile + v264:BasicObject = LoadField v101, :@y@0x1074 PatchPoint MethodRedefined(Integer@0x10e0, ==@0x1080, cme:0x10e8) - v245:Fixnum = GuardType v216, Fixnum recompile - v246:Fixnum = GuardType v263, Fixnum - v247:BoolExact = FixnumEq v245, v246 - Jump bb16(v91, v98, v247) - bb16(v226:ObjectSubclass[class_exact:Point], v227:ObjectSubclass[class_exact:Point], v228:BoolExact): + v267:Fixnum = GuardType v222, Fixnum recompile + v268:Fixnum = GuardType v264, Fixnum + v269:BoolExact = FixnumEq v267, v268 + Jump bb18(v91, v101, v269) + bb18(v232:ObjectSubclass[class_exact:Point], v233:ObjectSubclass[class_exact:Point], v234:BoolExact): CheckInterrupts PopInlineFrame - Return v228 + Return v234 "); } From 45aef90cd1970483ebeec5cc0ae61b606e053fbb Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Fri, 26 Jun 2026 10:33:18 -0400 Subject: [PATCH 28/31] ZJIT: Specialize InvokeBuiltin in type_specialize --- zjit/src/hir.rs | 71 ++++++++++++++++++++------------------- zjit/src/hir/opt_tests.rs | 70 +++++++++++++++++++------------------- zjit/src/hir/tests.rs | 46 ++++++++++++++----------- 3 files changed, 97 insertions(+), 90 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index d35b00bad53f58..85fedae00049f2 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3714,6 +3714,29 @@ impl Function { self.load_field(block, sp, local_id.into(), offset, return_type) } + fn try_inline_invoke_builtin(&mut self, block: BlockId, insn: Insn) -> InsnId { + let Insn::InvokeBuiltin { bf, recv, ref args, state, .. } = insn else { + panic!("try_inline_invoke_builtin called with non-InvokeBuiltin instruction"); + }; + let props = ZJITState::get_method_annotations().get_builtin_properties(bf).unwrap_or_default(); + // Try inlining the cfunc into HIR + let tmp_block = self.new_block(u32::MAX); + if let Some(replacement) = (props.inline)(self, tmp_block, recv, &args, state) { + // Copy contents of tmp_block to block + assert_ne!(block, tmp_block); + let insns = std::mem::take(&mut self.blocks[tmp_block.0].insns); + self.blocks[block.0].insns.extend(insns); + self.count(block, Counter::inline_cfunc_optimized_send_count); + if self.type_of(replacement).bit_equal(types::Any) { + // Not set yet; infer type + self.insn_types[replacement.0] = self.infer_type(replacement); + } + self.remove_block(tmp_block); + return replacement; + } + return self.push_insn(block, insn); + } + /// Try trivially inlining the method. If we can't, emit a SendDirect instruction instead and /// leave it to the general-purpose inliner to handle. fn try_inline_send_direct(&mut self, block: BlockId, insn: Insn) -> InsnId { @@ -3747,7 +3770,7 @@ impl Function { } IseqReturn::InvokeLeafBuiltin(bf, return_type) => { self.count(block, Counter::inline_iseq_optimized_send_count); - self.push_insn(block, Insn::InvokeBuiltin { + self.try_inline_invoke_builtin(block, Insn::InvokeBuiltin { bf, recv, args: vec![recv], @@ -3871,6 +3894,7 @@ impl Function { // Add GuardType for profiled receiver if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); } let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: send_block }); @@ -3914,6 +3938,7 @@ impl Function { if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); } let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: None }); @@ -3936,6 +3961,7 @@ impl Function { let id = unsafe { get_cme_def_body_attr_id(cme) }; if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); let replacement = self.try_emit_optimized_getivar(block, recv, id, profiled_type, state).unwrap_or_else(|counter| { self.count(block, counter); @@ -3966,6 +3992,7 @@ impl Function { // profiles the receiver operand even after the send insn has finished profiling. let recompile = None; recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); self.try_emit_optimized_setivar(block, recv, id, val, profiled_type, state, recompile).unwrap_or_else(|counter| { self.count(block, counter); self.push_insn(block, Insn::SetIvar { self_val: recv, id, ic: std::ptr::null(), val, state }); @@ -3992,6 +4019,7 @@ impl Function { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); } let kw_splat = flags & VM_CALL_KW_SPLAT != 0; let invoke_proc = self.push_insn(block, Insn::InvokeProc { recv, args: args.clone(), state, kw_splat }); @@ -4030,6 +4058,7 @@ impl Function { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); + self.insn_types[recv.0] = self.infer_type(recv); } // All structs from the same Struct class should have the same // length. So if our recv is embedded all runtime @@ -4174,6 +4203,7 @@ impl Function { fun.count(block, Counter::inline_cfunc_optimized_send_count); let owner = unsafe { (*cme).owner }; let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); + fun.insn_types[ccall.0] = fun.infer_type(ccall); fun.make_equal_to(send_insn_id, ccall); return Ok(()); } @@ -4195,6 +4225,7 @@ impl Function { elidable, block: blockiseq.map(BlockHandler::BlockIseq), }))); + fun.insn_types[ccall.0] = fun.infer_type(ccall); fun.make_equal_to(send_insn_id, ccall); Ok(()) } @@ -4240,6 +4271,7 @@ impl Function { fun.count(block, Counter::inline_cfunc_optimized_send_count); let owner = unsafe { (*cme).owner }; let ccall = fun.push_insn(block, Insn::CCall { cfunc: cfunc_ptr, recv, args, name, owner, return_type, elidable }); + fun.insn_types[ccall.0] = fun.infer_type(ccall); fun.make_equal_to(send_insn_id, ccall); return Ok(()); } @@ -4261,7 +4293,7 @@ impl Function { elidable, block: blockiseq.map(BlockHandler::BlockIseq), }))); - + fun.insn_types[ccall.0] = fun.infer_type(ccall); fun.make_equal_to(send_insn_id, ccall); Ok(()) } @@ -5229,37 +5261,6 @@ impl Function { /// Optimize Send that land in a C method to a direct CCall without /// runtime lookup. fn optimize_c_calls(&mut self) { - for block in self.reverse_post_order() { - let old_insns = std::mem::take(&mut self.blocks[block.0].insns); - assert!(self.blocks[block.0].insns.is_empty()); - for insn_id in old_insns { - let send = self.find(insn_id); - match send { - Insn::InvokeBuiltin { bf, recv, args, state, .. } => { - let props = ZJITState::get_method_annotations().get_builtin_properties(bf).unwrap_or_default(); - // Try inlining the cfunc into HIR - let tmp_block = self.new_block(u32::MAX); - if let Some(replacement) = (props.inline)(self, tmp_block, recv, &args, state) { - // Copy contents of tmp_block to block - assert_ne!(block, tmp_block); - let insns = std::mem::take(&mut self.blocks[tmp_block.0].insns); - self.blocks[block.0].insns.extend(insns); - self.count(block, Counter::inline_cfunc_optimized_send_count); - self.make_equal_to(insn_id, replacement); - if self.type_of(replacement).bit_equal(types::Any) { - // Not set yet; infer type - self.insn_types[replacement.0] = self.infer_type(replacement); - } - self.remove_block(tmp_block); - continue; - } - } - _ => {} - } - self.push_insn_id(block, insn_id); - } - } - crate::stats::trace_compile_phase("infer_types", || self.infer_types()); } /// Convert `Send` instructions with no profile data into `SideExit` with recompile info. @@ -9035,7 +9036,7 @@ fn add_iseq_to_hir( let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; let leaf = builtin_attrs & BUILTIN_ATTR_LEAF != 0; - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + let insn_id = fun.try_inline_invoke_builtin(block, Insn::InvokeBuiltin { bf, recv: self_param, args, @@ -9071,7 +9072,7 @@ fn add_iseq_to_hir( let builtin_attrs = unsafe { rb_jit_iseq_builtin_attrs(iseq) }; let leaf = builtin_attrs & BUILTIN_ATTR_LEAF != 0; - let insn_id = fun.push_insn(block, Insn::InvokeBuiltin { + let insn_id = fun.try_inline_invoke_builtin(block, Insn::InvokeBuiltin { bf, recv: self_param, args, diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index c813b6d4dbf8ac..40fba2aa2ffd8c 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -3764,9 +3764,9 @@ mod hir_opt_tests { v12:ModuleExact[M@0x1008] = Const Value(VALUE(0x1008)) PatchPoint NoSingletonClass(Module@0x1010) PatchPoint MethodRedefined(Module@0x1010, class@0x1018, cme:0x1020) - v24:ClassSubclass[Module@0x1010] = Const Value(VALUE(0x1010)) + v23:ClassSubclass[Module@0x1010] = Const Value(VALUE(0x1010)) CheckInterrupts - Return v24 + Return v23 "); } @@ -12006,16 +12006,16 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1008) PatchPoint MethodRedefined(String@0x1008, ascii_only?@0x1010, cme:0x1018) v23:StringExact = GuardType v10, StringExact recompile - v25:CUInt64 = LoadField v23, :RBASIC_FLAGS@0x1040 - v26:CUInt64[3145728] = Const CUInt64(3145728) - v27:CInt64 = IntAnd v25, v26 - v28:CInt64[1048576] = Const CInt64(1048576) - v29:CInt64 = GuardGreaterEq v27, v28 - v30:CInt64[1048576] = Const CInt64(1048576) - v31:CBool = IsBitEqual v29, v30 - v32:BoolExact = BoxBool v31 + v24:CUInt64 = LoadField v23, :RBASIC_FLAGS@0x1040 + v25:CUInt64[3145728] = Const CUInt64(3145728) + v26:CInt64 = IntAnd v24, v25 + v27:CInt64[1048576] = Const CInt64(1048576) + v28:CInt64 = GuardGreaterEq v26, v27 + v29:CInt64[1048576] = Const CInt64(1048576) + v30:CBool = IsBitEqual v28, v29 + v31:BoolExact = BoxBool v30 CheckInterrupts - Return v32 + Return v31 "); } @@ -14540,7 +14540,7 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, class@0x1008, cme:0x1010) v43:ObjectSubclass[class_exact:C] = GuardType v6, ObjectSubclass[class_exact:C] recompile - v51:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) + v44:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) v13:StaticSymbol[:_lex_actions] = Const Value(VALUE(0x1038)) v15:TrueClass = Const Value(true) PatchPoint MethodRedefined(Class@0x1040, respond_to?@0x1048, cme:0x1050) @@ -14576,9 +14576,9 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, class@0x1010, cme:0x1018) v25:ObjectSubclass[class_exact:C] = GuardType v10, ObjectSubclass[class_exact:C] recompile - v31:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) + v26:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(Class@0x1040, name@0x1048, cme:0x1050) - v30:StringExact|NilClass = CCall v31, :Module#name@0x1078 + v30:StringExact|NilClass = CCall v26, :Module#name@0x1078 CheckInterrupts Return v30 "); @@ -14608,9 +14608,9 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, class@0x1010, cme:0x1018) v23:ObjectSubclass[class_exact:C] = GuardType v10, ObjectSubclass[class_exact:C] recompile - v25:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) + v24:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) CheckInterrupts - Return v25 + Return v24 "); } @@ -14633,9 +14633,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:Fixnum[5] = Const Value(5) PatchPoint MethodRedefined(Integer@0x1000, class@0x1008, cme:0x1010) - v21:ClassSubclass[Integer@0x1000] = Const Value(VALUE(0x1000)) + v20:ClassSubclass[Integer@0x1000] = Const Value(VALUE(0x1000)) CheckInterrupts - Return v21 + Return v20 "); } @@ -14658,9 +14658,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, class@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:ClassSubclass[Object@0x1038] = Const Value(VALUE(0x1038)) + v19:ClassSubclass[Object@0x1038] = Const Value(VALUE(0x1038)) CheckInterrupts - Return v20 + Return v19 "); } @@ -16686,24 +16686,24 @@ mod hir_opt_tests { v35:Fixnum[0] = Const Value(0) Jump bb8(v8, v35) bb8(v48:BasicObject, v49:Fixnum): - v84:Array = RefineType v48, Array - v85:CInt64 = ArrayLength v84 - v86:Fixnum = BoxFixnum v85 - v87:BoolExact = FixnumGe v49, v86 - v54:CBool = Test v87 - CondBranch v54, bb10(), bb7(v48, v49) - bb10(): + v52:Array = RefineType v48, Array + v53:CInt64 = ArrayLength v52 + v54:Fixnum = BoxFixnum v53 + v55:BoolExact = FixnumGe v49, v54 + v57:CBool = Test v55 + CondBranch v57, bb11(), bb7(v48, v49) + bb11(): CheckInterrupts Return v48 - bb7(v67:BasicObject, v68:Fixnum): - v88:Array = RefineType v67, Array - v89:CInt64 = UnboxFixnum v68 - v90:BasicObject = ArrayAref v88, v89 - v74:BasicObject = InvokeBlock v90 # SendFallbackReason: InvokeBlock: not yet specialized - v91:Fixnum[1] = Const Value(1) - v92:Fixnum = FixnumAdd v68, v91 + bb7(v70:BasicObject, v71:Fixnum): + v75:Array = RefineType v70, Array + v76:CInt64 = UnboxFixnum v71 + v77:BasicObject = ArrayAref v75, v76 + v79:BasicObject = InvokeBlock v77 # SendFallbackReason: InvokeBlock: not yet specialized + v83:Fixnum[1] = Const Value(1) + v84:Fixnum = FixnumAdd v71, v83 PatchPoint NoEPEscape(each) - Jump bb8(v67, v92) + Jump bb8(v70, v84) bb4(v23:BasicObject, v24:NilClass): v28:BasicObject = InvokeBuiltin , v23 CheckInterrupts diff --git a/zjit/src/hir/tests.rs b/zjit/src/hir/tests.rs index 770dfc271edf5a..c90b05e5f59deb 100644 --- a/zjit/src/hir/tests.rs +++ b/zjit/src/hir/tests.rs @@ -4852,21 +4852,21 @@ pub(crate) mod hir_build_tests { v35:CPtr = GetEP 0 v36:CUInt64 = LoadField v35, :VM_ENV_DATA_INDEX_FLAGS@0x1004 v37:CBool = IsBlockParamModified v36 - CondBranch v37, bb5(), bb6() - bb5(): - v39:BasicObject = LoadField v35, :block@0x1005 - Jump bb7(v39, v39) + CondBranch v37, bb6(), bb7() bb6(): + v39:BasicObject = LoadField v35, :block@0x1005 + Jump bb8(v39, v39) + bb7(): v41:CInt64 = LoadField v35, :VM_ENV_DATA_INDEX_SPECVAL@0x1006 v42:CInt64 = GuardAnyBitSet v41, CUInt64(1) recompile v43:ObjectSubclass[BlockParamProxy] = Const Value(VALUE(0x1008)) - Jump bb7(v43, v22) - bb7(v33:BasicObject, v34:BasicObject): + Jump bb8(v43, v22) + bb8(v33:BasicObject, v34:BasicObject): CheckInterrupts v47:CBool = Test v33 v48:Falsy = RefineType v33, Falsy - CondBranch v47, bb8(), bb4(v18, v19, v20, v21, v34, v27) - bb8(): + CondBranch v47, bb9(), bb4(v18, v19, v20, v21, v34, v27) + bb9(): v50:Truthy = RefineType v33, Truthy v54:BasicObject = InvokeBlock v27 # SendFallbackReason: InvokeBlock: not yet specialized v57:BasicObject = InvokeBuiltin dir_s_close, v18, v27 @@ -5614,21 +5614,27 @@ pub(crate) mod hir_build_tests { v35:Fixnum[0] = Const Value(0) Jump bb8(v30, v35) bb8(v48:BasicObject, v49:Fixnum): - v52:BoolExact = InvokeBuiltin rb_jit_ary_at_end, v48, v49 - v54:CBool = Test v52 - v55:FalseClass = RefineType v52, Falsy - CondBranch v54, bb10(), bb7(v48, v49) - bb10(): - v57:TrueClass = RefineType v52, Truthy - v59:NilClass = Const Value(nil) + v52:Array = RefineType v48, Array + v53:CInt64 = ArrayLength v52 + v54:Fixnum = BoxFixnum v53 + v55:BoolExact = FixnumGe v49, v54 + v57:CBool = Test v55 + v58:FalseClass = RefineType v55, Falsy + CondBranch v57, bb11(), bb7(v48, v49) + bb11(): + v60:TrueClass = RefineType v55, Truthy + v62:NilClass = Const Value(nil) CheckInterrupts Return v48 - bb7(v67:BasicObject, v68:Fixnum): - v72:BasicObject = InvokeBuiltin rb_jit_ary_at, v67, v68 - v74:BasicObject = InvokeBlock v72 # SendFallbackReason: InvokeBlock: not yet specialized - v78:Fixnum = InvokeBuiltin rb_jit_fixnum_inc, v67, v68 + bb7(v70:BasicObject, v71:Fixnum): + v75:Array = RefineType v70, Array + v76:CInt64 = UnboxFixnum v71 + v77:BasicObject = ArrayAref v75, v76 + v79:BasicObject = InvokeBlock v77 # SendFallbackReason: InvokeBlock: not yet specialized + v83:Fixnum[1] = Const Value(1) + v84:Fixnum = FixnumAdd v71, v83 PatchPoint NoEPEscape(each) - Jump bb8(v67, v78) + Jump bb8(v70, v84) bb4(v23:BasicObject, v24:NilClass): v28:BasicObject = InvokeBuiltin , v23 Jump bb5(v23, v24, v28) From 284ddba734dfa28ceaaa00c49ed506cc768c4567 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Fri, 26 Jun 2026 10:33:51 -0400 Subject: [PATCH 29/31] ZJIT: Remove optimize_c_calls pass --- zjit/src/hir.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 85fedae00049f2..58daf6833c465a 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3786,7 +3786,6 @@ impl Function { /// opcodes if we know the target ISEQ statically. This removes run-time method lookups and /// opens the door for inlining. /// Also try and inline constant caches, specialize object allocations, and more. - /// Calls to C functions are handled separately in optimize_c_calls. fn type_specialize(&mut self) { for block in self.reverse_post_order() { let old_insns = std::mem::take(&mut self.blocks[block.0].insns); @@ -5258,13 +5257,8 @@ impl Function { self.push_insn(block, Insn::IncrCounterPtr { counter_ptr }); } - /// Optimize Send that land in a C method to a direct CCall without - /// runtime lookup. - fn optimize_c_calls(&mut self) { - } - /// Convert `Send` instructions with no profile data into `SideExit` with recompile info. - /// This runs after strength reduction passes (type_specialize, inline, optimize_c_calls) so + /// This runs after strength reduction passes (type_specialize, inline) so /// that sends that can be optimized without profiling (e.g. known CFUNCs) are already handled. /// The remaining no-profile sends are turned into side exits that trigger recompilation with /// fresh profile data. @@ -6096,7 +6090,6 @@ impl Function { macro_rules! counter_for { // Bucket all strength reduction together (type_specialize) => { Counter::compile_hir_strength_reduce_time_ns }; - (optimize_c_calls) => { Counter::compile_hir_strength_reduce_time_ns }; (convert_no_profile_sends) => { Counter::compile_hir_strength_reduce_time_ns }; // End strength reduction bucket (inline_methods) => { Counter::compile_hir_inline_methods_time_ns }; @@ -6149,7 +6142,6 @@ impl Function { } else { false }; - run_pass!(optimize_c_calls); run_pass!(convert_no_profile_sends); run_pass!(optimize_load_store); run_pass!(canonicalize); From 80ca0b52ef640b35db517e76ccebc55b91592d02 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Fri, 26 Jun 2026 10:52:39 -0400 Subject: [PATCH 30/31] ZJIT: Add Function::guard_type_recompile --- zjit/src/hir.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 58daf6833c465a..f6d6b60ddf945d 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3581,6 +3581,12 @@ impl Function { self.push_insn(block, Insn::GuardType { val, guard_type, state, recompile: None }) } + pub fn guard_type_recompile(&mut self, block: BlockId, val: InsnId, guard_type: Type, state: InsnId, recompile: Recompile) -> InsnId { + let result = self.push_insn(block, Insn::GuardType { val, guard_type, state, recompile: Some(recompile) }); + self.insn_types[result.0] = self.infer_type(result); + result + } + fn count_complex_call_features(&mut self, block: BlockId, ci_flags: c_uint) { use Counter::*; if 0 != ci_flags & VM_CALL_ARGS_SPLAT { self.count(block, complex_arg_pass_caller_splat); } @@ -3936,8 +3942,7 @@ impl Function { self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { - recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - self.insn_types[recv.0] = self.infer_type(recv); + recv = self.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); } let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: None }); @@ -3959,8 +3964,7 @@ impl Function { let id = unsafe { get_cme_def_body_attr_id(cme) }; if let Some(profiled_type) = profiled_type { - recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - self.insn_types[recv.0] = self.infer_type(recv); + recv = self.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); let replacement = self.try_emit_optimized_getivar(block, recv, id, profiled_type, state).unwrap_or_else(|counter| { self.count(block, counter); @@ -3989,9 +3993,8 @@ impl Function { // TODO: attr_writer SetIvar has a null inline cache and may target a receiver // operand other than CFP self. Support it with a reprofile strategy that // profiles the receiver operand even after the send insn has finished profiling. + recv = self.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); let recompile = None; - recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - self.insn_types[recv.0] = self.infer_type(recv); self.try_emit_optimized_setivar(block, recv, id, val, profiled_type, state, recompile).unwrap_or_else(|counter| { self.count(block, counter); self.push_insn(block, Insn::SetIvar { self_val: recv, id, ic: std::ptr::null(), val, state }); @@ -4017,8 +4020,7 @@ impl Function { } self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { - recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - self.insn_types[recv.0] = self.infer_type(recv); + recv = self.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); } let kw_splat = flags & VM_CALL_KW_SPLAT != 0; let invoke_proc = self.push_insn(block, Insn::InvokeProc { recv, args: args.clone(), state, kw_splat }); @@ -4056,8 +4058,7 @@ impl Function { } self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); if let Some(profiled_type) = profiled_type { - recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - self.insn_types[recv.0] = self.infer_type(recv); + recv = self.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); } // All structs from the same Struct class should have the same // length. So if our recv is embedded all runtime @@ -4175,8 +4176,7 @@ impl Function { if let Some(profiled_type) = profiled_type { // Guard receiver class - recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - fun.insn_types[recv.0] = fun.infer_type(recv); + recv = fun.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); } // Try inlining the cfunc into HIR. Only inline if we don't have a block argument @@ -4243,8 +4243,7 @@ impl Function { if let Some(profiled_type) = profiled_type { // Guard receiver class - recv = fun.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile) }); - fun.insn_types[recv.0] = fun.infer_type(recv); + recv = fun.guard_type_recompile(block, recv, Type::from_profiled_type(profiled_type), state, Recompile); } // Try inlining the cfunc into HIR. Only inline if we don't have a block argument From 33e58bca4daa4552245fdccc93854d255356c70e Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Thu, 2 Jul 2026 00:05:14 -0400 Subject: [PATCH 31/31] ZJIT: Clean up confusing comment Co-authored-by: Takashi Kokubun --- zjit/src/hir.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index f6d6b60ddf945d..dd4bbba8f25a65 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4264,7 +4264,7 @@ impl Function { return Ok(()); } - // Only allow leaf calls if we don't have a block argument + // Only allow inline calls if they are leaf, don't allocate, and don't have a block argument if props.leaf && props.no_gc { fun.count(block, Counter::inline_cfunc_optimized_send_count); let owner = unsafe { (*cme).owner };