From f486ad8eedd035046f473b0e10380783408e2e35 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 26 Jun 2026 15:29:52 +0900 Subject: [PATCH 01/15] Make rb_newobj_of_with_shape static It's not used outside of gc.c. --- gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gc.c b/gc.c index f7892531dd09d9..1baea0cdbeb679 100644 --- a/gc.c +++ b/gc.c @@ -1077,7 +1077,7 @@ rb_ec_newobj_of(rb_execution_context_t *ec, VALUE klass, VALUE flags, size_t siz return rb_newobj(ec, klass, flags, ROOT_SHAPE_ID | SHAPE_ID_LAYOUT_OTHER, true, size); } -VALUE +static VALUE rb_newobj_of_with_shape(VALUE klass, VALUE flags, shape_id_t shape_id, size_t size) { return rb_newobj(GET_EC(), klass, flags, shape_id, true, size); From feb0b2849fcccdd0b4a175fe6ac67592de865324 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Fri, 26 Jun 2026 16:08:05 +0900 Subject: [PATCH 02/15] [ruby/error_highlight] Bump version to 0.7.2 https://github.com/ruby/error_highlight/commit/d945ffd2aa --- lib/error_highlight/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/error_highlight/version.rb b/lib/error_highlight/version.rb index f0a5376b143b75..87c8d91b13fcf1 100644 --- a/lib/error_highlight/version.rb +++ b/lib/error_highlight/version.rb @@ -1,3 +1,3 @@ module ErrorHighlight - VERSION = "0.7.1" + VERSION = "0.7.2" end From aefb6f578c2fe42fe18ce62e05518e4a00dc31db Mon Sep 17 00:00:00 2001 From: git Date: Fri, 26 Jun 2026 07:09:39 +0000 Subject: [PATCH 03/15] Update default gems list at feb0b2849fcccdd0b4a175fe6ac675 [ci skip] --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index cb208d73446799..1ac45e2da77769 100644 --- a/NEWS.md +++ b/NEWS.md @@ -80,6 +80,7 @@ releases. * 4.0.3 to [v4.0.4][bundler-v4.0.4], [v4.0.5][bundler-v4.0.5], [v4.0.6][bundler-v4.0.6], [v4.0.7][bundler-v4.0.7], [v4.0.8][bundler-v4.0.8], [v4.0.9][bundler-v4.0.9], [v4.0.10][bundler-v4.0.10], [v4.0.11][bundler-v4.0.11], [v4.0.12][bundler-v4.0.12], [v4.0.13][bundler-v4.0.13], [v4.0.14][bundler-v4.0.14], [v4.0.15][bundler-v4.0.15] * erb 6.0.4 * 6.0.1 to [v6.0.1.1][erb-v6.0.1.1], [v6.0.2][erb-v6.0.2], [v6.0.3][erb-v6.0.3], [v6.0.4][erb-v6.0.4] +* error_highlight 0.7.2 * ipaddr 1.2.9 * 1.2.8 to [v1.2.9][ipaddr-v1.2.9] * json 2.20.0 From d8bf0a28cceb9fce71455de9f1ed93cb70493b2d Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 22 Jun 2026 22:14:20 +0100 Subject: [PATCH 04/15] Add monotonic wall-clock times to GC::Profiler --- gc/default/default.c | 73 ++++++++++++++++++++++++++++++++++++++++++-- test/ruby/test_gc.rb | 19 ++++++++++++ 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 5151fe2c85ac1d..b3796b43efff59 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -33,6 +33,7 @@ #include "darray.h" #include "gc/gc.h" #include "gc/gc_impl.h" +#include "hrtime.h" #include "probes.h" @@ -377,6 +378,9 @@ typedef struct gc_profile_record { double gc_time; double gc_invoke_time; + rb_hrtime_t gc_wall_time; + rb_hrtime_t gc_invoke_wall_time; + rb_hrtime_t gc_pause_time; size_t heap_total_objects; size_t heap_use_size; @@ -590,6 +594,7 @@ typedef struct rb_objspace { double prepare_time; #endif double invoke_time; + rb_hrtime_t invoke_wall_time; size_t minor_gc_count; size_t major_gc_count; @@ -615,6 +620,9 @@ typedef struct rb_objspace { /* temporary profiling space */ double gc_sweep_start_time; + rb_hrtime_t gc_wall_start_time; + rb_hrtime_t gc_sweep_wall_start_time; + rb_hrtime_t gc_pause_start_time; size_t total_allocated_objects_at_gc_start; size_t heap_used_at_gc_start; size_t heap_total_slots_at_gc_start; @@ -7028,6 +7036,7 @@ gc_enter_count(enum gc_enter_event event) } static bool current_process_time(struct timespec *ts); +static inline rb_hrtime_t elapsed_hrtime_from(rb_hrtime_t start); static void gc_clock_start(struct timespec *ts) @@ -7058,6 +7067,18 @@ gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_ { *lock_lev = RB_GC_VM_LOCK(); + if (objspace->profile.run) { + switch (event) { + case gc_enter_event_start: + case gc_enter_event_continue: + case gc_enter_event_rest: + objspace->profile.gc_pause_start_time = rb_hrtime_now(); + break; + case gc_enter_event_finalizer: + break; + } + } + switch (event) { case gc_enter_event_rest: case gc_enter_event_start: @@ -7088,6 +7109,15 @@ gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_l rb_gc_event_hook(0, RUBY_INTERNAL_EVENT_GC_EXIT); + if (objspace->profile.gc_pause_start_time) { + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->gc_pause_time = rb_hrtime_add(record->gc_pause_time, + elapsed_hrtime_from(objspace->profile.gc_pause_start_time)); + } + objspace->profile.gc_pause_start_time = 0; + } + gc_record(objspace, 1, gc_enter_event_cstr(event)); RUBY_DEBUG_LOG("%s (%s)", gc_enter_event_cstr(event), gc_current_status(objspace)); gc_report(1, objspace, "gc_exit: %s [%s]\n", gc_enter_event_cstr(event), gc_current_status(objspace)); @@ -8899,6 +8929,18 @@ getrusage_time(void) } } +static inline double +hrtime_to_sec(rb_hrtime_t time) +{ + return (double)time / (double)RB_HRTIME_PER_SEC; +} + +static inline rb_hrtime_t +elapsed_hrtime_from(rb_hrtime_t start) +{ + return rb_hrtime_sub(rb_hrtime_now(), start); +} + static inline void gc_prof_setup_new_record(rb_objspace_t *objspace, unsigned int reason) @@ -8929,6 +8971,8 @@ gc_prof_setup_new_record(rb_objspace_t *objspace, unsigned int reason) /* setup before-GC parameter */ record->flags = reason | (ruby_gc_stressful ? GPR_FLAG_STRESS : 0); + record->gc_invoke_wall_time = rb_hrtime_sub(rb_hrtime_now(), + objspace->profile.invoke_wall_time); #if MALLOC_ALLOCATED_SIZE record->allocated_size = malloc_allocated_size; #endif @@ -8957,6 +9001,7 @@ gc_prof_timer_start(rb_objspace_t *objspace) #endif record->gc_time = 0; record->gc_invoke_time = getrusage_time(); + objspace->profile.gc_wall_start_time = rb_hrtime_now(); } } @@ -8979,6 +9024,7 @@ gc_prof_timer_stop(rb_objspace_t *objspace) gc_profile_record *record = gc_prof_record(objspace); record->gc_time = elapsed_time_from(record->gc_invoke_time); record->gc_invoke_time -= objspace->profile.invoke_time; + record->gc_wall_time = elapsed_hrtime_from(objspace->profile.gc_wall_start_time); } } @@ -9017,6 +9063,7 @@ gc_prof_sweep_timer_start(rb_objspace_t *objspace) if (record->gc_time > 0 || GC_PROFILE_MORE_DETAIL) { objspace->profile.gc_sweep_start_time = getrusage_time(); + objspace->profile.gc_sweep_wall_start_time = rb_hrtime_now(); } } } @@ -9034,6 +9081,8 @@ gc_prof_sweep_timer_stop(rb_objspace_t *objspace) sweep_time = elapsed_time_from(objspace->profile.gc_sweep_start_time); /* need to accumulate GC time for lazy sweep after gc() */ record->gc_time += sweep_time; + record->gc_wall_time = rb_hrtime_add(record->gc_wall_time, + elapsed_hrtime_from(objspace->profile.gc_sweep_wall_start_time)); } else if (GC_PROFILE_MORE_DETAIL) { sweep_time = elapsed_time_from(objspace->profile.gc_sweep_start_time); @@ -9124,6 +9173,9 @@ gc_profile_clear(VALUE _) * { * :GC_TIME=>1.3000000000000858e-05, * :GC_INVOKE_TIME=>0.010634999999999999, + * :GC_WALL_TIME=>1.4000000000000001e-05, + * :GC_INVOKE_WALL_TIME=>0.010640000000000000, + * :GC_PAUSE_TIME=>1.5000000000000000e-05, * :HEAP_USE_SIZE=>289640, * :HEAP_TOTAL_SIZE=>588960, * :HEAP_TOTAL_OBJECTS=>14724, @@ -9135,9 +9187,19 @@ gc_profile_clear(VALUE _) * The keys mean: * * +:GC_TIME+:: - * Time elapsed in seconds for this GC run + * CPU time elapsed in seconds for this GC run. This is process CPU time, + * not elapsed wall-clock time. * +:GC_INVOKE_TIME+:: - * Time elapsed in seconds from startup to when the GC was invoked + * CPU time elapsed in seconds from startup to when the GC was invoked. + * +:GC_WALL_TIME+:: + * Monotonic wall-clock time elapsed in seconds for this GC run. + * +:GC_INVOKE_WALL_TIME+:: + * Monotonic wall-clock time elapsed in seconds from startup to when the GC + * was invoked. + * +:GC_PAUSE_TIME+:: + * Monotonic wall-clock time elapsed in seconds while the VM was in GC, + * including time to stop other ractors. This may include time from + * incremental marking or lazy sweeping continuation charged to this record. * +:HEAP_USE_SIZE+:: * Total bytes of heap used * +:HEAP_TOTAL_SIZE+:: @@ -9180,6 +9242,12 @@ gc_profile_record_get(VALUE _) rb_hash_aset(prof, ID2SYM(rb_intern("GC_FLAGS")), gc_info_decode(objspace, rb_hash_new(), record->flags)); rb_hash_aset(prof, ID2SYM(rb_intern("GC_TIME")), DBL2NUM(record->gc_time)); rb_hash_aset(prof, ID2SYM(rb_intern("GC_INVOKE_TIME")), DBL2NUM(record->gc_invoke_time)); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_WALL_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_wall_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_INVOKE_WALL_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_invoke_wall_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_PAUSE_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_pause_time))); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), SIZET2NUM(record->heap_use_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), SIZET2NUM(record->heap_total_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), SIZET2NUM(record->heap_total_objects)); @@ -9965,6 +10033,7 @@ rb_gc_impl_objspace_init(void *objspace_ptr) init_mark_stack(&objspace->mark_stack); objspace->profile.invoke_time = getrusage_time(); + objspace->profile.invoke_wall_time = rb_hrtime_now(); finalizer_table = st_init_numtable(); } diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index 21448294c2073c..fcefb81ecc4003 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -580,6 +580,25 @@ def test_profiler_raw_data GC::Profiler.disable end + def test_profiler_raw_data_includes_wall_time + GC::Profiler.enable + GC::Profiler.clear + + GC.start + record = GC::Profiler.raw_data.last + + assert_kind_of Float, record[:GC_WALL_TIME] + assert_kind_of Float, record[:GC_INVOKE_WALL_TIME] + assert_kind_of Float, record[:GC_PAUSE_TIME] + + assert_operator record[:GC_WALL_TIME], :>=, 0.0 + assert_operator record[:GC_INVOKE_WALL_TIME], :>=, 0.0 + assert_operator record[:GC_PAUSE_TIME], :>=, 0.0 + ensure + GC::Profiler.disable + GC::Profiler.clear + end + def test_profiler_total_time GC::Profiler.enable GC::Profiler.clear From 2da4c0e8210dfa7287f8282a5b66aa1a9e674c29 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 22 Jun 2026 22:14:32 +0100 Subject: [PATCH 05/15] [Doc] Document GC::Profiler.raw_data --- gc/default/default.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index b3796b43efff59..b41e6614834efd 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -10103,8 +10103,14 @@ rb_gc_impl_init(void) * * GC::Profiler.report * + * pp GC::Profiler.raw_data + * * GC::Profiler.disable * + * GC::Profiler.raw_data returns one Hash per GC run, including CPU time + * fields such as +:GC_TIME+ and wall-clock fields such as +:GC_WALL_TIME+ + * and +:GC_PAUSE_TIME+. + * * See also GC.count, GC.malloc_allocated_size and GC.malloc_allocations */ VALUE rb_mProfiler = rb_define_module_under(rb_mGC, "Profiler"); From 10857e07d2b2f6663ece30c22163f7ae4edc8fef Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 22 Jun 2026 22:14:38 +0100 Subject: [PATCH 06/15] Split out STW time from total GC pause time. GC_PAUSE_TIME == GC_STOP_TIME + GC_STW_TIME --- gc/default/default.c | 57 ++++++++++++++++++++++++++++++++++++++------ test/ruby/test_gc.rb | 5 ++++ 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index b41e6614834efd..96e8e8eb5506a2 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -381,6 +381,8 @@ typedef struct gc_profile_record { rb_hrtime_t gc_wall_time; rb_hrtime_t gc_invoke_wall_time; rb_hrtime_t gc_pause_time; + rb_hrtime_t gc_stop_time; + rb_hrtime_t gc_stw_time; size_t heap_total_objects; size_t heap_use_size; @@ -623,6 +625,8 @@ typedef struct rb_objspace { rb_hrtime_t gc_wall_start_time; rb_hrtime_t gc_sweep_wall_start_time; rb_hrtime_t gc_pause_start_time; + rb_hrtime_t gc_stw_start_time; + rb_hrtime_t gc_stop_time; size_t total_allocated_objects_at_gc_start; size_t heap_used_at_gc_start; size_t heap_total_slots_at_gc_start; @@ -7090,6 +7094,13 @@ gc_enter(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_ break; } + if (objspace->profile.gc_pause_start_time) { + objspace->profile.gc_stw_start_time = rb_hrtime_now(); + objspace->profile.gc_stop_time = rb_hrtime_sub( + objspace->profile.gc_stw_start_time, + objspace->profile.gc_pause_start_time); + } + gc_enter_count(event); if (RB_UNLIKELY(during_gc != 0)) rb_bug("during_gc != 0"); if (RGENGC_CHECK_MODE >= 3) gc_verify_internal_consistency(objspace); @@ -7111,11 +7122,18 @@ gc_exit(rb_objspace_t *objspace, enum gc_enter_event event, unsigned int *lock_l if (objspace->profile.gc_pause_start_time) { if (gc_prof_enabled(objspace)) { + rb_hrtime_t now = rb_hrtime_now(); gc_profile_record *record = gc_prof_record(objspace); record->gc_pause_time = rb_hrtime_add(record->gc_pause_time, - elapsed_hrtime_from(objspace->profile.gc_pause_start_time)); + rb_hrtime_sub(now, objspace->profile.gc_pause_start_time)); + record->gc_stop_time = rb_hrtime_add(record->gc_stop_time, + objspace->profile.gc_stop_time); + record->gc_stw_time = rb_hrtime_add(record->gc_stw_time, + rb_hrtime_sub(now, objspace->profile.gc_stw_start_time)); } objspace->profile.gc_pause_start_time = 0; + objspace->profile.gc_stw_start_time = 0; + objspace->profile.gc_stop_time = 0; } gc_record(objspace, 1, gc_enter_event_cstr(event)); @@ -9176,6 +9194,8 @@ gc_profile_clear(VALUE _) * :GC_WALL_TIME=>1.4000000000000001e-05, * :GC_INVOKE_WALL_TIME=>0.010640000000000000, * :GC_PAUSE_TIME=>1.5000000000000000e-05, + * :GC_STOP_TIME=>1.0000000000000000e-06, + * :GC_STW_TIME=>1.4000000000000001e-05, * :HEAP_USE_SIZE=>289640, * :HEAP_TOTAL_SIZE=>588960, * :HEAP_TOTAL_OBJECTS=>14724, @@ -9192,14 +9212,22 @@ gc_profile_clear(VALUE _) * +:GC_INVOKE_TIME+:: * CPU time elapsed in seconds from startup to when the GC was invoked. * +:GC_WALL_TIME+:: - * Monotonic wall-clock time elapsed in seconds for this GC run. + * Monotonic wall-clock time elapsed in seconds spent doing collector work + * for this GC record. This does not include time spent stopping other + * ractors before the VM enters GC. * +:GC_INVOKE_WALL_TIME+:: * Monotonic wall-clock time elapsed in seconds from startup to when the GC * was invoked. * +:GC_PAUSE_TIME+:: - * Monotonic wall-clock time elapsed in seconds while the VM was in GC, - * including time to stop other ractors. This may include time from - * incremental marking or lazy sweeping continuation charged to this record. + * Monotonic wall-clock time elapsed in seconds while user execution was + * blocked by this GC entry, including time to stop other ractors. This + * may include time from incremental marking or lazy sweeping continuation + * charged to this record. + * +:GC_STOP_TIME+:: + * Monotonic wall-clock time elapsed in seconds stopping other ractors. + * +:GC_STW_TIME+:: + * Monotonic wall-clock time elapsed in seconds after other ractors have + * stopped and before the VM exits GC. * +:HEAP_USE_SIZE+:: * Total bytes of heap used * +:HEAP_TOTAL_SIZE+:: @@ -9209,6 +9237,15 @@ gc_profile_clear(VALUE _) * +:GC_IS_MARKED+:: * Returns +true+ if the GC is in mark phase * + * The wall-clock timing fields relate to each other as follows: + * + * GC_PAUSE_TIME == GC_STOP_TIME + GC_STW_TIME + * + * +:GC_WALL_TIME+ measures collector work and is nested inside + * +:GC_STW_TIME+, so it must not be added to +:GC_STW_TIME+. The difference + * +GC_STW_TIME - GC_WALL_TIME+ is VM overhead inside the stopped interval + * (GC event hooks, bookkeeping, consistency checks, and continuation work). + * * If ruby was built with +GC_PROFILE_MORE_DETAIL+, you will also have access * to the following hash keys: * @@ -9248,6 +9285,10 @@ gc_profile_record_get(VALUE _) DBL2NUM(hrtime_to_sec(record->gc_invoke_wall_time))); rb_hash_aset(prof, ID2SYM(rb_intern("GC_PAUSE_TIME")), DBL2NUM(hrtime_to_sec(record->gc_pause_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_STOP_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_stop_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_STW_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_stw_time))); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), SIZET2NUM(record->heap_use_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), SIZET2NUM(record->heap_total_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), SIZET2NUM(record->heap_total_objects)); @@ -10108,8 +10149,10 @@ rb_gc_impl_init(void) * GC::Profiler.disable * * GC::Profiler.raw_data returns one Hash per GC run, including CPU time - * fields such as +:GC_TIME+ and wall-clock fields such as +:GC_WALL_TIME+ - * and +:GC_PAUSE_TIME+. + * fields such as +:GC_TIME+ and wall-clock fields such as +:GC_WALL_TIME+, + * +:GC_PAUSE_TIME+, +:GC_STOP_TIME+, and +:GC_STW_TIME+. +:GC_WALL_TIME+ + * measures collector work for the record, while +:GC_PAUSE_TIME+ measures + * how long user execution was blocked by the GC entry. * * See also GC.count, GC.malloc_allocated_size and GC.malloc_allocations */ diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index fcefb81ecc4003..70a1dbc0c53956 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -590,10 +590,15 @@ def test_profiler_raw_data_includes_wall_time assert_kind_of Float, record[:GC_WALL_TIME] assert_kind_of Float, record[:GC_INVOKE_WALL_TIME] assert_kind_of Float, record[:GC_PAUSE_TIME] + assert_kind_of Float, record[:GC_STOP_TIME] + assert_kind_of Float, record[:GC_STW_TIME] assert_operator record[:GC_WALL_TIME], :>=, 0.0 assert_operator record[:GC_INVOKE_WALL_TIME], :>=, 0.0 assert_operator record[:GC_PAUSE_TIME], :>=, 0.0 + assert_operator record[:GC_STOP_TIME], :>=, 0.0 + assert_operator record[:GC_STW_TIME], :>=, 0.0 + assert_in_delta record[:GC_PAUSE_TIME], record[:GC_STOP_TIME] + record[:GC_STW_TIME], 0.001 ensure GC::Profiler.disable GC::Profiler.clear From e1818cde3de901c2f4e006c4c94c0c1e419b160a Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 22 Jun 2026 22:14:43 +0100 Subject: [PATCH 07/15] Add mark and sweep wall times to GC::Profiler Assisted-By: devx/46f21e8b-0835-4833-801f-ce5e336dbf61 --- gc/default/default.c | 36 ++++++++++++++++++++++++++++++++++++ test/ruby/test_gc.rb | 5 +++++ 2 files changed, 41 insertions(+) diff --git a/gc/default/default.c b/gc/default/default.c index 96e8e8eb5506a2..9c5095c778e60e 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -383,6 +383,8 @@ typedef struct gc_profile_record { rb_hrtime_t gc_pause_time; rb_hrtime_t gc_stop_time; rb_hrtime_t gc_stw_time; + rb_hrtime_t gc_mark_wall_time; + rb_hrtime_t gc_sweep_wall_time; size_t heap_total_objects; size_t heap_use_size; @@ -627,6 +629,8 @@ typedef struct rb_objspace { rb_hrtime_t gc_pause_start_time; rb_hrtime_t gc_stw_start_time; rb_hrtime_t gc_stop_time; + rb_hrtime_t gc_mark_phase_wall_start_time; + rb_hrtime_t gc_sweep_phase_wall_start_time; size_t total_allocated_objects_at_gc_start; size_t heap_used_at_gc_start; size_t heap_total_slots_at_gc_start; @@ -7155,6 +7159,10 @@ gc_marking_enter(rb_objspace_t *objspace) gc_prof_mark_timer_start(objspace); + if (gc_prof_enabled(objspace)) { + objspace->profile.gc_mark_phase_wall_start_time = rb_hrtime_now(); + } + if (MEASURE_GC) { gc_clock_start(&objspace->profile.marking_start_time); } @@ -7171,6 +7179,12 @@ gc_marking_exit(rb_objspace_t *objspace) objspace->profile.marking_time_ns += gc_clock_end(&objspace->profile.marking_start_time); } + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->gc_mark_wall_time = rb_hrtime_add(record->gc_mark_wall_time, + elapsed_hrtime_from(objspace->profile.gc_mark_phase_wall_start_time)); + } + gc_prof_mark_timer_stop(objspace); } @@ -7179,6 +7193,10 @@ gc_sweeping_enter(rb_objspace_t *objspace) { GC_ASSERT(during_gc != 0); + if (gc_prof_enabled(objspace)) { + objspace->profile.gc_sweep_phase_wall_start_time = rb_hrtime_now(); + } + if (MEASURE_GC) { gc_clock_start(&objspace->profile.sweeping_start_time); } @@ -7192,6 +7210,12 @@ gc_sweeping_exit(rb_objspace_t *objspace) if (MEASURE_GC) { objspace->profile.sweeping_time_ns += gc_clock_end(&objspace->profile.sweeping_start_time); } + + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->gc_sweep_wall_time = rb_hrtime_add(record->gc_sweep_wall_time, + elapsed_hrtime_from(objspace->profile.gc_sweep_phase_wall_start_time)); + } } static void * @@ -9196,6 +9220,8 @@ gc_profile_clear(VALUE _) * :GC_PAUSE_TIME=>1.5000000000000000e-05, * :GC_STOP_TIME=>1.0000000000000000e-06, * :GC_STW_TIME=>1.4000000000000001e-05, + * :GC_MARK_WALL_TIME=>9.0000000000000002e-06, + * :GC_SWEEP_WALL_TIME=>5.0000000000000004e-06, * :HEAP_USE_SIZE=>289640, * :HEAP_TOTAL_SIZE=>588960, * :HEAP_TOTAL_OBJECTS=>14724, @@ -9228,6 +9254,12 @@ gc_profile_clear(VALUE _) * +:GC_STW_TIME+:: * Monotonic wall-clock time elapsed in seconds after other ractors have * stopped and before the VM exits GC. + * +:GC_MARK_WALL_TIME+:: + * Monotonic wall-clock time elapsed in seconds spent marking for this GC + * record, accumulated across incremental marking continuations. + * +:GC_SWEEP_WALL_TIME+:: + * Monotonic wall-clock time elapsed in seconds spent sweeping for this GC + * record, accumulated across lazy sweeping continuations. * +:HEAP_USE_SIZE+:: * Total bytes of heap used * +:HEAP_TOTAL_SIZE+:: @@ -9289,6 +9321,10 @@ gc_profile_record_get(VALUE _) DBL2NUM(hrtime_to_sec(record->gc_stop_time))); rb_hash_aset(prof, ID2SYM(rb_intern("GC_STW_TIME")), DBL2NUM(hrtime_to_sec(record->gc_stw_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_MARK_WALL_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_mark_wall_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_SWEEP_WALL_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_sweep_wall_time))); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), SIZET2NUM(record->heap_use_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), SIZET2NUM(record->heap_total_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), SIZET2NUM(record->heap_total_objects)); diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index 70a1dbc0c53956..5e15e1d6d30281 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -592,13 +592,18 @@ def test_profiler_raw_data_includes_wall_time assert_kind_of Float, record[:GC_PAUSE_TIME] assert_kind_of Float, record[:GC_STOP_TIME] assert_kind_of Float, record[:GC_STW_TIME] + assert_kind_of Float, record[:GC_MARK_WALL_TIME] + assert_kind_of Float, record[:GC_SWEEP_WALL_TIME] assert_operator record[:GC_WALL_TIME], :>=, 0.0 assert_operator record[:GC_INVOKE_WALL_TIME], :>=, 0.0 assert_operator record[:GC_PAUSE_TIME], :>=, 0.0 assert_operator record[:GC_STOP_TIME], :>=, 0.0 assert_operator record[:GC_STW_TIME], :>=, 0.0 + assert_operator record[:GC_MARK_WALL_TIME], :>=, 0.0 + assert_operator record[:GC_SWEEP_WALL_TIME], :>=, 0.0 assert_in_delta record[:GC_PAUSE_TIME], record[:GC_STOP_TIME] + record[:GC_STW_TIME], 0.001 + assert_operator record[:GC_MARK_WALL_TIME] + record[:GC_SWEEP_WALL_TIME], :<=, record[:GC_PAUSE_TIME] + 0.001 ensure GC::Profiler.disable GC::Profiler.clear From 48516c7af629861dd3d7c2f8692ee1bc840dc125 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Mon, 22 Jun 2026 22:14:51 +0100 Subject: [PATCH 08/15] Add compaction wall time to GC::Profiler Assisted-By: devx/46f21e8b-0835-4833-801f-ce5e336dbf61 --- gc/default/default.c | 21 +++++++++++++++++++-- test/ruby/test_gc.rb | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 9c5095c778e60e..8e900f11d0c2e4 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -385,6 +385,7 @@ typedef struct gc_profile_record { rb_hrtime_t gc_stw_time; rb_hrtime_t gc_mark_wall_time; rb_hrtime_t gc_sweep_wall_time; + rb_hrtime_t gc_compact_wall_time; size_t heap_total_objects; size_t heap_use_size; @@ -1269,6 +1270,7 @@ NO_SANITIZE("memory", static inline bool is_pointer_to_heap(rb_objspace_t *objsp static void gc_verify_internal_consistency(void *objspace_ptr); static double getrusage_time(void); +static inline rb_hrtime_t elapsed_hrtime_from(rb_hrtime_t start); static inline void gc_prof_setup_new_record(rb_objspace_t *objspace, unsigned int reason); static inline void gc_prof_timer_start(rb_objspace_t *); static inline void gc_prof_timer_stop(rb_objspace_t *); @@ -4521,7 +4523,13 @@ gc_sweep(rb_objspace_t *objspace) gc_sweep_start(objspace); if (objspace->flags.during_compacting) { + rb_hrtime_t compact_start_time = gc_prof_enabled(objspace) ? rb_hrtime_now() : 0; gc_sweep_compact(objspace); + if (gc_prof_enabled(objspace)) { + gc_profile_record *record = gc_prof_record(objspace); + record->gc_compact_wall_time = rb_hrtime_add(record->gc_compact_wall_time, + elapsed_hrtime_from(compact_start_time)); + } } if (immediate_sweep) { @@ -7044,7 +7052,6 @@ gc_enter_count(enum gc_enter_event event) } static bool current_process_time(struct timespec *ts); -static inline rb_hrtime_t elapsed_hrtime_from(rb_hrtime_t start); static void gc_clock_start(struct timespec *ts) @@ -9222,6 +9229,7 @@ gc_profile_clear(VALUE _) * :GC_STW_TIME=>1.4000000000000001e-05, * :GC_MARK_WALL_TIME=>9.0000000000000002e-06, * :GC_SWEEP_WALL_TIME=>5.0000000000000004e-06, + * :GC_COMPACT_WALL_TIME=>0.0000000000000000e+00, * :HEAP_USE_SIZE=>289640, * :HEAP_TOTAL_SIZE=>588960, * :HEAP_TOTAL_OBJECTS=>14724, @@ -9259,7 +9267,14 @@ gc_profile_clear(VALUE _) * record, accumulated across incremental marking continuations. * +:GC_SWEEP_WALL_TIME+:: * Monotonic wall-clock time elapsed in seconds spent sweeping for this GC - * record, accumulated across lazy sweeping continuations. + * record, accumulated across lazy sweeping continuations. This includes any + * compaction time, which is also reported separately as + * +:GC_COMPACT_WALL_TIME+. + * +:GC_COMPACT_WALL_TIME+:: + * Monotonic wall-clock time elapsed in seconds spent compacting for this GC + * record, or +0.0+ if this GC did not compact. Compaction runs within the + * sweep phase, so this time is nested inside +:GC_SWEEP_WALL_TIME+ and must + * not be added to it. * +:HEAP_USE_SIZE+:: * Total bytes of heap used * +:HEAP_TOTAL_SIZE+:: @@ -9325,6 +9340,8 @@ gc_profile_record_get(VALUE _) DBL2NUM(hrtime_to_sec(record->gc_mark_wall_time))); rb_hash_aset(prof, ID2SYM(rb_intern("GC_SWEEP_WALL_TIME")), DBL2NUM(hrtime_to_sec(record->gc_sweep_wall_time))); + rb_hash_aset(prof, ID2SYM(rb_intern("GC_COMPACT_WALL_TIME")), + DBL2NUM(hrtime_to_sec(record->gc_compact_wall_time))); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_USE_SIZE")), SIZET2NUM(record->heap_use_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_SIZE")), SIZET2NUM(record->heap_total_size)); rb_hash_aset(prof, ID2SYM(rb_intern("HEAP_TOTAL_OBJECTS")), SIZET2NUM(record->heap_total_objects)); diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index 5e15e1d6d30281..8b6c8899c040f5 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -594,6 +594,7 @@ def test_profiler_raw_data_includes_wall_time assert_kind_of Float, record[:GC_STW_TIME] assert_kind_of Float, record[:GC_MARK_WALL_TIME] assert_kind_of Float, record[:GC_SWEEP_WALL_TIME] + assert_kind_of Float, record[:GC_COMPACT_WALL_TIME] assert_operator record[:GC_WALL_TIME], :>=, 0.0 assert_operator record[:GC_INVOKE_WALL_TIME], :>=, 0.0 @@ -602,8 +603,31 @@ def test_profiler_raw_data_includes_wall_time assert_operator record[:GC_STW_TIME], :>=, 0.0 assert_operator record[:GC_MARK_WALL_TIME], :>=, 0.0 assert_operator record[:GC_SWEEP_WALL_TIME], :>=, 0.0 + assert_operator record[:GC_COMPACT_WALL_TIME], :>=, 0.0 assert_in_delta record[:GC_PAUSE_TIME], record[:GC_STOP_TIME] + record[:GC_STW_TIME], 0.001 assert_operator record[:GC_MARK_WALL_TIME] + record[:GC_SWEEP_WALL_TIME], :<=, record[:GC_PAUSE_TIME] + 0.001 + # No compaction was requested, so compaction time is folded into nothing. + assert_equal 0.0, record[:GC_COMPACT_WALL_TIME] + ensure + GC::Profiler.disable + GC::Profiler.clear + end + + def test_profiler_raw_data_includes_compaction_wall_time + omit "compaction not supported" unless GC.respond_to?(:compact) + + GC::Profiler.enable + GC::Profiler.clear + + GC.compact + record = GC::Profiler.raw_data.last + + assert_kind_of Float, record[:GC_COMPACT_WALL_TIME] + assert_operator record[:GC_COMPACT_WALL_TIME], :>=, 0.0 + # Compaction runs within the sweep phase, so it is bounded by sweep wall time. + assert_operator record[:GC_COMPACT_WALL_TIME], :<=, record[:GC_SWEEP_WALL_TIME] + 0.001 + rescue NotImplementedError + omit "compaction not supported" ensure GC::Profiler.disable GC::Profiler.clear From f214b30d46fbe44fd7dbaf81ed363ff446779e8e Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 23 Jun 2026 11:37:34 +0100 Subject: [PATCH 09/15] Shim hrtime for modular GC similar to how we shim bits.h already --- gc/default/default.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/gc/default/default.c b/gc/default/default.c index 8e900f11d0c2e4..902a7e4010fe64 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -33,7 +33,43 @@ #include "darray.h" #include "gc/gc.h" #include "gc/gc_impl.h" -#include "hrtime.h" + +#ifdef BUILDING_MODULAR_GC +/* hrtime.h transitively includes internal/time.h -> internal/bits.h, which are + * not available to out-of-tree modular GC builds. We only use a monotonic + * clock plus saturating add/sub, so provide that subset locally with the same + * semantics as hrtime.h. */ +# include +typedef uint64_t rb_hrtime_t; +# define RB_HRTIME_PER_SEC ((rb_hrtime_t)1000000000) + +static inline rb_hrtime_t +rb_hrtime_now(void) +{ +# if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + return (rb_hrtime_t)ts.tv_sec * RB_HRTIME_PER_SEC + (rb_hrtime_t)ts.tv_nsec; + } +# endif + return 0; +} + +static inline rb_hrtime_t +rb_hrtime_add(rb_hrtime_t a, rb_hrtime_t b) +{ + rb_hrtime_t c = a + b; + return c < a ? UINT64_MAX : c; /* saturate on overflow */ +} + +static inline rb_hrtime_t +rb_hrtime_sub(rb_hrtime_t a, rb_hrtime_t b) +{ + return a < b ? 0 : a - b; +} +#else +# include "hrtime.h" +#endif #include "probes.h" From f6ab0a1baed8bfc74f3abf86305e169853764f70 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 23 Jun 2026 11:44:02 +0100 Subject: [PATCH 10/15] Fix depends --- depend | 2 ++ 1 file changed, 2 insertions(+) diff --git a/depend b/depend index 95128de92144f0..22ae43b21ad589 100644 --- a/depend +++ b/depend @@ -5965,6 +5965,7 @@ gc.$(OBJEXT): $(top_srcdir)/internal/string.h gc.$(OBJEXT): $(top_srcdir)/internal/struct.h gc.$(OBJEXT): $(top_srcdir)/internal/symbol.h gc.$(OBJEXT): $(top_srcdir)/internal/thread.h +gc.$(OBJEXT): $(top_srcdir)/internal/time.h gc.$(OBJEXT): $(top_srcdir)/internal/variable.h gc.$(OBJEXT): $(top_srcdir)/internal/vm.h gc.$(OBJEXT): $(top_srcdir)/internal/warnings.h @@ -6026,6 +6027,7 @@ gc.$(OBJEXT): {$(VPATH)}encoding.h gc.$(OBJEXT): {$(VPATH)}eval_intern.h gc.$(OBJEXT): {$(VPATH)}gc.c gc.$(OBJEXT): {$(VPATH)}gc.rbinc +gc.$(OBJEXT): {$(VPATH)}hrtime.h gc.$(OBJEXT): {$(VPATH)}id.h gc.$(OBJEXT): {$(VPATH)}id_table.h gc.$(OBJEXT): {$(VPATH)}intern.h From 600acfc91b7dd708e7ba4aa23114f36de6d2f2a9 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 23 Jun 2026 12:56:27 +0100 Subject: [PATCH 11/15] MMTk doesn't have GC Profiler --- test/.excludes-mmtk/TestGc.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/.excludes-mmtk/TestGc.rb b/test/.excludes-mmtk/TestGc.rb index cbab458b90ad90..33664807537677 100644 --- a/test/.excludes-mmtk/TestGc.rb +++ b/test/.excludes-mmtk/TestGc.rb @@ -15,6 +15,8 @@ exclude(:test_old_to_young_reference, "testing behaviour specific to default GC") exclude(:test_profiler_enabled, "MMTk does not have GC::Profiler") exclude(:test_profiler_raw_data, "MMTk does not have GC::Profiler") +exclude(:test_profiler_raw_data_includes_compaction_wall_time, "MMTk does not have GC::Profiler") +exclude(:test_profiler_raw_data_includes_wall_time, "MMTk does not have GC::Profiler") exclude(:test_profiler_total_time, "MMTk does not have GC::Profiler") exclude(:test_start_full_mark, "testing behaviour specific to default GC") exclude(:test_start_immediate_sweep, "testing behaviour specific to default GC") From 45e0aaf0e3a81644c768cac3b299b62df8074f52 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 23 Jun 2026 14:29:06 +0100 Subject: [PATCH 12/15] Seperate compaction from the sweeping time even when auto-compact is enabled. --- gc/default/default.c | 40 +++++++++++++++++++++++------------ test/.excludes-mmtk/TestGc.rb | 2 +- test/ruby/test_gc.rb | 15 ++++++++----- 3 files changed, 37 insertions(+), 20 deletions(-) diff --git a/gc/default/default.c b/gc/default/default.c index 902a7e4010fe64..bb889fecf873da 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -663,6 +663,7 @@ typedef struct rb_objspace { double gc_sweep_start_time; rb_hrtime_t gc_wall_start_time; rb_hrtime_t gc_sweep_wall_start_time; + rb_hrtime_t gc_sweep_excluded_wall_time; rb_hrtime_t gc_pause_start_time; rb_hrtime_t gc_stw_start_time; rb_hrtime_t gc_stop_time; @@ -4562,9 +4563,13 @@ gc_sweep(rb_objspace_t *objspace) rb_hrtime_t compact_start_time = gc_prof_enabled(objspace) ? rb_hrtime_now() : 0; gc_sweep_compact(objspace); if (gc_prof_enabled(objspace)) { + rb_hrtime_t compact_wall_time = elapsed_hrtime_from(compact_start_time); gc_profile_record *record = gc_prof_record(objspace); record->gc_compact_wall_time = rb_hrtime_add(record->gc_compact_wall_time, - elapsed_hrtime_from(compact_start_time)); + compact_wall_time); + objspace->profile.gc_sweep_excluded_wall_time = rb_hrtime_add( + objspace->profile.gc_sweep_excluded_wall_time, + compact_wall_time); } } @@ -7238,6 +7243,7 @@ gc_sweeping_enter(rb_objspace_t *objspace) if (gc_prof_enabled(objspace)) { objspace->profile.gc_sweep_phase_wall_start_time = rb_hrtime_now(); + objspace->profile.gc_sweep_excluded_wall_time = 0; } if (MEASURE_GC) { @@ -7255,9 +7261,13 @@ gc_sweeping_exit(rb_objspace_t *objspace) } if (gc_prof_enabled(objspace)) { + rb_hrtime_t sweep_wall_time = elapsed_hrtime_from(objspace->profile.gc_sweep_phase_wall_start_time); gc_profile_record *record = gc_prof_record(objspace); + sweep_wall_time = rb_hrtime_sub(sweep_wall_time, + objspace->profile.gc_sweep_excluded_wall_time); record->gc_sweep_wall_time = rb_hrtime_add(record->gc_sweep_wall_time, - elapsed_hrtime_from(objspace->profile.gc_sweep_phase_wall_start_time)); + sweep_wall_time); + objspace->profile.gc_sweep_excluded_wall_time = 0; } } @@ -9282,9 +9292,10 @@ gc_profile_clear(VALUE _) * +:GC_INVOKE_TIME+:: * CPU time elapsed in seconds from startup to when the GC was invoked. * +:GC_WALL_TIME+:: - * Monotonic wall-clock time elapsed in seconds spent doing collector work - * for this GC record. This does not include time spent stopping other - * ractors before the VM enters GC. + * Monotonic wall-clock counterpart to +:GC_TIME+ for this GC record. + * This does not include time spent stopping other ractors before the VM + * enters GC. Use the phase wall-clock fields below for mark, sweep, and + * compaction attribution. * +:GC_INVOKE_WALL_TIME+:: * Monotonic wall-clock time elapsed in seconds from startup to when the GC * was invoked. @@ -9303,14 +9314,12 @@ gc_profile_clear(VALUE _) * record, accumulated across incremental marking continuations. * +:GC_SWEEP_WALL_TIME+:: * Monotonic wall-clock time elapsed in seconds spent sweeping for this GC - * record, accumulated across lazy sweeping continuations. This includes any - * compaction time, which is also reported separately as + * record, accumulated across lazy sweeping continuations. This does not + * include compaction time, which is reported separately as * +:GC_COMPACT_WALL_TIME+. * +:GC_COMPACT_WALL_TIME+:: * Monotonic wall-clock time elapsed in seconds spent compacting for this GC - * record, or +0.0+ if this GC did not compact. Compaction runs within the - * sweep phase, so this time is nested inside +:GC_SWEEP_WALL_TIME+ and must - * not be added to it. + * record, or +0.0+ if this GC did not compact. * +:HEAP_USE_SIZE+:: * Total bytes of heap used * +:HEAP_TOTAL_SIZE+:: @@ -9324,8 +9333,11 @@ gc_profile_clear(VALUE _) * * GC_PAUSE_TIME == GC_STOP_TIME + GC_STW_TIME * - * +:GC_WALL_TIME+ measures collector work and is nested inside - * +:GC_STW_TIME+, so it must not be added to +:GC_STW_TIME+. The difference + * +:GC_MARK_WALL_TIME+, +:GC_SWEEP_WALL_TIME+, and +:GC_COMPACT_WALL_TIME+ + * report separate phase timings and must not be added to +:GC_WALL_TIME+. + * + * +:GC_WALL_TIME+ is the wall-clock counterpart to +:GC_TIME+ and is nested + * inside +:GC_STW_TIME+, so it must not be added to +:GC_STW_TIME+. The difference * +GC_STW_TIME - GC_WALL_TIME+ is VM overhead inside the stopped interval * (GC event hooks, bookkeeping, consistency checks, and continuation work). * @@ -10240,8 +10252,8 @@ rb_gc_impl_init(void) * GC::Profiler.raw_data returns one Hash per GC run, including CPU time * fields such as +:GC_TIME+ and wall-clock fields such as +:GC_WALL_TIME+, * +:GC_PAUSE_TIME+, +:GC_STOP_TIME+, and +:GC_STW_TIME+. +:GC_WALL_TIME+ - * measures collector work for the record, while +:GC_PAUSE_TIME+ measures - * how long user execution was blocked by the GC entry. + * is the wall-clock counterpart to +:GC_TIME+, while +:GC_PAUSE_TIME+ + * measures how long user execution was blocked by the GC entry. * * See also GC.count, GC.malloc_allocated_size and GC.malloc_allocations */ diff --git a/test/.excludes-mmtk/TestGc.rb b/test/.excludes-mmtk/TestGc.rb index 33664807537677..64af24cfa88450 100644 --- a/test/.excludes-mmtk/TestGc.rb +++ b/test/.excludes-mmtk/TestGc.rb @@ -15,7 +15,7 @@ exclude(:test_old_to_young_reference, "testing behaviour specific to default GC") exclude(:test_profiler_enabled, "MMTk does not have GC::Profiler") exclude(:test_profiler_raw_data, "MMTk does not have GC::Profiler") -exclude(:test_profiler_raw_data_includes_compaction_wall_time, "MMTk does not have GC::Profiler") +exclude(:test_profiler_raw_data_reports_compaction_separately_from_sweep_wall_time, "MMTk does not have GC::Profiler") exclude(:test_profiler_raw_data_includes_wall_time, "MMTk does not have GC::Profiler") exclude(:test_profiler_total_time, "MMTk does not have GC::Profiler") exclude(:test_start_full_mark, "testing behaviour specific to default GC") diff --git a/test/ruby/test_gc.rb b/test/ruby/test_gc.rb index 8b6c8899c040f5..dbd9b0edb95398 100644 --- a/test/ruby/test_gc.rb +++ b/test/ruby/test_gc.rb @@ -581,6 +581,9 @@ def test_profiler_raw_data end def test_profiler_raw_data_includes_wall_time + auto_compact = GC.auto_compact if GC.respond_to?(:auto_compact) + GC.auto_compact = false if GC.respond_to?(:auto_compact=) + GC::Profiler.enable GC::Profiler.clear @@ -606,26 +609,28 @@ def test_profiler_raw_data_includes_wall_time assert_operator record[:GC_COMPACT_WALL_TIME], :>=, 0.0 assert_in_delta record[:GC_PAUSE_TIME], record[:GC_STOP_TIME] + record[:GC_STW_TIME], 0.001 assert_operator record[:GC_MARK_WALL_TIME] + record[:GC_SWEEP_WALL_TIME], :<=, record[:GC_PAUSE_TIME] + 0.001 - # No compaction was requested, so compaction time is folded into nothing. assert_equal 0.0, record[:GC_COMPACT_WALL_TIME] ensure GC::Profiler.disable GC::Profiler.clear + GC.auto_compact = auto_compact if GC.respond_to?(:auto_compact=) && defined?(auto_compact) end - def test_profiler_raw_data_includes_compaction_wall_time + def test_profiler_raw_data_reports_compaction_separately_from_sweep_wall_time omit "compaction not supported" unless GC.respond_to?(:compact) GC::Profiler.enable GC::Profiler.clear + objects = 10_000.times.map { Object.new } GC.compact record = GC::Profiler.raw_data.last assert_kind_of Float, record[:GC_COMPACT_WALL_TIME] - assert_operator record[:GC_COMPACT_WALL_TIME], :>=, 0.0 - # Compaction runs within the sweep phase, so it is bounded by sweep wall time. - assert_operator record[:GC_COMPACT_WALL_TIME], :<=, record[:GC_SWEEP_WALL_TIME] + 0.001 + assert_operator record[:GC_COMPACT_WALL_TIME], :>, 0.0 + phase_wall_time = record[:GC_MARK_WALL_TIME] + record[:GC_SWEEP_WALL_TIME] + record[:GC_COMPACT_WALL_TIME] + assert_operator phase_wall_time, :<=, record[:GC_PAUSE_TIME] + 0.001 + objects.clear rescue NotImplementedError omit "compaction not supported" ensure From ac56efb3068a3578757e9f4872981f4bbd9e5be1 Mon Sep 17 00:00:00 2001 From: Matt Valentine-House Date: Tue, 23 Jun 2026 16:16:24 +0100 Subject: [PATCH 13/15] Add an example sampler that uses the GC::Profiler records --- tool/gc_sampler.rb | 372 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100755 tool/gc_sampler.rb diff --git a/tool/gc_sampler.rb b/tool/gc_sampler.rb new file mode 100755 index 00000000000000..ba4ed4bd768bf4 --- /dev/null +++ b/tool/gc_sampler.rb @@ -0,0 +1,372 @@ +#!/usr/bin/env ruby +# A minimal in-process GC sampler. +# +# A background thread wakes on a fixed wall-clock interval, snapshots the GC +# counters, and records the *delta* since the previous wake. to provide GC +# activity as a rate over time. +# +# When any profiler-backed data is available we tap GC::Profiler, but drain and +# .clear it every interval so its record buffer never grows unbounded. The raw +# per-GC records can also be retained for later dumping. + +class GCSampler + Caps = Struct.new(:rgengc_stat, :malloc_size, :prof_wall, :prof_pause_split, :prof_phase_wall, :prof_detail, :prof_rgengc, keyword_init: true) + + def self.initial_caps + stat = GC.stat + Caps.new( + rgengc_stat: stat.key?(:total_promoted_count), + malloc_size: GC.respond_to?(:malloc_allocated_size), + prof_wall: false, + prof_pause_split: false, + prof_phase_wall: false, + prof_detail: false, + prof_rgengc: false, + ) + end + + Col = Struct.new(:key, :head, :w, :fmt) + + def initialize(interval: 0.02, caps: GCSampler.initial_caps, raw_profile: true) + @interval = interval + @caps = caps + @samples = [] + @profile_records = [] + @buf = {} + @running = false + @raw_profile = raw_profile + @use_profiler = true + end + + attr_reader :samples, :profile_records, :caps + + def start + @running = true + if @use_profiler + @prof_was = GC::Profiler.enabled? + GC::Profiler.enable + GC::Profiler.clear + end + @t0 = mono + @prev = snapshot + @thread = Thread.new do + while @running + sleep @interval + record + end + end + self + end + + def stop + @running = false + @thread&.join + record + if @use_profiler + GC::Profiler.clear + GC::Profiler.disable unless @prof_was + end + self + end + + def run + start + yield + ensure + stop + end + + def banner(io = $stdout) + io.puts "GC sampler — #{RUBY_DESCRIPTION}" + io.puts "optional data sources:" + io.printf(" GC::Profiler raw records : %s\n", yn(@raw_profile)) + io.printf(" GC::Profiler wall time fields : %s\n", yn(@caps.prof_wall)) + io.printf(" GC::Profiler pause split fields : %s\n", yn(@caps.prof_pause_split)) + io.printf(" GC::Profiler phase wall fields : %s\n", yn(@caps.prof_phase_wall)) + io.printf(" RGENGC_PROFILE (stat + profiler) : %s\n", yn(@caps.rgengc_stat || @caps.prof_rgengc)) + io.printf(" GC_PROFILE_MORE_DETAIL (profiler): %s\n", yn(@caps.prof_detail)) + io.printf(" MALLOC_ALLOCATED_SIZE (methods) : %s\n", yn(@caps.malloc_size)) + io.puts " GC_PROFILE_DETAIL_MEMORY : report-only — not samplable" + io.puts + end + + def report(io = $stdout) + cols = build_columns + io.puts(cols.map { |c| c.head.rjust(c.w) }.join(" ")) + @samples.each do |s| + io.puts(cols.map { |c| format_cell(c, s) }.join(" ")) + end + end + + def report_raw_profile(io = $stdout) + return if @profile_records.empty? + + io.puts + io.puts "raw GC::Profiler records:" + @profile_records.each_with_index do |record, index| + io.printf("[%d]\n", index) + record.each do |key, value| + io.printf(" %-26s %s\n", "#{key}:", value.inspect) + end + end + end + + def summary(io = $stdout) + return if @samples.empty? + wall = @samples.last[:t] + wall_total_ms = wall * 1000.0 + gc_ms = @samples.sum { _1[:gc_ms] } + minors = @samples.sum { _1[:minor] } + majors = @samples.sum { _1[:major] } + gc_count = minors + majors + io.puts + io.printf("wall: %.3f s\n", wall) + io.printf("GCs: %d minor + %d major\n", minors, majors) + io.printf("objects: %d allocated, %d freed\n", @samples.sum { _1[:alloc] }, @samples.sum { _1[:freed] }) + io.printf("GC CPU time: %.3f ms (~%.1f%% of wall)\n", gc_ms, pct(gc_ms, wall_total_ms)) + if @caps.prof_wall + wall_ms = @samples.sum { _1[:wall_ms].to_f } + pause_ms = @samples.sum { _1[:pause_ms].to_f } + max_pause_ms = @samples.filter_map { _1[:max_pause_ms] }.max || 0.0 + mean_pause = gc_count.zero? ? 0.0 : pause_ms / gc_count + io.printf("GC wall: %.3f ms (~%.1f%% of wall)\n", wall_ms, pct(wall_ms, wall_total_ms)) + io.printf("GC pause: %.3f ms total (%.3f ms mean, %.3f ms max, per GC)\n", pause_ms, mean_pause, max_pause_ms) + if @caps.prof_pause_split + stop_ms = @samples.sum { _1[:stop_ms].to_f } + stw_ms = @samples.sum { _1[:stw_ms].to_f } + io.printf(" stop/STW: %.3f ms / %.3f ms (ractor-stop ~%.1f%% of pause)\n", + stop_ms, stw_ms, pct(stop_ms, pause_ms)) + end + if @caps.prof_phase_wall + # Phase wall is bounded by pause; report against pause rather than + # GC wall time because incremental-mark continuation lands in mark wall + # but not in GC_WALL_TIME. + mark_ms = @samples.sum { _1[:markw_ms].to_f } + sweep_ms = @samples.sum { _1[:sweepw_ms].to_f } + compact_ms = @samples.sum { _1[:compactw_ms].to_f } + io.printf(" phase wall: mark %.3f ms (~%.1f%%) / sweep %.3f ms (~%.1f%%)", + mark_ms, pct(mark_ms, pause_ms), sweep_ms, pct(sweep_ms, pause_ms)) + io.printf(" / compact %.3f ms (~%.1f%%)", compact_ms, pct(compact_ms, pause_ms)) if compact_ms > 0.0 + io.printf(" (of pause)\n") + end + end + if @caps.prof_detail + io.printf("mark/sweep CPU: %.3f ms / %.3f ms\n", @samples.sum { _1[:mark_ms].to_f }, @samples.sum { _1[:sweep_ms].to_f }) + end + if @caps.rgengc_stat + io.printf("promoted: %d objects to oldgen\n", @samples.sum { _1[:promoted] }) + end + end + + private + + def yn(bool) = bool ? "yes" : "no" + def pct(part, whole) = whole.to_f.zero? ? 0.0 : 100.0 * part / whole + def mono = Process.clock_gettime(Process::CLOCK_MONOTONIC) + + def format_cell(col, row) + value = row[col.key] + return "-".rjust(col.w) if value.nil? + + format("%#{col.w}#{col.fmt}", value) + end + + # Column set adapts to what this build exposes. + def build_columns + cols = [ + Col.new(:t, "t(s)", 8, ".3f"), + Col.new(:gc, "gc", 4, "d"), + Col.new(:minor, "minor", 5, "d"), + Col.new(:major, "major", 5, "d"), + Col.new(:alloc, "alloc", 10, "d"), + Col.new(:freed, "freed", 10, "d"), + Col.new(:live, "live", 9, "d"), + Col.new(:gc_ms, "gc_ms", 7, ".2f"), + ] + if @caps.prof_wall + cols << Col.new(:wall_ms, "wall_ms", 8, ".3f") + cols << Col.new(:pause_ms, "pause_ms", 8, ".3f") + end + if @caps.prof_pause_split + cols << Col.new(:stop_ms, "stop_ms", 8, ".3f") + cols << Col.new(:stw_ms, "stw_ms", 8, ".3f") + end + if @caps.prof_phase_wall + cols << Col.new(:markw_ms, "markW_ms", 8, ".3f") + cols << Col.new(:sweepw_ms, "sweepW_ms", 9, ".3f") + cols << Col.new(:compactw_ms, "compactW_ms", 11, ".3f") + end + cols << Col.new(:mallocmb, "mallocMB", 8, ".1f") # total_malloc_bytes (always present) + if @caps.prof_detail + cols << Col.new(:mark_ms, "mark_ms", 8, ".3f") + cols << Col.new(:sweep_ms, "sweep_ms", 8, ".3f") + cols << Col.new(:empty, "emptyObj", 9, "d") + end + if @caps.rgengc_stat + cols << Col.new(:promoted, "promoted", 9, "d") + cols << Col.new(:shade, "shade", 6, "d") + cols << Col.new(:rem, "remembrd", 9, "d") + end + if @caps.prof_rgengc && !@caps.rgengc_stat + cols << Col.new(:old, "oldObj", 9, "d") + end + if @caps.malloc_size + cols << Col.new(:malloc_live, "mallocLiveMB", 12, ".2f") + cols << Col.new(:malloc_count, "mallocAllocs", 12, "d") + end + cols << Col.new(:gc_by, "gc_by", 8, "s") + cols + end + + def snapshot + GC.stat(@buf) + s = { + t: mono, + count: @buf[:count], + minor: @buf[:minor_gc_count], + major: @buf[:major_gc_count], + alloc: @buf[:total_allocated_objects], + freed: @buf[:total_freed_objects], + live: @buf[:heap_live_slots], + total_time: GC.total_time, + malloc_b: @buf[:total_malloc_bytes] || 0, + } + if @caps.rgengc_stat + s[:promoted] = @buf[:total_promoted_count] + s[:shade] = @buf[:total_shade_operation_count] + s[:rem] = @buf[:total_remembered_normal_object_count] + end + if @caps.malloc_size + s[:malloc_live] = GC.malloc_allocated_size + s[:malloc_count] = GC.malloc_allocations + end + s + end + + def record + cur = snapshot + gc_delta = cur[:count] - @prev[:count] + row = { + t: (cur[:t] - @t0).round(3), + gc: gc_delta, + minor: cur[:minor] - @prev[:minor], + major: cur[:major] - @prev[:major], + alloc: cur[:alloc] - @prev[:alloc], + freed: cur[:freed] - @prev[:freed], + live: cur[:live], + gc_ms: (cur[:total_time] - @prev[:total_time]) / 1_000_000.0, + mallocmb: cur[:malloc_b] / (1024.0 * 1024.0), + gc_by: gc_delta.zero? ? "-" : (GC.latest_gc_info[:gc_by] || "-"), + } + if @caps.rgengc_stat + row[:promoted] = cur[:promoted] - @prev[:promoted] + row[:shade] = cur[:shade] - @prev[:shade] + row[:rem] = cur[:rem] - @prev[:rem] + end + if @caps.malloc_size + row[:malloc_live] = cur[:malloc_live] / (1024.0 * 1024.0) + row[:malloc_count] = cur[:malloc_count] + end + drain_profiler(row) if @use_profiler + @samples << row + @prev = cur + end + + # Aggregate the per-GC profiler records that accrued during this interval, + # then clear so the buffer stays bounded. + def drain_profiler(row) + data = GC::Profiler.raw_data + GC::Profiler.clear + return if data.nil? || data.empty? + @profile_records.concat(data.map(&:dup)) if @raw_profile + @caps.prof_wall ||= data.any? { _1.key?(:GC_WALL_TIME) } + @caps.prof_pause_split ||= data.any? { _1.key?(:GC_STOP_TIME) } + @caps.prof_phase_wall ||= data.any? { _1.key?(:GC_MARK_WALL_TIME) } + @caps.prof_detail ||= data.any? { _1.key?(:GC_MARK_TIME) } + @caps.prof_rgengc ||= data.any? { _1.key?(:OLD_OBJECTS) } + if @caps.prof_wall + pauses = data.map { _1[:GC_PAUSE_TIME] || 0.0 } + row[:wall_ms] = data.sum { _1[:GC_WALL_TIME] || 0.0 } * 1000.0 + row[:pause_ms] = pauses.sum * 1000.0 + row[:max_pause_ms] = (pauses.max || 0.0) * 1000.0 + end + if @caps.prof_pause_split + row[:stop_ms] = data.sum { _1[:GC_STOP_TIME] || 0.0 } * 1000.0 + row[:stw_ms] = data.sum { _1[:GC_STW_TIME] || 0.0 } * 1000.0 + end + if @caps.prof_phase_wall + row[:markw_ms] = data.sum { _1[:GC_MARK_WALL_TIME] || 0.0 } * 1000.0 + row[:sweepw_ms] = data.sum { _1[:GC_SWEEP_WALL_TIME] || 0.0 } * 1000.0 + row[:compactw_ms] = data.sum { _1[:GC_COMPACT_WALL_TIME] || 0.0 } * 1000.0 + end + if @caps.prof_detail + row[:mark_ms] = data.sum { _1[:GC_MARK_TIME] || 0.0 } * 1000.0 + row[:sweep_ms] = data.sum { _1[:GC_SWEEP_TIME] || 0.0 } * 1000.0 + row[:empty] = data.sum { _1[:EMPTY_OBJECTS] || 0 } + end + if @caps.prof_rgengc && !@caps.rgengc_stat + row[:old] = data.last[:OLD_OBJECTS] + end + end +end + +USAGE = <<~USAGE + Usage: + #{$PROGRAM_NAME} SCRIPT [ARGS...] + #{$PROGRAM_NAME} -e CODE [ARGS...] +USAGE + +def parse_target(argv) + case argv.first + when "-e" + abort USAGE if argv.size < 2 + + code = argv[1] + args = argv.drop(2) + -> { + ARGV.replace(args) + $0 = "-e" + eval(code, TOPLEVEL_BINDING, "-e", 1) + } + when "-h", "--help" + puts USAGE + exit true + when nil + abort USAGE + else + script = argv.first + args = argv.drop(1) + abort "#{script}: no such file" unless File.file?(script) + + -> { + ARGV.replace(args) + $0 = script + load File.expand_path(script) + } + end +end + +if __FILE__ == $0 + target = parse_target(ARGV.dup) + GC.measure_total_time = true if GC.respond_to?(:measure_total_time=) + dump_raw = ENV["RAW"] == "1" + sampler = GCSampler.new(interval: 0.02, raw_profile: dump_raw) + failure = nil + exit_status = nil + + sampler.banner + begin + sampler.run { target.call } + rescue SystemExit => error + exit_status = error.status + rescue Exception => error # rubocop:disable Lint/RescueException + failure = error + ensure + sampler.report + sampler.report_raw_profile if dump_raw + sampler.summary + end + + raise failure if failure + exit exit_status unless exit_status.nil? +end From 5b09cd80a121160f3cc661f7926e80449455fdc3 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Fri, 26 Jun 2026 05:06:03 -0700 Subject: [PATCH 14/15] ZJIT: Remove inline_trivial pass (#17472) Collapse trivial inlining into emitting the SendDirect. No need for a separate pass. We are moving toward a strategy where we strength reduce all the way in one go. --- zjit/src/hir.rs | 116 +++++++++++++---------------- zjit/src/hir/opt_tests.rs | 152 +++++++++++++++++++------------------- 2 files changed, 127 insertions(+), 141 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index cdd9120af85a92..1f9f473c75f3c0 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -3719,6 +3719,51 @@ impl Function { }) } + /// Try trivially inlining the method. If we can't, emit a SendDirect instruction instead and + /// leave it to the general-purpose inliner to handle. + fn try_inline_send_direct(&mut self, block: BlockId, insn: Insn) -> InsnId { + let Insn::SendDirect { recv, iseq, cd, ref args, state, .. } = insn else { + panic!("try_inline_send_direct called with non-SendDirect instruction"); + }; + // The trivial inliner runs first to handle simple cases (constant returns, + // parameter returns, etc.) without frame push/pop overhead. The general + // inliner then handles more complex methods that require full inlining. + let call_info = unsafe { (*cd).ci }; + let ci_flags = unsafe { vm_ci_flag(call_info) }; + // .send call is not currently supported for builtins + if ci_flags & VM_CALL_OPT_SEND != 0 { + return self.push_insn(block, insn); + } + let Some(value) = iseq_get_return_value(iseq, None, ci_flags) else { + return self.push_insn(block, insn); + }; + match value { + IseqReturn::LocalVariable(idx) => { + self.count(block, Counter::inline_iseq_optimized_send_count); + args[idx as usize] + } + IseqReturn::Value(value) => { + self.count(block, Counter::inline_iseq_optimized_send_count); + self.push_insn(block, Insn::Const { val: Const::Value(value) }) + } + IseqReturn::Receiver => { + self.count(block, Counter::inline_iseq_optimized_send_count); + recv + } + IseqReturn::InvokeLeafBuiltin(bf, return_type) => { + self.count(block, Counter::inline_iseq_optimized_send_count); + self.push_insn(block, Insn::InvokeBuiltin { + bf, + recv, + args: vec![recv], + state, + leaf: true, + return_type, + }) + } + } + } + /// Rewrite eligible Send opcodes into SendDirect /// opcodes if we know the target ISEQ statically. This removes run-time method lookups and /// opens the door for inlining. @@ -3834,8 +3879,8 @@ impl Function { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile::ProfileSend { argc }) }); } - let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: send_block }); - self.make_equal_to(insn_id, send_direct); + let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: send_block }); + self.make_equal_to(insn_id, replacement); } else if !has_block && def_type == VM_METHOD_TYPE_BMETHOD { let procv = unsafe { rb_get_def_bmethod_proc((*cme).def) }; let proc = unsafe { rb_jit_get_proc_ptr(procv) }; @@ -3878,8 +3923,8 @@ impl Function { recv = self.push_insn(block, Insn::GuardType { val: recv, guard_type: Type::from_profiled_type(profiled_type), state, recompile: Some(Recompile::ProfileSend{ argc }) }); } - let send_direct = self.push_insn(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: None }); - self.make_equal_to(insn_id, send_direct); + let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme, iseq, args: processed_args, kw_bits, state: send_state, block: None }); + self.make_equal_to(insn_id, replacement); } else if !has_block && def_type == VM_METHOD_TYPE_IVAR && args.is_empty() { // Check if we're accessing ivars of a Class or Module object as they require single-ractor mode. // We omit gen_prepare_non_leaf_call on gen_getivar, so it's unsafe to raise for multi-ractor mode. @@ -4233,7 +4278,7 @@ impl Function { emit_super_call_guards(self, block, super_cme, current_cme, mid, state, frame_state.iseq); // Use SendDirect with the super method's CME and ISEQ. - let send_direct = self.push_insn(block, Insn::SendDirect { + let replacement = self.try_inline_send_direct(block, Insn::SendDirect { recv, cd, cme: super_cme, @@ -4243,7 +4288,7 @@ impl Function { state: send_state, block: None, }); - self.make_equal_to(insn_id, send_direct); + self.make_equal_to(insn_id, replacement); } else if def_type == VM_METHOD_TYPE_CFUNC { let cfunc = unsafe { get_cme_def_body_cfunc(super_cme) }; @@ -4751,60 +4796,6 @@ impl Function { did_inline } - fn inline_trivial(&mut self) { - for block in self.reverse_post_order() { - let old_insns = std::mem::take(&mut self.blocks[block.0].insns); - assert!(self.blocks[block.0].insns.is_empty()); - for insn_id in old_insns { - match self.find(insn_id) { - // We can inline SendDirect with blockiseq because we are prohibiting `yield` - // and `.call`, which would trigger autosplat. We only inline constants and - // variables and builtin calls. - Insn::SendDirect { recv, iseq, cd, args, state, .. } => { - let call_info = unsafe { (*cd).ci }; - let ci_flags = unsafe { vm_ci_flag(call_info) }; - // .send call is not currently supported for builtins - if ci_flags & VM_CALL_OPT_SEND != 0 { - self.push_insn_id(block, insn_id); continue; - } - let Some(value) = iseq_get_return_value(iseq, None, ci_flags) else { - self.push_insn_id(block, insn_id); continue; - }; - match value { - IseqReturn::LocalVariable(idx) => { - self.count(block, Counter::inline_iseq_optimized_send_count); - self.make_equal_to(insn_id, args[idx as usize]); - } - IseqReturn::Value(value) => { - self.count(block, Counter::inline_iseq_optimized_send_count); - let replacement = self.push_insn(block, Insn::Const { val: Const::Value(value) }); - self.make_equal_to(insn_id, replacement); - } - IseqReturn::Receiver => { - self.count(block, Counter::inline_iseq_optimized_send_count); - self.make_equal_to(insn_id, recv); - } - IseqReturn::InvokeLeafBuiltin(bf, return_type) => { - self.count(block, Counter::inline_iseq_optimized_send_count); - let replacement = self.push_insn(block, Insn::InvokeBuiltin { - bf, - recv, - args: vec![recv], - state, - leaf: true, - return_type, - }); - self.make_equal_to(insn_id, replacement); - } - } - } - _ => { self.push_insn_id(block, insn_id); } - } - } - } - crate::stats::trace_compile_phase("infer_types", || self.infer_types()); - } - fn load_shape(&mut self, block: BlockId, recv: InsnId) -> InsnId { self.push_insn(block, Insn::LoadField { recv, @@ -6184,7 +6175,6 @@ impl Function { macro_rules! counter_for { // Bucket all strength reduction together (type_specialize) => { Counter::compile_hir_strength_reduce_time_ns }; - (inline_trivial) => { Counter::compile_hir_strength_reduce_time_ns }; (optimize_c_calls) => { Counter::compile_hir_strength_reduce_time_ns }; (convert_no_profile_sends) => { Counter::compile_hir_strength_reduce_time_ns }; // End strength reduction bucket @@ -6231,10 +6221,6 @@ impl Function { for iteration in 0..=inline_max_iterations { // Function is assumed to have types inferred already run_pass!(type_specialize); - // The trivial inliner runs first to handle simple cases (constant returns, - // parameter returns, etc.) without frame push/pop overhead. The general - // inliner then handles more complex methods that require full inlining. - run_pass!(inline_trivial); // Cap inlining at inline_max_iterations passes; the trailing iteration (see above) // runs the rest of the pipeline with inlining off. let did_inline = if iteration < inline_max_iterations { diff --git a/zjit/src/hir/opt_tests.rs b/zjit/src/hir/opt_tests.rs index 933763e2b53660..19b7e8c1966034 100644 --- a/zjit/src/hir/opt_tests.rs +++ b/zjit/src/hir/opt_tests.rs @@ -1516,9 +1516,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, baz@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:Fixnum[1] = Const Value(1) + v19:Fixnum[1] = Const Value(1) CheckInterrupts - Return v20 + Return v19 "); } @@ -2018,9 +2018,9 @@ mod hir_opt_tests { bb3(v11:BasicObject, v12:BasicObject, v13:BasicObject): PatchPoint MethodRedefined(Integer@0x1008, +@0x1010, cme:0x1018) v27:Fixnum = GuardType v12, Fixnum recompile - v29:Fixnum[100] = Const Value(100) + v28:Fixnum[100] = Const Value(100) CheckInterrupts - Return v29 + Return v28 "); } @@ -3733,9 +3733,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): v14:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Integer@0x1008, zero?@0x1010, cme:0x1018) - v24:BoolExact = InvokeBuiltin leaf , v14 + v23:BoolExact = InvokeBuiltin leaf , v14 CheckInterrupts - Return v24 + Return v23 "); } @@ -3767,9 +3767,9 @@ mod hir_opt_tests { v18:ArrayExact = ArrayDup v17 PatchPoint NoSingletonClass(Array@0x1010) PatchPoint MethodRedefined(Array@0x1010, first@0x1018, cme:0x1020) - v32:BasicObject = InvokeBuiltin leaf , v18 + v31:BasicObject = InvokeBuiltin leaf , v18 CheckInterrupts - Return v32 + Return v31 "); } @@ -3797,9 +3797,9 @@ mod hir_opt_tests { v12:ModuleExact[M@0x1008] = Const Value(VALUE(0x1008)) PatchPoint NoSingletonClass(Module@0x1010) PatchPoint MethodRedefined(Module@0x1010, class@0x1018, cme:0x1020) - v25:ClassSubclass[Module@0x1010] = Const Value(VALUE(0x1010)) + v24:ClassSubclass[Module@0x1010] = Const Value(VALUE(0x1010)) CheckInterrupts - Return v25 + Return v24 "); } @@ -3897,7 +3897,7 @@ mod hir_opt_tests { v13:Fixnum[1] = Const Value(1) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v34:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v8, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v36:Fixnum[1] = Const Value(1) + v35:Fixnum[1] = Const Value(1) PatchPoint NoEPEscape(test) v21:CPtr = LoadSP v22:BasicObject = LoadField v21, :a@0x1038 @@ -3937,8 +3937,8 @@ mod hir_opt_tests { v15:Fixnum[1] = Const Value(1) SetLocal :a, l0, EP@3, v15 PatchPoint MethodRedefined(Object@0x1008, lambda@0x1010, cme:0x1018) - v47:ObjectSubclass[class_exact*:Object@VALUE(0x1008)] = GuardType v10, ObjectSubclass[class_exact*:Object@VALUE(0x1008)] recompile - v48:BasicObject = CCallWithFrame v47, :Kernel#lambda@0x1040, block=0x1048 + v46:ObjectSubclass[class_exact*:Object@VALUE(0x1008)] = GuardType v10, ObjectSubclass[class_exact*:Object@VALUE(0x1008)] recompile + v47:BasicObject = CCallWithFrame v46, :Kernel#lambda@0x1040, block=0x1048 v22:CPtr = GetEP 0 v23:BasicObject = LoadField v22, :a@0x1000 PatchPoint MethodRedefined(Object@0x1008, foo@0x1050, cme:0x1058) @@ -7435,10 +7435,10 @@ mod hir_opt_tests { PatchPoint SingleRactorMode PatchPoint MethodRedefined(Object@0x1000, zero@0x1008, cme:0x1010) v22:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v28:StaticSymbol[:b] = Const Value(VALUE(0x1038)) + v23:StaticSymbol[:b] = Const Value(VALUE(0x1038)) PatchPoint MethodRedefined(Object@0x1000, one@0x1040, cme:0x1048) CheckInterrupts - Return v28 + Return v23 "); } @@ -8159,9 +8159,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:NilClass = Const Value(nil) + v19:NilClass = Const Value(nil) CheckInterrupts - Return v20 + Return v19 "); } @@ -11911,16 +11911,16 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(String@0x1008) PatchPoint MethodRedefined(String@0x1008, ascii_only?@0x1010, cme:0x1018) v23:StringExact = GuardType v10, StringExact recompile - v26:CUInt64 = LoadField v23, :RBASIC_FLAGS@0x1040 - v27:CUInt64[3145728] = Const CUInt64(3145728) - v28:CInt64 = IntAnd v26, v27 - v29:CInt64[1048576] = Const CInt64(1048576) - v30:CInt64 = GuardGreaterEq v28, v29 - v31:CInt64[1048576] = Const CInt64(1048576) - v32:CBool = IsBitEqual v30, v31 - v33:BoolExact = BoxBool v32 + v25:CUInt64 = LoadField v23, :RBASIC_FLAGS@0x1040 + v26:CUInt64[3145728] = Const CUInt64(3145728) + v27:CInt64 = IntAnd v25, v26 + v28:CInt64[1048576] = Const CInt64(1048576) + v29:CInt64 = GuardGreaterEq v27, v28 + v30:CInt64[1048576] = Const CInt64(1048576) + v31:CBool = IsBitEqual v29, v30 + v32:BoolExact = BoxBool v31 CheckInterrupts - Return v33 + Return v32 "); } @@ -12609,9 +12609,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:StringExact[VALUE(0x1038)] = Const Value(VALUE(0x1038)) + v19:StringExact[VALUE(0x1038)] = Const Value(VALUE(0x1038)) CheckInterrupts - Return v20 + Return v19 "); } @@ -12635,9 +12635,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:NilClass = Const Value(nil) + v19:NilClass = Const Value(nil) CheckInterrupts - Return v20 + Return v19 "); } @@ -12661,9 +12661,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:TrueClass = Const Value(true) + v19:TrueClass = Const Value(true) CheckInterrupts - Return v20 + Return v19 "); } @@ -12687,9 +12687,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:FalseClass = Const Value(false) + v19:FalseClass = Const Value(false) CheckInterrupts - Return v20 + Return v19 "); } @@ -12713,9 +12713,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:Fixnum[0] = Const Value(0) + v19:Fixnum[0] = Const Value(0) CheckInterrupts - Return v20 + Return v19 "); } @@ -12739,9 +12739,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, callee@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v20:Fixnum[1] = Const Value(1) + v19:Fixnum[1] = Const Value(1) CheckInterrupts - Return v20 + Return v19 "); } @@ -14445,12 +14445,12 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, class@0x1008, cme:0x1010) v43:ObjectSubclass[class_exact:C] = GuardType v6, ObjectSubclass[class_exact:C] recompile - v46:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) + v45:ClassSubclass[C@0x1000] = Const Value(VALUE(0x1000)) v13:StaticSymbol[:_lex_actions] = Const Value(VALUE(0x1038)) v15:TrueClass = Const Value(true) PatchPoint MethodRedefined(Class@0x1040, respond_to?@0x1048, cme:0x1050) PatchPoint MethodRedefined(Class@0x1040, _lex_actions@0x1078, cme:0x1080) - v52:TrueClass = Const Value(true) + v51:TrueClass = Const Value(true) CheckInterrupts v26:StaticSymbol[:CORRECT] = Const Value(VALUE(0x10a8)) Return v26 @@ -14481,11 +14481,11 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, class@0x1010, cme:0x1018) v25:ObjectSubclass[class_exact:C] = GuardType v10, ObjectSubclass[class_exact:C] recompile - v28:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) + v27:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) PatchPoint MethodRedefined(Class@0x1040, name@0x1048, cme:0x1050) - v32:StringExact|NilClass = CCall v28, :Module#name@0x1078 + v31:StringExact|NilClass = CCall v27, :Module#name@0x1078 CheckInterrupts - Return v32 + Return v31 "); } @@ -14513,9 +14513,9 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, class@0x1010, cme:0x1018) v23:ObjectSubclass[class_exact:C] = GuardType v10, ObjectSubclass[class_exact:C] recompile - v26:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) + v25:ClassSubclass[C@0x1008] = Const Value(VALUE(0x1008)) CheckInterrupts - Return v26 + Return v25 "); } @@ -14538,9 +14538,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): v10:Fixnum[5] = Const Value(5) PatchPoint MethodRedefined(Integer@0x1000, class@0x1008, cme:0x1010) - v22:ClassSubclass[Integer@0x1000] = Const Value(VALUE(0x1000)) + v21:ClassSubclass[Integer@0x1000] = Const Value(VALUE(0x1000)) CheckInterrupts - Return v22 + Return v21 "); } @@ -14563,9 +14563,9 @@ mod hir_opt_tests { bb3(v6:BasicObject): PatchPoint MethodRedefined(Object@0x1000, class@0x1008, cme:0x1010) v18:ObjectSubclass[class_exact*:Object@VALUE(0x1000)] = GuardType v6, ObjectSubclass[class_exact*:Object@VALUE(0x1000)] recompile - v21:ClassSubclass[Object@0x1038] = Const Value(VALUE(0x1038)) + v20:ClassSubclass[Object@0x1038] = Const Value(VALUE(0x1038)) CheckInterrupts - Return v21 + Return v20 "); } @@ -15116,9 +15116,9 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, secret@0x1008, cme:0x1010) v19:ObjectSubclass[class_exact:C] = GuardType v6, ObjectSubclass[class_exact:C] recompile - v21:Fixnum[42] = Const Value(42) + v20:Fixnum[42] = Const Value(42) CheckInterrupts - Return v21 + Return v20 "); } @@ -15258,9 +15258,9 @@ mod hir_opt_tests { PatchPoint NoSingletonClass(C@0x1000) PatchPoint MethodRedefined(C@0x1000, secret@0x1008, cme:0x1010) v19:ObjectSubclass[class_exact:C] = GuardType v6, ObjectSubclass[class_exact:C] recompile - v21:Fixnum[42] = Const Value(42) + v20:Fixnum[42] = Const Value(42) CheckInterrupts - Return v21 + Return v20 "); } @@ -16086,26 +16086,26 @@ mod hir_opt_tests { bb5(): PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, foo@0x1010, cme:0x1018) - v46:Fixnum[3] = Const Value(3) - Jump bb4(v46) + v42:Fixnum[3] = Const Value(3) + Jump bb4(v42) bb6(): v22:CBool = HasType v10, ObjectSubclass[class_exact:D] CondBranch v22, bb7(), bb8() bb7(): PatchPoint NoSingletonClass(D@0x1040) PatchPoint MethodRedefined(D@0x1040, foo@0x1010, cme:0x1048) - v47:Fixnum[4] = Const Value(4) - Jump bb4(v47) + v45:Fixnum[4] = Const Value(4) + Jump bb4(v45) bb8(): v28:BasicObject = Send v10, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback Jump bb4(v28) bb4(v15:BasicObject): v31:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Integer@0x1070, +@0x1078, cme:0x1080) - v50:Fixnum = GuardType v15, Fixnum recompile - v51:Fixnum = FixnumAdd v50, v31 + v48:Fixnum = GuardType v15, Fixnum recompile + v49:Fixnum = FixnumAdd v48, v31 CheckInterrupts - Return v51 + Return v49 "); } @@ -16301,16 +16301,16 @@ mod hir_opt_tests { bb5(): v19:StaticSymbol = RefineType v10, StaticSymbol PatchPoint MethodRedefined(Symbol@0x1008, to_s@0x1010, cme:0x1018) - v39:StringExact = InvokeBuiltin leaf , v19 - Jump bb4(v39) + v36:StringExact = InvokeBuiltin leaf , v19 + Jump bb4(v36) bb6(): v22:CBool = HasType v10, DynamicSymbol CondBranch v22, bb7(), bb8() bb7(): v25:DynamicSymbol = RefineType v10, DynamicSymbol PatchPoint MethodRedefined(Symbol@0x1008, to_s@0x1010, cme:0x1018) - v40:StringExact = InvokeBuiltin leaf , v25 - Jump bb4(v40) + v38:StringExact = InvokeBuiltin leaf , v25 + Jump bb4(v38) bb8(): v28:BasicObject = Send v10, :to_s # SendFallbackReason: SendWithoutBlock: polymorphic fallback Jump bb4(v28) @@ -16358,8 +16358,8 @@ mod hir_opt_tests { bb5(): PatchPoint NoSingletonClass(C@0x1008) PatchPoint MethodRedefined(C@0x1008, foo@0x1010, cme:0x1018) - v32:Fixnum[3] = Const Value(3) - Jump bb4(v32) + v31:Fixnum[3] = Const Value(3) + Jump bb4(v31) bb6(): v22:BasicObject = Send v10, :foo # SendFallbackReason: SendWithoutBlock: polymorphic fallback Jump bb4(v22) @@ -17265,9 +17265,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): PatchPoint MethodRedefined(Integer@0x1008, even?@0x1010, cme:0x1018) v22:Fixnum = GuardType v10, Fixnum recompile - v24:BoolExact = InvokeBuiltin leaf , v22 + v23:BoolExact = InvokeBuiltin leaf , v22 CheckInterrupts - Return v24 + Return v23 "); } @@ -17293,9 +17293,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): PatchPoint MethodRedefined(Integer@0x1008, odd?@0x1010, cme:0x1018) v22:Fixnum = GuardType v10, Fixnum recompile - v24:BoolExact = InvokeBuiltin leaf , v22 + v23:BoolExact = InvokeBuiltin leaf , v22 CheckInterrupts - Return v24 + Return v23 "); } @@ -17321,9 +17321,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): PatchPoint MethodRedefined(Float@0x1008, zero?@0x1010, cme:0x1018) v22:Flonum = GuardType v10, Flonum recompile - v24:BoolExact = InvokeBuiltin leaf , v22 + v23:BoolExact = InvokeBuiltin leaf , v22 CheckInterrupts - Return v24 + Return v23 "); } @@ -17349,9 +17349,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): PatchPoint MethodRedefined(Float@0x1008, positive?@0x1010, cme:0x1018) v22:Flonum = GuardType v10, Flonum recompile - v24:BoolExact = InvokeBuiltin leaf , v22 + v23:BoolExact = InvokeBuiltin leaf , v22 CheckInterrupts - Return v24 + Return v23 "); } @@ -17377,9 +17377,9 @@ mod hir_opt_tests { bb3(v9:BasicObject, v10:BasicObject): PatchPoint MethodRedefined(Float@0x1008, negative?@0x1010, cme:0x1018) v22:Flonum = GuardType v10, Flonum recompile - v24:BoolExact = InvokeBuiltin leaf , v22 + v23:BoolExact = InvokeBuiltin leaf , v22 CheckInterrupts - Return v24 + Return v23 "); } #[test] From 15b8aafa969341abf3a4e0f3dba8297d1d0c24c9 Mon Sep 17 00:00:00 2001 From: Peter Zhu Date: Fri, 26 Jun 2026 20:25:12 +0900 Subject: [PATCH 15/15] Make rb_ary_subseq_step static --- array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/array.c b/array.c index e760176176ae61..600543d7f0bf8e 100644 --- a/array.c +++ b/array.c @@ -1740,7 +1740,7 @@ rb_ary_entry(VALUE ary, long offset) return rb_ary_entry_internal(ary, offset); } -VALUE +static VALUE rb_ary_subseq_step(VALUE ary, long beg, long len, long step) { VALUE klass;