From 6a3e1474914ecc508e2ca1ecbf9880a36c1be444 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Thu, 6 Feb 2025 17:04:20 +0000 Subject: [PATCH 1/3] [PROF-11311] Remove datadog_profiling_loader **What does this PR do?** This PR removes the `datadog_profiling_loader` native extension, as it's no longer needed. The profiling native loader was added in https://github.com/DataDog/dd-trace-rb/pull/2003 . Quoting from that PR's description: > When Ruby loads a native extension (using `require`), it uses > `dlopen(..., RTLD_LAZY | RTLD_GLOBAL)` > (https://github.com/ruby/ruby/blob/67950a4c0a884bdb78d9beb4405ebf7459229b21/dln.c#L362). > > This means that every symbol exposed directly or indirectly by that > native extension becomes visible to every other extension in the > Ruby process. This can cause issues, see > https://github.com/rubyjs/mini_racer/pull/179 . > > Ruby's extension loading mechanism is not configurable -- there's > no way to tell it to use different flags when calling `dlopen`. > To get around this, this commit introduces introduces another > extension (profiling loader) which has only a single responsibility: > mimic Ruby's extension loading mechanism, but when calling > `dlopen` use a different set of flags. > > This idea was shamelessly stolen from @lloeki's work in > https://github.com/rubyjs/mini_racer/pull/179, big thanks! ...and importantly ends with: > Note that, that we know of, the profiling native extension only > exposes one potentially problematic symbol: > `rust_eh_personality` (coming from libddprof/libdatadog). > > Future versions of Rust have been patched not to expose this > (see https://github.com/rust-lang/rust/pull/95604#issuecomment-1108563434) > so we may want to revisit the need for this loader in the future, > and perhaps delete it if we no longer require its services :) And we have reached the situation predicted in that description: 1. Nowadays libdatadog no longer exposes `rust_eh_personality` 2. We have a test that validates that only expected symbols are exported by the libdatadog library (see https://github.com/DataDog/libdatadog/pull/573 ). Any new symbols that show up would break shipping new libdatadog versions to rubygems.org until we review them. 3. The `libdatadog_api` extension, which we've been shipping for customers since release 2.3.0 back in July 2024 has always been loaded directly without a loader without issues. Thus, I think it's the right time to get rid of the loader. **Motivation:** Having the loader around is not zero cost; we've run into/caused a few issues ( https://github.com/DataDog/dd-trace-rb/pull/2250 and https://github.com/DataDog/dd-trace-rb/pull/3582 come to mind). It also adds overhead to development: every time we need to rebuild the extensions, it's an extra extension that needs to be prepared, rebuilt, copied, etc. I've been meaning to get rid of the loader for some time now, and this came up again this week so I decided it was time to do it. **Additional Notes:** In the future, we can always ressurrect this approach if we figure out we need it again. We've also discussed internally about proposing upstream to the Ruby VM to maybe add an API to do what the loader was doing, so that we wouldn't need the weird workaround. We haven't yet decided if we're going to do that (help welcome!). **How to test the change?** The loader was responsible for loading the rest of profiling. Thus, any test that uses profiling was also validating the loader and now will validate that we're doing fine without it. TL;DR green CI is good. --- .github/labeler.yml | 2 +- Rakefile | 4 - datadog.gemspec | 1 - ext/datadog_profiling_loader/.clang-format | 1 - .../datadog_profiling_loader.c | 142 ------------------ ext/datadog_profiling_loader/extconf.rb | 60 -------- .../profiling/load_native_extension.rb | 34 +---- .../profiling/load_native_extension_spec.rb | 73 --------- 8 files changed, 2 insertions(+), 315 deletions(-) delete mode 100644 ext/datadog_profiling_loader/.clang-format delete mode 100644 ext/datadog_profiling_loader/datadog_profiling_loader.c delete mode 100644 ext/datadog_profiling_loader/extconf.rb delete mode 100644 spec/datadog/profiling/load_native_extension_spec.rb diff --git a/.github/labeler.yml b/.github/labeler.yml index f55ab0f3c3..3ab963386a 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -18,7 +18,7 @@ dev/testing: # Changes to Profiling profiling: - changed-files: - - any-glob-to-any-file: [ '{lib/datadog/profiling/**,ext/datadog_profiling_loader/**,ext/datadog_profiling_native_extension/**}' ] + - any-glob-to-any-file: [ '{lib/datadog/profiling/**,ext/datadog_profiling_native_extension/**}' ] # Changes to CI-App ci-app: diff --git a/Rakefile b/Rakefile index 2dc4b2dbd0..2b2f1bb30f 100644 --- a/Rakefile +++ b/Rakefile @@ -435,10 +435,6 @@ NATIVE_EXTS = [ ext.ext_dir = 'ext/datadog_profiling_native_extension' end, - Rake::ExtensionTask.new("datadog_profiling_loader.#{RUBY_VERSION}_#{RUBY_PLATFORM}") do |ext| - ext.ext_dir = 'ext/datadog_profiling_loader' - end, - Rake::ExtensionTask.new("libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}") do |ext| ext.ext_dir = 'ext/libdatadog_api' end diff --git a/datadog.gemspec b/datadog.gemspec index 1f121fd072..1d2579ffe2 100644 --- a/datadog.gemspec +++ b/datadog.gemspec @@ -83,7 +83,6 @@ Gem::Specification.new do |spec| spec.extensions = [ 'ext/datadog_profiling_native_extension/extconf.rb', - 'ext/datadog_profiling_loader/extconf.rb', 'ext/libdatadog_api/extconf.rb' ] end diff --git a/ext/datadog_profiling_loader/.clang-format b/ext/datadog_profiling_loader/.clang-format deleted file mode 100644 index e3845288a2..0000000000 --- a/ext/datadog_profiling_loader/.clang-format +++ /dev/null @@ -1 +0,0 @@ -DisableFormat: true diff --git a/ext/datadog_profiling_loader/datadog_profiling_loader.c b/ext/datadog_profiling_loader/datadog_profiling_loader.c deleted file mode 100644 index 2791f6dad0..0000000000 --- a/ext/datadog_profiling_loader/datadog_profiling_loader.c +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include - -// Why this exists: -// -// The Datadog::Profiling::Loader exists because when Ruby loads a native extension (using `require`), it uses -// `dlopen(..., RTLD_LAZY | RTLD_GLOBAL)` (https://github.com/ruby/ruby/blob/67950a4c0a884bdb78d9beb4405ebf7459229b21/dln.c#L362). -// This means that every symbol exposed directly or indirectly by that native extension becomes visible to every other -// extension in the Ruby process. This can cause issues, see https://github.com/rubyjs/mini_racer/pull/179. -// -// Instead of `RTLD_LAZY | RTLD_GLOBAL`, we want to call `dlopen` with `RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND` when -// loading the profiling native extension, to avoid leaking any unintended symbols (`RTLD_LOCAL`) and avoid picking -// up other's symbols (`RTLD_DEEPBIND`). -// -// But Ruby's extension loading mechanism is not configurable -- there's no way to tell it to use different flags when -// calling `dlopen`. To get around this, this file (datadog_profiling_loader.c) introduces another extension -// (profiling loader) which has only a single responsibility: mimic Ruby's extension loading mechanism, but when calling -// `dlopen` use a different set of flags. -// This idea was shamelessly stolen from @lloeki's work in https://github.com/rubyjs/mini_racer/pull/179, big thanks! -// -// Extra note: Currently (May 2022), that we know of, the profiling native extension only exposes one potentially -// problematic symbol: `rust_eh_personality` (coming from libdatadog). -// Future versions of Rust have been patched not to expose this -// (see https://github.com/rust-lang/rust/pull/95604#issuecomment-1108563434) so we may want to revisit the need -// for this loader in the future, and perhaps delete it if we no longer require its services :) - -#ifndef RTLD_DEEPBIND - #define RTLD_DEEPBIND 0 -#endif - -// Used to mark function arguments that are deliberately left unused -#ifdef __GNUC__ - #define DDTRACE_UNUSED __attribute__((unused)) -#else - #define DDTRACE_UNUSED -#endif - -static VALUE ok_symbol = Qnil; // :ok in Ruby -static VALUE error_symbol = Qnil; // :error in Ruby - -static VALUE _native_load(DDTRACE_UNUSED VALUE self, VALUE ruby_path, VALUE ruby_init_name); -static bool failed_to_load(void *handle, VALUE *failure_details); -static bool incompatible_library(void *handle, VALUE *failure_details); -static bool failed_to_initialize(void *handle, char *init_name, VALUE *failure_details); -static void set_failure_from_dlerror(VALUE *failure_details); -static void unload_failed_library(void *handle); - -#define DDTRACE_EXPORT __attribute__ ((visibility ("default"))) - -void DDTRACE_EXPORT Init_datadog_profiling_loader(void) { - VALUE datadog_module = rb_define_module("Datadog"); - VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling"); - VALUE loader_module = rb_define_module_under(profiling_module, "Loader"); - rb_define_singleton_method(loader_module, "_native_load", _native_load, 2); - - ok_symbol = ID2SYM(rb_intern_const("ok")); - error_symbol = ID2SYM(rb_intern_const("error")); -} - -static VALUE _native_load(DDTRACE_UNUSED VALUE self, VALUE ruby_path, VALUE ruby_init_name) { - Check_Type(ruby_path, T_STRING); - Check_Type(ruby_init_name, T_STRING); - - char *path = StringValueCStr(ruby_path); - char *init_name = StringValueCStr(ruby_init_name); - - int dlopen_flags = RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND; - - #if defined(__has_feature) - #if __has_feature(address_sanitizer) - dlopen_flags &= ~RTLD_DEEPBIND; // Not supported by ASAN - #endif - #endif - - void *handle = dlopen(path, dlopen_flags); - - VALUE failure_details = Qnil; - - if ( - failed_to_load(handle, &failure_details) || - incompatible_library(handle, &failure_details) || - failed_to_initialize(handle, init_name, &failure_details) - ) { - return rb_ary_new_from_args(2, error_symbol, failure_details); - } - - return rb_ary_new_from_args(2, ok_symbol, Qnil); -} - -static bool failed_to_load(void *handle, VALUE *failure_details) { - if (handle == NULL) { - set_failure_from_dlerror(failure_details); - return true; - } else { - return false; - } -} - -static bool incompatible_library(void *handle, VALUE *failure_details) { - // The library being loaded may be linked to a different libruby than the current executing Ruby. - // We check if this is the case by checking if a well-known symbol resolves to a common address. - - void *xmalloc_from_library = dlsym(handle, "ruby_xmalloc"); - - if (xmalloc_from_library == NULL) { - // This happens when ruby is built without a `libruby.so` by using `--disable-shared` at compilation time. - // In this situation, no conflict between libruby version is possible. - return false; - } - - if (xmalloc_from_library != &ruby_xmalloc) { - *failure_details = rb_str_new_cstr("library was compiled and linked to a different Ruby version"); - unload_failed_library(handle); - return true; - } else { - return false; - } -} - -static bool failed_to_initialize(void *handle, char *init_name, VALUE *failure_details) { - void (*initialization_function)(void) = dlsym(handle, init_name); - - if (initialization_function == NULL) { - set_failure_from_dlerror(failure_details); - unload_failed_library(handle); - return true; - } else { - (*initialization_function)(); - return false; - } -} - -static void set_failure_from_dlerror(VALUE *failure_details) { - char *failure = dlerror(); - *failure_details = failure == NULL ? Qnil : rb_str_new_cstr(failure); -} - -static void unload_failed_library(void *handle) { - // Note: According to the Ruby VM sources, this may fail with a segfault on really old versions of macOS (< 10.11) - dlclose(handle); -} diff --git a/ext/datadog_profiling_loader/extconf.rb b/ext/datadog_profiling_loader/extconf.rb deleted file mode 100644 index f8e62e4c94..0000000000 --- a/ext/datadog_profiling_loader/extconf.rb +++ /dev/null @@ -1,60 +0,0 @@ -# rubocop:disable Style/StderrPuts - -if RUBY_ENGINE != "ruby" || Gem.win_platform? - $stderr.puts( - "WARN: Skipping build of Datadog profiling loader. See Datadog profiling native extension note for details." - ) - - File.write("Makefile", "all install clean: # dummy makefile that does nothing") - exit -end - -require "mkmf" - -# Because we can't control what compiler versions our customers use, shipping with -Werror by default is a no-go. -# But we can enable it in CI, so that we quickly spot any new warnings that just got introduced. -append_cflags "-Werror" if ENV["DATADOG_GEM_CI"] == "true" - -# Older gcc releases may not default to C99 and we need to ask for this. This is also used: -# * by upstream Ruby -- search for gnu99 in the codebase -# * by msgpack, another datadog gem dependency -# (https://github.com/msgpack/msgpack-ruby/blob/18ce08f6d612fe973843c366ac9a0b74c4e50599/ext/msgpack/extconf.rb#L8) -append_cflags "-std=gnu99" - -# Gets really noisy when we include the MJIT header, let's omit it (TODO: Use #pragma GCC diagnostic instead?) -append_cflags "-Wno-unused-function" - -# Allow defining variables at any point in a function -append_cflags "-Wno-declaration-after-statement" - -# If we forget to include a Ruby header, the function call may still appear to work, but then -# cause a segfault later. Let's ensure that never happens. -append_cflags "-Werror-implicit-function-declaration" - -# Warn on unused parameters to functions. Use `DDTRACE_UNUSED` to mark things as known-to-not-be-used. -append_cflags "-Wunused-parameter" - -# The native extension is not intended to expose any symbols/functions for other native libraries to use; -# the sole exception being `Init_datadog_profiling_loader` which needs to be visible for Ruby to call it when -# it `dlopen`s the library. -# -# By setting this compiler flag, we tell it to assume that everything is private unless explicitly stated. -# For more details see https://gcc.gnu.org/wiki/Visibility -append_cflags "-fvisibility=hidden" - -# Avoid legacy C definitions -append_cflags "-Wold-style-definition" - -# Enable all other compiler warnings -append_cflags "-Wall" -append_cflags "-Wextra" - -# Tag the native extension library with the Ruby version and Ruby platform. -# This makes it easier for development (avoids "oops I forgot to rebuild when I switched my Ruby") and ensures that -# the wrong library is never loaded. -# When requiring, we need to use the exact same string, including the version and the platform. -EXTENSION_NAME = "datadog_profiling_loader.#{RUBY_VERSION}_#{RUBY_PLATFORM}".freeze - -create_makefile(EXTENSION_NAME) - -# rubocop:enable Style/StderrPuts diff --git a/lib/datadog/profiling/load_native_extension.rb b/lib/datadog/profiling/load_native_extension.rb index 88b983792c..b019b19a15 100644 --- a/lib/datadog/profiling/load_native_extension.rb +++ b/lib/datadog/profiling/load_native_extension.rb @@ -1,41 +1,9 @@ # frozen_string_literal: true -# This file is used to load the profiling native extension. It works in two steps: -# -# 1. Load the datadog_profiling_loader extension. This extension will be used to load the actual extension, but in -# a special way that avoids exposing native-level code symbols. See `datadog_profiling_loader.c` for more details. -# -# 2. Use the Datadog::Profiling::Loader exposed by the datadog_profiling_loader extension to load the actual -# profiling native extension. -# -# All code on this file is on-purpose at the top-level; this makes it so this file is executed only once, -# the first time it gets required, to avoid any issues with the native extension being initialized more than once. - begin - require "datadog_profiling_loader.#{RUBY_VERSION}_#{RUBY_PLATFORM}" + require "datadog_profiling_native_extension.#{RUBY_VERSION}_#{RUBY_PLATFORM}" rescue LoadError => e raise LoadError, "Failed to load the profiling loader extension. To fix this, please remove and then reinstall datadog " \ "(Details: #{e.message})" end - -extension_name = "datadog_profiling_native_extension.#{RUBY_VERSION}_#{RUBY_PLATFORM}" -file_name = "#{extension_name}.#{RbConfig::CONFIG["DLEXT"]}" -full_file_path = "#{__dir__}/../../#{file_name}" - -unless File.exist?(full_file_path) - extension_dir = Gem.loaded_specs["datadog"].extension_dir - candidate_path = "#{extension_dir}/#{file_name}" - if File.exist?(candidate_path) - full_file_path = candidate_path - else - # We found none of the files. This is unexpected. Let's go ahead anyway, the error is going to be reported further - # down anyway. - end -end - -init_function_name = "Init_#{extension_name.split(".").first}" - -status, result = Datadog::Profiling::Loader._native_load(full_file_path, init_function_name) - -raise "Failure to load #{extension_name} due to #{result}" if status == :error diff --git a/spec/datadog/profiling/load_native_extension_spec.rb b/spec/datadog/profiling/load_native_extension_spec.rb deleted file mode 100644 index 51873b1241..0000000000 --- a/spec/datadog/profiling/load_native_extension_spec.rb +++ /dev/null @@ -1,73 +0,0 @@ -require "spec_helper" -require "datadog/profiling/spec_helper" - -RSpec.describe "Datadog::Profiling load_native_extension" do - before { skip_if_profiling_not_supported(self) } - - subject(:load_native_extension) do - load "#{__dir__}/../../../lib/datadog/profiling/load_native_extension.rb" - end - - context "when native extension can be found inside lib" do - it "loads the native extension from lib/" do - expect(Datadog::Profiling::Loader).to receive(:_native_load) do |full_file_path| - absolute_path = File.absolute_path(full_file_path) - expect(absolute_path).to include("lib/datadog_profiling_native_extension") - end - - load_native_extension - end - end - - context "when native extension cannot be found inside lib" do - let(:extension_dir) { Gem.loaded_specs["datadog"].extension_dir } - - before do - expect(File).to receive(:exist?) do |full_file_path| - absolute_path = File.absolute_path(full_file_path) - - if absolute_path.include?("lib/datadog_profiling_native_extension") - false - elsif absolute_path.include?(extension_dir) - true - else - raise "Unexpected path in mock: #{full_file_path}" - end - end.twice - end - - it "loads the native extension from the extension dir" do - expect(Datadog::Profiling::Loader).to receive(:_native_load) do |full_file_path| - absolute_path = File.absolute_path(full_file_path) - expect(absolute_path).to include(extension_dir) - end - - load_native_extension - end - end - - context "when native extension cannot be found on either directory" do - before do - expect(File).to receive(:exist?).twice.and_return(false) - end - - it "tries to load the native extension from lib/" do - expect(Datadog::Profiling::Loader).to receive(:_native_load) do |full_file_path| - absolute_path = File.absolute_path(full_file_path) - expect(absolute_path).to include("lib/datadog_profiling_native_extension") - end - - load_native_extension - end - end - - context "when the loader reports an error" do - it "raises an exception" do - expect(Datadog::Profiling::Loader).to receive(:_native_load).and_return([:error, "some error"]) - - expect do - load_native_extension - end.to raise_error(/Failure to load datadog_profiling_native_extension.*due to some error/) - end - end -end From afa07de1386ccd7286bb867ae2ed0bad8e09afe7 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 7 Feb 2025 10:22:46 +0000 Subject: [PATCH 2/3] Set `rb_ext_ractor_safe(true)` to avoid breakages from removing loader This was an interesting one... Once we removed the profiling loader extension, our tests using Ractors started failing with: > Ractor::UnsafeError: ractor unsafe method called from not main ractor When I started investigating with gdb, I discovered that because we were initializing our extension without going through Ruby, we were skipping this part: https://github.com/ruby/ruby/blob/7178593558080ca529abb61ef27038236ab2687d/load.c#L1301-L1302 : ```c ext_config_push(th, &prev_ext_config); handle = rb_vm_call_cfunc(rb_vm_top_self(), load_ext, path, VM_BLOCK_HANDLER_NONE, path); ``` that is, the `ext_config_push` is what's used by Ruby to validate if a gem is safe to use from Ractors or not. (If you're curious, that value then affects function definition, which controls wether Ruby will check or not for being called from a Ractor). Because we were skipping this entire mechanism, we implicitly were getting the same result as `rb_ext_ractor_safe(true)`. Once we removed the loader, this started failing. And I missed it before opening my PR since for reasons documented in the profiler ractor tests in detail, we don't run ractor-related tests by default. So this issue is one more reason why having the loader may create its own set of issues and why I'm happy to get rid of it. --- ext/datadog_profiling_native_extension/profiling.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index e26bbf897a..e6143533ee 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -41,6 +41,10 @@ static VALUE _native_malloc_stats(DDTRACE_UNUSED VALUE _self); static VALUE _native_safe_object_info(DDTRACE_UNUSED VALUE _self, VALUE obj); void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { + // The profiler still has a lot of limitations around being used in Ractors BUT for now we're choosing to take care of those + // on our side, rather than asking Ruby to block calling our APIs from Ractors. + rb_ext_ractor_safe(true); + VALUE datadog_module = rb_define_module("Datadog"); VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling"); VALUE native_extension_module = rb_define_module_under(profiling_module, "NativeExtension"); From db67572243c37792d9500edaf6f4a8076d883f4e Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Fri, 7 Feb 2025 10:50:21 +0000 Subject: [PATCH 3/3] Avoid calling `rb_ext_ractor_safe` on 2.x Rubies Can you tell it's Friday? --- ext/datadog_profiling_native_extension/profiling.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index e6143533ee..40b7af5466 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -43,7 +43,9 @@ static VALUE _native_safe_object_info(DDTRACE_UNUSED VALUE _self, VALUE obj); void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { // The profiler still has a lot of limitations around being used in Ractors BUT for now we're choosing to take care of those // on our side, rather than asking Ruby to block calling our APIs from Ractors. - rb_ext_ractor_safe(true); + #ifdef HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); + #endif VALUE datadog_module = rb_define_module("Datadog"); VALUE profiling_module = rb_define_module_under(datadog_module, "Profiling");