From 92e79035e6bbc14da0ed2cd467c2e987f437c1da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Meadows-J=C3=B6nsson?= Date: Mon, 9 Mar 2026 23:23:22 +0100 Subject: [PATCH 1/2] Stream tarball downloads to disk and add file-based unpack Use hex_core's new request_to_file to stream tarball downloads directly to disk via hackney instead of buffering in memory. Add TmpDir GenServer for process-based temp file cleanup. --- lib/diff/application.ex | 1 + lib/diff/hex/adapter.ex | 40 +++++++++++++++++- lib/diff/hex/hex.ex | 43 +++++++------------ lib/diff/tmp_dir.ex | 85 ++++++++++++++++++++++++++++++++++++++ mix.exs | 2 +- mix.lock | 2 +- test/diff/tmp_dir_test.exs | 79 +++++++++++++++++++++++++++++++++++ 7 files changed, 221 insertions(+), 31 deletions(-) create mode 100644 lib/diff/tmp_dir.ex create mode 100644 test/diff/tmp_dir_test.exs diff --git a/lib/diff/application.ex b/lib/diff/application.ex index 44df1a7..7802519 100644 --- a/lib/diff/application.ex +++ b/lib/diff/application.ex @@ -10,6 +10,7 @@ defmodule Diff.Application do # List all child processes to be supervised children = [ + Diff.TmpDir, goth_spec(), {Task.Supervisor, name: Diff.Tasks}, {Phoenix.PubSub, name: Diff.PubSub}, diff --git a/lib/diff/hex/adapter.ex b/lib/diff/hex/adapter.ex index 3e7c4ea..867c3ee 100644 --- a/lib/diff/hex/adapter.ex +++ b/lib/diff/hex/adapter.ex @@ -11,12 +11,50 @@ defmodule Diff.Hex.Adapter do with {:ok, status, resp_headers, client_ref} <- resp, {:ok, resp_body} <- :hackney.body(client_ref) do - # :hex_core expects headers to be a Map resp_headers = Map.new(resp_headers) {:ok, {status, resp_headers, resp_body}} end end + @impl true + def request_to_file(method, uri, req_headers, req_body, filename, _config) do + {content_type, payload} = deconstruct_body(req_body) + req_headers = prepare_headers(req_headers, content_type) + + case :hackney.request(method, uri, req_headers, payload, @opts) do + {:ok, 200, resp_headers, ref} -> + resp_headers = Map.new(resp_headers) + + case File.open(filename, [:write, :binary], &stream_body_to_file(ref, &1)) do + {:ok, :ok} -> {:ok, {200, resp_headers}} + {:ok, {:error, _} = error} -> error + {:error, reason} -> {:error, reason} + end + + {:ok, status, resp_headers, ref} -> + :hackney.skip_body(ref) + resp_headers = Map.new(resp_headers) + {:ok, {status, resp_headers}} + + {:error, reason} -> + {:error, reason} + end + end + + defp stream_body_to_file(ref, file) do + case :hackney.stream_body(ref) do + {:ok, data} -> + :ok = IO.binwrite(file, data) + stream_body_to_file(ref, file) + + :done -> + :ok + + {:error, reason} -> + {:error, reason} + end + end + defp prepare_headers(req_headers, content_type) do if content_type do req_headers diff --git a/lib/diff/hex/hex.ex b/lib/diff/hex/hex.ex index 6995445..e8dd90d 100644 --- a/lib/diff/hex/hex.ex +++ b/lib/diff/hex/hex.ex @@ -26,14 +26,17 @@ defmodule Diff.Hex do end def get_tarball(package, version) do - with {:ok, {200, _, tarball}} <- :hex_repo.get_tarball(@config, package, version) do - {:ok, tarball} - else - {:ok, {403, _, _}} -> + path = Diff.TmpDir.tmp_file("tarball") + + case :hex_repo.get_tarball_to_file(@config, package, version, to_charlist(path)) do + {:ok, {200, _headers}} -> + {:ok, path} + + {:ok, {403, _}} -> {:error, :not_found} - {:ok, {status, _, _}} -> - Logger.error("Failed to get package versions. Status: #{status}.") + {:ok, {status, _}} -> + Logger.error("Failed to get tarball for package: #{package}. Status: #{status}.") {:error, :not_found} {:error, reason} -> @@ -42,10 +45,9 @@ defmodule Diff.Hex do end end - def unpack_tarball(tarball, path) when is_binary(path) do - path = to_charlist(path) - - with {:ok, _} <- :hex_tarball.unpack(tarball, path) do + def unpack_tarball(tarball_path, output_path) do + with {:ok, _} <- + :hex_tarball.unpack({:file, to_charlist(tarball_path)}, to_charlist(output_path)) do :ok end end @@ -73,12 +75,11 @@ defmodule Diff.Hex do end def diff(package, from, to) do - path_from = tmp_path("package-#{package}-#{from}-") - path_to = tmp_path("package-#{package}-#{to}-") - with {:ok, tarball_from} <- get_tarball(package, from), + path_from = Diff.TmpDir.tmp_dir("package-#{package}-#{from}"), :ok <- unpack_tarball(tarball_from, path_from), {:ok, tarball_to} <- get_tarball(package, to), + path_to = Diff.TmpDir.tmp_dir("package-#{package}-#{to}"), :ok <- unpack_tarball(tarball_to, path_to) do from_files = tree_files(path_from) to_files = tree_files(path_to) @@ -92,8 +93,7 @@ defmodule Diff.Hex do all_files = (from_files ++ to_files) |> Enum.uniq() |> Enum.sort() stream = - all_files - |> Stream.flat_map(fn file -> + Stream.flat_map(all_files, fn file -> {path_old, path_new} = cond do file in new_files -> {"/dev/null", Path.join(path_to, file)} @@ -120,14 +120,6 @@ defmodule Diff.Hex do [{:error, {error, reason}}] end end) - |> Stream.transform( - fn -> :ok end, - fn elem, :ok -> {[elem], :ok} end, - fn :ok -> - File.rm_rf(path_from) - File.rm_rf(path_to) - end - ) {:ok, stream} else @@ -166,9 +158,4 @@ defmodule Diff.Hex do |> Enum.filter(&File.regular?(&1, raw: true)) |> Enum.map(&Path.relative_to(&1, directory)) end - - defp tmp_path(prefix) do - random_string = Base.encode16(:crypto.strong_rand_bytes(4)) - Path.join([System.tmp_dir!(), "diff", prefix <> random_string]) - end end diff --git a/lib/diff/tmp_dir.ex b/lib/diff/tmp_dir.ex new file mode 100644 index 0000000..6b41db5 --- /dev/null +++ b/lib/diff/tmp_dir.ex @@ -0,0 +1,85 @@ +defmodule Diff.TmpDir do + use GenServer + + @table __MODULE__ + + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + def tmp_file(prefix) do + path = path(prefix) + File.touch!(path) + track(path) + path + end + + def tmp_dir(prefix) do + path = path(prefix) + File.mkdir_p!(path) + track(path) + path + end + + defp path(prefix) do + random = Base.encode16(:crypto.strong_rand_bytes(4)) + Path.join(base_dir(), prefix <> "-" <> random) + end + + defp base_dir() do + dir = Path.join(System.tmp_dir!(), "diff") + File.mkdir_p!(dir) + dir + end + + defp track(path) do + pid = self() + :ets.insert(@table, {pid, path}) + GenServer.cast(__MODULE__, {:monitor, pid}) + end + + @impl true + def init(_opts) do + Process.flag(:trap_exit, true) + :ets.new(@table, [:named_table, :duplicate_bag, :public]) + {:ok, %{monitors: MapSet.new()}} + end + + @impl true + def handle_cast({:monitor, pid}, state) do + if pid in state.monitors do + {:noreply, state} + else + Process.monitor(pid) + {:noreply, %{state | monitors: MapSet.put(state.monitors, pid)}} + end + end + + @impl true + def handle_info({:DOWN, _ref, :process, pid, _reason}, state) do + cleanup_pid(pid) + {:noreply, %{state | monitors: MapSet.delete(state.monitors, pid)}} + end + + @impl true + def terminate(_reason, _state) do + :ets.foldl( + fn {_pid, path}, :ok -> + File.rm_rf(path) + :ok + end, + :ok, + @table + ) + end + + defp cleanup_pid(pid) do + entries = :ets.lookup(@table, pid) + + Enum.each(entries, fn {_pid, path} -> + File.rm_rf(path) + end) + + :ets.delete(@table, pid) + end +end diff --git a/mix.exs b/mix.exs index 6f97773..8d7ec7d 100644 --- a/mix.exs +++ b/mix.exs @@ -38,7 +38,7 @@ defmodule Diff.MixProject do {:git_diff, github: "ericmj/git_diff", branch: "ericmj/fix-modes"}, {:goth, "~> 1.0"}, {:hackney, "~> 1.15"}, - {:hex_core, "~> 0.11.0"}, + {:hex_core, "~> 0.15.0"}, {:jason, "~> 1.0"}, {:logster, "~> 1.0.0"}, {:mox, "~> 1.0", only: :test}, diff --git a/mix.lock b/mix.lock index be0ad07..d011264 100644 --- a/mix.lock +++ b/mix.lock @@ -14,7 +14,7 @@ "git_diff": {:git, "https://github.com/ericmj/git_diff.git", "e4ee06cfd139b8a911d07e42d0ff3b15eee2b740", [branch: "ericmj/fix-modes"]}, "goth": {:hex, :goth, "1.4.3", "80e86225ae174844e6a77b61982fafadfc715277db465e0956348d8bdd56b231", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "34e2012ed1af2fe2446083da60a988fd9829943d30e4447832646c3a6863a7e6"}, "hackney": {:hex, :hackney, "1.25.0", "390e9b83f31e5b325b9f43b76e1a785cbdb69b5b6cd4e079aa67835ded046867", [:rebar3], [{:certifi, "~> 2.15.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.4", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.1", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "7209bfd75fd1f42467211ff8f59ea74d6f2a9e81cbcee95a56711ee79fd6b1d4"}, - "hex_core": {:hex, :hex_core, "0.11.0", "d1c6bbf2a4ee6b5f002bec6fa52b5080c53c8b63b7caf6eb88b943687547bff4", [:rebar3], [], "hexpm", "707893677a425491962a2db522f1d2b1f85f97ea27418b06f7929f1d30cde0b0"}, + "hex_core": {:hex, :hex_core, "0.15.0", "8eadc0ccb08e3742f2313073d04f39eaa7904617329039e9d3c402f5dd227673", [:rebar3], [], "hexpm", "c2093764c7af8ef0818c104fa141eba431e7be93f8374638c45c7037b26a52f8"}, "hpax": {:hex, :hpax, "1.0.1", "c857057f89e8bd71d97d9042e009df2a42705d6d690d54eca84c8b29af0787b0", [:mix], [], "hexpm", "4e2d5a4f76ae1e3048f35ae7adb1641c36265510a2d4638157fbcb53dda38445"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, diff --git a/test/diff/tmp_dir_test.exs b/test/diff/tmp_dir_test.exs new file mode 100644 index 0000000..e95dddd --- /dev/null +++ b/test/diff/tmp_dir_test.exs @@ -0,0 +1,79 @@ +defmodule Diff.TmpDirTest do + use ExUnit.Case, async: true + + test "tmp_file/1 creates a file" do + path = Diff.TmpDir.tmp_file("test") + assert File.exists?(path) + assert File.regular?(path) + end + + test "tmp_dir/1 creates a directory" do + path = Diff.TmpDir.tmp_dir("test") + assert File.dir?(path) + end + + test "cleanup on normal process exit" do + test_pid = self() + + Task.start(fn -> + file = Diff.TmpDir.tmp_file("test") + dir = Diff.TmpDir.tmp_dir("test") + send(test_pid, {:paths, file, dir}) + end) + + assert_receive {:paths, file, dir} + Process.sleep(100) + + refute File.exists?(file) + refute File.exists?(dir) + end + + @tag :capture_log + test "cleanup on process crash" do + test_pid = self() + + Task.start(fn -> + file = Diff.TmpDir.tmp_file("test") + dir = Diff.TmpDir.tmp_dir("test") + send(test_pid, {:paths, file, dir}) + raise "crash" + end) + + assert_receive {:paths, file, dir} + Process.sleep(100) + + refute File.exists?(file) + refute File.exists?(dir) + end + + test "multiple paths for one process" do + test_pid = self() + + Task.start(fn -> + paths = + for i <- 1..5 do + file = Diff.TmpDir.tmp_file("test-#{i}") + dir = Diff.TmpDir.tmp_dir("test-#{i}") + {file, dir} + end + + send(test_pid, {:paths, paths}) + end) + + assert_receive {:paths, paths} + Process.sleep(100) + + for {file, dir} <- paths do + refute File.exists?(file) + refute File.exists?(dir) + end + end + + test "paths persist while process is alive" do + file = Diff.TmpDir.tmp_file("test") + dir = Diff.TmpDir.tmp_dir("test") + + assert File.exists?(file) + assert File.dir?(dir) + end +end From 40975b721587bbc8704ebdad0170daf4ea743a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20Meadows-J=C3=B6nsson?= Date: Tue, 10 Mar 2026 00:00:29 +0100 Subject: [PATCH 2/2] Configure hex_core repo_url and repo_public_key from application config The hex_core config was hardcoded to default_config() which always points to production repo.hex.pm. Read repo_url and repo_public_key from application config so staging uses the correct repo and key. --- config/config.exs | 3 ++- config/runtime.exs | 4 +++- lib/diff/hex/hex.ex | 25 +++++++++++++++++-------- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/config/config.exs b/config/config.exs index ac99389..58170bc 100644 --- a/config/config.exs +++ b/config/config.exs @@ -9,7 +9,8 @@ import Config config :diff, cache_version: 2, - package_store_impl: Diff.Package.DefaultStore + package_store_impl: Diff.Package.DefaultStore, + repo_url: "https://repo.hex.pm" # Configures the endpoint config :diff, DiffWeb.Endpoint, diff --git a/config/runtime.exs b/config/runtime.exs index e39811c..c080318 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -5,7 +5,9 @@ if config_env() == :prod do host: System.fetch_env!("DIFF_HOST"), hexpm_host: System.fetch_env!("DIFF_HEXPM_HOST"), cache_version: String.to_integer(System.fetch_env!("DIFF_CACHE_VERSION")), - bucket: System.fetch_env!("DIFF_BUCKET") + bucket: System.fetch_env!("DIFF_BUCKET"), + repo_url: System.fetch_env!("DIFF_REPO_URL"), + repo_public_key: System.fetch_env!("DIFF_REPO_PUBLIC_KEY") config :diff, DiffWeb.Endpoint, http: [port: String.to_integer(System.fetch_env!("DIFF_PORT"))], diff --git a/lib/diff/hex/hex.ex b/lib/diff/hex/hex.ex index e8dd90d..a861d5d 100644 --- a/lib/diff/hex/hex.ex +++ b/lib/diff/hex/hex.ex @@ -1,18 +1,27 @@ defmodule Diff.Hex do @behaviour Diff.Hex.Behaviour - @config %{ - :hex_core.default_config() - | http_adapter: {Diff.Hex.Adapter, %{}}, - http_user_agent_fragment: "hexpm_diff" - } + defp config() do + config = %{ + :hex_core.default_config() + | http_adapter: {Diff.Hex.Adapter, %{}}, + http_user_agent_fragment: "hexpm_diff", + repo_url: Application.fetch_env!(:diff, :repo_url) + } + + if repo_public_key = Application.get_env(:diff, :repo_public_key) do + %{config | repo_public_key: repo_public_key} + else + %{config | repo_verify: false} + end + end @max_file_size 1024 * 1024 require Logger def get_versions() do - with {:ok, {200, _, results}} <- :hex_repo.get_versions(@config) do + with {:ok, {200, _, results}} <- :hex_repo.get_versions(config()) do {:ok, results} else {:ok, {status, _, _}} -> @@ -28,7 +37,7 @@ defmodule Diff.Hex do def get_tarball(package, version) do path = Diff.TmpDir.tmp_file("tarball") - case :hex_repo.get_tarball_to_file(@config, package, version, to_charlist(path)) do + case :hex_repo.get_tarball_to_file(config(), package, version, to_charlist(path)) do {:ok, {200, _headers}} -> {:ok, path} @@ -53,7 +62,7 @@ defmodule Diff.Hex do end def get_checksums(package, versions) do - with {:ok, {200, _, releases}} <- :hex_repo.get_package(@config, package) do + with {:ok, {200, _, releases}} <- :hex_repo.get_package(config(), package) do checksums = for release <- releases.releases, release.version in versions do release.outer_checksum