From 2ade7c3dd7ab2397a5255b1972b1c955973ce88c Mon Sep 17 00:00:00 2001 From: "Brian \"Moses\" Hall" Date: Mon, 6 May 2024 16:43:46 -0400 Subject: [PATCH] =?UTF-8?q?TTO-214=20Catalog=20should=20use=20the=20most?= =?UTF-8?q?=20recent=20redirects=20file=20instead=20of=20=E2=80=A6=20(#46)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * TTO-214 Catalog should use the most recent redirects file instead of failing when the redirects file for the current month doesn't exist - Default behavior when `REDIRECT_FILE` is not set is to use this month's file and failing that, last month's. - Move logic relating to default location and name of redirect files from Services to Redirects. --- Gemfile.lock | 10 ++++++++++ lib/ht_traject/redirects.rb | 13 +++++++++++++ lib/services.rb | 4 +--- spec/ht_traject/redirects_spec.rb | 31 +++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index e1f8d8f..4c027d0 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -38,6 +38,7 @@ GEM httpclient (2.8.3) httpx (0.23.3) http-2-next (>= 0.4.1) + jdbc-mysql (8.0.30) json (2.6.3) json (2.6.3-java) language_server-protocol (3.17.0.3) @@ -52,11 +53,14 @@ GEM unf marc-fastxmlwriter (1.1.0) marc (~> 1.0) + marc-marc4j (1.0.0-java) + marc (~> 1) marc_alephsequential (2.0.0) marc (~> 1) yell (~> 2) match_map (3.0.0) method_source (1.0.0) + naconormalizer (1.0.1-java) nokogiri (1.16.2-arm64-darwin) racc (~> 1.4) nokogiri (1.16.2-java) @@ -150,6 +154,9 @@ GEM nokogiri (~> 1.9) slop (~> 4.0) yell + traject-marc4j_reader (1.1.0-java) + marc (~> 1.0) + marc-marc4j (~> 1.0) traject_alephsequential_reader (1.1.2) marc (~> 1) marc_alephsequential (~> 2) @@ -176,9 +183,11 @@ DEPENDENCIES http (~> 5.0) httpclient httpx + jdbc-mysql library_stdnums marc-fastxmlwriter (~> 1.1) match_map + naconormalizer pry rake (~> 13.0) rsolr @@ -190,6 +199,7 @@ DEPENDENCIES standard thor traject (~> 3.0) + traject-marc4j_reader traject_alephsequential_reader traject_umich_format yell diff --git a/lib/ht_traject/redirects.rb b/lib/ht_traject/redirects.rb index 77b7144..4c8c59b 100644 --- a/lib/ht_traject/redirects.rb +++ b/lib/ht_traject/redirects.rb @@ -11,6 +11,19 @@ module HathiTrust class Redirects + def self.redirects_file_name(date: Date.today) + "redirects_#{date.strftime "%Y%m"}.txt.gz" + end + + def self.default_redirects_file(directory: "/htapps/babel/hathifiles/catalog_redirects/redirects") + default_file = File.join(directory, redirects_file_name) + if !File.exist?(default_file) + # Fall back to previous month's (that's what the << method does) file + default_file = File.join(directory, redirects_file_name(date: Date.today << 1)) + end + default_file + end + def old_ids_for(id) redirects[id] || [] end diff --git a/lib/services.rb b/lib/services.rb index eb7cbc5..2fc62de 100644 --- a/lib/services.rb +++ b/lib/services.rb @@ -64,10 +64,8 @@ def env_local_file end Services.register(:redirect_file) do - yyyymm = DateTime.now.strftime "%Y%m" - default_file = "/htapps/babel/hathifiles/catalog_redirects/redirects/redirects_#{yyyymm}.txt.gz" # Start migrating from redirect_file to REDIRECT_FILE on principle of least surprise - ENV["redirect_file"] || ENV["REDIRECT_FILE"] || default_file + ENV["redirect_file"] || ENV["REDIRECT_FILE"] || Redirects.default_redirects_file end Services.register(:db) do diff --git a/spec/ht_traject/redirects_spec.rb b/spec/ht_traject/redirects_spec.rb index ee5687a..d477222 100644 --- a/spec/ht_traject/redirects_spec.rb +++ b/spec/ht_traject/redirects_spec.rb @@ -8,6 +8,37 @@ let(:sample_old_cid) { "000004165" } let(:sample_new_cid) { "006215998" } + describe ".redirects_file_name" do + it "generates the appropriately dated redirects file" do + expect(described_class.redirects_file_name(date: Date.parse("2024-01-01"))) + .to eq("redirects_202401.txt.gz") + end + end + + describe ".default_redirects_file" do + context "with current month's file" do + it "uses the existing file" do + Dir.mktmpdir do |tmpdir| + current_file = File.join(tmpdir, described_class.redirects_file_name) + FileUtils.touch current_file + expect(described_class.default_redirects_file(directory: tmpdir)) + .to eq(current_file) + end + end + end + + context "without current month's file" do + it "uses last month's file" do + Dir.mktmpdir do |tmpdir| + last_file = File.join(tmpdir, described_class.redirects_file_name(date: Date.today << 1)) + FileUtils.touch last_file + expect(described_class.default_redirects_file(directory: tmpdir)) + .to eq(last_file) + end + end + end + end + describe "#old_ids_for" do context "with a real file" do override_service(:redirect_file) do