From 5d1919354619b1197900a4c23c68c78c6b468a72 Mon Sep 17 00:00:00 2001 From: Sean Linsley Date: Mon, 25 Sep 2023 13:48:17 -0500 Subject: [PATCH] Postgres: use ANY instead of IN for array inclusion queries --- activerecord/CHANGELOG.md | 15 +++++ .../connection_adapters/abstract/quoting.rb | 2 +- .../postgresql/oid/array.rb | 6 +- .../connection_adapters/postgresql/quoting.rb | 17 ++++-- .../extended_deterministic_queries.rb | 15 ----- activerecord/lib/arel/nodes/homogeneous_in.rb | 2 +- activerecord/lib/arel/visitors/postgresql.rb | 16 +++++ .../test/cases/arel/visitors/postgres_test.rb | 34 ++++++++++- .../test/cases/associations/eager_test.rb | 6 +- activerecord/test/cases/batches_test.rb | 6 +- .../test/cases/bind_parameter_test.rb | 60 +++++++------------ .../test/cases/log_subscriber_test.rb | 20 ++++++- .../test/cases/relation/merging_test.rb | 13 +++- .../test/cases/relation/where_test.rb | 2 + activerecord/test/schema/schema.rb | 6 +- 15 files changed, 149 insertions(+), 71 deletions(-) diff --git a/activerecord/CHANGELOG.md b/activerecord/CHANGELOG.md index c2e060c7da4fc..a39affbcab9c9 100644 --- a/activerecord/CHANGELOG.md +++ b/activerecord/CHANGELOG.md @@ -1,5 +1,20 @@ +* Postgres: use ANY instead of IN for array inclusion queries + + Previously, queries like `where(id: [1, 2])` generated the SQL `id IN (1, 2)`. + Now `id = ANY ('{1,2}')` is generated instead, and `where.not` generates `id != ALL ('{1,2}')`. + + This brings several advantages: + + * the query can now be a prepared statement + * query parsing is faster + * duplicate entries in `pg_stat_statements` can be avoided + * queries are less likely to be truncated in `pg_stat_activity` + + *Sean Linsley* + * Ensure `#signed_id` outputs `url_safe` strings. *Jason Meller* + Please check [7-1-stable](https://github.com/rails/rails/blob/7-1-stable/activerecord/CHANGELOG.md) for previous changes. diff --git a/activerecord/lib/active_record/connection_adapters/abstract/quoting.rb b/activerecord/lib/active_record/connection_adapters/abstract/quoting.rb index 80b04475cdff5..11a545e146eba 100644 --- a/activerecord/lib/active_record/connection_adapters/abstract/quoting.rb +++ b/activerecord/lib/active_record/connection_adapters/abstract/quoting.rb @@ -33,7 +33,7 @@ def quote(value) # Cast a +value+ to a type that the database understands. For example, # SQLite does not understand dates, so this method will convert a Date # to a String. - def type_cast(value) + def type_cast(value, **kwargs) case value when Symbol, ActiveSupport::Multibyte::Chars, Type::Binary::Data value.to_s diff --git a/activerecord/lib/active_record/connection_adapters/postgresql/oid/array.rb b/activerecord/lib/active_record/connection_adapters/postgresql/oid/array.rb index e46e47102b33f..2a651598925d0 100644 --- a/activerecord/lib/active_record/connection_adapters/postgresql/oid/array.rb +++ b/activerecord/lib/active_record/connection_adapters/postgresql/oid/array.rb @@ -7,7 +7,11 @@ module OID # :nodoc: class Array < Type::Value # :nodoc: include ActiveModel::Type::Helpers::Mutable - Data = Struct.new(:encoder, :values) # :nodoc: + Data = Struct.new(:encoder, :values) do # :nodoc: + def hash + [encoder.name, encoder.delimiter, values].hash + end + end attr_reader :subtype, :delimiter delegate :type, :user_input_in_time_zone, :limit, :precision, :scale, to: :subtype diff --git a/activerecord/lib/active_record/connection_adapters/postgresql/quoting.rb b/activerecord/lib/active_record/connection_adapters/postgresql/quoting.rb index 44243ed50e61e..ee775cc4273b5 100644 --- a/activerecord/lib/active_record/connection_adapters/postgresql/quoting.rb +++ b/activerecord/lib/active_record/connection_adapters/postgresql/quoting.rb @@ -130,13 +130,18 @@ def quote_default_expression(value, column) # :nodoc: end end - def type_cast(value) # :nodoc: + def type_cast(value, in_array: false) # :nodoc: case value when Type::Binary::Data - # Return a bind param hash with format as binary. - # See https://deveiate.org/code/pg/PG/Connection.html#method-i-exec_prepared-doc - # for more information - { value: value.to_s, format: 1 } + if in_array + # Arrays are string-encoded, so an included binary value must also be string-encoded. + escape_bytea(value.to_s) + else + # Return a bind param hash with format as binary. + # See https://deveiate.org/code/pg/PG/Connection.html#method-i-exec_prepared-doc + # for more information + { value: value.to_s, format: 1 } + end when OID::Xml::Data, OID::Bit::Data value.to_s when OID::Array::Data @@ -221,7 +226,7 @@ def determine_encoding_of_strings_in_array(value) def type_cast_array(values) case values when ::Array then values.map { |item| type_cast_array(item) } - else type_cast(values) + else type_cast(values, in_array: true) end end diff --git a/activerecord/lib/active_record/encryption/extended_deterministic_queries.rb b/activerecord/lib/active_record/encryption/extended_deterministic_queries.rb index bf2fef53bd654..e99ca94462906 100644 --- a/activerecord/lib/active_record/encryption/extended_deterministic_queries.rb +++ b/activerecord/lib/active_record/encryption/extended_deterministic_queries.rb @@ -28,7 +28,6 @@ def self.install_support ActiveRecord::Relation.prepend(RelationQueries) ActiveRecord::Base.include(CoreQueries) ActiveRecord::Encryption::EncryptedAttributeType.prepend(ExtendedEncryptableType) - Arel::Nodes::HomogeneousIn.prepend(InWithAdditionalValues) end # When modifying this file run performance tests in @@ -153,20 +152,6 @@ def serialize(data) end end end - - module InWithAdditionalValues - def proc_for_binds - -> value { ActiveModel::Attribute.with_cast_value(attribute.name, value, encryption_aware_type_caster) } - end - - def encryption_aware_type_caster - if attribute.type_caster.is_a?(ActiveRecord::Encryption::EncryptedAttributeType) - attribute.type_caster.cast_type - else - attribute.type_caster - end - end - end end end end diff --git a/activerecord/lib/arel/nodes/homogeneous_in.rb b/activerecord/lib/arel/nodes/homogeneous_in.rb index 08f5088d9b85a..26cfbeb9845ba 100644 --- a/activerecord/lib/arel/nodes/homogeneous_in.rb +++ b/activerecord/lib/arel/nodes/homogeneous_in.rb @@ -48,7 +48,7 @@ def casted_values end def proc_for_binds - -> value { ActiveModel::Attribute.with_cast_value(attribute.name, value, attribute.type_caster) } + -> value { ActiveModel::Attribute.from_database(attribute.name, value, ActiveModel::Type.default_value) } end def fetch_attribute(&block) diff --git a/activerecord/lib/arel/visitors/postgresql.rb b/activerecord/lib/arel/visitors/postgresql.rb index 8aa57880d8f77..67dfa972d2713 100644 --- a/activerecord/lib/arel/visitors/postgresql.rb +++ b/activerecord/lib/arel/visitors/postgresql.rb @@ -78,6 +78,22 @@ def visit_Arel_Nodes_IsDistinctFrom(o, collector) visit o.right, collector end + # Postgres-specific implementation that uses `col = ANY ('{1,2}')` instead of `col IN (1,2)` + def visit_Arel_Nodes_HomogeneousIn(o, collector) + visit o.left, collector + collector << (o.type == :in ? " = ANY (" : " != ALL (") + type_caster = ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.new(o.attribute.type_caster, ",") + casted_values = o.values.map do |raw_value| + o.attribute.type_caster.serialize(raw_value) if o.attribute.type_caster.serializable?(raw_value) + end + casted_values.compact! + pg_encoder = PG::TextEncoder::Array.new(name: "#{type_caster.type}[]", delimiter: ",") + values = [ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array::Data.new(pg_encoder, casted_values)] + proc_for_binds = -> value { ActiveModel::Attribute.from_database(o.attribute.name, value, ActiveModel::Type.default_value) } + collector.add_binds(values, proc_for_binds, &bind_block) + collector << ")" + end + BIND_BLOCK = proc { |i| "$#{i}" } private_constant :BIND_BLOCK diff --git a/activerecord/test/cases/arel/visitors/postgres_test.rb b/activerecord/test/cases/arel/visitors/postgres_test.rb index 2795540669fe9..221c740239c3d 100644 --- a/activerecord/test/cases/arel/visitors/postgres_test.rb +++ b/activerecord/test/cases/arel/visitors/postgres_test.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true +require "active_model/attribute" +require "active_record" +require "active_record/connection_adapters/postgresql_adapter" require_relative "../helper" module Arel @@ -7,10 +10,23 @@ module Visitors class PostgresTest < Arel::Spec before do @visitor = PostgreSQL.new Table.engine.connection - @table = Table.new(:users) + @table = Table.new(:users, type_caster: fake_pg_caster) @attr = @table[:id] end + # map that converts attribute names to a caster + def fake_pg_caster + Object.new.tap do |caster| + def caster.type_for_attribute(attr_name) + ActiveModel::Type::String.new + end + def caster.type_cast_for_database(attr_name, value) + type = type_for_attribute(attr_name) + type.serialize(value) + end + end + end + def compile(node) @visitor.accept(node, Collectors::SQLString.new).value end @@ -331,6 +347,22 @@ def compile(node) _(sql).must_be_like %{ "products"."tags" && '{foo,bar,baz}' } end end + + describe "Nodes::HomogeneousIn" do + it "should use = ANY" do + test = Nodes::HomogeneousIn.new([1, 2], @table[:id], :in) + _(compile(test)).must_be_like %{ + "users"."id" = ANY ($1) + } + end + + it "should use != ALL for negation" do + test = Nodes::HomogeneousIn.new([1, 2], @table[:id], :notin) + _(compile(test)).must_be_like %{ + "users"."id" != ALL ($1) + } + end + end end end end diff --git a/activerecord/test/cases/associations/eager_test.rb b/activerecord/test/cases/associations/eager_test.rb index fe5fe6debed9b..7c93c3aeb2942 100644 --- a/activerecord/test/cases/associations/eager_test.rb +++ b/activerecord/test/cases/associations/eager_test.rb @@ -1700,7 +1700,11 @@ def test_preloading_has_many_through_with_custom_scope c = Sharded::BlogPost.connection quoted_blog_id = Regexp.escape(c.quote_table_name("sharded_comments.blog_id")) quoted_blog_post_id = Regexp.escape(c.quote_table_name("sharded_comments.blog_post_id")) - assert_match(/WHERE #{quoted_blog_id} IN \(.+\) AND #{quoted_blog_post_id} IN \(.+\)/, sql) + if current_adapter?(:PostgreSQLAdapter) + assert_match(/WHERE #{quoted_blog_id} = ANY \(\$1\) AND #{quoted_blog_post_id} = ANY \(\$2\)/, sql) + else + assert_match(/WHERE #{quoted_blog_id} IN \(.+\) AND #{quoted_blog_post_id} IN \(.+\)/, sql) + end end test "preloading has_many association associated by a composite query_constraints" do diff --git a/activerecord/test/cases/batches_test.rb b/activerecord/test/cases/batches_test.rb index ca5f1e7b09c21..5f715b5eac154 100644 --- a/activerecord/test/cases/batches_test.rb +++ b/activerecord/test/cases/batches_test.rb @@ -524,7 +524,8 @@ def test_in_batches_executes_range_queries_when_unconstrained def test_in_batches_executes_in_queries_when_unconstrained_and_opted_out_of_ranges c = Post.connection quoted_posts_id = Regexp.escape(c.quote_table_name("posts.id")) - assert_sql(/#{quoted_posts_id} IN \(.+\)/i) do + regex = current_adapter?(:PostgreSQLAdapter) ? /#{quoted_posts_id} = ANY \(\$1\)/i : /#{quoted_posts_id} IN \(.+\)/i + assert_sql(regex) do Post.in_batches(of: 2, use_ranges: false) { |relation| assert_kind_of Post, relation.first } end end @@ -532,7 +533,8 @@ def test_in_batches_executes_in_queries_when_unconstrained_and_opted_out_of_rang def test_in_batches_executes_in_queries_when_constrained c = Post.connection quoted_posts_id = Regexp.escape(c.quote_table_name("posts.id")) - assert_sql(/#{quoted_posts_id} IN \(.+\)/i) do + regex = current_adapter?(:PostgreSQLAdapter) ? /#{quoted_posts_id} = ANY \(\$1\)/i : /#{quoted_posts_id} IN \(.+\)/i + assert_sql(regex) do Post.where("id < ?", 5).in_batches(of: 2) { |relation| assert_kind_of Post, relation.first } end end diff --git a/activerecord/test/cases/bind_parameter_test.rb b/activerecord/test/cases/bind_parameter_test.rb index 91573ef510a20..5ac1bab782dda 100644 --- a/activerecord/test/cases/bind_parameter_test.rb +++ b/activerecord/test/cases/bind_parameter_test.rb @@ -94,7 +94,11 @@ def test_statement_cache_with_in_clause topics = Topic.where(id: [1, 3]).order(:id) assert_equal [1, 3], topics.map(&:id) - assert_not_includes statement_cache, to_sql_key(topics.arel) + if current_adapter?(:PostgreSQLAdapter) + assert_includes statement_cache, to_sql_key(topics.arel) + else + assert_not_includes statement_cache, to_sql_key(topics.arel) + end end def test_statement_cache_with_sql_string_literal @@ -162,12 +166,12 @@ def test_logs_unnamed_binds end def test_bind_params_to_sql_with_prepared_statements - assert_bind_params_to_sql + assert_bind_params_to_sql(true) end def test_bind_params_to_sql_with_unprepared_statements @connection.unprepared_statement do - assert_bind_params_to_sql + assert_bind_params_to_sql(false) end end @@ -198,60 +202,40 @@ def test_binds_with_filtered_attributes end private - def assert_bind_params_to_sql + def assert_bind_params_to_sql(prepared) table = Author.quoted_table_name pk = "#{table}.#{Author.quoted_primary_key}" - # prepared_statements: true - # - # SELECT `authors`.* FROM `authors` WHERE (`authors`.`id` IN (?, ?, ?) OR `authors`.`id` IS NULL) - # - # prepared_statements: false - # - # SELECT `authors`.* FROM `authors` WHERE (`authors`.`id` IN (1, 2, 3) OR `authors`.`id` IS NULL) - # - sql = "SELECT #{table}.* FROM #{table} WHERE (#{pk} IN (#{bind_params(1..3)}) OR #{pk} IS NULL)" + condition = if current_adapter?(:PostgreSQLAdapter) + prepared ? "= ANY ($1)" : "= ANY ('{1,2,3}')" + else + prepared ? "IN (?, ?, ?)" : "IN (1, 2, 3)" + end + + sql = "SELECT #{table}.* FROM #{table} WHERE (#{pk} #{condition} OR #{pk} IS NULL)" authors = Author.where(id: [1, 2, 3, nil]) assert_equal sql, @connection.to_sql(authors.arel) assert_sql(sql) { assert_equal 3, authors.length } - # prepared_statements: true - # - # SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (?, ?, ?) - # - # prepared_statements: false - # - # SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (1, 2, 3) - # - sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} IN (#{bind_params(1..3)})" + sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} #{condition}" authors = Author.where(id: [1, 2, 3, 9223372036854775808]) assert_equal sql, @connection.to_sql(authors.arel) assert_sql(sql) { assert_equal 3, authors.length } - # prepared_statements: true - # - # SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (?, ?, ?) - # - # prepared_statements: false - # - # SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (1, 2, 3) - # - sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} IN (#{bind_params(1..3)})" + condition = if current_adapter?(:PostgreSQLAdapter) + prepared ? "IN ($1, $2, $3)" : "IN (1, 2, 3)" + else + prepared ? "IN (?, ?, ?)" : "IN (1, 2, 3)" + end + sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} #{condition}" arel_node = Arel.sql("SELECT #{table}.* FROM #{table} WHERE #{pk} IN (?)", [1, 2, 3]) assert_equal sql, @connection.to_sql(arel_node) assert_sql(sql) { assert_equal 3, @connection.select_all(arel_node).length } end - def bind_params(ids) - collector = @connection.send(:collector) - bind_params = ids.map { |i| Arel::Nodes::BindParam.new(i) } - sql, _ = @connection.visitor.compile(bind_params, collector) - sql - end - def to_sql_key(arel) sql = @connection.to_sql(arel) @connection.respond_to?(:sql_key, true) ? @connection.send(:sql_key, sql) : sql diff --git a/activerecord/test/cases/log_subscriber_test.rb b/activerecord/test/cases/log_subscriber_test.rb index 70523cd725bf3..8af44a19b64d8 100644 --- a/activerecord/test/cases/log_subscriber_test.rb +++ b/activerecord/test/cases/log_subscriber_test.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true require "cases/helper" +require "models/admin" +require "models/admin/user" require "models/binary" require "models/developer" require "models/post" @@ -251,7 +253,23 @@ def test_cached_queries_doesnt_log_when_level_is_not_debug def test_where_in_binds_logging_include_attribute_names Developer.where(id: [1, 2, 3, 4, 5]).load wait - assert_match(%{["id", 1], ["id", 2], ["id", 3], ["id", 4], ["id", 5]}, @logger.logged(:debug).last) + if current_adapter?(:PostgreSQLAdapter) + assert_match(%{["id", "{1,2,3,4,5}"]}, @logger.logged(:debug).last) + else + assert_match(%{["id", 1], ["id", 2], ["id", 3], ["id", 4], ["id", 5]}, @logger.logged(:debug).last) + end + end + + def test_where_clause_with_json_attribute + Admin::User.create(json_options: { a: 1 }) + count = Admin::User.where(json_options: [{ a: 1 }, { b: 2 }]).count + assert_equal 1, count + wait + if current_adapter?(:PostgreSQLAdapter) + assert_match(%{[["json_options", "{\\"{\\\\\\"a\\\\\\":1}\\",\\"{\\\\\\"b\\\\\\":2}\\"}"]]}, @logger.logged(:debug).last) + else + assert_match(%{[["json_options", "{\\"a\\":1}"], ["json_options", "{\\"b\\":2}"]]}, @logger.logged(:debug).last) + end end def test_binary_data_is_not_logged diff --git a/activerecord/test/cases/relation/merging_test.rb b/activerecord/test/cases/relation/merging_test.rb index eb34cff077a68..f86b44c5c9e60 100644 --- a/activerecord/test/cases/relation/merging_test.rb +++ b/activerecord/test/cases/relation/merging_test.rb @@ -205,9 +205,16 @@ def test_merge_doesnt_duplicate_same_clauses non_mary_and_bob = Author.where.not(id: [mary, bob]) - author_id = Author.connection.quote_table_name("authors.id") - assert_sql(/WHERE #{Regexp.escape(author_id)} NOT IN \((\?|\W?\w?\d), \g<1>\)\z/) do - assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob) + if current_adapter?(:PostgreSQLAdapter) + author_id = Author.connection.quote_table_name("authors.id") + assert_sql(/WHERE #{Regexp.escape(author_id)} != ALL \(\$1\)\z/) do + assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob) + end + else + author_id = Author.connection.quote_table_name("authors.id") + assert_sql(/WHERE #{Regexp.escape(author_id)} NOT IN \((\?|\W?\w?\d), \g<1>\)\z/) do + assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob) + end end only_david = Author.where("#{author_id} IN (?)", david) diff --git a/activerecord/test/cases/relation/where_test.rb b/activerecord/test/cases/relation/where_test.rb index 1c4883465c1b7..2c5901a141441 100644 --- a/activerecord/test/cases/relation/where_test.rb +++ b/activerecord/test/cases/relation/where_test.rb @@ -397,6 +397,8 @@ def test_where_with_integer_for_binary_column def test_where_with_emoji_for_binary_column Binary.create!(data: "🥦") + count = Binary.where(data: ["🥦", "🍦"]).count + assert_equal 1, count assert Binary.where(data: ["🥦", "🍦"]).to_sql.include?("f09fa5a6") assert Binary.where(data: ["🥦", "🍦"]).to_sql.include?("f09f8da6") end diff --git a/activerecord/test/schema/schema.rb b/activerecord/test/schema/schema.rb index b1e2d7dea37ff..06a101b50cc82 100644 --- a/activerecord/test/schema/schema.rb +++ b/activerecord/test/schema/schema.rb @@ -38,7 +38,11 @@ t.string :json_data_empty, null: true, default: "", limit: 1024 t.text :params t.references :account - t.json :json_options + if ActiveRecord::TestCase.current_adapter?(:PostgreSQLAdapter) + t.jsonb :json_options + else + t.json :json_options + end end create_table :admin_user_jsons, force: true do |t|