Skip to content

Commit

Permalink
Postgres: use ANY instead of IN for array inclusion queries
Browse files Browse the repository at this point in the history
  • Loading branch information
seanlinsley committed Oct 11, 2023
1 parent 7228da3 commit 5d19193
Show file tree
Hide file tree
Showing 15 changed files with 149 additions and 71 deletions.
15 changes: 15 additions & 0 deletions activerecord/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
* Postgres: use ANY instead of IN for array inclusion queries

Previously, queries like `where(id: [1, 2])` generated the SQL `id IN (1, 2)`.
Now `id = ANY ('{1,2}')` is generated instead, and `where.not` generates `id != ALL ('{1,2}')`.

This brings several advantages:

* the query can now be a prepared statement
* query parsing is faster
* duplicate entries in `pg_stat_statements` can be avoided
* queries are less likely to be truncated in `pg_stat_activity`

*Sean Linsley*

* Ensure `#signed_id` outputs `url_safe` strings.

*Jason Meller*


Please check [7-1-stable](https://github.com/rails/rails/blob/7-1-stable/activerecord/CHANGELOG.md) for previous changes.
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def quote(value)
# Cast a +value+ to a type that the database understands. For example,
# SQLite does not understand dates, so this method will convert a Date
# to a String.
def type_cast(value)
def type_cast(value, **kwargs)
case value
when Symbol, ActiveSupport::Multibyte::Chars, Type::Binary::Data
value.to_s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ module OID # :nodoc:
class Array < Type::Value # :nodoc:
include ActiveModel::Type::Helpers::Mutable

Data = Struct.new(:encoder, :values) # :nodoc:
Data = Struct.new(:encoder, :values) do # :nodoc:
def hash
[encoder.name, encoder.delimiter, values].hash
end
end

attr_reader :subtype, :delimiter
delegate :type, :user_input_in_time_zone, :limit, :precision, :scale, to: :subtype
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,18 @@ def quote_default_expression(value, column) # :nodoc:
end
end

def type_cast(value) # :nodoc:
def type_cast(value, in_array: false) # :nodoc:
case value
when Type::Binary::Data
# Return a bind param hash with format as binary.
# See https://deveiate.org/code/pg/PG/Connection.html#method-i-exec_prepared-doc
# for more information
{ value: value.to_s, format: 1 }
if in_array
# Arrays are string-encoded, so an included binary value must also be string-encoded.
escape_bytea(value.to_s)
else
# Return a bind param hash with format as binary.
# See https://deveiate.org/code/pg/PG/Connection.html#method-i-exec_prepared-doc
# for more information
{ value: value.to_s, format: 1 }
end
when OID::Xml::Data, OID::Bit::Data
value.to_s
when OID::Array::Data
Expand Down Expand Up @@ -221,7 +226,7 @@ def determine_encoding_of_strings_in_array(value)
def type_cast_array(values)
case values
when ::Array then values.map { |item| type_cast_array(item) }
else type_cast(values)
else type_cast(values, in_array: true)
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def self.install_support
ActiveRecord::Relation.prepend(RelationQueries)
ActiveRecord::Base.include(CoreQueries)
ActiveRecord::Encryption::EncryptedAttributeType.prepend(ExtendedEncryptableType)
Arel::Nodes::HomogeneousIn.prepend(InWithAdditionalValues)
end

# When modifying this file run performance tests in
Expand Down Expand Up @@ -153,20 +152,6 @@ def serialize(data)
end
end
end

module InWithAdditionalValues
def proc_for_binds
-> value { ActiveModel::Attribute.with_cast_value(attribute.name, value, encryption_aware_type_caster) }
end

def encryption_aware_type_caster
if attribute.type_caster.is_a?(ActiveRecord::Encryption::EncryptedAttributeType)
attribute.type_caster.cast_type
else
attribute.type_caster
end
end
end
end
end
end
2 changes: 1 addition & 1 deletion activerecord/lib/arel/nodes/homogeneous_in.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def casted_values
end

def proc_for_binds
-> value { ActiveModel::Attribute.with_cast_value(attribute.name, value, attribute.type_caster) }
-> value { ActiveModel::Attribute.from_database(attribute.name, value, ActiveModel::Type.default_value) }
end

def fetch_attribute(&block)
Expand Down
16 changes: 16 additions & 0 deletions activerecord/lib/arel/visitors/postgresql.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,22 @@ def visit_Arel_Nodes_IsDistinctFrom(o, collector)
visit o.right, collector
end

# Postgres-specific implementation that uses `col = ANY ('{1,2}')` instead of `col IN (1,2)`
def visit_Arel_Nodes_HomogeneousIn(o, collector)
visit o.left, collector
collector << (o.type == :in ? " = ANY (" : " != ALL (")
type_caster = ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array.new(o.attribute.type_caster, ",")
casted_values = o.values.map do |raw_value|
o.attribute.type_caster.serialize(raw_value) if o.attribute.type_caster.serializable?(raw_value)
end
casted_values.compact!
pg_encoder = PG::TextEncoder::Array.new(name: "#{type_caster.type}[]", delimiter: ",")
values = [ActiveRecord::ConnectionAdapters::PostgreSQL::OID::Array::Data.new(pg_encoder, casted_values)]
proc_for_binds = -> value { ActiveModel::Attribute.from_database(o.attribute.name, value, ActiveModel::Type.default_value) }
collector.add_binds(values, proc_for_binds, &bind_block)
collector << ")"
end

BIND_BLOCK = proc { |i| "$#{i}" }
private_constant :BIND_BLOCK

Expand Down
34 changes: 33 additions & 1 deletion activerecord/test/cases/arel/visitors/postgres_test.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,32 @@
# frozen_string_literal: true

require "active_model/attribute"
require "active_record"
require "active_record/connection_adapters/postgresql_adapter"
require_relative "../helper"

module Arel
module Visitors
class PostgresTest < Arel::Spec
before do
@visitor = PostgreSQL.new Table.engine.connection
@table = Table.new(:users)
@table = Table.new(:users, type_caster: fake_pg_caster)
@attr = @table[:id]
end

# map that converts attribute names to a caster
def fake_pg_caster
Object.new.tap do |caster|
def caster.type_for_attribute(attr_name)
ActiveModel::Type::String.new
end
def caster.type_cast_for_database(attr_name, value)
type = type_for_attribute(attr_name)
type.serialize(value)
end
end
end

def compile(node)
@visitor.accept(node, Collectors::SQLString.new).value
end
Expand Down Expand Up @@ -331,6 +347,22 @@ def compile(node)
_(sql).must_be_like %{ "products"."tags" && '{foo,bar,baz}' }
end
end

describe "Nodes::HomogeneousIn" do
it "should use = ANY" do
test = Nodes::HomogeneousIn.new([1, 2], @table[:id], :in)
_(compile(test)).must_be_like %{
"users"."id" = ANY ($1)
}
end

it "should use != ALL for negation" do
test = Nodes::HomogeneousIn.new([1, 2], @table[:id], :notin)
_(compile(test)).must_be_like %{
"users"."id" != ALL ($1)
}
end
end
end
end
end
6 changes: 5 additions & 1 deletion activerecord/test/cases/associations/eager_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1700,7 +1700,11 @@ def test_preloading_has_many_through_with_custom_scope
c = Sharded::BlogPost.connection
quoted_blog_id = Regexp.escape(c.quote_table_name("sharded_comments.blog_id"))
quoted_blog_post_id = Regexp.escape(c.quote_table_name("sharded_comments.blog_post_id"))
assert_match(/WHERE #{quoted_blog_id} IN \(.+\) AND #{quoted_blog_post_id} IN \(.+\)/, sql)
if current_adapter?(:PostgreSQLAdapter)
assert_match(/WHERE #{quoted_blog_id} = ANY \(\$1\) AND #{quoted_blog_post_id} = ANY \(\$2\)/, sql)
else
assert_match(/WHERE #{quoted_blog_id} IN \(.+\) AND #{quoted_blog_post_id} IN \(.+\)/, sql)
end
end

test "preloading has_many association associated by a composite query_constraints" do
Expand Down
6 changes: 4 additions & 2 deletions activerecord/test/cases/batches_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -524,15 +524,17 @@ def test_in_batches_executes_range_queries_when_unconstrained
def test_in_batches_executes_in_queries_when_unconstrained_and_opted_out_of_ranges
c = Post.connection
quoted_posts_id = Regexp.escape(c.quote_table_name("posts.id"))
assert_sql(/#{quoted_posts_id} IN \(.+\)/i) do
regex = current_adapter?(:PostgreSQLAdapter) ? /#{quoted_posts_id} = ANY \(\$1\)/i : /#{quoted_posts_id} IN \(.+\)/i
assert_sql(regex) do
Post.in_batches(of: 2, use_ranges: false) { |relation| assert_kind_of Post, relation.first }
end
end

def test_in_batches_executes_in_queries_when_constrained
c = Post.connection
quoted_posts_id = Regexp.escape(c.quote_table_name("posts.id"))
assert_sql(/#{quoted_posts_id} IN \(.+\)/i) do
regex = current_adapter?(:PostgreSQLAdapter) ? /#{quoted_posts_id} = ANY \(\$1\)/i : /#{quoted_posts_id} IN \(.+\)/i
assert_sql(regex) do
Post.where("id < ?", 5).in_batches(of: 2) { |relation| assert_kind_of Post, relation.first }
end
end
Expand Down
60 changes: 22 additions & 38 deletions activerecord/test/cases/bind_parameter_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,11 @@ def test_statement_cache_with_in_clause

topics = Topic.where(id: [1, 3]).order(:id)
assert_equal [1, 3], topics.map(&:id)
assert_not_includes statement_cache, to_sql_key(topics.arel)
if current_adapter?(:PostgreSQLAdapter)
assert_includes statement_cache, to_sql_key(topics.arel)
else
assert_not_includes statement_cache, to_sql_key(topics.arel)
end
end

def test_statement_cache_with_sql_string_literal
Expand Down Expand Up @@ -162,12 +166,12 @@ def test_logs_unnamed_binds
end

def test_bind_params_to_sql_with_prepared_statements
assert_bind_params_to_sql
assert_bind_params_to_sql(true)
end

def test_bind_params_to_sql_with_unprepared_statements
@connection.unprepared_statement do
assert_bind_params_to_sql
assert_bind_params_to_sql(false)
end
end

Expand Down Expand Up @@ -198,60 +202,40 @@ def test_binds_with_filtered_attributes
end

private
def assert_bind_params_to_sql
def assert_bind_params_to_sql(prepared)
table = Author.quoted_table_name
pk = "#{table}.#{Author.quoted_primary_key}"

# prepared_statements: true
#
# SELECT `authors`.* FROM `authors` WHERE (`authors`.`id` IN (?, ?, ?) OR `authors`.`id` IS NULL)
#
# prepared_statements: false
#
# SELECT `authors`.* FROM `authors` WHERE (`authors`.`id` IN (1, 2, 3) OR `authors`.`id` IS NULL)
#
sql = "SELECT #{table}.* FROM #{table} WHERE (#{pk} IN (#{bind_params(1..3)}) OR #{pk} IS NULL)"
condition = if current_adapter?(:PostgreSQLAdapter)
prepared ? "= ANY ($1)" : "= ANY ('{1,2,3}')"
else
prepared ? "IN (?, ?, ?)" : "IN (1, 2, 3)"
end

sql = "SELECT #{table}.* FROM #{table} WHERE (#{pk} #{condition} OR #{pk} IS NULL)"

authors = Author.where(id: [1, 2, 3, nil])
assert_equal sql, @connection.to_sql(authors.arel)
assert_sql(sql) { assert_equal 3, authors.length }

# prepared_statements: true
#
# SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (?, ?, ?)
#
# prepared_statements: false
#
# SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (1, 2, 3)
#
sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} IN (#{bind_params(1..3)})"
sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} #{condition}"

authors = Author.where(id: [1, 2, 3, 9223372036854775808])
assert_equal sql, @connection.to_sql(authors.arel)
assert_sql(sql) { assert_equal 3, authors.length }

# prepared_statements: true
#
# SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (?, ?, ?)
#
# prepared_statements: false
#
# SELECT `authors`.* FROM `authors` WHERE `authors`.`id` IN (1, 2, 3)
#
sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} IN (#{bind_params(1..3)})"
condition = if current_adapter?(:PostgreSQLAdapter)
prepared ? "IN ($1, $2, $3)" : "IN (1, 2, 3)"
else
prepared ? "IN (?, ?, ?)" : "IN (1, 2, 3)"
end
sql = "SELECT #{table}.* FROM #{table} WHERE #{pk} #{condition}"

arel_node = Arel.sql("SELECT #{table}.* FROM #{table} WHERE #{pk} IN (?)", [1, 2, 3])
assert_equal sql, @connection.to_sql(arel_node)
assert_sql(sql) { assert_equal 3, @connection.select_all(arel_node).length }
end

def bind_params(ids)
collector = @connection.send(:collector)
bind_params = ids.map { |i| Arel::Nodes::BindParam.new(i) }
sql, _ = @connection.visitor.compile(bind_params, collector)
sql
end

def to_sql_key(arel)
sql = @connection.to_sql(arel)
@connection.respond_to?(:sql_key, true) ? @connection.send(:sql_key, sql) : sql
Expand Down
20 changes: 19 additions & 1 deletion activerecord/test/cases/log_subscriber_test.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# frozen_string_literal: true

require "cases/helper"
require "models/admin"
require "models/admin/user"
require "models/binary"
require "models/developer"
require "models/post"
Expand Down Expand Up @@ -251,7 +253,23 @@ def test_cached_queries_doesnt_log_when_level_is_not_debug
def test_where_in_binds_logging_include_attribute_names
Developer.where(id: [1, 2, 3, 4, 5]).load
wait
assert_match(%{["id", 1], ["id", 2], ["id", 3], ["id", 4], ["id", 5]}, @logger.logged(:debug).last)
if current_adapter?(:PostgreSQLAdapter)
assert_match(%{["id", "{1,2,3,4,5}"]}, @logger.logged(:debug).last)
else
assert_match(%{["id", 1], ["id", 2], ["id", 3], ["id", 4], ["id", 5]}, @logger.logged(:debug).last)
end
end

def test_where_clause_with_json_attribute
Admin::User.create(json_options: { a: 1 })
count = Admin::User.where(json_options: [{ a: 1 }, { b: 2 }]).count
assert_equal 1, count
wait
if current_adapter?(:PostgreSQLAdapter)
assert_match(%{[["json_options", "{\\"{\\\\\\"a\\\\\\":1}\\",\\"{\\\\\\"b\\\\\\":2}\\"}"]]}, @logger.logged(:debug).last)
else
assert_match(%{[["json_options", "{\\"a\\":1}"], ["json_options", "{\\"b\\":2}"]]}, @logger.logged(:debug).last)
end
end

def test_binary_data_is_not_logged
Expand Down
13 changes: 10 additions & 3 deletions activerecord/test/cases/relation/merging_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,16 @@ def test_merge_doesnt_duplicate_same_clauses

non_mary_and_bob = Author.where.not(id: [mary, bob])

author_id = Author.connection.quote_table_name("authors.id")
assert_sql(/WHERE #{Regexp.escape(author_id)} NOT IN \((\?|\W?\w?\d), \g<1>\)\z/) do
assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob)
if current_adapter?(:PostgreSQLAdapter)
author_id = Author.connection.quote_table_name("authors.id")
assert_sql(/WHERE #{Regexp.escape(author_id)} != ALL \(\$1\)\z/) do
assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob)
end
else
author_id = Author.connection.quote_table_name("authors.id")
assert_sql(/WHERE #{Regexp.escape(author_id)} NOT IN \((\?|\W?\w?\d), \g<1>\)\z/) do
assert_equal [david], non_mary_and_bob.merge(non_mary_and_bob)
end
end

only_david = Author.where("#{author_id} IN (?)", david)
Expand Down
2 changes: 2 additions & 0 deletions activerecord/test/cases/relation/where_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,8 @@ def test_where_with_integer_for_binary_column

def test_where_with_emoji_for_binary_column
Binary.create!(data: "🥦")
count = Binary.where(data: ["🥦", "🍦"]).count
assert_equal 1, count
assert Binary.where(data: ["🥦", "🍦"]).to_sql.include?("f09fa5a6")
assert Binary.where(data: ["🥦", "🍦"]).to_sql.include?("f09f8da6")
end
Expand Down
Loading

0 comments on commit 5d19193

Please sign in to comment.