Skip to content

Commit

Permalink
Add --generate-stats-helper-sql command to generate helpers psql inst…
Browse files Browse the repository at this point in the history
…all script (#535)

Passing the new "--generate-stats-helper-sql" command to the collector will
generate a SQL script that can be passed to "psql" to install stats helpers
(e.g. for collecting column stats) on all configured databases for the
specified server.

The command takes the name of the configuration section of a specific
server in the collector configuration (e.g. "server1"), or "default" when
using environment variable-based configuration.

It can be used manually by saving the output to a file, or by piping
it to psql like this:

pganalyze-collector --generate-stats-helper-sql=server1 | psql -f -

Make sure to execute the generated SQL as an administrative user or
database owner (not the pganalyze user!) in order for the
SECURITY DEFINER helper functions to have sufficient privileges assigned.
  • Loading branch information
lfittl authored Apr 19, 2024
1 parent 41afc29 commit e523b7d
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 19 deletions.
2 changes: 1 addition & 1 deletion input/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func CollectFull(ctx context.Context, server *state.Server, connection *sql.DB,
return
}

ps, ts, err = postgres.CollectAllSchemas(ctx, server, globalCollectionOpts, logger, ps, ts, systemType)
ps, ts, err = postgres.CollectAllSchemas(ctx, server, globalCollectionOpts, logger, ps, ts)
if err != nil {
logger.PrintError("Error collecting schema information: %s", err)
return
Expand Down
20 changes: 11 additions & 9 deletions input/postgres/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,39 @@ const defaultSchemaTableLimit = 5000
// timeout than a full collection interval (10 minutes)
const schemaCollectionTimeout = 8 * time.Minute

func CollectAllSchemas(ctx context.Context, server *state.Server, collectionOpts state.CollectionOpts, logger *util.Logger, ps state.PersistedState, ts state.TransientState, systemType string) (state.PersistedState, state.TransientState, error) {
func GetDatabasesToCollect(server *state.Server, databases []state.PostgresDatabase) []string {
schemaDbNames := []string{}

ctxSchema, cancel := context.WithTimeout(ctx, schemaCollectionTimeout)
defer cancel()

if server.Config.DbAllNames {
for _, database := range ts.Databases {
if !database.IsTemplate && database.AllowConnections && !isCloudInternalDatabase(systemType, database.Name) {
for _, database := range databases {
if !database.IsTemplate && database.AllowConnections && !isCloudInternalDatabase(server.Config.SystemType, database.Name) {
schemaDbNames = append(schemaDbNames, database.Name)
}
}
} else {
schemaDbNames = append(schemaDbNames, server.Config.DbName)
schemaDbNames = append(schemaDbNames, server.Config.DbExtraNames...)
}
return schemaDbNames
}

func CollectAllSchemas(ctx context.Context, server *state.Server, collectionOpts state.CollectionOpts, logger *util.Logger, ps state.PersistedState, ts state.TransientState) (state.PersistedState, state.TransientState, error) {
ctxSchema, cancel := context.WithTimeout(ctx, schemaCollectionTimeout)
defer cancel()

ps.Relations = []state.PostgresRelation{}

ps.SchemaStats = make(map[state.Oid]*state.SchemaStats)
ps.Functions = []state.PostgresFunction{}

collected := make(map[string]bool)
for _, dbName := range schemaDbNames {
for _, dbName := range GetDatabasesToCollect(server, ts.Databases) {
if _, ok := collected[dbName]; ok {
continue
}
server.SelfTest.MarkMonitoredDb(dbName)

collected[dbName] = true
psNext, tsNext, databaseOid, err := collectOneSchema(ctxSchema, server, collectionOpts, logger, ps, ts, ts.Version, systemType, dbName)
psNext, tsNext, databaseOid, err := collectOneSchema(ctxSchema, server, collectionOpts, logger, ps, ts, ts.Version, server.Config.SystemType, dbName)
if err != nil {
// If the outer context failed, return an error to the caller
if ctx.Err() != nil {
Expand Down
33 changes: 32 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,34 @@ func run(ctx context.Context, wg *sync.WaitGroup, globalCollectionOpts state.Col
}
}

if globalCollectionOpts.GenerateStatsHelperSql != "" {
wg.Add(1)
testRunSuccess = make(chan bool)
go func() {
var matchingServer *state.Server
for _, server := range servers {
if globalCollectionOpts.GenerateStatsHelperSql == server.Config.SectionName {
matchingServer = server
}
}
if matchingServer == nil {
fmt.Fprintf(os.Stderr, "ERROR - Specified configuration section name '%s' not known\n", globalCollectionOpts.GenerateStatsHelperSql)
testRunSuccess <- false
} else {
output, err := runner.GenerateStatsHelperSql(ctx, matchingServer, globalCollectionOpts, logger.WithPrefix(matchingServer.Config.SectionName))
if err != nil {
fmt.Fprintf(os.Stderr, "ERROR - %s\n", err)
testRunSuccess <- false
} else {
fmt.Print(output)
testRunSuccess <- true
}
}
wg.Done()
}()
return
}

state.ReadStateFile(servers, globalCollectionOpts, logger)

writeStateFile = func() {
Expand Down Expand Up @@ -260,6 +288,7 @@ func main() {
var testRunLogs bool
var testExplain bool
var testSection string
var generateStatsHelperSql string
var forceStateUpdate bool
var configFilename string
var stateFilename string
Expand All @@ -284,6 +313,7 @@ func main() {
flag.BoolVar(&testRunLogs, "test-logs", false, "Tests whether log collection works (does not test privilege dropping for local log collection, use --test for that)")
flag.BoolVar(&testExplain, "test-explain", false, "Tests whether EXPLAIN collection works by issuing a dummy query (ensure log collection works first)")
flag.StringVar(&testSection, "test-section", "", "Tests a particular section of the config file, i.e. a specific server, and ignores all other config sections")
flag.StringVar(&generateStatsHelperSql, "generate-stats-helper-sql", "", "Generates a SQL script for the given server (name of section in the config file, or \"default\" for env variables), that can be run with \"psql -f\" for installing the collector stats helpers on all configured databases")
flag.BoolVar(&reloadRun, "reload", false, "Reloads the collector daemon thats running on the host")
flag.BoolVar(&noReload, "no-reload", false, "Disables automatic config reloading during a test run")
flag.BoolVarP(&logger.Verbose, "verbose", "v", false, "Outputs additional debugging information, use this if you're encountering errors or other problems")
Expand Down Expand Up @@ -353,7 +383,7 @@ func main() {
}
}

if testReport != "" || testRunLogs || testRunAndTrace || testExplain {
if testReport != "" || testRunLogs || testRunAndTrace || testExplain || generateStatsHelperSql != "" {
testRun = true
}

Expand All @@ -365,6 +395,7 @@ func main() {
TestRunLogs: testRunLogs || dryRunLogs,
TestExplain: testExplain,
TestSection: testSection,
GenerateStatsHelperSql: generateStatsHelperSql,
DebugLogs: debugLogs,
DiscoverLogLocation: discoverLogLocation,
CollectPostgresRelations: !noPostgresRelations,
Expand Down
66 changes: 66 additions & 0 deletions runner/generate_helper_sql.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package runner

import (
"context"
"fmt"
"strings"

"github.com/lib/pq"
"github.com/pganalyze/collector/input/postgres"
"github.com/pganalyze/collector/state"
"github.com/pganalyze/collector/util"
)

var statsHelpers = []string{
// Column stats
`CREATE OR REPLACE FUNCTION pganalyze.get_column_stats() RETURNS SETOF pg_stats AS
$$
/* pganalyze-collector */ SELECT schemaname, tablename, attname, inherited, null_frac, avg_width,
n_distinct, NULL::anyarray, most_common_freqs, NULL::anyarray, correlation, NULL::anyarray,
most_common_elem_freqs, elem_count_histogram
FROM pg_catalog.pg_stats;
$$ LANGUAGE sql VOLATILE SECURITY DEFINER;`,

// Extended stats
`CREATE OR REPLACE FUNCTION pganalyze.get_relation_stats_ext() RETURNS TABLE(
statistics_schemaname text, statistics_name text,
inherited boolean, n_distinct pg_ndistinct, dependencies pg_dependencies,
most_common_val_nulls boolean[], most_common_freqs float8[], most_common_base_freqs float8[]
) AS
$$
/* pganalyze-collector */ SELECT statistics_schemaname::text, statistics_name::text,
(row_to_json(se.*)::jsonb ->> 'inherited')::boolean AS inherited, n_distinct, dependencies,
most_common_val_nulls, most_common_freqs, most_common_base_freqs
FROM pg_catalog.pg_stats_ext se;
$$ LANGUAGE sql VOLATILE SECURITY DEFINER;`}

func GenerateStatsHelperSql(ctx context.Context, server *state.Server, globalCollectionOpts state.CollectionOpts, logger *util.Logger) (string, error) {
db, err := postgres.EstablishConnection(ctx, server, logger, globalCollectionOpts, "")
if err != nil {
return "", err
}
defer db.Close()

version, err := postgres.GetPostgresVersion(ctx, logger, db)
if err != nil {
return "", fmt.Errorf("error collecting Postgres version: %s", err)
}

databases, _, err := postgres.GetDatabases(ctx, logger, db, version)
if err != nil {
return "", fmt.Errorf("error collecting pg_databases: %s", err)
}

output := strings.Builder{}
for _, dbName := range postgres.GetDatabasesToCollect(server, databases) {
output.WriteString(fmt.Sprintf("\\c %s\n", pq.QuoteIdentifier(dbName)))
output.WriteString("CREATE SCHEMA IF NOT EXISTS pganalyze;\n")
output.WriteString(fmt.Sprintf("GRANT USAGE ON SCHEMA pganalyze TO %s;\n", server.Config.GetDbUsername()))
for _, helper := range statsHelpers {
output.WriteString(helper + "\n")
}
output.WriteString("\n")
}

return output.String(), nil
}
17 changes: 9 additions & 8 deletions state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,15 @@ type CollectionOpts struct {

DiffStatements bool

SubmitCollectedData bool
TestRun bool
TestReport string
TestRunLogs bool
TestExplain bool
TestSection string
DebugLogs bool
DiscoverLogLocation bool
SubmitCollectedData bool
TestRun bool
TestReport string
TestRunLogs bool
TestExplain bool
TestSection string
GenerateStatsHelperSql string
DebugLogs bool
DiscoverLogLocation bool

StateFilename string
WriteStateUpdate bool
Expand Down

0 comments on commit e523b7d

Please sign in to comment.