From 5878138e17c2dbba747f1046679ff20b9c993632 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Fri, 4 Aug 2023 15:49:41 +0200
Subject: [PATCH 01/10] review only, reactivate print debugging

---
 pkg/manager/manager.go |  1 -
 pkg/pool/pool.go       | 16 ++++++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index f46282c..36c3ccc 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -280,7 +280,6 @@ func (m Manager) FinalizeTemplateDatabase(ctx context.Context, hash string) (db.
 }
 
 // GetTestDatabase tries to get a ready test DB from an existing pool.
-// If no DB is ready after the preconfigured timeout, ErrTimeout is returned.
 func (m Manager) GetTestDatabase(ctx context.Context, hash string) (db.TestDatabase, error) {
 	ctx, task := trace.NewTask(ctx, "get_test_db")
 	defer task.End()
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index f8f8271..ebef62c 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -118,7 +118,7 @@ func (pool *HashPool) Stop() {
 func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout time.Duration) (db db.TestDatabase, err error) {
 	var index int
 
-	// fmt.Printf("pool#%s: waiting for ready ID...\n", hash)
+	fmt.Printf("pool#%s: waiting for ready ID...\n", hash)
 
 	select {
 	case <-time.After(timeout):
@@ -130,7 +130,7 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	case index = <-pool.ready:
 	}
 
-	// fmt.Printf("pool#%s: got ready ID=%v\n", hash, index)
+	fmt.Printf("pool#%s: got ready ID=%v\n", hash, index)
 
 	reg := trace.StartRegion(ctx, "wait_for_lock_hash_pool")
 	pool.Lock()
@@ -147,7 +147,7 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	// sanity check, should never happen - we got this index from 'ready' channel
 	if testDB.state != dbStateReady {
 
-		// fmt.Printf("pool#%s: GetTestDatabase ErrInvalidState ID=%v\n", hash, index)
+		fmt.Printf("pool#%s: GetTestDatabase ErrInvalidState ID=%v\n", hash, index)
 
 		err = ErrInvalidState
 		return
@@ -167,7 +167,7 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 		pool.tasksChan <- workerTaskCleanDirty
 	}
 
-	// fmt.Printf("pool#%s: ready=%d, dirty=%d, waitingForCleaning=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", hash, len(pool.ready), len(pool.dirty), len(pool.waitingForCleaning), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 
 	return testDB.TestDatabase, nil
 }
@@ -178,6 +178,8 @@ func (pool *HashPool) AddTestDatabase(ctx context.Context, templateDB db.Databas
 
 func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string, poolMaxParallelTasks int) {
 
+	fmt.Println("workerTaskLoop")
+
 	handlers := map[string]func(ctx context.Context) error{
 		workerTaskExtend:     ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
 		workerTaskCleanDirty: ignoreErrs(pool.cleanDirty, context.Canceled),
@@ -218,6 +220,8 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 
 func (pool *HashPool) controlLoop() {
 
+	fmt.Println("controlLoop")
+
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -340,6 +344,7 @@ func (pool *HashPool) cleanDirty(ctx context.Context) error {
 
 	pool.ready <- testDB.ID
 
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (cleanDirty)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 	return nil
 }
 
@@ -362,6 +367,9 @@ func (pool *HashPool) extend(ctx context.Context) error {
 
 	reg := trace.StartRegion(ctx, "worker_wait_for_lock_hash_pool")
 	pool.Lock()
+
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (extend)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+
 	defer pool.Unlock()
 	reg.End()
 

From c7342a640309424b4aaaee2ae42e1616765cf1f8 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Fri, 25 Aug 2023 16:45:18 +0200
Subject: [PATCH 02/10] close, range and refill dirty channel on unlock and
 recreate, debug statements, use 1000 instead of 10e3

---
 .drone.yml                    |  2 +-
 pkg/manager/manager_config.go |  4 ++--
 pkg/pool/pool.go              | 43 +++++++++++++++++++++++++++++++++--
 3 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/.drone.yml b/.drone.yml
index 20a5c9a..0a8deee 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -30,7 +30,7 @@ alias:
   - &IMAGE_DEPLOY_ID ${DRONE_REPO,,}:${DRONE_COMMIT_SHA}
 
   # Defines which branches will trigger a docker image push our Google Cloud Registry (tags are always published)
-  - &GCR_PUBLISH_BRANCHES [dev, master, aj/pooling-improvements]
+  - &GCR_PUBLISH_BRANCHES [dev, master, aj/pooling-improvements, mr/aj-review]
 
   # Docker registry publish default settings
   - &GCR_REGISTRY_SETTINGS
diff --git a/pkg/manager/manager_config.go b/pkg/manager/manager_config.go
index 3269375..435ec11 100644
--- a/pkg/manager/manager_config.go
+++ b/pkg/manager/manager_config.go
@@ -60,8 +60,8 @@ func DefaultManagerConfigFromEnv() ManagerConfig {
 		TestDatabaseInitialPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_INITIAL_POOL_SIZE", runtime.NumCPU()),
 		// TestDatabaseMaxPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_MAX_POOL_SIZE", 500),
 		TestDatabaseMaxPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_MAX_POOL_SIZE", runtime.NumCPU()*4),
-		TemplateFinalizeTimeout: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEMPLATE_FINALIZE_TIMEOUT_MS", 5*60*10e3 /*5 min*/)),
-		TestDatabaseGetTimeout:  time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_GET_TIMEOUT_MS", 1*60*10e3 /*1 min, timeout hardcoded also in GET request handler*/)),
+		TemplateFinalizeTimeout: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEMPLATE_FINALIZE_TIMEOUT_MS", 5*60*1000 /*5 min*/)),
+		TestDatabaseGetTimeout:  time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_GET_TIMEOUT_MS", 1*60*1000 /*1 min, timeout hardcoded also in GET request handler*/)),
 		PoolMaxParallelTasks:    util.GetEnvAsInt("INTEGRESQL_POOL_MAX_PARALLEL_TASKS", runtime.NumCPU()),
 	}
 }
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 562d5f4..7980b08 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -162,7 +162,7 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 
 	// we try to ensure that InitialPoolSize count is staying ready
 	// thus, we try to move the oldest dirty dbs into cleaning
-	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize {
+	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize && len(pool.ready) < pool.InitialPoolSize {
 		pool.tasksChan <- workerTaskCleanDirty
 	}
 
@@ -266,10 +266,29 @@ func (pool *HashPool) ReturnTestDatabase(ctx context.Context, hash string, id in
 	testDB.state = dbStateReady
 	pool.dbs[id] = testDB
 
+	// fmt.Printf("ReturnTestDatabase %v: close channel\n", id)
+
+	// The testDB is still in the dirty channel.
+	// We need to explicitly remove it from there by force closing the channel so we can range over it and thus recreate a new dirty channel without the returned ID.
+	newDirty := make(chan int, pool.MaxPoolSize)
+	close(pool.dirty)
+
+	for dirtyID := range pool.dirty {
+		if dirtyID != id {
+			newDirty <- dirtyID
+		}
+	}
+
+	// fmt.Printf("ReturnTestDatabase %v: reset channel\n", id)
+	pool.dirty = newDirty
+
+	// fmt.Printf("ReturnTestDatabase %v: ready\n", id)
+	// id to ready again.
 	pool.ready <- id
 
-	return nil
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 
+	return nil
 }
 
 // RecreateTestDatabase recreates the test DB according to the template and returns it back to the pool.
@@ -301,8 +320,28 @@ func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id
 	testDB.state = dbStateReady
 	pool.dbs[id] = testDB
 
+	// fmt.Printf("RecreateTestDatabase %v: close channel\n", id)
+
+	// The testDB is still in the dirty channel.
+	// We need to explicitly remove it from there by force closing the channel so we can range over it and thus recreate a new dirty channel without the returned ID.
+	newDirty := make(chan int, pool.MaxPoolSize)
+	close(pool.dirty)
+
+	for dirtyID := range pool.dirty {
+		if dirtyID != id {
+			newDirty <- dirtyID
+		}
+	}
+
+	// fmt.Printf("RecreateTestDatabase %v: reset channel\n", id)
+	pool.dirty = newDirty
+
+	// fmt.Printf("RecreateTestDatabase %v: ready\n", id)
+	// id to ready again.
 	pool.ready <- id
 
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+
 	return nil
 
 }

From 8e64b273ae131793bb4a44dba64a4583d05d72db Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Mon, 28 Aug 2023 18:21:50 +0200
Subject: [PATCH 03/10] filter via tmp channel without closing the initial, fix
 tests / stabilize via disabling worker autostart

---
 Makefile                         |  5 +-
 docker-compose.yml               |  4 +-
 pkg/manager/manager_test.go      |  8 +--
 pkg/pool/pool.go                 | 90 ++++++++++++++++----------------
 pkg/pool/pool_collection.go      | 25 ++++++---
 pkg/pool/pool_collection_test.go | 31 ++++++-----
 pkg/templates/template_test.go   |  6 +--
 7 files changed, 93 insertions(+), 76 deletions(-)

diff --git a/Makefile b/Makefile
index 284edf2..8ce960f 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,10 @@ test: ##- Run tests, output by package, print coverage.
 
 # note that we explicitly don't want to use a -coverpkg=./... option, per pkg coverage take precedence
 go-test-by-pkg: ##- (opt) Run tests, output by package.
-	gotestsum --format pkgname-and-test-fails --jsonfile /tmp/test.log -- -race -cover -count=1 -coverprofile=/tmp/coverage.out ./...
+	gotestsum --format pkgname-and-test-fails --format-hide-empty-pkg --jsonfile /tmp/test.log -- -race -cover -count=1 -coverprofile=/tmp/coverage.out ./...
+
+go-test-by-name: ##- (opt) Run tests, output by testname.
+	gotestsum --format testname --jsonfile /tmp/test.log -- -race -cover -count=1 -coverprofile=/tmp/coverage.out ./...
 
 go-test-print-coverage: ##- (opt) Print overall test coverage (must be done after running tests).
 	@printf "coverage "
diff --git a/docker-compose.yml b/docker-compose.yml
index 75c3afd..9f2de21 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -5,8 +5,8 @@ services:
     build:
       context: .
       target: development
-    ports:
-      - "5000:5000"
+    # ports:
+    #   - "5000:5000"
     working_dir: /app
     volumes:
       - .:/app #:delegated
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 8709cb8..9763cc6 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -648,9 +648,9 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.TestDatabaseInitialPoolSize = 10
-	cfg.TestDatabaseMaxPoolSize = 15
-	cfg.TestDatabaseGetTimeout = 200 * time.Millisecond
+	cfg.TestDatabaseInitialPoolSize = 4
+	cfg.TestDatabaseMaxPoolSize = 8
+	cfg.TestDatabaseGetTimeout = 250 * time.Millisecond
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
@@ -673,7 +673,7 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	}
 
 	// request many more databases than initally added
-	for i := 0; i <= cfg.TestDatabaseMaxPoolSize*3; i++ {
+	for i := 0; i <= cfg.TestDatabaseMaxPoolSize*2; i++ {
 		test, err := m.GetTestDatabase(ctx, hash)
 		assert.NoError(t, err)
 		assert.NotEmpty(t, test)
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 7980b08..99f1549 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -157,6 +157,7 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	pool.dirty <- index
 
 	if len(pool.dbs) < pool.PoolConfig.MaxPoolSize {
+		fmt.Printf("pool#%s: Conditional extend\n", hash)
 		pool.tasksChan <- workerTaskExtend
 	}
 
@@ -177,7 +178,7 @@ func (pool *HashPool) AddTestDatabase(ctx context.Context, templateDB db.Databas
 
 func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string, poolMaxParallelTasks int) {
 
-	fmt.Println("workerTaskLoop")
+	fmt.Printf("pool#%s: workerTaskLoop\n", pool.templateDB.TemplateHash)
 
 	handlers := map[string]func(ctx context.Context) error{
 		workerTaskExtend:     ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
@@ -208,9 +209,10 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 				<-semaphore
 			}()
 
-			// fmt.Println("task", task)
+			fmt.Printf("pool#%s: workerTaskLoop task=%v\n", pool.templateDB.TemplateHash, task)
+
 			if err := handler(ctx); err != nil {
-				fmt.Println("task", task, "failed:", err.Error())
+				fmt.Printf("pool#%s: workerTaskLoop task=%v FAILED! err=%v\n", pool.templateDB.TemplateHash, task, err.Error())
 			}
 		}(task)
 
@@ -219,7 +221,7 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 
 func (pool *HashPool) controlLoop() {
 
-	fmt.Println("controlLoop")
+	fmt.Printf("pool#%s: controlLoop\n", pool.templateDB.TemplateHash)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -266,29 +268,43 @@ func (pool *HashPool) ReturnTestDatabase(ctx context.Context, hash string, id in
 	testDB.state = dbStateReady
 	pool.dbs[id] = testDB
 
-	// fmt.Printf("ReturnTestDatabase %v: close channel\n", id)
+	// remove id from dirty and add it to ready channel
+	pool.excludeIDFromDirtyChannel(id)
+	pool.ready <- id
 
-	// The testDB is still in the dirty channel.
-	// We need to explicitly remove it from there by force closing the channel so we can range over it and thus recreate a new dirty channel without the returned ID.
-	newDirty := make(chan int, pool.MaxPoolSize)
-	close(pool.dirty)
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 
-	for dirtyID := range pool.dirty {
-		if dirtyID != id {
-			newDirty <- dirtyID
-		}
-	}
+	return nil
+}
 
-	// fmt.Printf("ReturnTestDatabase %v: reset channel\n", id)
-	pool.dirty = newDirty
+func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 
-	// fmt.Printf("ReturnTestDatabase %v: ready\n", id)
-	// id to ready again.
-	pool.ready <- id
+	// The testDB identified by overgiven id may still in the dirty channel. We want to exclude it.
+	// We need to explicitly remove it from there by filtering the current channel to a tmp channel.
+	// We finally close the tmp channel and flush it onto the dirty channel again.
+	// The db is ready again.
+	filteredDirty := make(chan int, pool.MaxPoolSize)
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	var dirtyID int
+	for loop := true; loop; {
+		select {
+		case dirtyID = <-pool.dirty:
+			if dirtyID != id {
+				filteredDirty <- dirtyID
+			}
+		default:
+			loop = false
+			break
+		}
+	}
 
-	return nil
+	// filteredDirty now has all filtered values without the returned id, redirect the other back to the dirty channel.
+	// close so we can range over it...
+	close(filteredDirty)
+
+	for dirtyID := range filteredDirty {
+		pool.dirty <- dirtyID
+	}
 }
 
 // RecreateTestDatabase recreates the test DB according to the template and returns it back to the pool.
@@ -320,24 +336,8 @@ func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id
 	testDB.state = dbStateReady
 	pool.dbs[id] = testDB
 
-	// fmt.Printf("RecreateTestDatabase %v: close channel\n", id)
-
-	// The testDB is still in the dirty channel.
-	// We need to explicitly remove it from there by force closing the channel so we can range over it and thus recreate a new dirty channel without the returned ID.
-	newDirty := make(chan int, pool.MaxPoolSize)
-	close(pool.dirty)
-
-	for dirtyID := range pool.dirty {
-		if dirtyID != id {
-			newDirty <- dirtyID
-		}
-	}
-
-	// fmt.Printf("RecreateTestDatabase %v: reset channel\n", id)
-	pool.dirty = newDirty
-
-	// fmt.Printf("RecreateTestDatabase %v: ready\n", id)
-	// id to ready again.
+	// remove id from dirty and add it to ready channel
+	pool.excludeIDFromDirtyChannel(id)
 	pool.ready <- id
 
 	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
@@ -361,20 +361,23 @@ func (pool *HashPool) cleanDirty(ctx context.Context) error {
 		return ctx.Err()
 	default:
 		// nothing to do
+		fmt.Println("cleanDirty noop", id)
 		return nil
 	}
 
+	fmt.Printf("pool#%s: cleanDirty %v\n", pool.templateDB.TemplateHash, id)
+
 	regLock := trace.StartRegion(ctx, "worker_wait_for_rlock_hash_pool")
-	pool.RLock()
+	pool.Lock()
 	regLock.End()
 
 	if id < 0 || id >= len(pool.dbs) {
 		// sanity check, should never happen
-		pool.RUnlock()
+		pool.Unlock()
 		return ErrInvalidIndex
 	}
 	testDB := pool.dbs[id]
-	pool.RUnlock()
+	pool.Unlock()
 
 	if testDB.state == dbStateReady {
 		// nothing to do
@@ -441,10 +444,9 @@ func (pool *HashPool) extend(ctx context.Context) error {
 
 	reg := trace.StartRegion(ctx, "worker_wait_for_lock_hash_pool")
 	pool.Lock()
+	defer pool.Unlock()
 
 	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (extend)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
-
-	defer pool.Unlock()
 	reg.End()
 
 	// get index of a next test DB - its ID
diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index 6aad7e2..b509e9a 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -14,10 +14,11 @@ import (
 var ErrUnknownHash = errors.New("no database pool exists for this hash")
 
 type PoolConfig struct {
-	MaxPoolSize          int
-	InitialPoolSize      int
-	TestDBNamePrefix     string
-	PoolMaxParallelTasks int
+	MaxPoolSize            int
+	InitialPoolSize        int
+	TestDBNamePrefix       string
+	PoolMaxParallelTasks   int
+	DisableWorkerAutostart bool
 }
 
 type PoolCollection struct {
@@ -59,12 +60,25 @@ func (p *PoolCollection) InitHashPool(ctx context.Context, templateDB db.Databas
 
 	// Create a new HashPool
 	pool := NewHashPool(cfg, templateDB, initDBFunc)
-	pool.Start()
+
+	if !cfg.DisableWorkerAutostart {
+		pool.Start()
+	}
 
 	// pool is ready
 	p.pools[pool.templateDB.TemplateHash] = pool
 }
 
+// Start is used to start all background workers
+func (p *PoolCollection) Start() {
+	p.mutex.RLock()
+	defer p.mutex.RUnlock()
+
+	for _, pool := range p.pools {
+		pool.Start()
+	}
+}
+
 // Stop is used to stop all background workers
 func (p *PoolCollection) Stop() {
 	p.mutex.RLock()
@@ -73,7 +87,6 @@ func (p *PoolCollection) Stop() {
 	for _, pool := range p.pools {
 		pool.Stop()
 	}
-
 }
 
 // GetTestDatabase picks up a ready to use test DB. It waits the given timeout until a DB is available.
diff --git a/pkg/pool/pool_collection_test.go b/pkg/pool/pool_collection_test.go
index c89b42f..465c515 100644
--- a/pkg/pool/pool_collection_test.go
+++ b/pkg/pool/pool_collection_test.go
@@ -17,9 +17,10 @@ func TestPoolAddGet(t *testing.T) {
 
 	ctx := context.Background()
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          2,
-		PoolMaxParallelTasks: 4,
-		TestDBNamePrefix:     "prefix_",
+		MaxPoolSize:            2,
+		PoolMaxParallelTasks:   4,
+		TestDBNamePrefix:       "prefix_",
+		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
 	p := pool.NewPoolCollection(cfg)
 
@@ -37,16 +38,17 @@ func TestPoolAddGet(t *testing.T) {
 		return nil
 	}
 	p.InitHashPool(ctx, templateDB, initFunc)
+
 	t.Cleanup(func() { p.Stop() })
 
-	// get from empty
+	// get from empty (just initialized)
 	_, err := p.GetTestDatabase(ctx, hash1, 0)
 	assert.Error(t, err, pool.ErrTimeout)
 
 	// add a new one
 	assert.NoError(t, p.AddTestDatabase(ctx, templateDB))
 	// get it
-	testDB, err := p.GetTestDatabase(ctx, hash1, 100*time.Millisecond)
+	testDB, err := p.GetTestDatabase(ctx, hash1, 1*time.Second)
 	assert.NoError(t, err)
 	assert.Equal(t, "prefix_h1_000", testDB.Database.Config.Database)
 	assert.Equal(t, "ich", testDB.Database.Config.Username)
@@ -60,14 +62,14 @@ func TestPoolAddGet(t *testing.T) {
 	assert.ErrorIs(t, p.AddTestDatabase(ctx, templateDB2), pool.ErrPoolFull)
 
 	// get from empty h1
-	_, err = p.GetTestDatabase(ctx, hash1, 0)
-	assert.Error(t, err, pool.ErrTimeout)
+	_, err = p.GetTestDatabase(ctx, hash1, 100*time.Millisecond)
+	assert.ErrorIs(t, err, pool.ErrTimeout)
 
 	// get from h2
-	testDB1, err := p.GetTestDatabase(ctx, hash2, 0)
+	testDB1, err := p.GetTestDatabase(ctx, hash2, 1*time.Second)
 	assert.NoError(t, err)
 	assert.Equal(t, hash2, testDB1.TemplateHash)
-	testDB2, err := p.GetTestDatabase(ctx, hash2, 0)
+	testDB2, err := p.GetTestDatabase(ctx, hash2, 1*time.Second)
 	assert.NoError(t, err)
 	assert.Equal(t, hash2, testDB2.TemplateHash)
 	assert.NotEqual(t, testDB1.ID, testDB2.ID)
@@ -237,7 +239,7 @@ func TestPoolRemoveAll(t *testing.T) {
 	// start using pool again
 	p.InitHashPool(ctx, templateDB1, initFunc)
 	assert.NoError(t, p.AddTestDatabase(ctx, templateDB1))
-	testDB, err := p.GetTestDatabase(ctx, hash1, 0)
+	testDB, err := p.GetTestDatabase(ctx, hash1, 1*time.Second)
 	assert.NoError(t, err)
 	assert.Equal(t, 0, testDB.ID)
 }
@@ -320,19 +322,16 @@ func TestPoolReturnTestDatabase(t *testing.T) {
 	}
 
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          10,
-		PoolMaxParallelTasks: 3,
+		MaxPoolSize:            10,
+		PoolMaxParallelTasks:   3,
+		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
 	p := pool.NewPoolCollection(cfg)
-	t.Cleanup(func() { p.Stop() })
 
 	p.InitHashPool(ctx, templateDB1, initFunc)
 	// add just one test DB
 	require.NoError(t, p.AddTestDatabase(ctx, templateDB1))
 
-	// stop the workers to prevent auto cleaning in background
-	p.Stop()
-
 	testDB1, err := p.GetTestDatabase(ctx, templateDB1.TemplateHash, time.Millisecond)
 	assert.NoError(t, err)
 
diff --git a/pkg/templates/template_test.go b/pkg/templates/template_test.go
index ef46708..3415383 100644
--- a/pkg/templates/template_test.go
+++ b/pkg/templates/template_test.go
@@ -28,7 +28,7 @@ func TestTemplateGetSetState(t *testing.T) {
 	assert.Equal(t, templates.TemplateStateDiscarded, state)
 }
 
-func TestTemplateWaitForReady(t *testing.T) {
+func TestForReady(t *testing.T) {
 	ctx := context.Background()
 	goroutineNum := 10
 
@@ -58,7 +58,7 @@ func TestTemplateWaitForReady(t *testing.T) {
 		wg.Add(1)
 		go func() {
 			defer wg.Done()
-			timeout := 3 * time.Millisecond
+			timeout := 30 * time.Millisecond
 			state := t1.WaitUntilFinalized(ctx, timeout)
 			if state != templates.TemplateStateInit {
 				errsChan <- errors.New(fmt.Sprintf("expected state %v (init), but is %v", templates.TemplateStateInit, state))
@@ -67,7 +67,7 @@ func TestTemplateWaitForReady(t *testing.T) {
 	}
 
 	// now set state
-	time.Sleep(5 * time.Millisecond)
+	time.Sleep(50 * time.Millisecond)
 	t1.SetState(ctx, templates.TemplateStateFinalized)
 
 	wg.Wait()

From 1283351b91a7907792c7f27f3af37195665c522f Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Tue, 29 Aug 2023 16:15:22 +0200
Subject: [PATCH 04/10] redesign recreate handling

---
 pkg/manager/manager.go      |  28 ++----
 pkg/manager/manager_test.go |   9 +-
 pkg/pool/pool.go            | 188 +++++++++++++++++++-----------------
 3 files changed, 117 insertions(+), 108 deletions(-)

diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index 0e37bb8..1586123 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -331,7 +331,7 @@ func (m Manager) ReturnTestDatabase(ctx context.Context, hash string, id int) er
 	// check if the template exists and is finalized
 	template, found := m.templates.Get(ctx, hash)
 	if !found {
-		return m.dropDatabaseWithID(ctx, hash, id)
+		return ErrTemplateNotFound
 	}
 
 	if template.WaitUntilFinalized(ctx, m.config.TemplateFinalizeTimeout) !=
@@ -342,15 +342,10 @@ func (m Manager) ReturnTestDatabase(ctx context.Context, hash string, id int) er
 
 	// template is ready, we can return unchanged testDB to the pool
 	if err := m.pool.ReturnTestDatabase(ctx, hash, id); err != nil {
-		if !(errors.Is(err, pool.ErrInvalidIndex) ||
-			errors.Is(err, pool.ErrUnknownHash)) {
-			// other error is an internal error
-			return err
-		}
 
-		// db is not tracked in the pool
-		// try to drop it if exists
-		return m.dropDatabaseWithID(ctx, hash, id)
+		fmt.Printf("manager.ReturnTestDatabase error: %v\n", err)
+
+		return err
 	}
 
 	return nil
@@ -368,7 +363,7 @@ func (m *Manager) RecreateTestDatabase(ctx context.Context, hash string, id int)
 	// check if the template exists and is finalized
 	template, found := m.templates.Get(ctx, hash)
 	if !found {
-		return m.dropDatabaseWithID(ctx, hash, id)
+		return ErrTemplateNotFound
 	}
 
 	if template.WaitUntilFinalized(ctx, m.config.TemplateFinalizeTimeout) !=
@@ -376,17 +371,12 @@ func (m *Manager) RecreateTestDatabase(ctx context.Context, hash string, id int)
 		return ErrInvalidTemplateState
 	}
 
-	// template is ready, we can returb the testDB to the pool and have it cleaned up
+	// template is ready, we can return the testDB to the pool and have it cleaned up
 	if err := m.pool.RecreateTestDatabase(ctx, hash, id); err != nil {
-		if !(errors.Is(err, pool.ErrInvalidIndex) ||
-			errors.Is(err, pool.ErrUnknownHash)) {
-			// other error is an internal error
-			return err
-		}
 
-		// db is not tracked in the pool
-		// try to drop it if exists
-		return m.dropDatabaseWithID(ctx, hash, id)
+		fmt.Printf("manager.RecreateTestDatabase error: %v\n", err)
+
+		return err
 	}
 
 	return nil
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 9763cc6..4d2a614 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -675,6 +675,9 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	// request many more databases than initally added
 	for i := 0; i <= cfg.TestDatabaseMaxPoolSize*2; i++ {
 		test, err := m.GetTestDatabase(ctx, hash)
+
+		t.Logf("open %v", test.ID)
+
 		assert.NoError(t, err)
 		assert.NotEmpty(t, test)
 
@@ -692,6 +695,8 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 		require.NoError(t, err)
 		assert.NoError(t, db.QueryRowContext(ctx, "SELECT COUNT(*) FROM pilots WHERE name = 'Anna'").Scan(&res))
 		assert.Equal(t, 1, res)
+
+		t.Logf("close %v", test.ID)
 		db.Close()
 
 		// recreate testDB after usage
@@ -882,8 +887,8 @@ func TestManagerReturnUntrackedTemplateDatabase(t *testing.T) {
 		t.Fatalf("failed to manually create template database %q: %v", dbName, err)
 	}
 
-	if err := m.ReturnTestDatabase(ctx, hash, id); err != nil {
-		t.Fatalf("failed to return manually created test database: %v", err)
+	if err := m.ReturnTestDatabase(ctx, hash, id); err == nil {
+		t.Fatalf("succeeded to return manually created test database: %v", err) // this should not work!
 	}
 }
 
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 99f1549..76696a5 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -43,9 +43,10 @@ const (
 
 // HashPool holds a test DB pool for a certain hash. Each HashPool is running cleanup workers in background.
 type HashPool struct {
-	dbs   []existingDB
-	ready chan int // ID of initalized DBs according to a template, ready to pick them up
-	dirty chan int // ID of DBs that were given away and need to be recreated to reuse them
+	dbs        []existingDB
+	ready      chan int      // ID of initalized DBs according to a template, ready to pick them up
+	dirty      chan int      // ID of DBs that were given away and need to be recreated to reuse them
+	recreating chan struct{} // tracks currently running recreating ops
 
 	recreateDB recreateTestDBFunc
 	templateDB db.Database
@@ -54,8 +55,9 @@ type HashPool struct {
 	sync.RWMutex
 	wg sync.WaitGroup
 
-	tasksChan chan string
-	running   bool
+	tasksChan     chan string
+	running       bool
+	workerContext context.Context // the ctx all background workers will receive (nil if not yet started)
 }
 
 // NewHashPool creates new hash pool with the given config.
@@ -67,9 +69,10 @@ func NewHashPool(cfg PoolConfig, templateDB db.Database, initDBFunc RecreateDBFu
 	}
 
 	pool := &HashPool{
-		dbs:   make([]existingDB, 0, cfg.MaxPoolSize),
-		ready: make(chan int, cfg.MaxPoolSize),
-		dirty: make(chan int, cfg.MaxPoolSize),
+		dbs:        make([]existingDB, 0, cfg.MaxPoolSize),
+		ready:      make(chan int, cfg.MaxPoolSize),
+		dirty:      make(chan int, cfg.MaxPoolSize),
+		recreating: make(chan struct{}, cfg.MaxPoolSize),
 
 		recreateDB: makeActualRecreateTestDBFunc(templateDB.Config.Database, initDBFunc),
 		templateDB: templateDB,
@@ -91,6 +94,10 @@ func (pool *HashPool) Start() {
 	}
 
 	pool.running = true
+
+	ctx, cancel := context.WithCancel(context.Background())
+	pool.workerContext = ctx
+
 	for i := 0; i < pool.InitialPoolSize; i++ {
 		pool.tasksChan <- workerTaskExtend
 	}
@@ -98,7 +105,7 @@ func (pool *HashPool) Start() {
 	pool.wg.Add(1)
 	go func() {
 		defer pool.wg.Done()
-		pool.controlLoop()
+		pool.controlLoop(ctx, cancel)
 	}()
 }
 
@@ -112,6 +119,7 @@ func (pool *HashPool) Stop() {
 
 	pool.tasksChan <- workerTaskStop
 	pool.wg.Wait()
+	pool.workerContext = nil
 }
 
 func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout time.Duration) (db db.TestDatabase, err error) {
@@ -162,12 +170,12 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	}
 
 	// we try to ensure that InitialPoolSize count is staying ready
-	// thus, we try to move the oldest dirty dbs into cleaning
-	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize && len(pool.ready) < pool.InitialPoolSize {
+	// thus, we try to move the oldest dirty dbs into recreating with the workerTaskCleanDirty
+	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize && (len(pool.ready)+len(pool.recreating)) < pool.InitialPoolSize {
 		pool.tasksChan <- workerTaskCleanDirty
 	}
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 
 	return testDB.TestDatabase, nil
 }
@@ -219,11 +227,11 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 	}
 }
 
-func (pool *HashPool) controlLoop() {
+func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc) {
 
 	fmt.Printf("pool#%s: controlLoop\n", pool.templateDB.TemplateHash)
 
-	ctx, cancel := context.WithCancel(context.Background())
+	// ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
 	workerTasksChan := make(chan string, len(pool.tasksChan))
@@ -272,7 +280,7 @@ func (pool *HashPool) ReturnTestDatabase(ctx context.Context, hash string, id in
 	pool.excludeIDFromDirtyChannel(id)
 	pool.ready <- id
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 
 	return nil
 }
@@ -282,7 +290,7 @@ func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 	// The testDB identified by overgiven id may still in the dirty channel. We want to exclude it.
 	// We need to explicitly remove it from there by filtering the current channel to a tmp channel.
 	// We finally close the tmp channel and flush it onto the dirty channel again.
-	// The db is ready again.
+	// The id is now no longer in the channel.
 	filteredDirty := make(chan int, pool.MaxPoolSize)
 
 	var dirtyID int
@@ -298,7 +306,7 @@ func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 		}
 	}
 
-	// filteredDirty now has all filtered values without the returned id, redirect the other back to the dirty channel.
+	// filteredDirty now has all filtered values without the above id, redirect the other ids back to the dirty channel.
 	// close so we can range over it...
 	close(filteredDirty)
 
@@ -307,7 +315,7 @@ func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 	}
 }
 
-// RecreateTestDatabase recreates the test DB according to the template and returns it back to the pool.
+// RecreateTestDatabase prioritizes the test DB to be recreated next via the dirty worker.
 func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id int) error {
 
 	pool.RLock()
@@ -316,39 +324,92 @@ func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id
 		return ErrInvalidIndex
 	}
 
-	// check if db is in the correct state
-	testDB := pool.dbs[id]
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase %v)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize, id)
 	pool.RUnlock()
 
-	if testDB.state == dbStateReady {
+	// exclude from the normal dirty channel, force recreation in a background worker...
+	pool.excludeIDFromDirtyChannel(id)
+
+	// directly spawn a new worker in the bg (with the same ctx as the typical workers)
+	go pool.recreateDatabaseGracefully(pool.workerContext, id)
+
+	return nil
+}
+
+// recreateDatabaseGracefully continuosly tries to recreate the testdatabase and will retry/block until it succeeds
+func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) error {
+
+	if ctx.Err() != nil {
+		// pool closed in the meantime.
+		return ctx.Err()
+	}
+
+	pool.RLock()
+
+	if pool.dbs[id].state == dbStateReady {
+		// nothing to do
+		pool.RUnlock()
 		return nil
 	}
 
-	// state is dirty -> we will now recreate it
-	if err := pool.recreateDB(ctx, &testDB); err != nil {
-		return err
+	testDB := pool.dbs[id]
+	pool.RUnlock()
+
+	pool.recreating <- struct{}{}
+
+	defer func() {
+		<-pool.recreating
+	}()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			fmt.Printf("recreateDatabaseGracefully: recreating ID='%v'...\n", id)
+			err := pool.recreateDB(ctx, &testDB)
+			if err != nil {
+				fmt.Println(err)
+				if errors.Is(err, ErrTestDBInUse) {
+					fmt.Printf("recreateDatabaseGracefully: DB is still in use, will retry ID='%v'.\n", id)
+					time.Sleep(250 * time.Millisecond) // TODO make configurable and/or exponential retry backoff...
+				} else {
+					fmt.Printf("recreateDatabaseGracefully: db error while cleanup ID='%v' err=%v\n", id, err)
+					return nil // noop
+				}
+			} else {
+				goto MoveToReady
+			}
+		}
 	}
 
+MoveToReady:
 	pool.Lock()
 	defer pool.Unlock()
 
-	// change the state to 'ready'
-	testDB.state = dbStateReady
-	pool.dbs[id] = testDB
+	if ctx.Err() != nil {
+		// pool closed in the meantime.
+		return ctx.Err()
+	}
 
-	// remove id from dirty and add it to ready channel
-	pool.excludeIDFromDirtyChannel(id)
-	pool.ready <- id
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (recreateDatabaseGracefully %v)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize, id)
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	if pool.dbs[id].state == dbStateReady {
+		// oups, it has been cleaned by another worker already
+		// we won't add it to the 'ready' channel to avoid duplication
+		return nil
+	}
 
-	return nil
+	pool.dbs[id].state = dbStateReady
+	// pool.dbs[id] = testDB
 
+	pool.ready <- pool.dbs[id].ID
+
+	return nil
 }
 
 // cleanDirty reads 'dirty' channel and cleans up a test DB with the received index.
 // When the DB is recreated according to a template, its index goes to the 'ready' channel.
-// The function waits until there is a dirty DB...
 func (pool *HashPool) cleanDirty(ctx context.Context) error {
 
 	ctx, task := trace.NewTask(ctx, "worker_clean_dirty")
@@ -361,68 +422,25 @@ func (pool *HashPool) cleanDirty(ctx context.Context) error {
 		return ctx.Err()
 	default:
 		// nothing to do
-		fmt.Println("cleanDirty noop", id)
+		fmt.Println("cleanDirty noop")
 		return nil
 	}
 
 	fmt.Printf("pool#%s: cleanDirty %v\n", pool.templateDB.TemplateHash, id)
 
 	regLock := trace.StartRegion(ctx, "worker_wait_for_rlock_hash_pool")
-	pool.Lock()
+	pool.RLock()
 	regLock.End()
 
 	if id < 0 || id >= len(pool.dbs) {
 		// sanity check, should never happen
-		pool.Unlock()
+		pool.RUnlock()
 		return ErrInvalidIndex
 	}
-	testDB := pool.dbs[id]
-	pool.Unlock()
-
-	if testDB.state == dbStateReady {
-		// nothing to do
-		return nil
-	}
-
-	reg := trace.StartRegion(ctx, "worker_db_operation")
-	err := pool.recreateDB(ctx, &testDB)
-	reg.End()
-
-	if err != nil {
-		// fmt.Printf("worker_clean_dirty: failed to clean up DB ID='%v': %v\n", id, err)
-
-		// we guarantee FIFO, we must keeping trying to clean up **exactly this** test database!
-		if errors.Is(err, ErrTestDBInUse) {
-
-			fmt.Printf("worker_clean_dirty: scheduling retry cleanup for ID='%v'...\n", id)
-			time.Sleep(250 * time.Millisecond)
-			fmt.Printf("integworker_clean_dirtyresql: push DB ID='%v' into retry.", id)
-			pool.dirty <- id
-			pool.tasksChan <- workerTaskCleanDirty
-			return nil
-		}
-
-		return err
-	}
-
-	regLock = trace.StartRegion(ctx, "worker_wait_for_lock_hash_pool")
-	pool.Lock()
-	defer pool.Unlock()
-	regLock.End()
-
-	if testDB.state == dbStateReady {
-		// oups, it has been cleaned by another worker already
-		// we won't add it to the 'ready' channel to avoid duplication
-		return nil
-	}
-
-	testDB.state = dbStateReady
-	pool.dbs[id] = testDB
-
-	pool.ready <- testDB.ID
+	// testDB := pool.dbs[id]
+	pool.RUnlock()
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (cleanDirty)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
-	return nil
+	return pool.recreateDatabaseGracefully(ctx, id)
 }
 
 func ignoreErrs(f func(ctx context.Context) error, errs ...error) func(context.Context) error {
@@ -446,7 +464,7 @@ func (pool *HashPool) extend(ctx context.Context) error {
 	pool.Lock()
 	defer pool.Unlock()
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, tasksChan=%d, dbs=%d initial=%d max=%d (extend)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (extend)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 	reg.End()
 
 	// get index of a next test DB - its ID
@@ -490,8 +508,6 @@ func (pool *HashPool) RemoveAll(ctx context.Context, removeFunc RemoveDBFunc) er
 	// stop all workers
 	pool.Stop()
 
-	// !
-	// HashPool locked
 	pool.Lock()
 	defer pool.Unlock()
 
@@ -517,6 +533,4 @@ func (pool *HashPool) RemoveAll(ctx context.Context, removeFunc RemoveDBFunc) er
 	close(pool.tasksChan)
 
 	return nil
-	// HashPool unlocked
-	// !
 }

From a3b5be0a4700797206ad23dcf2a8fd910c23bf1a Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Wed, 30 Aug 2023 17:37:59 +0200
Subject: [PATCH 05/10] adds backoff handling for recreateDatabaseGracefully

---
 pkg/manager/manager.go           | 29 +++++++++-------------
 pkg/manager/manager_config.go    | 42 ++++++++++++++++----------------
 pkg/manager/manager_test.go      | 34 +++++++++++++-------------
 pkg/pool/pool.go                 | 32 +++++++++++++++---------
 pkg/pool/pool_collection.go      | 13 ++++++----
 pkg/pool/pool_collection_test.go | 30 +++++++++++------------
 6 files changed, 94 insertions(+), 86 deletions(-)

diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index 1586123..355fd1e 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -37,10 +37,12 @@ func New(config ManagerConfig) (*Manager, ManagerConfig) {
 	if config.DatabasePrefix != "" {
 		testDBPrefix = testDBPrefix + fmt.Sprintf("%s_", config.DatabasePrefix)
 	}
-	if config.TestDatabasePrefix != "" {
-		testDBPrefix = testDBPrefix + fmt.Sprintf("%s_", config.TestDatabasePrefix)
+	if config.PoolConfig.TestDBNamePrefix != "" {
+		testDBPrefix = testDBPrefix + fmt.Sprintf("%s_", config.PoolConfig.TestDBNamePrefix)
 	}
 
+	config.PoolConfig.TestDBNamePrefix = testDBPrefix
+
 	if len(config.TestDatabaseOwner) == 0 {
 		config.TestDatabaseOwner = config.ManagerDatabaseConfig.Username
 	}
@@ -50,30 +52,23 @@ func New(config ManagerConfig) (*Manager, ManagerConfig) {
 	}
 
 	// at least one test database needs to be present initially
-	if config.TestDatabaseInitialPoolSize == 0 {
-		config.TestDatabaseInitialPoolSize = 1
+	if config.PoolConfig.InitialPoolSize == 0 {
+		config.PoolConfig.InitialPoolSize = 1
 	}
 
-	if config.TestDatabaseInitialPoolSize > config.TestDatabaseMaxPoolSize && config.TestDatabaseMaxPoolSize > 0 {
-		config.TestDatabaseInitialPoolSize = config.TestDatabaseMaxPoolSize
+	if config.PoolConfig.InitialPoolSize > config.PoolConfig.MaxPoolSize && config.PoolConfig.MaxPoolSize > 0 {
+		config.PoolConfig.InitialPoolSize = config.PoolConfig.MaxPoolSize
 	}
 
-	if config.PoolMaxParallelTasks < 1 {
-		config.PoolMaxParallelTasks = 1
+	if config.PoolConfig.MaxParallelTasks < 1 {
+		config.PoolConfig.MaxParallelTasks = 1
 	}
 
 	m := &Manager{
 		config:    config,
 		db:        nil,
 		templates: templates.NewCollection(),
-		pool: pool.NewPoolCollection(
-			pool.PoolConfig{
-				MaxPoolSize:          config.TestDatabaseMaxPoolSize,
-				InitialPoolSize:      config.TestDatabaseInitialPoolSize,
-				TestDBNamePrefix:     testDBPrefix,
-				PoolMaxParallelTasks: config.PoolMaxParallelTasks,
-			},
-		),
+		pool:      pool.NewPoolCollection(config.PoolConfig),
 	}
 
 	return m, m.config
@@ -139,7 +134,7 @@ func (m *Manager) Initialize(ctx context.Context) error {
 		}
 	}
 
-	rows, err := m.db.QueryContext(ctx, "SELECT datname FROM pg_database WHERE datname LIKE $1", fmt.Sprintf("%s_%s_%%", m.config.DatabasePrefix, m.config.TestDatabasePrefix))
+	rows, err := m.db.QueryContext(ctx, "SELECT datname FROM pg_database WHERE datname LIKE $1", fmt.Sprintf("%s_%s_%%", m.config.DatabasePrefix, m.config.PoolConfig.TestDBNamePrefix))
 	if err != nil {
 		return err
 	}
diff --git a/pkg/manager/manager_config.go b/pkg/manager/manager_config.go
index 435ec11..55b36b0 100644
--- a/pkg/manager/manager_config.go
+++ b/pkg/manager/manager_config.go
@@ -5,6 +5,7 @@ import (
 	"time"
 
 	"github.com/allaboutapps/integresql/pkg/db"
+	"github.com/allaboutapps/integresql/pkg/pool"
 	"github.com/allaboutapps/integresql/pkg/util"
 )
 
@@ -12,16 +13,14 @@ type ManagerConfig struct {
 	ManagerDatabaseConfig    db.DatabaseConfig
 	TemplateDatabaseTemplate string
 
-	DatabasePrefix              string
-	TemplateDatabasePrefix      string
-	TestDatabasePrefix          string
-	TestDatabaseOwner           string
-	TestDatabaseOwnerPassword   string
-	TestDatabaseInitialPoolSize int           // Initial number of ready DBs prepared in background
-	TestDatabaseMaxPoolSize     int           // Maximal pool size that won't be exceeded
-	TemplateFinalizeTimeout     time.Duration // Time to wait for a template to transition into the 'finalized' state
-	TestDatabaseGetTimeout      time.Duration // Time to wait for a ready database
-	PoolMaxParallelTasks        int           // Maximal number of pool tasks running in parallel. Must be a number greater or equal 1.
+	DatabasePrefix            string
+	TemplateDatabasePrefix    string
+	TestDatabaseOwner         string
+	TestDatabaseOwnerPassword string
+	TemplateFinalizeTimeout   time.Duration // Time to wait for a template to transition into the 'finalized' state
+	TestDatabaseGetTimeout    time.Duration // Time to wait for a ready database
+
+	PoolConfig pool.PoolConfig
 }
 
 func DefaultManagerConfigFromEnv() ManagerConfig {
@@ -50,18 +49,19 @@ func DefaultManagerConfigFromEnv() ManagerConfig {
 		// DatabasePrefix_TemplateDatabasePrefix_HASH
 		TemplateDatabasePrefix: util.GetEnv("INTEGRESQL_TEMPLATE_DB_PREFIX", "template"),
 
-		// DatabasePrefix_TestDatabasePrefix_HASH_ID
-		TestDatabasePrefix: util.GetEnv("INTEGRESQL_TEST_DB_PREFIX", "test"),
-
-		// reuse the same user (PGUSER) and passwort (PGPASSWORT) for the test / template databases by default
+		// we reuse the same user (PGUSER) and passwort (PGPASSWORT) for the test / template databases by default
 		TestDatabaseOwner:         util.GetEnv("INTEGRESQL_TEST_PGUSER", util.GetEnv("INTEGRESQL_PGUSER", util.GetEnv("PGUSER", "postgres"))),
 		TestDatabaseOwnerPassword: util.GetEnv("INTEGRESQL_TEST_PGPASSWORD", util.GetEnv("INTEGRESQL_PGPASSWORD", util.GetEnv("PGPASSWORD", ""))),
-		// TestDatabaseInitialPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_INITIAL_POOL_SIZE", 10),
-		TestDatabaseInitialPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_INITIAL_POOL_SIZE", runtime.NumCPU()),
-		// TestDatabaseMaxPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_MAX_POOL_SIZE", 500),
-		TestDatabaseMaxPoolSize: util.GetEnvAsInt("INTEGRESQL_TEST_MAX_POOL_SIZE", runtime.NumCPU()*4),
-		TemplateFinalizeTimeout: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEMPLATE_FINALIZE_TIMEOUT_MS", 5*60*1000 /*5 min*/)),
-		TestDatabaseGetTimeout:  time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_GET_TIMEOUT_MS", 1*60*1000 /*1 min, timeout hardcoded also in GET request handler*/)),
-		PoolMaxParallelTasks:    util.GetEnvAsInt("INTEGRESQL_POOL_MAX_PARALLEL_TASKS", runtime.NumCPU()),
+		TemplateFinalizeTimeout:   time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEMPLATE_FINALIZE_TIMEOUT_MS", 5*60*1000 /*5 min*/)),
+		TestDatabaseGetTimeout:    time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_GET_TIMEOUT_MS", 1*60*1000 /*1 min, timeout hardcoded also in GET request handler*/)),
+
+		PoolConfig: pool.PoolConfig{
+			InitialPoolSize:                   util.GetEnvAsInt("INTEGRESQL_TEST_INITIAL_POOL_SIZE", runtime.NumCPU()), // previously default 10
+			MaxPoolSize:                       util.GetEnvAsInt("INTEGRESQL_TEST_MAX_POOL_SIZE", runtime.NumCPU()*4),   // previously default 500
+			TestDBNamePrefix:                  util.GetEnv("INTEGRESQL_TEST_DB_PREFIX", "test"),                        // DatabasePrefix_TestDBNamePrefix_HASH_ID
+			MaxParallelTasks:                  util.GetEnvAsInt("INTEGRESQL_POOL_MAX_PARALLEL_TASKS", runtime.NumCPU()),
+			TestDatabaseRetryRecreateSleepMin: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_RETRY_RECREATE_SLEEP_MIN_MS", 250 /*250 ms*/)),
+			TestDatabaseRetryRecreateSleepMax: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_RETRY_RECREATE_SLEEP_MAX_MS", 1000*3 /*3 sec*/)),
+		},
 	}
 }
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 4d2a614..045a5ca 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -310,8 +310,8 @@ func TestManagerGetTestDatabaseExtendPool(t *testing.T) {
 
 	cfg := manager.DefaultManagerConfigFromEnv()
 	cfg.TestDatabaseGetTimeout = 300 * time.Millisecond
-	cfg.TestDatabaseInitialPoolSize = 0 // this will be autotransformed to 1 during init
-	cfg.TestDatabaseMaxPoolSize = 10
+	cfg.PoolConfig.InitialPoolSize = 0 // this will be autotransformed to 1 during init
+	cfg.PoolConfig.MaxPoolSize = 10
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
@@ -335,7 +335,7 @@ func TestManagerGetTestDatabaseExtendPool(t *testing.T) {
 
 	previousID := -1
 	// assert than one by one pool will be extended
-	for i := 0; i < cfg.TestDatabaseMaxPoolSize; i++ {
+	for i := 0; i < cfg.PoolConfig.MaxPoolSize; i++ {
 		testDB, err := m.GetTestDatabase(ctx, hash)
 		assert.NoError(t, err)
 		assert.Equal(t, previousID+1, testDB.ID)
@@ -606,8 +606,8 @@ func TestManagerGetAndReturnTestDatabase(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.TestDatabaseInitialPoolSize = 3
-	cfg.TestDatabaseMaxPoolSize = 3
+	cfg.PoolConfig.InitialPoolSize = 3
+	cfg.PoolConfig.MaxPoolSize = 3
 	cfg.TestDatabaseGetTimeout = 200 * time.Millisecond
 	m, _ := testManagerWithConfig(cfg)
 
@@ -631,7 +631,7 @@ func TestManagerGetAndReturnTestDatabase(t *testing.T) {
 	}
 
 	// request many more databases than initally added
-	for i := 0; i <= cfg.TestDatabaseMaxPoolSize*3; i++ {
+	for i := 0; i <= cfg.PoolConfig.MaxPoolSize*3; i++ {
 		test, err := m.GetTestDatabase(ctx, hash)
 		assert.NoError(t, err)
 		assert.NotEmpty(t, test)
@@ -648,9 +648,9 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.TestDatabaseInitialPoolSize = 4
-	cfg.TestDatabaseMaxPoolSize = 8
-	cfg.TestDatabaseGetTimeout = 250 * time.Millisecond
+	cfg.PoolConfig.InitialPoolSize = 4
+	cfg.PoolConfig.MaxPoolSize = 8
+	cfg.TestDatabaseGetTimeout = 500 * time.Millisecond
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
@@ -673,7 +673,7 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	}
 
 	// request many more databases than initally added
-	for i := 0; i <= cfg.TestDatabaseMaxPoolSize*2; i++ {
+	for i := 0; i <= cfg.PoolConfig.MaxPoolSize*2; i++ {
 		test, err := m.GetTestDatabase(ctx, hash)
 
 		t.Logf("open %v", test.ID)
@@ -712,8 +712,8 @@ func TestManagerGetTestDatabaseDontReturn(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.TestDatabaseInitialPoolSize = 5
-	cfg.TestDatabaseMaxPoolSize = 5
+	cfg.PoolConfig.InitialPoolSize = 5
+	cfg.PoolConfig.MaxPoolSize = 5
 	cfg.TestDatabaseGetTimeout = time.Second
 	m, _ := testManagerWithConfig(cfg)
 
@@ -737,7 +737,7 @@ func TestManagerGetTestDatabaseDontReturn(t *testing.T) {
 	}
 
 	var wg sync.WaitGroup
-	for i := 0; i < cfg.TestDatabaseMaxPoolSize*5; i++ {
+	for i := 0; i < cfg.PoolConfig.MaxPoolSize*5; i++ {
 		wg.Add(1)
 		go func(i int) {
 			defer wg.Done()
@@ -782,8 +782,8 @@ func TestManagerReturnTestDatabase(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.TestDatabaseInitialPoolSize = 1
-	cfg.TestDatabaseMaxPoolSize = 10
+	cfg.PoolConfig.InitialPoolSize = 1
+	cfg.PoolConfig.MaxPoolSize = 10
 	cfg.TestDatabaseGetTimeout = 200 * time.Millisecond
 
 	m, _ := testManagerWithConfig(cfg)
@@ -878,7 +878,7 @@ func TestManagerReturnUntrackedTemplateDatabase(t *testing.T) {
 	}
 
 	id := 321
-	dbName := fmt.Sprintf("%s_%s_%s_%d", config.DatabasePrefix, config.TestDatabasePrefix, hash, id)
+	dbName := fmt.Sprintf("%s_%s_%s_%d", config.DatabasePrefix, config.PoolConfig.TestDBNamePrefix, hash, id)
 
 	if _, err := db.ExecContext(ctx, fmt.Sprintf("DROP DATABASE IF EXISTS %s", pq.QuoteIdentifier(dbName))); err != nil {
 		t.Fatalf("failed to manually drop template database %q: %v", dbName, err)
@@ -981,7 +981,7 @@ func TestManagerClearTrackedTestDatabases(t *testing.T) {
 
 	cfg := manager.DefaultManagerConfigFromEnv()
 	// there are no db added in background
-	cfg.TestDatabaseInitialPoolSize = 0
+	cfg.PoolConfig.InitialPoolSize = 0
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 76696a5..8054bde 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -64,8 +64,8 @@ type HashPool struct {
 // Starts the workers to extend the pool in background up to requested inital number.
 func NewHashPool(cfg PoolConfig, templateDB db.Database, initDBFunc RecreateDBFunc) *HashPool {
 
-	if cfg.PoolMaxParallelTasks < minConcurrentTasksNum {
-		cfg.PoolMaxParallelTasks = minConcurrentTasksNum
+	if cfg.MaxParallelTasks < minConcurrentTasksNum {
+		cfg.MaxParallelTasks = minConcurrentTasksNum
 	}
 
 	pool := &HashPool{
@@ -184,7 +184,7 @@ func (pool *HashPool) AddTestDatabase(ctx context.Context, templateDB db.Databas
 	return pool.extend(ctx)
 }
 
-func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string, poolMaxParallelTasks int) {
+func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string, MaxParallelTasks int) {
 
 	fmt.Printf("pool#%s: workerTaskLoop\n", pool.templateDB.TemplateHash)
 
@@ -194,7 +194,7 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 	}
 
 	// to limit the number of running goroutines.
-	var semaphore = make(chan struct{}, poolMaxParallelTasks)
+	var semaphore = make(chan struct{}, MaxParallelTasks)
 
 	for task := range taskChan {
 		handler, ok := handlers[task]
@@ -238,7 +238,7 @@ func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc
 	pool.wg.Add(1)
 	go func() {
 		defer pool.wg.Done()
-		pool.workerTaskLoop(ctx, workerTasksChan, pool.PoolMaxParallelTasks)
+		pool.workerTaskLoop(ctx, workerTasksChan, pool.MaxParallelTasks)
 	}()
 
 	for task := range pool.tasksChan {
@@ -361,21 +361,31 @@ func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) er
 		<-pool.recreating
 	}()
 
+	try := 0
+
 	for {
 		select {
 		case <-ctx.Done():
 			return ctx.Err()
 		default:
-			fmt.Printf("recreateDatabaseGracefully: recreating ID='%v'...\n", id)
+			try++
+
+			fmt.Printf("recreateDatabaseGracefully: recreating ID='%v' try=%v...\n", id, try)
 			err := pool.recreateDB(ctx, &testDB)
 			if err != nil {
-				fmt.Println(err)
+				// only still connected errors are worthy a retry
 				if errors.Is(err, ErrTestDBInUse) {
-					fmt.Printf("recreateDatabaseGracefully: DB is still in use, will retry ID='%v'.\n", id)
-					time.Sleep(250 * time.Millisecond) // TODO make configurable and/or exponential retry backoff...
+
+					backoff := time.Duration(try) * pool.PoolConfig.TestDatabaseRetryRecreateSleepMin
+					if backoff > pool.PoolConfig.TestDatabaseRetryRecreateSleepMax {
+						backoff = pool.PoolConfig.TestDatabaseRetryRecreateSleepMax
+					}
+
+					fmt.Printf("recreateDatabaseGracefully: DB is still in use, will retry ID='%v' try=%v in backoff=%v.\n", id, try, backoff)
+					time.Sleep(backoff)
 				} else {
-					fmt.Printf("recreateDatabaseGracefully: db error while cleanup ID='%v' err=%v\n", id, err)
-					return nil // noop
+					fmt.Printf("recreateDatabaseGracefully: bailout worker task DB error while cleanup ID='%v' try=%v err=%v\n", id, try, err)
+					return err
 				}
 			} else {
 				goto MoveToReady
diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index b509e9a..b8e35fd 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -14,11 +14,14 @@ import (
 var ErrUnknownHash = errors.New("no database pool exists for this hash")
 
 type PoolConfig struct {
-	MaxPoolSize            int
-	InitialPoolSize        int
-	TestDBNamePrefix       string
-	PoolMaxParallelTasks   int
-	DisableWorkerAutostart bool
+	InitialPoolSize                   int           // Initial number of ready DBs prepared in background
+	MaxPoolSize                       int           // Maximal pool size that won't be exceeded
+	TestDBNamePrefix                  string        // Test-Database prefix: DatabasePrefix_TestDBNamePrefix_HASH_ID
+	MaxParallelTasks                  int           // Maximal number of pool tasks running in parallel. Must be a number greater or equal 1.
+	TestDatabaseRetryRecreateSleepMin time.Duration // Minimal time to wait after a test db recreate has failed (e.g. as client is still connected). Subsequent retries multiply this values until...
+	TestDatabaseRetryRecreateSleepMax time.Duration // ... the maximum possible sleep time between retries (e.g. 3 seconds) is reached.
+
+	DisableWorkerAutostart bool // test only flag for starting without background worker task system
 }
 
 type PoolCollection struct {
diff --git a/pkg/pool/pool_collection_test.go b/pkg/pool/pool_collection_test.go
index 465c515..00eafca 100644
--- a/pkg/pool/pool_collection_test.go
+++ b/pkg/pool/pool_collection_test.go
@@ -18,7 +18,7 @@ func TestPoolAddGet(t *testing.T) {
 	ctx := context.Background()
 	cfg := pool.PoolConfig{
 		MaxPoolSize:            2,
-		PoolMaxParallelTasks:   4,
+		MaxParallelTasks:       4,
 		TestDBNamePrefix:       "prefix_",
 		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
@@ -94,10 +94,10 @@ func TestPoolAddGetConcurrent(t *testing.T) {
 
 	maxPoolSize := 15
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          maxPoolSize,
-		InitialPoolSize:      maxPoolSize,
-		PoolMaxParallelTasks: 4,
-		TestDBNamePrefix:     "",
+		MaxPoolSize:      maxPoolSize,
+		InitialPoolSize:  maxPoolSize,
+		MaxParallelTasks: 4,
+		TestDBNamePrefix: "",
 	}
 	p := pool.NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
@@ -149,9 +149,9 @@ func TestPoolAddGetReturnConcurrent(t *testing.T) {
 	}
 
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          40,
-		PoolMaxParallelTasks: 4,
-		TestDBNamePrefix:     "",
+		MaxPoolSize:      40,
+		MaxParallelTasks: 4,
+		TestDBNamePrefix: "",
 	}
 	p := pool.NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
@@ -212,8 +212,8 @@ func TestPoolRemoveAll(t *testing.T) {
 	}
 
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          6,
-		PoolMaxParallelTasks: 4,
+		MaxPoolSize:      6,
+		MaxParallelTasks: 4,
 	}
 	p := pool.NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
@@ -263,10 +263,10 @@ func TestPoolReuseDirty(t *testing.T) {
 
 	maxPoolSize := 40
 	cfg := pool.PoolConfig{
-		MaxPoolSize:          maxPoolSize,
-		InitialPoolSize:      maxPoolSize,
-		PoolMaxParallelTasks: 1,
-		TestDBNamePrefix:     "test_",
+		MaxPoolSize:      maxPoolSize,
+		InitialPoolSize:  maxPoolSize,
+		MaxParallelTasks: 1,
+		TestDBNamePrefix: "test_",
 	}
 	p := pool.NewPoolCollection(cfg)
 
@@ -323,7 +323,7 @@ func TestPoolReturnTestDatabase(t *testing.T) {
 
 	cfg := pool.PoolConfig{
 		MaxPoolSize:            10,
-		PoolMaxParallelTasks:   3,
+		MaxParallelTasks:       3,
 		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
 	p := pool.NewPoolCollection(cfg)

From 5bcf409de0f413072346500d5921248b07fadef3 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Wed, 30 Aug 2023 19:44:35 +0200
Subject: [PATCH 06/10] adds test database generations and
 TestDatabaseMinimalLifetime (blocks auto clean for specific time on issued
 databases) to properly deal with pressure caused by fast test db issuance and
 out-of-order recreates which might cause interferances

---
 pkg/manager/manager_config.go    |  1 +
 pkg/manager/manager_test.go      |  8 +--
 pkg/pool/pool.go                 | 86 ++++++++++++++++++++++++--------
 pkg/pool/pool_collection.go      |  1 +
 pkg/pool/pool_collection_test.go |  2 +-
 5 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/pkg/manager/manager_config.go b/pkg/manager/manager_config.go
index 55b36b0..4053003 100644
--- a/pkg/manager/manager_config.go
+++ b/pkg/manager/manager_config.go
@@ -62,6 +62,7 @@ func DefaultManagerConfigFromEnv() ManagerConfig {
 			MaxParallelTasks:                  util.GetEnvAsInt("INTEGRESQL_POOL_MAX_PARALLEL_TASKS", runtime.NumCPU()),
 			TestDatabaseRetryRecreateSleepMin: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_RETRY_RECREATE_SLEEP_MIN_MS", 250 /*250 ms*/)),
 			TestDatabaseRetryRecreateSleepMax: time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_RETRY_RECREATE_SLEEP_MAX_MS", 1000*3 /*3 sec*/)),
+			TestDatabaseMinimalLifetime:       time.Millisecond * time.Duration(util.GetEnvAsInt("INTEGRESQL_TEST_DB_MINIMAL_LIFETIME_MS", 250 /*250 ms*/)),
 		},
 	}
 }
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 045a5ca..676bb43 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -648,9 +648,9 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	ctx := context.Background()
 
 	cfg := manager.DefaultManagerConfigFromEnv()
-	cfg.PoolConfig.InitialPoolSize = 4
+	cfg.PoolConfig.InitialPoolSize = 8
 	cfg.PoolConfig.MaxPoolSize = 8
-	cfg.TestDatabaseGetTimeout = 500 * time.Millisecond
+	cfg.TestDatabaseGetTimeout = 1000 * time.Millisecond
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
@@ -673,7 +673,7 @@ func TestManagerGetAndRecreateTestDatabase(t *testing.T) {
 	}
 
 	// request many more databases than initally added
-	for i := 0; i <= cfg.PoolConfig.MaxPoolSize*2; i++ {
+	for i := 0; i <= cfg.PoolConfig.MaxPoolSize*5; i++ {
 		test, err := m.GetTestDatabase(ctx, hash)
 
 		t.Logf("open %v", test.ID)
@@ -714,7 +714,7 @@ func TestManagerGetTestDatabaseDontReturn(t *testing.T) {
 	cfg := manager.DefaultManagerConfigFromEnv()
 	cfg.PoolConfig.InitialPoolSize = 5
 	cfg.PoolConfig.MaxPoolSize = 5
-	cfg.TestDatabaseGetTimeout = time.Second
+	cfg.TestDatabaseGetTimeout = time.Second * 5
 	m, _ := testManagerWithConfig(cfg)
 
 	if err := m.Initialize(ctx); err != nil {
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 8054bde..6b82a00 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -22,8 +22,9 @@ var (
 type dbState int // Indicates a current DB state.
 
 const (
-	dbStateReady dbState = iota // Initialized according to a template and ready to be picked up.
-	dbStateDirty                // Taken by a client and potentially currently in use.
+	dbStateReady      dbState = iota // Initialized according to a template and ready to be picked up.
+	dbStateDirty                     // Taken by a client and potentially currently in use.
+	dbStateRecreating                // In the process of being recreated (to prevent concurrent cleans)
 )
 
 const minConcurrentTasksNum = 1
@@ -31,14 +32,24 @@ const minConcurrentTasksNum = 1
 type existingDB struct {
 	state dbState
 	db.TestDatabase
+
+	// To prevent auto-cleans of a testdatabase on the dirty channel directly after it was issued as ready,
+	// each testdatabase gets a timestamp assigned after which auto-cleaning it generally allowed (unlock
+	// and recreate do not respect this). This timeout is typically very low and should only be neccessary
+	// to be tweaked in scenarios in which the pool is overloaded by requests.
+	// Prefer to tweak InitialPoolSize (the always ready dbs) and MaxPoolSize instead if you have issues here.
+	blockAutoCleanDirtyUntil time.Time
+
+	// increased after each recreation, useful for sleepy recreating workers to check if we still operate on the same gen.
+	generation uint
 }
 
 type workerTask string
 
 const (
-	workerTaskStop       = "STOP"
-	workerTaskExtend     = "EXTEND"
-	workerTaskCleanDirty = "CLEAN_DIRTY"
+	workerTaskStop           = "STOP"
+	workerTaskExtend         = "EXTEND"
+	workerTaskAutoCleanDirty = "CLEAN_DIRTY"
 )
 
 // HashPool holds a test DB pool for a certain hash. Each HashPool is running cleanup workers in background.
@@ -160,7 +171,10 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 		return
 	}
 
+	// flag as dirty and block auto clean until
 	testDB.state = dbStateDirty
+	testDB.blockAutoCleanDirtyUntil = time.Now().Add(pool.TestDatabaseMinimalLifetime)
+
 	pool.dbs[index] = testDB
 	pool.dirty <- index
 
@@ -170,9 +184,9 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	}
 
 	// we try to ensure that InitialPoolSize count is staying ready
-	// thus, we try to move the oldest dirty dbs into recreating with the workerTaskCleanDirty
+	// thus, we try to move the oldest dirty dbs into recreating with the workerTaskAutoCleanDirty
 	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize && (len(pool.ready)+len(pool.recreating)) < pool.InitialPoolSize {
-		pool.tasksChan <- workerTaskCleanDirty
+		pool.tasksChan <- workerTaskAutoCleanDirty
 	}
 
 	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
@@ -189,8 +203,8 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 	fmt.Printf("pool#%s: workerTaskLoop\n", pool.templateDB.TemplateHash)
 
 	handlers := map[string]func(ctx context.Context) error{
-		workerTaskExtend:     ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
-		workerTaskCleanDirty: ignoreErrs(pool.cleanDirty, context.Canceled),
+		workerTaskExtend:         ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
+		workerTaskAutoCleanDirty: ignoreErrs(pool.autoCleanDirty, context.Canceled),
 	}
 
 	// to limit the number of running goroutines.
@@ -268,7 +282,7 @@ func (pool *HashPool) ReturnTestDatabase(ctx context.Context, hash string, id in
 
 	// check if db is in the correct state
 	testDB := pool.dbs[id]
-	if testDB.state == dbStateReady {
+	if testDB.state != dbStateDirty {
 		return nil
 	}
 
@@ -344,16 +358,21 @@ func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) er
 		return ctx.Err()
 	}
 
-	pool.RLock()
+	pool.Lock()
 
-	if pool.dbs[id].state == dbStateReady {
+	if pool.dbs[id].state != dbStateDirty {
 		// nothing to do
-		pool.RUnlock()
+		pool.Unlock()
 		return nil
 	}
 
 	testDB := pool.dbs[id]
-	pool.RUnlock()
+
+	// set state recreating...
+	pool.dbs[id].state = dbStateRecreating
+	pool.dbs[id] = testDB
+
+	pool.Unlock()
 
 	pool.recreating <- struct{}{}
 
@@ -410,17 +429,19 @@ MoveToReady:
 		return nil
 	}
 
+	// increase the generation of the testdb (as we just recreated it) and move into ready!
+	pool.dbs[id].generation++
 	pool.dbs[id].state = dbStateReady
-	// pool.dbs[id] = testDB
 
 	pool.ready <- pool.dbs[id].ID
 
 	return nil
 }
 
-// cleanDirty reads 'dirty' channel and cleans up a test DB with the received index.
+// autoCleanDirty reads 'dirty' channel and cleans up a test DB with the received index.
 // When the DB is recreated according to a template, its index goes to the 'ready' channel.
-func (pool *HashPool) cleanDirty(ctx context.Context) error {
+// Note that we generally gurantee FIFO when it comes to auto-cleaning as long as no manual unlock/recreates happen.
+func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 
 	ctx, task := trace.NewTask(ctx, "worker_clean_dirty")
 	defer task.End()
@@ -432,11 +453,11 @@ func (pool *HashPool) cleanDirty(ctx context.Context) error {
 		return ctx.Err()
 	default:
 		// nothing to do
-		fmt.Println("cleanDirty noop")
+		fmt.Println("autoCleanDirty noop")
 		return nil
 	}
 
-	fmt.Printf("pool#%s: cleanDirty %v\n", pool.templateDB.TemplateHash, id)
+	fmt.Printf("pool#%s: autoCleanDirty id=%v\n", pool.templateDB.TemplateHash, id)
 
 	regLock := trace.StartRegion(ctx, "worker_wait_for_rlock_hash_pool")
 	pool.RLock()
@@ -447,7 +468,32 @@ func (pool *HashPool) cleanDirty(ctx context.Context) error {
 		pool.RUnlock()
 		return ErrInvalidIndex
 	}
-	// testDB := pool.dbs[id]
+
+	blockedUntil := time.Until(pool.dbs[id].blockAutoCleanDirtyUntil)
+	generation := pool.dbs[id].generation
+
+	pool.RUnlock()
+
+	// immediately pass to pool recreate
+	if blockedUntil <= 0 {
+		return pool.recreateDatabaseGracefully(ctx, id)
+	}
+
+	// else we need to wait until we are allowed to work with it!
+	// we block auto-cleaning until we are allowed to...
+	fmt.Printf("pool#%s: autoCleanDirty id=%v sleep for blockedUntil=%v...\n", pool.templateDB.TemplateHash, id, blockedUntil)
+	time.Sleep(blockedUntil)
+
+	// we need to check that the testDB.generation did not change since we slept
+	// (which would indicate that the database was already unlocked/recreated by someone else in the meantime)
+	pool.RLock()
+
+	if pool.dbs[id].generation != generation || pool.dbs[id].state != dbStateDirty {
+		fmt.Printf("pool#%s: autoCleanDirty id=%v bailout old generation=%v vs new generation=%v state=%v...\n", pool.templateDB.TemplateHash, id, generation, pool.dbs[id].generation, pool.dbs[id].state)
+		pool.RUnlock()
+		return nil
+	}
+
 	pool.RUnlock()
 
 	return pool.recreateDatabaseGracefully(ctx, id)
diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index b8e35fd..1a91a98 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -20,6 +20,7 @@ type PoolConfig struct {
 	MaxParallelTasks                  int           // Maximal number of pool tasks running in parallel. Must be a number greater or equal 1.
 	TestDatabaseRetryRecreateSleepMin time.Duration // Minimal time to wait after a test db recreate has failed (e.g. as client is still connected). Subsequent retries multiply this values until...
 	TestDatabaseRetryRecreateSleepMax time.Duration // ... the maximum possible sleep time between retries (e.g. 3 seconds) is reached.
+	TestDatabaseMinimalLifetime       time.Duration // After a testdatabase transitions from ready to dirty, always block auto-recreation for this duration (except manual recreate).
 
 	DisableWorkerAutostart bool // test only flag for starting without background worker task system
 }
diff --git a/pkg/pool/pool_collection_test.go b/pkg/pool/pool_collection_test.go
index 00eafca..90c0c05 100644
--- a/pkg/pool/pool_collection_test.go
+++ b/pkg/pool/pool_collection_test.go
@@ -274,7 +274,7 @@ func TestPoolReuseDirty(t *testing.T) {
 	t.Cleanup(func() { p.Stop() })
 
 	getDirty := func(seenIDMap *sync.Map) {
-		newTestDB1, err := p.GetTestDatabase(ctx, templateDB1.TemplateHash, 1*time.Second)
+		newTestDB1, err := p.GetTestDatabase(ctx, templateDB1.TemplateHash, 3*time.Second)
 		assert.NoError(t, err)
 		seenIDMap.Store(newTestDB1.ID, true)
 	}

From 96dd1c3f9c7fe98cea427a8260e0c87d015195c7 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Mon, 4 Sep 2023 15:41:29 +0200
Subject: [PATCH 07/10] fix toolchain, reenable pipefails, fix linting, cleanup

---
 .devcontainer/devcontainer.json               | 39 +++-------
 .drone.yml                                    |  2 +-
 .golangci.yml                                 | 29 +++++++
 Dockerfile                                    | 77 ++++++++++++++----
 Makefile                                      | 28 ++++---
 cmd/server/main.go                            |  8 +-
 docker-compose.yml                            | 34 +++++++-
 internal/api/server.go                        |  1 +
 internal/api/templates/templates.go           | 78 ++++++++++---------
 internal/config/build_args.go                 | 18 +++++
 ...st.go => database_config_internal_test.go} |  0
 pkg/manager/helpers_test.go                   |  4 +-
 pkg/manager/manager.go                        | 22 +-----
 pkg/manager/manager_config.go                 |  3 +-
 pkg/manager/manager_test.go                   |  9 ++-
 pkg/pool/pool.go                              | 32 +++++---
 pkg/pool/pool_collection.go                   | 41 +++++-----
 ...st.go => pool_collection_internal_test.go} | 57 +++++++-------
 pkg/templates/template.go                     | 14 ++--
 pkg/templates/template_collection.go          |  2 +-
 pkg/templates/template_test.go                |  5 +-
 pkg/util/retry.go                             |  2 +-
 tests/testclient/client.go                    |  5 +-
 23 files changed, 308 insertions(+), 202 deletions(-)
 create mode 100644 .golangci.yml
 create mode 100644 internal/config/build_args.go
 rename pkg/db/{database_config_test.go => database_config_internal_test.go} (100%)
 rename pkg/pool/{pool_collection_test.go => pool_collection_internal_test.go} (86%)

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index a8f6bd0..7e268fd 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -14,9 +14,11 @@
 	// The optional 'workspaceFolder' property is the path VS Code should open by default when
 	// connected. This is typically a file mount in .devcontainer/docker-compose.yml
 	"workspaceFolder": "/app",
-	// Set *default* container specific settings.json values on container create.
+	// All containers should stop if we close / reload the VSCode window.
+	"shutdownAction": "stopCompose",
 	"customizations": {
 		"vscode": {
+			// Set *default* container specific settings.json values on container create.
 			"settings": {
 				// https://github.com/golang/tools/blob/master/gopls/doc/vscode.md#vscode
 				"go.useLanguageServer": true,
@@ -45,6 +47,8 @@
 					// DISABLED, done via
 					"staticcheck": false,
 				},
+				// https://code.visualstudio.com/docs/languages/go#_intellisense
+				"go.autocompleteUnimportedPackages": true,
 				// https://github.com/golangci/golangci-lint#editor-integration
 				"go.lintTool": "golangci-lint",
 				"go.lintFlags": [
@@ -69,30 +73,6 @@
 				},
 				// ensure that the pgFormatter VSCode extension uses the pgFormatter that comes preinstalled in the Dockerfile
 				"pgFormatter.pgFormatterPath": "/usr/local/bin/pg_format"
-				// "go.lintOnSave": "workspace"
-				// general build settings in sync with our makefile
-				// "go.buildFlags": [
-				// 	"-o",
-				// 	"bin/app"
-				// ]
-				// "sqltools.connections": [
-				// 	{
-				// 		"database": "sample",
-				// 		"dialect": "PostgreSQL",
-				// 		"name": "postgres",
-				// 		"password": "9bed16f749d74a3c8bfbced18a7647f5",
-				// 		"port": 5432,
-				// 		"server": "postgres",
-				// 		"username": "dbuser"
-				// 	}
-				// ],
-				// "sqltools.autoConnectTo": [
-				// 	"postgres"
-				// ],
-				// // only use pg_format to actually format!
-				// "sqltools.formatLanguages": [],
-				// "sqltools.telemetry": false,
-				// "sqltools.autoOpenSessionFiles": false
 			},
 			// Add the IDs of extensions you want installed when the container is created.
 			"extensions": [
@@ -100,12 +80,12 @@
 				"golang.go",
 				"bradymholt.pgformatter",
 				// optional:
-				// "766b.go-outliner",
+				"42crunch.vscode-openapi",
 				"heaths.vscode-guid",
 				"bungcip.better-toml",
 				"eamodio.gitlens",
-				"casualjim.gotemplate"
-				// "mtxr.sqltools",
+				"casualjim.gotemplate",
+				"yzhang.markdown-all-in-one"
 			]
 		}
 	},
@@ -115,6 +95,7 @@
 	// "shutdownAction": "none",
 	// Uncomment the next line to run commands after the container is created - for example installing git.
 	"postCreateCommand": "go version",
+	// "postCreateCommand": "apt-get update && apt-get install -y git",
 	// Uncomment to connect as a non-root user. See https://aka.ms/vscode-remote/containers/non-root.
-	"remoteUser": "development"
+	// "remoteUser": ""
 }
\ No newline at end of file
diff --git a/.drone.yml b/.drone.yml
index 0a8deee..ec24175 100644
--- a/.drone.yml
+++ b/.drone.yml
@@ -133,7 +133,7 @@ pipeline:
     environment:
       IMAGE_TAG: *IMAGE_BUILDER_ID
     commands:
-      - "docker build --target builder-integresql --compress -t $${IMAGE_TAG} ."
+      - "docker build --target builder --compress -t $${IMAGE_TAG} ."
     <<: *WHEN_BUILD_EVENT
 
   "docker build (target integresql)":
diff --git a/.golangci.yml b/.golangci.yml
new file mode 100644
index 0000000..af28163
--- /dev/null
+++ b/.golangci.yml
@@ -0,0 +1,29 @@
+linters:
+  enable:
+    # https://github.com/golangci/golangci-lint#enabled-by-default-linters
+    # Additional linters you want to activate may be specified here...
+
+    # ---
+    # https://github.com/mgechev/revive
+    # replacement for the now deprecated official golint linter, see https://github.com/golang/go/issues/38968
+    - revive
+
+    # ---
+    # https://github.com/maratori/testpackage
+    # used to enforce blackbox testing
+    - testpackage
+
+    # ---
+    # https://github.com/securego/gosec
+    # inspects source code for security problems by scanning the Go AST.
+    - gosec
+
+    # ---
+    # https://github.com/sivchari/tenv
+    # prefer t.Setenv instead of os.Setenv within test code.
+    - tenv
+
+    # ---
+    # https://github.com/polyfloyd/go-errorlint
+    # ensure we are comparing errors via errors.Is, types/values via errors.As and wrap errors with %w.
+    - errorlint
diff --git a/Dockerfile b/Dockerfile
index 6d23916..b7d3416 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,23 +18,55 @@ ENV MAKEFLAGS "-j 8 --no-print-directory"
 # e.g. stretch=>stretch-pgdg, buster=>buster-pgdg, bullseye=>bullseye-pgdg
 RUN echo "deb http://apt.postgresql.org/pub/repos/apt/ bullseye-pgdg main" \
     | tee /etc/apt/sources.list.d/pgdg.list \
-    && apt install curl ca-certificates gnupg \
-    && curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
-
+    && wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc \
+    | apt-key add -
 
 # Install required system dependencies
 RUN apt-get update \
     && apt-get install -y \
+    #
+    # Mandadory minimal linux packages
+    # Installed at development stage and app stage
+    # Do not forget to add mandadory linux packages to the final app Dockerfile stage below!
+    #
+    # -- START MANDADORY --
+    ca-certificates \
+    # --- END MANDADORY ---
+    #
+    # Development specific packages
+    # Only installed at development stage and NOT available in the final Docker stage
+    # based upon
+    # https://github.com/microsoft/vscode-remote-try-go/blob/master/.devcontainer/Dockerfile
+    # https://raw.githubusercontent.com/microsoft/vscode-dev-containers/master/script-library/common-debian.sh
+    #
+    # icu-devtools: https://stackoverflow.com/questions/58736399/how-to-get-vscode-liveshare-extension-working-when-running-inside-vscode-remote
+    # graphviz: https://github.com/google/pprof#building-pprof
+    # -- START DEVELOPMENT --
+    apt-utils \
+    dialog \
+    openssh-client \
+    less \
+    iproute2 \
+    procps \
+    lsb-release \
     locales \
     sudo \
     bash-completion \
     bsdmainutils \
+    graphviz \
+    xz-utils \
     postgresql-client-12 \
+    icu-devtools \
+    tmux \
+    rsync \
+    # --- END DEVELOPMENT ---
+    #
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 
-# vscode support: LANG must be supported, requires installing the locale package first
-# see https://github.com/Microsoft/vscode/issues/58015
+# env/vscode support: LANG must be supported, requires installing the locale package first
+# https://github.com/Microsoft/vscode/issues/58015
+# https://stackoverflow.com/questions/28405902/how-to-set-the-locale-inside-a-debian-ubuntu-docker-container
 RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
     dpkg-reconfigure --frontend=noninteractive locales && \
     update-locale LANG=en_US.UTF-8
@@ -82,6 +114,25 @@ RUN ARCH="$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)" \
 # https://github.com/uw-labs/lichen/tags
 RUN go install github.com/uw-labs/lichen@v0.1.7
 
+# watchexec
+# https://github.com/watchexec/watchexec/releases
+RUN mkdir -p /tmp/watchexec \
+    && cd /tmp/watchexec \
+    && wget https://github.com/watchexec/watchexec/releases/download/v1.20.6/watchexec-1.20.6-$(arch)-unknown-linux-musl.tar.xz \
+    && tar xf watchexec-1.20.6-$(arch)-unknown-linux-musl.tar.xz \
+    && cp watchexec-1.20.6-$(arch)-unknown-linux-musl/watchexec /usr/local/bin/watchexec \
+    && rm -rf /tmp/watchexec
+
+# yq
+# https://github.com/mikefarah/yq/releases
+RUN mkdir -p /tmp/yq \
+    && cd /tmp/yq \
+    && ARCH="$(arch | sed s/aarch64/arm64/ | sed s/x86_64/amd64/)" \
+    && wget "https://github.com/mikefarah/yq/releases/download/v4.30.5/yq_linux_${ARCH}.tar.gz" \
+    && tar xzf "yq_linux_${ARCH}.tar.gz" \
+    && cp "yq_linux_${ARCH}" /usr/local/bin/yq \
+    && rm -rf /tmp/yq
+
 # linux permissions / vscode support: Add user to avoid linux file permission issues
 # Detail: Inside the container, any mounted files/folders will have the exact same permissions
 # as outside the container - including the owner user ID (UID) and group ID (GID).
@@ -100,7 +151,6 @@ RUN groupadd --gid $USER_GID $USERNAME \
     && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME \
     && chmod 0440 /etc/sudoers.d/$USERNAME
 
-
 # vscode support: cached extensions install directory
 # https://code.visualstudio.com/docs/remote/containers-advanced#_avoiding-extension-reinstalls-on-container-rebuild
 RUN mkdir -p /home/$USERNAME/.vscode-server/extensions \
@@ -113,7 +163,6 @@ RUN mkdir -p /home/$USERNAME/.vscode-server/extensions \
 # Note that this should be the final step after installing all build deps
 RUN mkdir -p /$GOPATH/pkg && chown -R $USERNAME /$GOPATH
 
-
 # $GOBIN is where our own compiled binaries will live and other go.mod / VSCode binaries will be installed.
 # It should always come AFTER our other $PATH segments and should be earliest targeted in stage "builder",
 # as /app/bin will the shadowed by a volume mount via docker-compose!
@@ -133,15 +182,11 @@ COPY Makefile /app/Makefile
 COPY go.mod /app/go.mod
 COPY go.sum /app/go.sum
 COPY tools.go /app/tools.go
-RUN make modules && make tools
+RUN make modules
+COPY tools.go /app/tools.go
+RUN make tools
 COPY . /app/
-
-### -----------------------
-# --- Stage: builder-integresql
-### -----------------------
-
-FROM builder as builder-integresql
-RUN make build
+RUN make go-build
 
 ### -----------------------
 # --- Stage: integresql
@@ -152,7 +197,7 @@ RUN make build
 # The :debug image provides a busybox shell to enter.
 # https://github.com/GoogleContainerTools/distroless#debug-images
 FROM gcr.io/distroless/base-debian11:debug as integresql
-COPY --from=builder-integresql /app/bin/integresql /
+COPY --from=builder /app/bin/integresql /
 # Note that cmd is not supported with these kind of images, no shell included
 # see https://github.com/GoogleContainerTools/distroless/issues/62
 # and https://github.com/GoogleContainerTools/distroless#entrypoints
diff --git a/Makefile b/Makefile
index 8ce960f..4e4701c 100644
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@
 
 # first is default target when running "make" without args
 build: ##- Default 'make' target: go-format, go-build and lint.
-	@$(MAKE) format
-	@$(MAKE) gobuild
+	@$(MAKE) go-format
+	@$(MAKE) go-build
 	@$(MAKE) lint
 
 # useful to ensure that everything gets resetuped from scratch
@@ -22,14 +22,16 @@ info-go: ##- (opt) Prints go.mod updates, module-name and current go version.
 	@go version >> tmp/.info-go
 	@cat tmp/.info-go
 
-format:
-	go fmt
+lint: go-lint  ##- Runs golangci-lint and make check-*.
 
-gobuild:
-	go build -o bin/integresql ./cmd/server
+go-format: ##- (opt) Runs go format.
+	go fmt ./...
 
-lint:
-	golangci-lint run --fast
+go-build: ##- (opt) Runs go build.
+	go build -ldflags $(LDFLAGS) -o bin/integresql ./cmd/server
+
+go-lint: ##- (opt) Runs golangci-lint.
+	golangci-lint run --timeout 5m
 
 bench: ##- Run tests, output by package, print coverage.
 	@go test -benchmem=false -run=./... -bench . github.com/allaboutapps/integresql/tests -race -count=4 -v
@@ -179,10 +181,6 @@ LDFLAGS = $(eval LDFLAGS := "\
 # required to ensure make fails if one recipe fails (even on parallel jobs) and on pipefails
 .ONESHELL:
 
-# # normal POSIX bash shell mode
-# SHELL = /bin/bash
-# .SHELLFLAGS = -cEeuo pipefail
-
-# wrapped make time tracing shell, use it via MAKE_TRACE_TIME=true make <target>
-# SHELL = /bin/rksh
-# .SHELLFLAGS = $@
\ No newline at end of file
+# normal POSIX bash shell mode
+SHELL = /bin/bash
+.SHELLFLAGS = -cEeuo pipefail
diff --git a/cmd/server/main.go b/cmd/server/main.go
index f23b3aa..3399fd5 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -2,6 +2,8 @@ package main
 
 import (
 	"context"
+	"errors"
+	"fmt"
 	"log"
 	"net/http"
 	"os"
@@ -10,10 +12,14 @@ import (
 	"time"
 
 	"github.com/allaboutapps/integresql/internal/api"
+	"github.com/allaboutapps/integresql/internal/config"
 	"github.com/allaboutapps/integresql/internal/router"
 )
 
 func main() {
+
+	fmt.Println(config.GetFormattedBuildArgs())
+
 	s := api.DefaultServerFromEnv()
 
 	if err := s.InitManager(context.Background()); err != nil {
@@ -35,7 +41,7 @@ func main() {
 	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
 	defer cancel()
 
-	if err := s.Shutdown(ctx); err != nil && err != http.ErrServerClosed {
+	if err := s.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
 		log.Fatalf("Failed to gracefully shut down server: %v", err)
 	}
 }
diff --git a/docker-compose.yml b/docker-compose.yml
index 9f2de21..70acc3e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -8,9 +8,23 @@ services:
     # ports:
     #   - "5000:5000"
     working_dir: /app
+    # linux permissions / vscode support: we must explicitly run as the development user
+    user: development
     volumes:
-      - .:/app #:delegated
-      # - ./.pkg:/go/pkg # enable this to reuse the pkg cache
+      # mount working directory
+      # https://code.visualstudio.com/docs/remote/containers-advanced#_update-the-mount-consistency-to-delegated-for-macos
+      # https://docs.docker.com/docker-for-mac/osxfs-caching/#delegated
+      # the container’s view is authoritative (permit delays before updates on the container appear in the host)
+      - .:/app:delegated
+
+      # mount cached go pkg downloads
+      - go-pkg:/go/pkg
+
+      # mount cached vscode container extensions
+      # https://code.visualstudio.com/docs/remote/containers-advanced#_avoiding-extension-reinstalls-on-container-rebuild
+      - vscode-extensions:/home/development/.vscode-server/extensions
+      - vscode-extensions-insiders:/home/development/.vscode-server-insiders/extensions
+
     depends_on:
       - postgres
     environment: &SERVICE_ENV
@@ -28,7 +42,13 @@ services:
       - seccomp:unconfined
 
     # Overrides default command so things don't shut down after the process ends.
-    command: /bin/sh -c "while sleep 1000; do :; done"
+    # Overrides default command so things don't shut down after the process ends.
+    command:
+      - /bin/sh
+      - -c
+      - |
+        git config --global --add safe.directory /app
+        while sleep 1000; do :; done
 
   postgres:
     image: postgres:12.4-alpine # should be the same version as used in .drone.yml, Dockerfile and live
@@ -46,3 +66,11 @@ services:
 
 volumes:
   pgvolume: # declare a named volume to persist DB data
+
+  # go: go mod cached downloads
+  go-pkg:
+
+  # vscode: Avoiding extension reinstalls on container rebuild
+  # https://code.visualstudio.com/docs/remote/containers-advanced#_avoiding-extension-reinstalls-on-container-rebuild
+  vscode-extensions:
+  vscode-extensions-insiders:
diff --git a/internal/api/server.go b/internal/api/server.go
index 23f6450..e4a4689 100644
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -8,6 +8,7 @@ import (
 	"net"
 	"time"
 
+	// #nosec G108 - pprof handlers (conditionally made available via http.DefaultServeMux within router)
 	_ "net/http/pprof"
 
 	"github.com/allaboutapps/integresql/pkg/manager"
diff --git a/internal/api/templates/templates.go b/internal/api/templates/templates.go
index f32446a..a81d1db 100644
--- a/internal/api/templates/templates.go
+++ b/internal/api/templates/templates.go
@@ -2,6 +2,7 @@ package templates
 
 import (
 	"context"
+	"errors"
 	"net/http"
 	"strconv"
 	"time"
@@ -33,14 +34,14 @@ func postInitializeTemplate(s *api.Server) echo.HandlerFunc {
 
 		template, err := s.Manager.InitializeTemplateDatabase(ctx, payload.Hash)
 		if err != nil {
-			switch err {
-			case manager.ErrManagerNotReady:
+			if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateAlreadyInitialized:
+			} else if errors.Is(err, manager.ErrTemplateAlreadyInitialized) {
 				return echo.NewHTTPError(http.StatusLocked, "template is already initialized")
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.JSON(http.StatusOK, &template)
@@ -55,17 +56,17 @@ func putFinalizeTemplate(s *api.Server) echo.HandlerFunc {
 		defer cancel()
 
 		if _, err := s.Manager.FinalizeTemplateDatabase(ctx, hash); err != nil {
-			switch err {
-			case manager.ErrTemplateAlreadyInitialized:
+			if errors.Is(err, manager.ErrTemplateAlreadyInitialized) {
 				// template is initialized, we ignore this error
 				return c.NoContent(http.StatusNoContent)
-			case manager.ErrManagerNotReady:
+			} else if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateNotFound:
+			} else if errors.Is(err, manager.ErrTemplateNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "template not found")
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.NoContent(http.StatusNoContent)
@@ -80,14 +81,14 @@ func deleteDiscardTemplate(s *api.Server) echo.HandlerFunc {
 		defer cancel()
 
 		if err := s.Manager.DiscardTemplateDatabase(ctx, hash); err != nil {
-			switch err {
-			case manager.ErrManagerNotReady:
+			if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateNotFound:
+			} else if errors.Is(err, manager.ErrTemplateNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "template not found")
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.NoContent(http.StatusNoContent)
@@ -103,24 +104,24 @@ func getTestDatabase(s *api.Server) echo.HandlerFunc {
 
 		test, err := s.Manager.GetTestDatabase(ctx, hash)
 		if err != nil {
-			switch err {
-			case manager.ErrManagerNotReady:
+
+			if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateNotFound:
+			} else if errors.Is(err, manager.ErrTemplateNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "template not found")
-			case manager.ErrTemplateDiscarded:
+			} else if errors.Is(err, manager.ErrTemplateDiscarded) {
 				return echo.NewHTTPError(http.StatusGone, "template was just discarded")
-			case pool.ErrPoolFull:
-				return echo.NewHTTPError(http.StatusInsufficientStorage, "pool is full and can't be extended")
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.JSON(http.StatusOK, &test)
 	}
 }
 
+// deprecated
 func deleteReturnTestDatabase(s *api.Server) echo.HandlerFunc {
 	return postUnlockTestDatabase(s)
 }
@@ -137,18 +138,18 @@ func postUnlockTestDatabase(s *api.Server) echo.HandlerFunc {
 		defer cancel()
 
 		if err := s.Manager.ReturnTestDatabase(ctx, hash, id); err != nil {
-			switch err {
-			case manager.ErrManagerNotReady:
+			if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateNotFound:
+			} else if errors.Is(err, manager.ErrTemplateNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "template not found")
-			case manager.ErrTestNotFound:
+			} else if errors.Is(err, manager.ErrTestNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "test database not found")
-			case pool.ErrTestDBInUse:
+			} else if errors.Is(err, pool.ErrTestDBInUse) {
 				return echo.NewHTTPError(http.StatusLocked, pool.ErrTestDBInUse.Error())
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.NoContent(http.StatusNoContent)
@@ -164,18 +165,19 @@ func postRecreateTestDatabase(s *api.Server) echo.HandlerFunc {
 		}
 
 		if err := s.Manager.RecreateTestDatabase(c.Request().Context(), hash, id); err != nil {
-			switch err {
-			case manager.ErrManagerNotReady:
+
+			if errors.Is(err, manager.ErrManagerNotReady) {
 				return echo.ErrServiceUnavailable
-			case manager.ErrTemplateNotFound:
+			} else if errors.Is(err, manager.ErrTemplateNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "template not found")
-			case manager.ErrTestNotFound:
+			} else if errors.Is(err, manager.ErrTestNotFound) {
 				return echo.NewHTTPError(http.StatusNotFound, "test database not found")
-			case pool.ErrTestDBInUse:
+			} else if errors.Is(err, pool.ErrTestDBInUse) {
 				return echo.NewHTTPError(http.StatusLocked, pool.ErrTestDBInUse.Error())
-			default:
-				return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 			}
+
+			// default 500
+			return echo.NewHTTPError(http.StatusInternalServerError, err.Error())
 		}
 
 		return c.NoContent(http.StatusNoContent)
diff --git a/internal/config/build_args.go b/internal/config/build_args.go
new file mode 100644
index 0000000..7972bc2
--- /dev/null
+++ b/internal/config/build_args.go
@@ -0,0 +1,18 @@
+package config
+
+import "fmt"
+
+// The following vars are automatically injected via -ldflags.
+// See Makefile target "make go-build" and make var $(LDFLAGS).
+// No need to change them here.
+// https://www.digitalocean.com/community/tutorials/using-ldflags-to-set-version-information-for-go-applications
+var (
+	ModuleName = "build.local/misses/ldflags"               // e.g. "allaboutapps.dev/aw/go-starter"
+	Commit     = "< 40 chars git commit hash via ldflags >" // e.g. "59cb7684dd0b0f38d68cd7db657cb614feba8f7e"
+	BuildDate  = "1970-01-01T00:00:00+00:00"                // e.g. "1970-01-01T00:00:00+00:00"
+)
+
+// GetFormattedBuildArgs returns string representation of buildsargs set via ldflags "<ModuleName> @ <Commit> (<BuildDate>)"
+func GetFormattedBuildArgs() string {
+	return fmt.Sprintf("%v @ %v (%v)", ModuleName, Commit, BuildDate)
+}
diff --git a/pkg/db/database_config_test.go b/pkg/db/database_config_internal_test.go
similarity index 100%
rename from pkg/db/database_config_test.go
rename to pkg/db/database_config_internal_test.go
diff --git a/pkg/manager/helpers_test.go b/pkg/manager/helpers_test.go
index 27c5048..00baa06 100644
--- a/pkg/manager/helpers_test.go
+++ b/pkg/manager/helpers_test.go
@@ -51,7 +51,7 @@ func disconnectManager(t *testing.T, m *manager.Manager) {
 
 }
 
-func initTemplateDB(ctx context.Context, errs chan<- error, m *manager.Manager) {
+func initTemplateDB(_ context.Context, errs chan<- error, m *manager.Manager) {
 
 	template, err := m.InitializeTemplateDatabase(context.Background(), "hashinghash")
 	if err != nil {
@@ -159,7 +159,7 @@ func verifyTestDB(t *testing.T, test db.TestDatabase) {
 	}
 }
 
-func getTestDB(ctx context.Context, errs chan<- error, m *manager.Manager) {
+func getTestDB(_ context.Context, errs chan<- error, m *manager.Manager) {
 
 	_, err := m.GetTestDatabase(context.Background(), "hashinghash")
 	errs <- err
diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index 355fd1e..24f6218 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -98,7 +98,7 @@ func (m *Manager) Connect(ctx context.Context) error {
 	return nil
 }
 
-func (m *Manager) Disconnect(ctx context.Context, ignoreCloseError bool) error {
+func (m *Manager) Disconnect(_ context.Context, ignoreCloseError bool) error {
 	if m.db == nil {
 		return errors.New("manager is not connected")
 	}
@@ -398,25 +398,7 @@ func (m Manager) ResetAllTracking(ctx context.Context) error {
 	// remove all templates to disallow any new test DB creation from existing templates
 	m.templates.RemoveAll(ctx)
 
-	if err := m.pool.RemoveAll(ctx, m.dropTestPoolDB); err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func (m Manager) dropDatabaseWithID(ctx context.Context, hash string, id int) error {
-	dbName := m.pool.MakeDBName(hash, id)
-	exists, err := m.checkDatabaseExists(ctx, dbName)
-	if err != nil {
-		return err
-	}
-
-	if !exists {
-		return ErrTestNotFound
-	}
-
-	return m.dropDatabase(ctx, dbName)
+	return m.pool.RemoveAll(ctx, m.dropTestPoolDB)
 }
 
 func (m Manager) checkDatabaseExists(ctx context.Context, dbName string) (bool, error) {
diff --git a/pkg/manager/manager_config.go b/pkg/manager/manager_config.go
index 4053003..24c56e7 100644
--- a/pkg/manager/manager_config.go
+++ b/pkg/manager/manager_config.go
@@ -9,7 +9,8 @@ import (
 	"github.com/allaboutapps/integresql/pkg/util"
 )
 
-type ManagerConfig struct {
+// we explicitly want to access this struct via manager.ManagerConfig, thus we disable revive for the next line
+type ManagerConfig struct { //nolint:revive
 	ManagerDatabaseConfig    db.DatabaseConfig
 	TemplateDatabaseTemplate string
 
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 676bb43..a2488cf 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -129,7 +129,7 @@ func TestManagerInitializeTemplateDatabaseTimeout(t *testing.T) {
 	defer cancel()
 
 	_, err := m.InitializeTemplateDatabase(ctxt, hash)
-	if err != context.DeadlineExceeded {
+	if !errors.Is(err, context.DeadlineExceeded) {
 		t.Fatalf("received unexpected error, got %v, want %v", err, context.DeadlineExceeded)
 	}
 }
@@ -173,7 +173,7 @@ func TestManagerInitializeTemplateDatabaseConcurrently(t *testing.T) {
 		if err == nil {
 			success++
 		} else {
-			if err == manager.ErrTemplateAlreadyInitialized {
+			if errors.Is(err, manager.ErrTemplateAlreadyInitialized) {
 				failed++
 			} else {
 				errored++
@@ -384,7 +384,10 @@ func TestManagerFinalizeTemplateAndGetTestDatabaseConcurrently(t *testing.T) {
 		return nil
 	})
 
-	g.Wait()
+	if err := g.Wait(); err != nil {
+		t.Fatal(err)
+	}
+
 	first := <-testCh
 	assert.Equal(t, "FINALIZE", first)
 }
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 6b82a00..a7a16c7 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -66,7 +66,7 @@ type HashPool struct {
 	sync.RWMutex
 	wg sync.WaitGroup
 
-	tasksChan     chan string
+	tasksChan     chan workerTask
 	running       bool
 	workerContext context.Context // the ctx all background workers will receive (nil if not yet started)
 }
@@ -89,7 +89,7 @@ func NewHashPool(cfg PoolConfig, templateDB db.Database, initDBFunc RecreateDBFu
 		templateDB: templateDB,
 		PoolConfig: cfg,
 
-		tasksChan: make(chan string, cfg.MaxPoolSize+1),
+		tasksChan: make(chan workerTask, cfg.MaxPoolSize+1),
 		running:   false,
 	}
 
@@ -194,15 +194,11 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	return testDB.TestDatabase, nil
 }
 
-func (pool *HashPool) AddTestDatabase(ctx context.Context, templateDB db.Database) error {
-	return pool.extend(ctx)
-}
-
-func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string, MaxParallelTasks int) {
+func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan workerTask, MaxParallelTasks int) {
 
 	fmt.Printf("pool#%s: workerTaskLoop\n", pool.templateDB.TemplateHash)
 
-	handlers := map[string]func(ctx context.Context) error{
+	handlers := map[workerTask]func(ctx context.Context) error{
 		workerTaskExtend:         ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
 		workerTaskAutoCleanDirty: ignoreErrs(pool.autoCleanDirty, context.Canceled),
 	}
@@ -224,7 +220,7 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan string
 		}
 
 		pool.wg.Add(1)
-		go func(task string) {
+		go func(task workerTask) {
 
 			defer func() {
 				pool.wg.Done()
@@ -248,7 +244,7 @@ func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc
 	// ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
-	workerTasksChan := make(chan string, len(pool.tasksChan))
+	workerTasksChan := make(chan workerTask, len(pool.tasksChan))
 	pool.wg.Add(1)
 	go func() {
 		defer pool.wg.Done()
@@ -272,10 +268,15 @@ func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc
 }
 
 // ReturnTestDatabase returns the given test DB directly to the pool, without cleaning (recreating it).
-func (pool *HashPool) ReturnTestDatabase(ctx context.Context, hash string, id int) error {
+func (pool *HashPool) ReturnTestDatabase(ctx context.Context, id int) error {
 	pool.Lock()
 	defer pool.Unlock()
 
+	if ctx.Err() != nil {
+		// client vanished
+		return ctx.Err()
+	}
+
 	if id < 0 || id >= len(pool.dbs) {
 		return ErrInvalidIndex
 	}
@@ -330,7 +331,7 @@ func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 }
 
 // RecreateTestDatabase prioritizes the test DB to be recreated next via the dirty worker.
-func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id int) error {
+func (pool *HashPool) RecreateTestDatabase(ctx context.Context, id int) error {
 
 	pool.RLock()
 	if id < 0 || id >= len(pool.dbs) {
@@ -341,10 +342,17 @@ func (pool *HashPool) RecreateTestDatabase(ctx context.Context, hash string, id
 	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase %v)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize, id)
 	pool.RUnlock()
 
+	if ctx.Err() != nil {
+		// client vanished
+		return ctx.Err()
+	}
+
 	// exclude from the normal dirty channel, force recreation in a background worker...
 	pool.excludeIDFromDirtyChannel(id)
 
 	// directly spawn a new worker in the bg (with the same ctx as the typical workers)
+	// note that this runs unchained, meaning we do not care about errors that may happen via this bg task
+	//nolint:errcheck
 	go pool.recreateDatabaseGracefully(pool.workerContext, id)
 
 	return nil
diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index 1a91a98..3f68770 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -13,7 +13,8 @@ import (
 
 var ErrUnknownHash = errors.New("no database pool exists for this hash")
 
-type PoolConfig struct {
+// we explicitly want to access this struct via pool.PoolConfig, thus we disable revive for the next line
+type PoolConfig struct { //nolint:revive
 	InitialPoolSize                   int           // Initial number of ready DBs prepared in background
 	MaxPoolSize                       int           // Maximal pool size that won't be exceeded
 	TestDBNamePrefix                  string        // Test-Database prefix: DatabasePrefix_TestDBNamePrefix_HASH_ID
@@ -25,7 +26,8 @@ type PoolConfig struct {
 	DisableWorkerAutostart bool // test only flag for starting without background worker task system
 }
 
-type PoolCollection struct {
+// we explicitly want to access this struct via pool.PoolCollection, thus we disable revive for the next line
+type PoolCollection struct { //nolint:revive
 	PoolConfig
 
 	pools map[string]*HashPool // map[hash]
@@ -56,7 +58,7 @@ func makeActualRecreateTestDBFunc(templateName string, userRecreateFunc Recreate
 type recreateTestDBFunc func(context.Context, *existingDB) error
 
 // InitHashPool creates a new pool with a given template hash and starts the cleanup workers.
-func (p *PoolCollection) InitHashPool(ctx context.Context, templateDB db.Database, initDBFunc RecreateDBFunc) {
+func (p *PoolCollection) InitHashPool(_ context.Context, templateDB db.Database, initDBFunc RecreateDBFunc) {
 	p.mutex.Lock()
 	defer p.mutex.Unlock()
 
@@ -106,20 +108,6 @@ func (p *PoolCollection) GetTestDatabase(ctx context.Context, hash string, timeo
 	return pool.GetTestDatabase(ctx, hash, timeout)
 }
 
-// AddTestDatabase adds a new test DB to the pool and creates it according to the template.
-// The new test DB is marked as 'Ready' and can be picked up with GetTestDatabase.
-// If the pool size has already reached MAX, ErrPoolFull is returned.
-func (p *PoolCollection) AddTestDatabase(ctx context.Context, templateDB db.Database) error {
-	hash := templateDB.TemplateHash
-
-	pool, err := p.getPool(ctx, hash)
-	if err != nil {
-		return err
-	}
-
-	return pool.AddTestDatabase(ctx, templateDB)
-}
-
 // ReturnTestDatabase returns the given test DB directly to the pool, without cleaning (recreating it).
 func (p *PoolCollection) ReturnTestDatabase(ctx context.Context, hash string, id int) error {
 	pool, err := p.getPool(ctx, hash)
@@ -127,7 +115,7 @@ func (p *PoolCollection) ReturnTestDatabase(ctx context.Context, hash string, id
 		return err
 	}
 
-	return pool.ReturnTestDatabase(ctx, hash, id)
+	return pool.ReturnTestDatabase(ctx, id)
 }
 
 // RecreateTestDatabase recreates the test DB according to the template and returns it back to the pool.
@@ -137,7 +125,7 @@ func (p *PoolCollection) RecreateTestDatabase(ctx context.Context, hash string,
 		return err
 	}
 
-	return pool.RecreateTestDatabase(ctx, hash, id)
+	return pool.RecreateTestDatabase(ctx, id)
 }
 
 // RemoveAllWithHash removes a pool with a given template hash.
@@ -218,3 +206,18 @@ func (p *PoolCollection) getPoolLockCollection(ctx context.Context, hash string)
 
 	return pool, unlock, err
 }
+
+// extend is only used for internal testing!
+// it adds a new test DB to the pool and creates it according to the template.
+// The new test DB is marked as 'Ready' and can be picked up with GetTestDatabase.
+// If the pool size has already reached MAX, ErrPoolFull is returned.
+func (p *PoolCollection) extend(ctx context.Context, templateDB db.Database) error {
+	hash := templateDB.TemplateHash
+
+	pool, err := p.getPool(ctx, hash)
+	if err != nil {
+		return err
+	}
+
+	return pool.extend(ctx)
+}
diff --git a/pkg/pool/pool_collection_test.go b/pkg/pool/pool_collection_internal_test.go
similarity index 86%
rename from pkg/pool/pool_collection_test.go
rename to pkg/pool/pool_collection_internal_test.go
index 90c0c05..cb396f8 100644
--- a/pkg/pool/pool_collection_test.go
+++ b/pkg/pool/pool_collection_internal_test.go
@@ -1,4 +1,4 @@
-package pool_test
+package pool
 
 import (
 	"context"
@@ -7,7 +7,6 @@ import (
 	"time"
 
 	"github.com/allaboutapps/integresql/pkg/db"
-	"github.com/allaboutapps/integresql/pkg/pool"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
@@ -16,13 +15,13 @@ func TestPoolAddGet(t *testing.T) {
 	t.Parallel()
 
 	ctx := context.Background()
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:            2,
 		MaxParallelTasks:       4,
 		TestDBNamePrefix:       "prefix_",
 		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 
 	hash1 := "h1"
 	hash2 := "h2"
@@ -43,10 +42,10 @@ func TestPoolAddGet(t *testing.T) {
 
 	// get from empty (just initialized)
 	_, err := p.GetTestDatabase(ctx, hash1, 0)
-	assert.Error(t, err, pool.ErrTimeout)
+	assert.Error(t, err, ErrTimeout)
 
 	// add a new one
-	assert.NoError(t, p.AddTestDatabase(ctx, templateDB))
+	assert.NoError(t, p.extend(ctx, templateDB))
 	// get it
 	testDB, err := p.GetTestDatabase(ctx, hash1, 1*time.Second)
 	assert.NoError(t, err)
@@ -57,13 +56,13 @@ func TestPoolAddGet(t *testing.T) {
 	templateDB2 := templateDB
 	templateDB2.TemplateHash = hash2
 	p.InitHashPool(ctx, templateDB2, initFunc)
-	assert.NoError(t, p.AddTestDatabase(ctx, templateDB2))
-	assert.NoError(t, p.AddTestDatabase(ctx, templateDB2))
-	assert.ErrorIs(t, p.AddTestDatabase(ctx, templateDB2), pool.ErrPoolFull)
+	assert.NoError(t, p.extend(ctx, templateDB2))
+	assert.NoError(t, p.extend(ctx, templateDB2))
+	assert.ErrorIs(t, p.extend(ctx, templateDB2), ErrPoolFull)
 
 	// get from empty h1
 	_, err = p.GetTestDatabase(ctx, hash1, 100*time.Millisecond)
-	assert.ErrorIs(t, err, pool.ErrTimeout)
+	assert.ErrorIs(t, err, ErrTimeout)
 
 	// get from h2
 	testDB1, err := p.GetTestDatabase(ctx, hash2, 1*time.Second)
@@ -93,13 +92,13 @@ func TestPoolAddGetConcurrent(t *testing.T) {
 	}
 
 	maxPoolSize := 15
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:      maxPoolSize,
 		InitialPoolSize:  maxPoolSize,
 		MaxParallelTasks: 4,
 		TestDBNamePrefix: "",
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
 
 	var wg sync.WaitGroup
@@ -148,12 +147,12 @@ func TestPoolAddGetReturnConcurrent(t *testing.T) {
 		return nil
 	}
 
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:      40,
 		MaxParallelTasks: 4,
 		TestDBNamePrefix: "",
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
 
 	p.InitHashPool(ctx, templateDB1, initFunc)
@@ -163,8 +162,8 @@ func TestPoolAddGetReturnConcurrent(t *testing.T) {
 
 	// add DBs sequentially
 	for i := 0; i < cfg.MaxPoolSize/4; i++ {
-		assert.NoError(t, p.AddTestDatabase(ctx, templateDB1))
-		assert.NoError(t, p.AddTestDatabase(ctx, templateDB2))
+		assert.NoError(t, p.extend(ctx, templateDB1))
+		assert.NoError(t, p.extend(ctx, templateDB2))
 	}
 
 	// stop the workers to prevent auto cleaning in background
@@ -211,11 +210,11 @@ func TestPoolRemoveAll(t *testing.T) {
 		return nil
 	}
 
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:      6,
 		MaxParallelTasks: 4,
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 	t.Cleanup(func() { p.Stop() })
 
 	p.InitHashPool(ctx, templateDB1, initFunc)
@@ -223,8 +222,8 @@ func TestPoolRemoveAll(t *testing.T) {
 
 	// add DBs sequentially
 	for i := 0; i < cfg.MaxPoolSize; i++ {
-		assert.NoError(t, p.AddTestDatabase(ctx, templateDB1))
-		assert.NoError(t, p.AddTestDatabase(ctx, templateDB2))
+		assert.NoError(t, p.extend(ctx, templateDB1))
+		assert.NoError(t, p.extend(ctx, templateDB2))
 	}
 
 	// remove all
@@ -232,13 +231,13 @@ func TestPoolRemoveAll(t *testing.T) {
 
 	// try to get
 	_, err := p.GetTestDatabase(ctx, hash1, 0)
-	assert.Error(t, err, pool.ErrTimeout)
+	assert.Error(t, err, ErrTimeout)
 	_, err = p.GetTestDatabase(ctx, hash2, 0)
-	assert.Error(t, err, pool.ErrTimeout)
+	assert.Error(t, err, ErrTimeout)
 
 	// start using pool again
 	p.InitHashPool(ctx, templateDB1, initFunc)
-	assert.NoError(t, p.AddTestDatabase(ctx, templateDB1))
+	assert.NoError(t, p.extend(ctx, templateDB1))
 	testDB, err := p.GetTestDatabase(ctx, hash1, 1*time.Second)
 	assert.NoError(t, err)
 	assert.Equal(t, 0, testDB.ID)
@@ -262,13 +261,13 @@ func TestPoolReuseDirty(t *testing.T) {
 	}
 
 	maxPoolSize := 40
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:      maxPoolSize,
 		InitialPoolSize:  maxPoolSize,
 		MaxParallelTasks: 1,
 		TestDBNamePrefix: "test_",
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 
 	p.InitHashPool(ctx, templateDB1, initFunc)
 	t.Cleanup(func() { p.Stop() })
@@ -321,23 +320,23 @@ func TestPoolReturnTestDatabase(t *testing.T) {
 		return nil
 	}
 
-	cfg := pool.PoolConfig{
+	cfg := PoolConfig{
 		MaxPoolSize:            10,
 		MaxParallelTasks:       3,
 		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
-	p := pool.NewPoolCollection(cfg)
+	p := NewPoolCollection(cfg)
 
 	p.InitHashPool(ctx, templateDB1, initFunc)
 	// add just one test DB
-	require.NoError(t, p.AddTestDatabase(ctx, templateDB1))
+	require.NoError(t, p.extend(ctx, templateDB1))
 
 	testDB1, err := p.GetTestDatabase(ctx, templateDB1.TemplateHash, time.Millisecond)
 	assert.NoError(t, err)
 
 	// assert that workers are stopped and no new DB showed up
 	_, err = p.GetTestDatabase(ctx, templateDB1.TemplateHash, time.Millisecond)
-	assert.ErrorIs(t, err, pool.ErrTimeout)
+	assert.ErrorIs(t, err, ErrTimeout)
 
 	// return and get the same one
 	assert.NoError(t, p.ReturnTestDatabase(ctx, hash1, testDB1.ID))
diff --git a/pkg/templates/template.go b/pkg/templates/template.go
index 1911234..79a3e97 100644
--- a/pkg/templates/template.go
+++ b/pkg/templates/template.go
@@ -41,7 +41,7 @@ func NewTemplate(hash string, config TemplateConfig) *Template {
 	return t
 }
 
-func (t *Template) GetConfig(ctx context.Context) TemplateConfig {
+func (t *Template) GetConfig(_ context.Context) TemplateConfig {
 	t.mutex.RLock()
 	defer t.mutex.RUnlock()
 
@@ -49,7 +49,7 @@ func (t *Template) GetConfig(ctx context.Context) TemplateConfig {
 }
 
 // GetState locks the template and checks its state.
-func (t *Template) GetState(ctx context.Context) TemplateState {
+func (t *Template) GetState(_ context.Context) TemplateState {
 	t.mutex.RLock()
 	defer t.mutex.RUnlock()
 
@@ -94,18 +94,18 @@ func (t *Template) WaitUntilFinalized(ctx context.Context, timeout time.Duration
 
 // GetStateWithLock gets the current state leaving the template locked.
 // REMEMBER to unlock it when you no longer need it locked.
-func (t *Template) GetStateWithLock(ctx context.Context) (TemplateState, lockedTemplate) {
+func (t *Template) GetStateWithLock(_ context.Context) (TemplateState, LockedTemplate) {
 	t.mutex.Lock()
 
-	return t.state, lockedTemplate{t: t}
+	return t.state, LockedTemplate{t: t}
 }
 
-type lockedTemplate struct {
+type LockedTemplate struct {
 	t *Template
 }
 
 // Unlock releases the locked template.
-func (l *lockedTemplate) Unlock() {
+func (l *LockedTemplate) Unlock() {
 	if l.t != nil {
 		l.t.mutex.Unlock()
 		l.t = nil
@@ -113,7 +113,7 @@ func (l *lockedTemplate) Unlock() {
 }
 
 // SetState sets a new state of the locked template (without acquiring the lock again).
-func (l lockedTemplate) SetState(ctx context.Context, newState TemplateState) {
+func (l LockedTemplate) SetState(_ context.Context, newState TemplateState) {
 	if l.t.state == newState {
 		return
 	}
diff --git a/pkg/templates/template_collection.go b/pkg/templates/template_collection.go
index 1d4f31f..4769a91 100644
--- a/pkg/templates/template_collection.go
+++ b/pkg/templates/template_collection.go
@@ -81,7 +81,7 @@ func (tc *Collection) Get(ctx context.Context, hash string) (template *Template,
 }
 
 // RemoveUnsafe removes the template and can be called ONLY IF THE COLLECTION IS LOCKED.
-func (tc *Collection) RemoveUnsafe(ctx context.Context, hash string) {
+func (tc *Collection) RemoveUnsafe(_ context.Context, hash string) {
 	delete(tc.templates, hash)
 }
 
diff --git a/pkg/templates/template_test.go b/pkg/templates/template_test.go
index 3415383..128c5dd 100644
--- a/pkg/templates/template_test.go
+++ b/pkg/templates/template_test.go
@@ -2,7 +2,6 @@ package templates_test
 
 import (
 	"context"
-	"errors"
 	"fmt"
 	"sync"
 	"testing"
@@ -48,7 +47,7 @@ func TestForReady(t *testing.T) {
 			timeout := 1 * time.Second
 			state := t1.WaitUntilFinalized(ctx, timeout)
 			if state != templates.TemplateStateFinalized {
-				errsChan <- errors.New(fmt.Sprintf("expected state %v (finalized), but is %v", templates.TemplateStateFinalized, state))
+				errsChan <- fmt.Errorf("expected state %v (finalized), but is %v", templates.TemplateStateFinalized, state)
 			}
 		}()
 	}
@@ -61,7 +60,7 @@ func TestForReady(t *testing.T) {
 			timeout := 30 * time.Millisecond
 			state := t1.WaitUntilFinalized(ctx, timeout)
 			if state != templates.TemplateStateInit {
-				errsChan <- errors.New(fmt.Sprintf("expected state %v (init), but is %v", templates.TemplateStateInit, state))
+				errsChan <- fmt.Errorf("expected state %v (init), but is %v", templates.TemplateStateInit, state)
 			}
 		}()
 	}
diff --git a/pkg/util/retry.go b/pkg/util/retry.go
index 1f629c7..ac5fb18 100644
--- a/pkg/util/retry.go
+++ b/pkg/util/retry.go
@@ -17,5 +17,5 @@ func Retry(attempts int, sleep time.Duration, f func() error) error {
 		time.Sleep(sleep)
 	}
 
-	return fmt.Errorf("failing after %d attempts, lat error: %v", attempts, err)
+	return fmt.Errorf("failing after %d attempts, lat error: %w", attempts, err)
 }
diff --git a/tests/testclient/client.go b/tests/testclient/client.go
index 686c204..503a241 100644
--- a/tests/testclient/client.go
+++ b/tests/testclient/client.go
@@ -8,6 +8,7 @@ import (
 	"context"
 	"database/sql"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net/http"
@@ -16,6 +17,8 @@ import (
 
 	"github.com/allaboutapps/integresql/pkg/manager"
 	"github.com/allaboutapps/integresql/pkg/util"
+
+	// Import postgres driver for database/sql package
 	_ "github.com/lib/pq"
 )
 
@@ -124,7 +127,7 @@ func (c *Client) SetupTemplate(ctx context.Context, hash string, init func(conn
 		}
 
 		return c.FinalizeTemplate(ctx, hash)
-	} else if err == manager.ErrTemplateAlreadyInitialized {
+	} else if errors.Is(err, manager.ErrTemplateAlreadyInitialized) {
 		return nil
 	} else {
 		return err

From 1e64a0c859408333c0ef049938e352ab599c2119 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Mon, 4 Sep 2023 17:44:07 +0200
Subject: [PATCH 08/10] private disableWorkerAutostart

---
 pkg/pool/pool_collection.go               | 4 ++--
 pkg/pool/pool_collection_internal_test.go | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index 3f68770..d4ccd46 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -23,7 +23,7 @@ type PoolConfig struct { //nolint:revive
 	TestDatabaseRetryRecreateSleepMax time.Duration // ... the maximum possible sleep time between retries (e.g. 3 seconds) is reached.
 	TestDatabaseMinimalLifetime       time.Duration // After a testdatabase transitions from ready to dirty, always block auto-recreation for this duration (except manual recreate).
 
-	DisableWorkerAutostart bool // test only flag for starting without background worker task system
+	disableWorkerAutostart bool // test only private flag for starting without background worker task system
 }
 
 // we explicitly want to access this struct via pool.PoolCollection, thus we disable revive for the next line
@@ -67,7 +67,7 @@ func (p *PoolCollection) InitHashPool(_ context.Context, templateDB db.Database,
 	// Create a new HashPool
 	pool := NewHashPool(cfg, templateDB, initDBFunc)
 
-	if !cfg.DisableWorkerAutostart {
+	if !cfg.disableWorkerAutostart {
 		pool.Start()
 	}
 
diff --git a/pkg/pool/pool_collection_internal_test.go b/pkg/pool/pool_collection_internal_test.go
index cb396f8..a843d0c 100644
--- a/pkg/pool/pool_collection_internal_test.go
+++ b/pkg/pool/pool_collection_internal_test.go
@@ -19,7 +19,7 @@ func TestPoolAddGet(t *testing.T) {
 		MaxPoolSize:            2,
 		MaxParallelTasks:       4,
 		TestDBNamePrefix:       "prefix_",
-		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
+		disableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
 	p := NewPoolCollection(cfg)
 
@@ -323,7 +323,7 @@ func TestPoolReturnTestDatabase(t *testing.T) {
 	cfg := PoolConfig{
 		MaxPoolSize:            10,
 		MaxParallelTasks:       3,
-		DisableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
+		disableWorkerAutostart: true, // no extend / cleanDirty tasks should run automatically!
 	}
 	p := NewPoolCollection(cfg)
 

From 34fdd8e7d4a791322e227e16d08eee433700dbc5 Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Tue, 5 Sep 2023 15:35:01 +0200
Subject: [PATCH 09/10] intro zerolog and add proper log statements in pool and
 manager, into typical go-starter logging middleware and additional env vars
 for controlling them

---
 cmd/server/main.go                |  28 ++-
 go.mod                            |   1 +
 go.sum                            |   7 +
 internal/api/middleware/logger.go | 307 ++++++++++++++++++++++++++++++
 internal/api/server_config.go     |  48 ++++-
 internal/router/echo_logger.go    |  13 ++
 internal/router/router.go         |  53 +++++-
 internal/router/router_test.go    |  35 ++++
 internal/test/helper_request.go   | 132 +++++++++++++
 internal/test/test_server.go      |  62 ++++++
 pkg/manager/manager.go            | 122 +++++++++---
 pkg/manager/manager_config.go     |   4 +-
 pkg/manager/manager_test.go       |   4 +-
 pkg/pool/pool.go                  | 181 ++++++++++++------
 pkg/pool/pool_collection.go       |   2 +-
 pkg/util/context.go               |  58 ++++++
 pkg/util/log.go                   |  42 ++++
 pkg/util/log_test.go              |  20 ++
 18 files changed, 1022 insertions(+), 97 deletions(-)
 create mode 100644 internal/api/middleware/logger.go
 create mode 100644 internal/router/echo_logger.go
 create mode 100644 internal/router/router_test.go
 create mode 100644 internal/test/helper_request.go
 create mode 100644 internal/test/test_server.go
 create mode 100644 pkg/util/context.go
 create mode 100644 pkg/util/log.go
 create mode 100644 pkg/util/log_test.go

diff --git a/cmd/server/main.go b/cmd/server/main.go
index 3399fd5..9ab58f2 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -3,8 +3,6 @@ package main
 import (
 	"context"
 	"errors"
-	"fmt"
-	"log"
 	"net/http"
 	"os"
 	"os/signal"
@@ -14,23 +12,39 @@ import (
 	"github.com/allaboutapps/integresql/internal/api"
 	"github.com/allaboutapps/integresql/internal/config"
 	"github.com/allaboutapps/integresql/internal/router"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
 )
 
 func main() {
 
-	fmt.Println(config.GetFormattedBuildArgs())
+	cfg := api.DefaultServerConfigFromEnv()
 
-	s := api.DefaultServerFromEnv()
+	zerolog.TimeFieldFormat = time.RFC3339Nano
+	zerolog.SetGlobalLevel(cfg.Logger.Level)
+	if cfg.Logger.PrettyPrintConsole {
+		log.Logger = log.Output(zerolog.NewConsoleWriter(func(w *zerolog.ConsoleWriter) {
+			w.TimeFormat = "15:04:05"
+		}))
+	}
+
+	log.Info().Str("version", config.GetFormattedBuildArgs()).Msg("starting...")
+
+	s := api.NewServer(cfg)
 
 	if err := s.InitManager(context.Background()); err != nil {
-		log.Fatalf("Failed to initialize manager: %v", err)
+		log.Fatal().Err(err).Msg("Failed to initialize manager")
 	}
 
 	router.Init(s)
 
 	go func() {
 		if err := s.Start(); err != nil {
-			log.Fatalf("Failed to start server: %v", err)
+			if errors.Is(err, http.ErrServerClosed) {
+				log.Info().Msg("Server closed")
+			} else {
+				log.Fatal().Err(err).Msg("Failed to start server")
+			}
 		}
 	}()
 
@@ -42,6 +56,6 @@ func main() {
 	defer cancel()
 
 	if err := s.Shutdown(ctx); err != nil && !errors.Is(err, http.ErrServerClosed) {
-		log.Fatalf("Failed to gracefully shut down server: %v", err)
+		log.Fatal().Err(err).Msg("Failed to gracefully shut down server")
 	}
 }
diff --git a/go.mod b/go.mod
index 4f748c6..b4e6479 100644
--- a/go.mod
+++ b/go.mod
@@ -6,6 +6,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/labstack/echo/v4 v4.10.2
 	github.com/lib/pq v1.10.9
+	github.com/rs/zerolog v1.28.0
 	github.com/stretchr/testify v1.8.4
 	golang.org/x/sync v0.3.0
 )
diff --git a/go.sum b/go.sum
index 5a270e2..3701fcb 100644
--- a/go.sum
+++ b/go.sum
@@ -1,6 +1,8 @@
+github.com/coreos/go-systemd/v22 v22.3.3-0.20220203105225-a9a7ef127534/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
 github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
 github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
 github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
@@ -17,14 +19,19 @@ github.com/labstack/gommon v0.4.0/go.mod h1:uW6kP17uPlLJsD3ijUYn3/M5bAxtlZhMI6m3
 github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
 github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
 github.com/mattn/go-colorable v0.1.11/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
+github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
 github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
 github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
 github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng=
 github.com/mattn/go-isatty v0.0.17/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
 github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rs/xid v1.4.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/zerolog v1.28.0 h1:MirSo27VyNi7RJYP3078AA1+Cyzd2GB66qy3aUHvsWY=
+github.com/rs/zerolog v1.28.0/go.mod h1:NILgTygv/Uej1ra5XxGf82ZFSLk58MFGAUS2o6usyD0=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
diff --git a/internal/api/middleware/logger.go b/internal/api/middleware/logger.go
new file mode 100644
index 0000000..7135b5b
--- /dev/null
+++ b/internal/api/middleware/logger.go
@@ -0,0 +1,307 @@
+package middleware
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"io"
+	"net"
+	"net/http"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/allaboutapps/integresql/pkg/util"
+	"github.com/labstack/echo/v4"
+	"github.com/labstack/echo/v4/middleware"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+// RequestBodyLogSkipper defines a function to skip logging certain request bodies.
+// Returning true skips logging the payload of the request.
+type RequestBodyLogSkipper func(req *http.Request) bool
+
+// DefaultRequestBodyLogSkipper returns true for all requests with Content-Type
+// application/x-www-form-urlencoded or multipart/form-data as those might contain
+// binary or URL-encoded file uploads unfit for logging purposes.
+func DefaultRequestBodyLogSkipper(req *http.Request) bool {
+	contentType := req.Header.Get(echo.HeaderContentType)
+	switch {
+	case strings.HasPrefix(contentType, echo.MIMEApplicationForm),
+		strings.HasPrefix(contentType, echo.MIMEMultipartForm):
+		return true
+	default:
+		return false
+	}
+}
+
+// ResponseBodyLogSkipper defines a function to skip logging certain response bodies.
+// Returning true skips logging the payload of the response.
+type ResponseBodyLogSkipper func(req *http.Request, res *echo.Response) bool
+
+// DefaultResponseBodyLogSkipper returns false for all responses with Content-Type
+// application/json, preventing logging for all other types of payloads as those
+// might contain binary or URL-encoded data unfit for logging purposes.
+func DefaultResponseBodyLogSkipper(_ *http.Request, res *echo.Response) bool {
+	contentType := res.Header().Get(echo.HeaderContentType)
+	switch {
+	case strings.HasPrefix(contentType, echo.MIMEApplicationJSON):
+		return false
+	default:
+		return true
+	}
+}
+
+// BodyLogReplacer defines a function to replace certain parts of a body before logging it,
+// mainly used to strip sensitive information from a request or response payload.
+// The []byte returned should contain a sanitized payload ready for logging.
+type BodyLogReplacer func(body []byte) []byte
+
+// DefaultBodyLogReplacer returns the body received without any modifications.
+func DefaultBodyLogReplacer(body []byte) []byte {
+	return body
+}
+
+// HeaderLogReplacer defines a function to replace certain parts of a header before logging it,
+// mainly used to strip sensitive information from a request or response header.
+// The http.Header returned should be a sanitized copy of the original header as not to modify
+// the request or response while logging.
+type HeaderLogReplacer func(header http.Header) http.Header
+
+// DefaultHeaderLogReplacer replaces all Authorization, X-CSRF-Token and Proxy-Authorization
+// header entries with a redacted string, indicating their presence without revealing actual,
+// potentially sensitive values in the logs.
+func DefaultHeaderLogReplacer(header http.Header) http.Header {
+	sanitizedHeader := http.Header{}
+
+	for k, vv := range header {
+		shouldRedact := strings.EqualFold(k, echo.HeaderAuthorization) ||
+			strings.EqualFold(k, echo.HeaderXCSRFToken) ||
+			strings.EqualFold(k, "Proxy-Authorization")
+
+		for _, v := range vv {
+			if shouldRedact {
+				sanitizedHeader.Add(k, "*****REDACTED*****")
+			} else {
+				sanitizedHeader.Add(k, v)
+			}
+		}
+	}
+
+	return sanitizedHeader
+}
+
+// QueryLogReplacer defines a function to replace certain parts of a URL query before logging it,
+// mainly used to strip sensitive information from a request query.
+// The url.Values returned should be a sanitized copy of the original query as not to modify the
+// request while logging.
+type QueryLogReplacer func(query url.Values) url.Values
+
+// DefaultQueryLogReplacer returns the query received without any modifications.
+func DefaultQueryLogReplacer(query url.Values) url.Values {
+	return query
+}
+
+var (
+	DefaultLoggerConfig = LoggerConfig{
+		Skipper:                  middleware.DefaultSkipper,
+		Level:                    zerolog.DebugLevel,
+		LogRequestBody:           false,
+		LogRequestHeader:         false,
+		LogRequestQuery:          false,
+		RequestBodyLogSkipper:    DefaultRequestBodyLogSkipper,
+		RequestBodyLogReplacer:   DefaultBodyLogReplacer,
+		RequestHeaderLogReplacer: DefaultHeaderLogReplacer,
+		RequestQueryLogReplacer:  DefaultQueryLogReplacer,
+		LogResponseBody:          false,
+		LogResponseHeader:        false,
+		ResponseBodyLogSkipper:   DefaultResponseBodyLogSkipper,
+		ResponseBodyLogReplacer:  DefaultBodyLogReplacer,
+	}
+)
+
+type LoggerConfig struct {
+	Skipper                   middleware.Skipper
+	Level                     zerolog.Level
+	LogRequestBody            bool
+	LogRequestHeader          bool
+	LogRequestQuery           bool
+	RequestBodyLogSkipper     RequestBodyLogSkipper
+	RequestBodyLogReplacer    BodyLogReplacer
+	RequestHeaderLogReplacer  HeaderLogReplacer
+	RequestQueryLogReplacer   QueryLogReplacer
+	LogResponseBody           bool
+	LogResponseHeader         bool
+	ResponseBodyLogSkipper    ResponseBodyLogSkipper
+	ResponseBodyLogReplacer   BodyLogReplacer
+	ResponseHeaderLogReplacer HeaderLogReplacer
+}
+
+// Logger with default logger output and configuration
+func Logger() echo.MiddlewareFunc {
+	return LoggerWithConfig(DefaultLoggerConfig, nil)
+}
+
+// LoggerWithConfig returns a new MiddlewareFunc which creates a logger with the desired configuration.
+// If output is set to nil, the default output is used. If more output params are provided, the first is being used.
+func LoggerWithConfig(config LoggerConfig, output ...io.Writer) echo.MiddlewareFunc {
+	if config.Skipper == nil {
+		config.Skipper = DefaultLoggerConfig.Skipper
+	}
+	if config.RequestBodyLogSkipper == nil {
+		config.RequestBodyLogSkipper = DefaultRequestBodyLogSkipper
+	}
+	if config.RequestBodyLogReplacer == nil {
+		config.RequestBodyLogReplacer = DefaultBodyLogReplacer
+	}
+	if config.RequestHeaderLogReplacer == nil {
+		config.RequestHeaderLogReplacer = DefaultHeaderLogReplacer
+	}
+	if config.RequestQueryLogReplacer == nil {
+		config.RequestQueryLogReplacer = DefaultQueryLogReplacer
+	}
+	if config.ResponseBodyLogSkipper == nil {
+		config.ResponseBodyLogSkipper = DefaultResponseBodyLogSkipper
+	}
+	if config.ResponseBodyLogReplacer == nil {
+		config.ResponseBodyLogReplacer = DefaultBodyLogReplacer
+	}
+	if config.ResponseHeaderLogReplacer == nil {
+		config.ResponseHeaderLogReplacer = DefaultHeaderLogReplacer
+	}
+
+	return func(next echo.HandlerFunc) echo.HandlerFunc {
+		return func(c echo.Context) error {
+			if config.Skipper(c) {
+				return next(c)
+			}
+
+			req := c.Request()
+			res := c.Response()
+
+			id := req.Header.Get(echo.HeaderXRequestID)
+			if len(id) == 0 {
+				id = res.Header().Get(echo.HeaderXRequestID)
+			}
+
+			in := req.Header.Get(echo.HeaderContentLength)
+			if len(in) == 0 {
+				in = "0"
+			}
+
+			l := log.With().
+				Dict("req", zerolog.Dict().
+					Str("id", id).
+					Str("host", req.Host).
+					Str("method", req.Method).
+					Str("url", req.URL.String()).
+					Str("bytes_in", in),
+				).Logger()
+
+			if len(output) > 0 {
+				l = l.Output(output[0])
+			}
+
+			le := l.WithLevel(config.Level)
+			req = req.WithContext(l.WithContext(context.WithValue(req.Context(), util.CTXKeyRequestID, id)))
+
+			if config.LogRequestBody && !config.RequestBodyLogSkipper(req) {
+				var reqBody []byte
+				var err error
+				if req.Body != nil {
+					reqBody, err = io.ReadAll(req.Body)
+					if err != nil {
+						l.Error().Err(err).Msg("Failed to read body while logging request")
+						return err
+					}
+
+					req.Body = io.NopCloser(bytes.NewBuffer(reqBody))
+				}
+
+				le = le.Bytes("req_body", config.RequestBodyLogReplacer(reqBody))
+			}
+			if config.LogRequestHeader {
+				header := zerolog.Dict()
+				for k, v := range config.RequestHeaderLogReplacer(req.Header) {
+					header.Strs(k, v)
+				}
+
+				le = le.Dict("req_header", header)
+			}
+			if config.LogRequestQuery {
+				query := zerolog.Dict()
+				for k, v := range req.URL.Query() {
+					query.Strs(k, v)
+				}
+
+				le = le.Dict("req_query", query)
+			}
+
+			le.Msg("Request received")
+
+			c.SetRequest(req)
+
+			var resBody bytes.Buffer
+			if config.LogResponseBody {
+				mw := io.MultiWriter(res.Writer, &resBody)
+				writer := &bodyDumpResponseWriter{Writer: mw, ResponseWriter: res.Writer}
+				res.Writer = writer
+			}
+
+			start := time.Now()
+			err := next(c)
+			if err != nil {
+				c.Error(err)
+			}
+			stop := time.Now()
+
+			// Retrieve logger from context again since other middlewares might have enhanced it
+			ll := util.LogFromEchoContext(c)
+			lle := ll.WithLevel(config.Level).
+				Dict("res", zerolog.Dict().
+					Int("status", res.Status).
+					Int64("bytes_out", res.Size).
+					TimeDiff("duration_ms", stop, start).
+					Err(err),
+				)
+
+			if config.LogResponseBody && !config.ResponseBodyLogSkipper(req, res) {
+				lle = lle.Bytes("res_body", config.ResponseBodyLogReplacer(resBody.Bytes()))
+			}
+			if config.LogResponseHeader {
+				header := zerolog.Dict()
+				for k, v := range config.ResponseHeaderLogReplacer(res.Header()) {
+					header.Strs(k, v)
+				}
+
+				lle = lle.Dict("res_header", header)
+			}
+
+			lle.Msg("Response sent")
+
+			return nil
+		}
+	}
+}
+
+type bodyDumpResponseWriter struct {
+	io.Writer
+	http.ResponseWriter
+}
+
+func (w *bodyDumpResponseWriter) WriteHeader(code int) {
+	w.ResponseWriter.WriteHeader(code)
+}
+
+func (w *bodyDumpResponseWriter) Write(b []byte) (int, error) {
+	return w.Writer.Write(b)
+}
+
+func (w *bodyDumpResponseWriter) Flush() {
+	w.ResponseWriter.(http.Flusher).Flush()
+}
+
+func (w *bodyDumpResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
+	return w.ResponseWriter.(http.Hijacker).Hijack()
+}
diff --git a/internal/api/server_config.go b/internal/api/server_config.go
index 41194f9..316cace 100644
--- a/internal/api/server_config.go
+++ b/internal/api/server_config.go
@@ -1,17 +1,61 @@
 package api
 
-import "github.com/allaboutapps/integresql/pkg/util"
+import (
+	"github.com/allaboutapps/integresql/pkg/util"
+	"github.com/rs/zerolog"
+)
 
 type ServerConfig struct {
 	Address        string
 	Port           int
 	DebugEndpoints bool
+	Logger         LoggerConfig
+	Echo           EchoConfig
+}
+
+type EchoConfig struct {
+	Debug                         bool
+	ListenAddress                 string
+	EnableCORSMiddleware          bool
+	EnableLoggerMiddleware        bool
+	EnableRecoverMiddleware       bool
+	EnableRequestIDMiddleware     bool
+	EnableTrailingSlashMiddleware bool
+}
+
+type LoggerConfig struct {
+	Level              zerolog.Level
+	RequestLevel       zerolog.Level
+	LogRequestBody     bool
+	LogRequestHeader   bool
+	LogRequestQuery    bool
+	LogResponseBody    bool
+	LogResponseHeader  bool
+	PrettyPrintConsole bool
 }
 
 func DefaultServerConfigFromEnv() ServerConfig {
 	return ServerConfig{
 		Address:        util.GetEnv("INTEGRESQL_ADDRESS", ""),
 		Port:           util.GetEnvAsInt("INTEGRESQL_PORT", 5000),
-		DebugEndpoints: util.GetEnvAsBool("INTEGRESQL_DEBUG_ENDPOINTS", true),
+		DebugEndpoints: util.GetEnvAsBool("INTEGRESQL_DEBUG_ENDPOINTS", true), // https://golang.org/pkg/net/http/pprof/
+		Echo: EchoConfig{
+			Debug:                         util.GetEnvAsBool("INTEGRESQL_ECHO_DEBUG", false),
+			EnableCORSMiddleware:          util.GetEnvAsBool("INTEGRESQL_ECHO_ENABLE_CORS_MIDDLEWARE", true),
+			EnableLoggerMiddleware:        util.GetEnvAsBool("INTEGRESQL_ECHO_ENABLE_LOGGER_MIDDLEWARE", true),
+			EnableRecoverMiddleware:       util.GetEnvAsBool("INTEGRESQL_ECHO_ENABLE_RECOVER_MIDDLEWARE", true),
+			EnableRequestIDMiddleware:     util.GetEnvAsBool("INTEGRESQL_ECHO_ENABLE_REQUEST_ID_MIDDLEWARE", true),
+			EnableTrailingSlashMiddleware: util.GetEnvAsBool("INTEGRESQL_ECHO_ENABLE_TRAILING_SLASH_MIDDLEWARE", true),
+		},
+		Logger: LoggerConfig{
+			Level:              util.LogLevelFromString(util.GetEnv("INTEGRESQL_LOGGER_LEVEL", zerolog.InfoLevel.String())),
+			RequestLevel:       util.LogLevelFromString(util.GetEnv("INTEGRESQL_LOGGER_REQUEST_LEVEL", zerolog.DebugLevel.String())),
+			LogRequestBody:     util.GetEnvAsBool("INTEGRESQL_LOGGER_LOG_REQUEST_BODY", false),
+			LogRequestHeader:   util.GetEnvAsBool("INTEGRESQL_LOGGER_LOG_REQUEST_HEADER", false),
+			LogRequestQuery:    util.GetEnvAsBool("INTEGRESQL_LOGGER_LOG_REQUEST_QUERY", false),
+			LogResponseBody:    util.GetEnvAsBool("INTEGRESQL_LOGGER_LOG_RESPONSE_BODY", false),
+			LogResponseHeader:  util.GetEnvAsBool("INTEGRESQL_LOGGER_LOG_RESPONSE_HEADER", false),
+			PrettyPrintConsole: util.GetEnvAsBool("INTEGRESQL_LOGGER_PRETTY_PRINT_CONSOLE", false),
+		},
 	}
 }
diff --git a/internal/router/echo_logger.go b/internal/router/echo_logger.go
new file mode 100644
index 0000000..0931855
--- /dev/null
+++ b/internal/router/echo_logger.go
@@ -0,0 +1,13 @@
+package router
+
+import "github.com/rs/zerolog"
+
+type echoLogger struct {
+	level zerolog.Level
+	log   zerolog.Logger
+}
+
+func (l *echoLogger) Write(p []byte) (n int, err error) {
+	l.log.WithLevel(l.level).Msgf("%s", p)
+	return len(p), nil
+}
diff --git a/internal/router/router.go b/internal/router/router.go
index 35d5505..4fac61d 100644
--- a/internal/router/router.go
+++ b/internal/router/router.go
@@ -5,28 +5,67 @@ import (
 
 	"github.com/allaboutapps/integresql/internal/api"
 	"github.com/allaboutapps/integresql/internal/api/admin"
+	"github.com/allaboutapps/integresql/internal/api/middleware"
 	"github.com/allaboutapps/integresql/internal/api/templates"
 	"github.com/labstack/echo/v4"
 	echoMiddleware "github.com/labstack/echo/v4/middleware"
+	"github.com/rs/zerolog/log"
 )
 
 func Init(s *api.Server) {
 	s.Echo = echo.New()
 
-	s.Echo.Debug = false
+	s.Echo.Debug = s.Config.Echo.Debug
 	s.Echo.HideBanner = true
+	s.Echo.Logger.SetOutput(&echoLogger{level: s.Config.Logger.RequestLevel, log: log.With().Str("component", "echo").Logger()})
 
-	s.Echo.Pre(echoMiddleware.RemoveTrailingSlash())
+	// ---
+	// General middleware
+	if s.Config.Echo.EnableTrailingSlashMiddleware {
+		s.Echo.Pre(echoMiddleware.RemoveTrailingSlash())
+	} else {
+		log.Warn().Msg("Disabling trailing slash middleware due to environment config")
+	}
 
-	s.Echo.Use(echoMiddleware.Recover())
-	s.Echo.Use(echoMiddleware.RequestID())
-	s.Echo.Use(echoMiddleware.Logger())
+	if s.Config.Echo.EnableRecoverMiddleware {
+		s.Echo.Use(echoMiddleware.Recover())
+	} else {
+		log.Warn().Msg("Disabling recover middleware due to environment config")
+	}
 
-	admin.InitRoutes(s)
-	templates.InitRoutes(s)
+	if s.Config.Echo.EnableRequestIDMiddleware {
+		s.Echo.Use(echoMiddleware.RequestID())
+	} else {
+		log.Warn().Msg("Disabling request ID middleware due to environment config")
+	}
+
+	if s.Config.Echo.EnableLoggerMiddleware {
+		s.Echo.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{
+			Level:             s.Config.Logger.RequestLevel,
+			LogRequestBody:    s.Config.Logger.LogRequestBody,
+			LogRequestHeader:  s.Config.Logger.LogRequestHeader,
+			LogRequestQuery:   s.Config.Logger.LogRequestQuery,
+			LogResponseBody:   s.Config.Logger.LogResponseBody,
+			LogResponseHeader: s.Config.Logger.LogResponseHeader,
+			RequestBodyLogSkipper: func(req *http.Request) bool {
+				return middleware.DefaultRequestBodyLogSkipper(req)
+			},
+			ResponseBodyLogSkipper: func(req *http.Request, res *echo.Response) bool {
+				return middleware.DefaultResponseBodyLogSkipper(req, res)
+			},
+			Skipper: func(c echo.Context) bool {
+				return false
+			},
+		}))
+	} else {
+		log.Warn().Msg("Disabling logger middleware due to environment config")
+	}
 
 	// enable debug endpoints only if requested
 	if s.Config.DebugEndpoints {
 		s.Echo.GET("/debug/*", echo.WrapHandler(http.DefaultServeMux))
 	}
+
+	admin.InitRoutes(s)
+	templates.InitRoutes(s)
 }
diff --git a/internal/router/router_test.go b/internal/router/router_test.go
new file mode 100644
index 0000000..c393096
--- /dev/null
+++ b/internal/router/router_test.go
@@ -0,0 +1,35 @@
+package router_test
+
+import (
+	"testing"
+
+	"github.com/allaboutapps/integresql/internal/api"
+	"github.com/allaboutapps/integresql/internal/test"
+	"github.com/stretchr/testify/require"
+)
+
+func TestPprofEnabledNoAuth(t *testing.T) {
+	config := api.DefaultServerConfigFromEnv()
+
+	// these are typically our default values, however we force set them here to ensure those are set while test execution.
+	config.DebugEndpoints = true
+
+	test.WithTestServerConfigurable(t, config, func(s *api.Server) {
+		res := test.PerformRequest(t, s, "GET", "/debug/pprof/heap/", nil, nil)
+		require.Equal(t, 200, res.Result().StatusCode)
+
+		// index
+		res = test.PerformRequest(t, s, "GET", "/debug/pprof/", nil, nil)
+		require.Equal(t, 301, res.Result().StatusCode)
+	})
+}
+
+func TestPprofDisabled(t *testing.T) {
+	config := api.DefaultServerConfigFromEnv()
+	config.DebugEndpoints = false
+
+	test.WithTestServerConfigurable(t, config, func(s *api.Server) {
+		res := test.PerformRequest(t, s, "GET", "/debug/pprof/heap", nil, nil)
+		require.Equal(t, 404, res.Result().StatusCode)
+	})
+}
diff --git a/internal/test/helper_request.go b/internal/test/helper_request.go
new file mode 100644
index 0000000..7693358
--- /dev/null
+++ b/internal/test/helper_request.go
@@ -0,0 +1,132 @@
+package test
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/allaboutapps/integresql/internal/api"
+	"github.com/labstack/echo/v4"
+)
+
+type GenericPayload map[string]interface{}
+type GenericArrayPayload []interface{}
+
+func (g GenericPayload) Reader(t *testing.T) *bytes.Reader {
+	t.Helper()
+
+	b, err := json.Marshal(g)
+	if err != nil {
+		t.Fatalf("failed to serialize payload: %v", err)
+	}
+
+	return bytes.NewReader(b)
+}
+
+func (g GenericArrayPayload) Reader(t *testing.T) *bytes.Reader {
+	t.Helper()
+
+	b, err := json.Marshal(g)
+	if err != nil {
+		t.Fatalf("failed to serialize payload: %v", err)
+	}
+
+	return bytes.NewReader(b)
+}
+
+func PerformRequestWithParams(t *testing.T, s *api.Server, method string, path string, body GenericPayload, headers http.Header, queryParams map[string]string) *httptest.ResponseRecorder {
+	t.Helper()
+
+	if body == nil {
+		return PerformRequestWithRawBody(t, s, method, path, nil, headers, queryParams)
+	}
+
+	return PerformRequestWithRawBody(t, s, method, path, body.Reader(t), headers, queryParams)
+}
+
+func PerformRequestWithArrayAndParams(t *testing.T, s *api.Server, method string, path string, body GenericArrayPayload, headers http.Header, queryParams map[string]string) *httptest.ResponseRecorder {
+	t.Helper()
+
+	if body == nil {
+		return PerformRequestWithRawBody(t, s, method, path, nil, headers, queryParams)
+	}
+
+	return PerformRequestWithRawBody(t, s, method, path, body.Reader(t), headers, queryParams)
+}
+
+func PerformRequestWithRawBody(t *testing.T, s *api.Server, method string, path string, body io.Reader, headers http.Header, queryParams map[string]string) *httptest.ResponseRecorder {
+	t.Helper()
+
+	req := httptest.NewRequest(method, path, body)
+
+	if headers != nil {
+		req.Header = headers
+	}
+	if body != nil && len(req.Header.Get(echo.HeaderContentType)) == 0 {
+		req.Header.Set(echo.HeaderContentType, echo.MIMEApplicationJSON)
+	}
+
+	if queryParams != nil {
+		q := req.URL.Query()
+		for k, v := range queryParams {
+			q.Add(k, v)
+		}
+
+		req.URL.RawQuery = q.Encode()
+	}
+
+	res := httptest.NewRecorder()
+
+	s.Echo.ServeHTTP(res, req)
+
+	return res
+}
+
+func PerformRequest(t *testing.T, s *api.Server, method string, path string, body GenericPayload, headers http.Header) *httptest.ResponseRecorder {
+	t.Helper()
+
+	return PerformRequestWithParams(t, s, method, path, body, headers, nil)
+}
+
+func PerformRequestWithArray(t *testing.T, s *api.Server, method string, path string, body GenericArrayPayload, headers http.Header) *httptest.ResponseRecorder {
+	t.Helper()
+
+	return PerformRequestWithArrayAndParams(t, s, method, path, body, headers, nil)
+}
+
+func ParseResponseBody(t *testing.T, res *httptest.ResponseRecorder, v interface{}) {
+	t.Helper()
+
+	if err := json.NewDecoder(res.Result().Body).Decode(&v); err != nil {
+		t.Fatalf("Failed to parse response body: %v", err)
+	}
+}
+
+// func ParseResponseAndValidate(t *testing.T, res *httptest.ResponseRecorder, v runtime.Validatable) {
+// 	t.Helper()
+
+// 	ParseResponseBody(t, res, &v)
+
+// 	if err := v.Validate(strfmt.Default); err != nil {
+// 		t.Fatalf("Failed to validate response: %v", err)
+// 	}
+// }
+
+func HeadersWithAuth(t *testing.T, token string) http.Header {
+	t.Helper()
+
+	return HeadersWithConfigurableAuth(t, "Bearer", token)
+}
+
+func HeadersWithConfigurableAuth(t *testing.T, scheme string, token string) http.Header {
+	t.Helper()
+
+	headers := http.Header{}
+	headers.Set(echo.HeaderAuthorization, fmt.Sprintf("%s %s", scheme, token))
+
+	return headers
+}
diff --git a/internal/test/test_server.go b/internal/test/test_server.go
new file mode 100644
index 0000000..45c108c
--- /dev/null
+++ b/internal/test/test_server.go
@@ -0,0 +1,62 @@
+package test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/allaboutapps/integresql/internal/api"
+	"github.com/allaboutapps/integresql/internal/router"
+)
+
+// WithTestServer returns a fully configured server (using the default server config).
+func WithTestServer(t *testing.T, closure func(s *api.Server)) {
+	t.Helper()
+	defaultConfig := api.DefaultServerConfigFromEnv()
+	WithTestServerConfigurable(t, defaultConfig, closure)
+}
+
+// WithTestServerConfigurable returns a fully configured server, allowing for configuration using the provided server config.
+func WithTestServerConfigurable(t *testing.T, config api.ServerConfig, closure func(s *api.Server)) {
+	t.Helper()
+	ctx := context.Background()
+	WithTestServerConfigurableContext(ctx, t, config, closure)
+}
+
+// WithTestServerConfigurableContext returns a fully configured server, allowing for configuration using the provided server config.
+// The provided context will be used during setup (instead of the default background context).
+func WithTestServerConfigurableContext(ctx context.Context, t *testing.T, config api.ServerConfig, closure func(s *api.Server)) {
+	t.Helper()
+	execClosureNewTestServer(ctx, t, config, closure)
+
+}
+
+// Executes closure on a new test server
+func execClosureNewTestServer(ctx context.Context, t *testing.T, config api.ServerConfig, closure func(s *api.Server)) {
+	t.Helper()
+
+	// https://stackoverflow.com/questions/43424787/how-to-use-next-available-port-in-http-listenandserve
+	// You may use port 0 to indicate you're not specifying an exact port but you want a free, available port selected by the system
+	config.Address = ":0"
+
+	s := api.NewServer(config)
+
+	if err := s.InitManager(ctx); err != nil {
+		t.Fatalf("failed to start manager: %v", err)
+	}
+
+	router.Init(s)
+
+	closure(s)
+
+	// echo is managed and should close automatically after running the test
+	if err := s.Echo.Shutdown(ctx); err != nil {
+		t.Fatalf("failed to shutdown server: %v", err)
+	}
+
+	if err := s.Manager.Disconnect(ctx, true); err != nil {
+		t.Fatalf("failed to shutdown manager: %v", err)
+	}
+
+	// disallow any further refs to managed object after running the test
+	s = nil
+}
diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index 24f6218..67f13f0 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -3,6 +3,7 @@ package manager
 import (
 	"context"
 	"database/sql"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"runtime/trace"
@@ -11,7 +12,10 @@ import (
 	"github.com/allaboutapps/integresql/pkg/db"
 	"github.com/allaboutapps/integresql/pkg/pool"
 	"github.com/allaboutapps/integresql/pkg/templates"
+	"github.com/allaboutapps/integresql/pkg/util"
 	"github.com/lib/pq"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
 )
 
 var (
@@ -64,6 +68,15 @@ func New(config ManagerConfig) (*Manager, ManagerConfig) {
 		config.PoolConfig.MaxParallelTasks = 1
 	}
 
+	// debug log final derived config
+	c, err := json.Marshal(config)
+
+	if err != nil {
+		log.Fatal().Err(err).Msg("Failed to marshal the env")
+	}
+
+	log.Debug().RawJSON("config", c).Msg("manager.New")
+
 	m := &Manager{
 		config:    config,
 		db:        nil,
@@ -80,38 +93,55 @@ func DefaultFromEnv() *Manager {
 }
 
 func (m *Manager) Connect(ctx context.Context) error {
+
+	log := m.getManagerLogger(ctx, "Connect")
+
 	if m.db != nil {
-		return errors.New("manager is already connected")
+		err := errors.New("manager is already connected")
+		log.Error().Err(err)
+		return err
 	}
 
 	db, err := sql.Open("postgres", m.config.ManagerDatabaseConfig.ConnectionString())
 	if err != nil {
+		log.Error().Err(err).Msg("unable to connect")
 		return err
 	}
 
 	if err := db.PingContext(ctx); err != nil {
+		log.Error().Err(err).Msg("unable to ping")
 		return err
 	}
 
 	m.db = db
 
+	log.Debug().Msg("connected.")
+
 	return nil
 }
 
-func (m *Manager) Disconnect(_ context.Context, ignoreCloseError bool) error {
+func (m *Manager) Disconnect(ctx context.Context, ignoreCloseError bool) error {
+
+	log := m.getManagerLogger(ctx, "Disconnect").With().Bool("ignoreCloseError", ignoreCloseError).Logger()
+
 	if m.db == nil {
-		return errors.New("manager is not connected")
+		err := errors.New("manager is not connected")
+		log.Error().Err(err)
+		return err
 	}
 
 	// stop the pool before closing DB connection
 	m.pool.Stop()
 
 	if err := m.db.Close(); err != nil && !ignoreCloseError {
+		log.Error().Err(err)
 		return err
 	}
 
 	m.db = nil
 
+	log.Warn().Msg("disconnected.")
+
 	return nil
 }
 
@@ -128,37 +158,53 @@ func (m Manager) Ready() bool {
 }
 
 func (m *Manager) Initialize(ctx context.Context) error {
+
+	log := m.getManagerLogger(ctx, "Initialize")
+
 	if !m.Ready() {
 		if err := m.Connect(ctx); err != nil {
+			log.Error().Err(err)
 			return err
 		}
 	}
 
 	rows, err := m.db.QueryContext(ctx, "SELECT datname FROM pg_database WHERE datname LIKE $1", fmt.Sprintf("%s_%s_%%", m.config.DatabasePrefix, m.config.PoolConfig.TestDBNamePrefix))
 	if err != nil {
+		log.Error().Err(err)
 		return err
 	}
 	defer rows.Close()
 
+	log.Debug().Msg("Dropping unmanaged dbs...")
+
 	for rows.Next() {
 		var dbName string
 		if err := rows.Scan(&dbName); err != nil {
 			return err
 		}
 
+		log.Warn().Str("dbName", dbName).Msg("Dropping...")
+
 		if _, err := m.db.Exec(fmt.Sprintf("DROP DATABASE %s", pq.QuoteIdentifier(dbName))); err != nil {
+			log.Error().Str("dbName", dbName).Err(err)
 			return err
 		}
 	}
 
+	log.Info().Msg("initialized.")
+
 	return nil
 }
 
 func (m Manager) InitializeTemplateDatabase(ctx context.Context, hash string) (db.TemplateDatabase, error) {
 	ctx, task := trace.NewTask(ctx, "initialize_template_db")
+
+	log := m.getManagerLogger(ctx, "InitializeTemplateDatabase").With().Str("hash", hash).Logger()
+
 	defer task.End()
 
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return db.TemplateDatabase{}, ErrManagerNotReady
 	}
 
@@ -183,6 +229,8 @@ func (m Manager) InitializeTemplateDatabase(ctx context.Context, hash string) (d
 
 	reg := trace.StartRegion(ctx, "drop_and_create_db")
 	if err := m.dropAndCreateDatabase(ctx, dbName, m.config.ManagerDatabaseConfig.Username, m.config.TemplateDatabaseTemplate); err != nil {
+
+		log.Error().Err(err).Msg("triggering unsafe remove after dropAndCreateDatabase failed...")
 		m.templates.RemoveUnsafe(ctx, hash)
 
 		return db.TemplateDatabase{}, err
@@ -192,6 +240,8 @@ func (m Manager) InitializeTemplateDatabase(ctx context.Context, hash string) (d
 	// if template config has been overwritten, the existing pool needs to be removed
 	err := m.pool.RemoveAllWithHash(ctx, hash, m.dropTestPoolDB)
 	if err != nil && !errors.Is(err, pool.ErrUnknownHash) {
+
+		log.Error().Err(err).Msg("triggering unsafe remove after RemoveAllWithHash failed...")
 		m.templates.RemoveUnsafe(ctx, hash)
 
 		return db.TemplateDatabase{}, err
@@ -208,14 +258,18 @@ func (m Manager) InitializeTemplateDatabase(ctx context.Context, hash string) (d
 func (m Manager) DiscardTemplateDatabase(ctx context.Context, hash string) error {
 
 	ctx, task := trace.NewTask(ctx, "discard_template_db")
+	log := m.getManagerLogger(ctx, "DiscardTemplateDatabase").With().Str("hash", hash).Logger()
+
 	defer task.End()
 
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return ErrManagerNotReady
 	}
 
 	// first remove all DB with this hash
 	if err := m.pool.RemoveAllWithHash(ctx, hash, m.dropTestPoolDB); err != nil && !errors.Is(err, pool.ErrUnknownHash) {
+		log.Error().Err(err).Msg("remove all err")
 		return err
 	}
 
@@ -224,6 +278,9 @@ func (m Manager) DiscardTemplateDatabase(ctx context.Context, hash string) error
 
 	if !found {
 		// even if a template is not found in the collection, it might still exist in the DB
+
+		log.Warn().Msg("template not found, checking for existance...")
+
 		dbName = m.makeTemplateDatabaseName(hash)
 		exists, err := m.checkDatabaseExists(ctx, dbName)
 		if err != nil {
@@ -237,19 +294,26 @@ func (m Manager) DiscardTemplateDatabase(ctx context.Context, hash string) error
 		template.SetState(ctx, templates.TemplateStateDiscarded)
 	}
 
+	log.Debug().Msg("found template database, dropping...")
+
 	return m.dropDatabase(ctx, dbName)
 }
 
 func (m Manager) FinalizeTemplateDatabase(ctx context.Context, hash string) (db.TemplateDatabase, error) {
 	ctx, task := trace.NewTask(ctx, "finalize_template_db")
+
+	log := m.getManagerLogger(ctx, "FinalizeTemplateDatabase").With().Str("hash", hash).Logger()
+
 	defer task.End()
 
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return db.TemplateDatabase{}, ErrManagerNotReady
 	}
 
 	template, found := m.templates.Get(ctx, hash)
 	if !found {
+		log.Error().Msg("bailout: template not found")
 		return db.TemplateDatabase{}, ErrTemplateNotFound
 	}
 
@@ -258,28 +322,36 @@ func (m Manager) FinalizeTemplateDatabase(ctx context.Context, hash string) (db.
 
 	// early bailout if we are already ready (multiple calls)
 	if state == templates.TemplateStateFinalized {
+		log.Warn().Msg("bailout: template already finalized")
 		return db.TemplateDatabase{Database: template.Database}, ErrTemplateAlreadyInitialized
 	}
 
 	// Disallow transition from discarded to ready
 	if state == templates.TemplateStateDiscarded {
+		log.Error().Msg("bailout: template discarded!")
 		return db.TemplateDatabase{}, ErrTemplateDiscarded
 	}
 
 	// Init a pool with this hash
+	log.Trace().Msg("init hash pool...")
 	m.pool.InitHashPool(ctx, template.Database, m.recreateTestPoolDB)
 
 	lockedTemplate.SetState(ctx, templates.TemplateStateFinalized)
 
+	log.Debug().Msg("Template database finalized successfully.")
 	return db.TemplateDatabase{Database: template.Database}, nil
 }
 
 // GetTestDatabase tries to get a ready test DB from an existing pool.
 func (m Manager) GetTestDatabase(ctx context.Context, hash string) (db.TestDatabase, error) {
 	ctx, task := trace.NewTask(ctx, "get_test_db")
+
+	log := m.getManagerLogger(ctx, "GetTestDatabase").With().Str("hash", hash).Logger()
+
 	defer task.End()
 
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return db.TestDatabase{}, ErrManagerNotReady
 	}
 
@@ -302,6 +374,7 @@ func (m Manager) GetTestDatabase(ctx context.Context, hash string) (db.TestDatab
 		// Template exists, but the pool is not there -
 		// it must have been removed.
 		// It needs to be reinitialized.
+		log.Warn().Err(err).Msg("ErrUnknownHash, going to InitHashPool and recursively calling us again...")
 		m.pool.InitHashPool(ctx, template.Database, m.recreateTestPoolDB)
 
 		testDB, err = m.pool.GetTestDatabase(ctx, template.TemplateHash, m.config.TestDatabaseGetTimeout)
@@ -336,14 +409,7 @@ func (m Manager) ReturnTestDatabase(ctx context.Context, hash string, id int) er
 	}
 
 	// template is ready, we can return unchanged testDB to the pool
-	if err := m.pool.ReturnTestDatabase(ctx, hash, id); err != nil {
-
-		fmt.Printf("manager.ReturnTestDatabase error: %v\n", err)
-
-		return err
-	}
-
-	return nil
+	return m.pool.ReturnTestDatabase(ctx, hash, id)
 }
 
 // RecreateTestDatabase recreates the test DB according to the template and returns it back to the pool.
@@ -367,21 +433,20 @@ func (m *Manager) RecreateTestDatabase(ctx context.Context, hash string, id int)
 	}
 
 	// template is ready, we can return the testDB to the pool and have it cleaned up
-	if err := m.pool.RecreateTestDatabase(ctx, hash, id); err != nil {
-
-		fmt.Printf("manager.RecreateTestDatabase error: %v\n", err)
-
-		return err
-	}
-
-	return nil
+	return m.pool.RecreateTestDatabase(ctx, hash, id)
 }
 
 func (m Manager) ClearTrackedTestDatabases(ctx context.Context, hash string) error {
+
+	log := m.getManagerLogger(ctx, "ClearTrackedTestDatabases").With().Str("hash", hash).Logger()
+
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return ErrManagerNotReady
 	}
 
+	log.Warn().Msg("clearing...")
+
 	err := m.pool.RemoveAllWithHash(ctx, hash, m.dropTestPoolDB)
 	if errors.Is(err, pool.ErrUnknownHash) {
 		return ErrTemplateNotFound
@@ -391,10 +456,16 @@ func (m Manager) ClearTrackedTestDatabases(ctx context.Context, hash string) err
 }
 
 func (m Manager) ResetAllTracking(ctx context.Context) error {
+
+	log := m.getManagerLogger(ctx, "ResetAllTracking")
+
 	if !m.Ready() {
+		log.Error().Msg("not ready")
 		return ErrManagerNotReady
 	}
 
+	log.Warn().Msg("resetting...")
+
 	// remove all templates to disallow any new test DB creation from existing templates
 	m.templates.RemoveAll(ctx)
 
@@ -404,7 +475,8 @@ func (m Manager) ResetAllTracking(ctx context.Context) error {
 func (m Manager) checkDatabaseExists(ctx context.Context, dbName string) (bool, error) {
 	var exists bool
 
-	// fmt.Printf("SELECT 1 AS exists FROM pg_database WHERE datname = %s\n", dbName)
+	log := m.getManagerLogger(ctx, "checkDatabaseExists")
+	log.Trace().Msgf("SELECT 1 AS exists FROM pg_database WHERE datname = %s\n", dbName)
 
 	if err := m.db.QueryRowContext(ctx, "SELECT 1 AS exists FROM pg_database WHERE datname = $1", dbName).Scan(&exists); err != nil {
 		if err == sql.ErrNoRows {
@@ -440,7 +512,8 @@ func (m Manager) createDatabase(ctx context.Context, dbName string, owner string
 
 	defer trace.StartRegion(ctx, "create_db").End()
 
-	// fmt.Printf("CREATE DATABASE %s WITH OWNER %s TEMPLATE %s\n", pq.QuoteIdentifier(dbName), pq.QuoteIdentifier(owner), pq.QuoteIdentifier(template))
+	log := m.getManagerLogger(ctx, "createDatabase")
+	log.Trace().Msgf("CREATE DATABASE %s WITH OWNER %s TEMPLATE %s\n", pq.QuoteIdentifier(dbName), pq.QuoteIdentifier(owner), pq.QuoteIdentifier(template))
 
 	if _, err := m.db.ExecContext(ctx, fmt.Sprintf("CREATE DATABASE %s WITH OWNER %s TEMPLATE %s", pq.QuoteIdentifier(dbName), pq.QuoteIdentifier(owner), pq.QuoteIdentifier(template))); err != nil {
 		return err
@@ -472,7 +545,8 @@ func (m Manager) dropDatabase(ctx context.Context, dbName string) error {
 
 	defer trace.StartRegion(ctx, "drop_db").End()
 
-	// fmt.Printf("DROP DATABASE IF EXISTS %s\n", pq.QuoteIdentifier(dbName))
+	log := m.getManagerLogger(ctx, "dropDatabase")
+	log.Trace().Msgf("DROP DATABASE IF EXISTS %s\n", pq.QuoteIdentifier(dbName))
 
 	if _, err := m.db.ExecContext(ctx, fmt.Sprintf("DROP DATABASE IF EXISTS %s", pq.QuoteIdentifier(dbName))); err != nil {
 		if strings.Contains(err.Error(), "is being accessed by other users") {
@@ -500,3 +574,7 @@ func (m Manager) dropAndCreateDatabase(ctx context.Context, dbName string, owner
 func (m Manager) makeTemplateDatabaseName(hash string) string {
 	return fmt.Sprintf("%s_%s_%s", m.config.DatabasePrefix, m.config.TemplateDatabasePrefix, hash)
 }
+
+func (m Manager) getManagerLogger(ctx context.Context, managerFunction string) zerolog.Logger {
+	return util.LogFromContext(ctx).With().Str("managerFn", managerFunction).Logger()
+}
diff --git a/pkg/manager/manager_config.go b/pkg/manager/manager_config.go
index 24c56e7..d59f000 100644
--- a/pkg/manager/manager_config.go
+++ b/pkg/manager/manager_config.go
@@ -11,13 +11,13 @@ import (
 
 // we explicitly want to access this struct via manager.ManagerConfig, thus we disable revive for the next line
 type ManagerConfig struct { //nolint:revive
-	ManagerDatabaseConfig    db.DatabaseConfig
+	ManagerDatabaseConfig    db.DatabaseConfig `json:"-"` // sensitive
 	TemplateDatabaseTemplate string
 
 	DatabasePrefix            string
 	TemplateDatabasePrefix    string
 	TestDatabaseOwner         string
-	TestDatabaseOwnerPassword string
+	TestDatabaseOwnerPassword string        `json:"-"` // sensitive
 	TemplateFinalizeTimeout   time.Duration // Time to wait for a template to transition into the 'finalized' state
 	TestDatabaseGetTimeout    time.Duration // Time to wait for a ready database
 
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index a2488cf..45f2a44 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -509,7 +509,7 @@ func TestManagerDiscardTemplateDatabase(t *testing.T) {
 		if err == nil {
 			success++
 		} else {
-			// fmt.Println(err)
+			// t.Log(err)
 			errored++
 		}
 	}
@@ -577,7 +577,7 @@ func TestManagerDiscardThenReinitializeTemplateDatabase(t *testing.T) {
 		if err == nil {
 			success++
 		} else {
-			// fmt.Println(err)
+			t.Log(err)
 			errored++
 		}
 	}
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index a7a16c7..869e581 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -3,12 +3,13 @@ package pool
 import (
 	"context"
 	"errors"
-	"fmt"
 	"runtime/trace"
 	"sync"
 	"time"
 
 	"github.com/allaboutapps/integresql/pkg/db"
+	"github.com/allaboutapps/integresql/pkg/util"
+	"github.com/rs/zerolog"
 )
 
 var (
@@ -27,8 +28,6 @@ const (
 	dbStateRecreating                // In the process of being recreated (to prevent concurrent cleans)
 )
 
-const minConcurrentTasksNum = 1
-
 type existingDB struct {
 	state dbState
 	db.TestDatabase
@@ -75,10 +74,6 @@ type HashPool struct {
 // Starts the workers to extend the pool in background up to requested inital number.
 func NewHashPool(cfg PoolConfig, templateDB db.Database, initDBFunc RecreateDBFunc) *HashPool {
 
-	if cfg.MaxParallelTasks < minConcurrentTasksNum {
-		cfg.MaxParallelTasks = minConcurrentTasksNum
-	}
-
 	pool := &HashPool{
 		dbs:        make([]existingDB, 0, cfg.MaxPoolSize),
 		ready:      make(chan int, cfg.MaxPoolSize),
@@ -97,10 +92,15 @@ func NewHashPool(cfg PoolConfig, templateDB db.Database, initDBFunc RecreateDBFu
 }
 
 func (pool *HashPool) Start() {
+
+	log := pool.getPoolLogger(context.Background(), "Start")
 	pool.Lock()
+	log.Debug().Msg("starting...")
+
 	defer pool.Unlock()
 
 	if pool.running {
+		log.Warn().Msg("bailout already running!")
 		return
 	}
 
@@ -118,11 +118,18 @@ func (pool *HashPool) Start() {
 		defer pool.wg.Done()
 		pool.controlLoop(ctx, cancel)
 	}()
+
+	log.Info().Msg("started!")
 }
 
 func (pool *HashPool) Stop() {
+
+	log := pool.getPoolLogger(context.Background(), "Stop")
+	log.Debug().Msg("stopping...")
+
 	pool.Lock()
 	if !pool.running {
+		log.Warn().Msg("bailout already stopped!")
 		return
 	}
 	pool.running = false
@@ -131,24 +138,29 @@ func (pool *HashPool) Stop() {
 	pool.tasksChan <- workerTaskStop
 	pool.wg.Wait()
 	pool.workerContext = nil
+	log.Warn().Msg("stopped!")
 }
 
-func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout time.Duration) (db db.TestDatabase, err error) {
+func (pool *HashPool) GetTestDatabase(ctx context.Context, timeout time.Duration) (db db.TestDatabase, err error) {
 	var index int
 
-	fmt.Printf("pool#%s: waiting for ready ID...\n", hash)
+	log := pool.getPoolLogger(ctx, "GetTestDatabase")
+	log.Trace().Msg("waiting for ready ID...")
 
 	select {
 	case <-time.After(timeout):
 		err = ErrTimeout
+		log.Error().Err(err).Dur("timeout", timeout).Msg("timeout")
 		return
 	case <-ctx.Done():
 		err = ctx.Err()
+		log.Warn().Err(err).Msg("ctx done")
 		return
 	case index = <-pool.ready:
 	}
 
-	fmt.Printf("pool#%s: got ready ID=%v\n", hash, index)
+	log = log.With().Int("id", index).Logger()
+	log.Trace().Msg("got ready testdatabase!")
 
 	reg := trace.StartRegion(ctx, "wait_for_lock_hash_pool")
 	pool.Lock()
@@ -158,16 +170,15 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	// sanity check, should never happen
 	if index < 0 || index >= len(pool.dbs) {
 		err = ErrInvalidIndex
+		log.Error().Err(err).Int("dbs", len(pool.dbs)).Msg("index out of bounds!")
 		return
 	}
 
 	testDB := pool.dbs[index]
 	// sanity check, should never happen - we got this index from 'ready' channel
 	if testDB.state != dbStateReady {
-
-		fmt.Printf("pool#%s: GetTestDatabase ErrInvalidState ID=%v\n", hash, index)
-
 		err = ErrInvalidState
+		log.Error().Err(err).Msgf("testdatabase is not in ready state=%v!", testDB.state)
 		return
 	}
 
@@ -179,24 +190,26 @@ func (pool *HashPool) GetTestDatabase(ctx context.Context, hash string, timeout
 	pool.dirty <- index
 
 	if len(pool.dbs) < pool.PoolConfig.MaxPoolSize {
-		fmt.Printf("pool#%s: Conditional extend\n", hash)
+		log.Trace().Msg("push workerTaskExtend")
 		pool.tasksChan <- workerTaskExtend
 	}
 
 	// we try to ensure that InitialPoolSize count is staying ready
 	// thus, we try to move the oldest dirty dbs into recreating with the workerTaskAutoCleanDirty
 	if len(pool.dbs) >= pool.PoolConfig.MaxPoolSize && (len(pool.ready)+len(pool.recreating)) < pool.InitialPoolSize {
+		log.Trace().Msg("push workerTaskAutoCleanDirty")
 		pool.tasksChan <- workerTaskAutoCleanDirty
 	}
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (GetTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	pool.unsafeTraceLogStats(log)
 
 	return testDB.TestDatabase, nil
 }
 
 func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan workerTask, MaxParallelTasks int) {
 
-	fmt.Printf("pool#%s: workerTaskLoop\n", pool.templateDB.TemplateHash)
+	log := pool.getPoolLogger(ctx, "workerTaskLoop")
+	log.Debug().Msg("starting...")
 
 	handlers := map[workerTask]func(ctx context.Context) error{
 		workerTaskExtend:         ignoreErrs(pool.extend, ErrPoolFull, context.Canceled),
@@ -209,12 +222,13 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan worker
 	for task := range taskChan {
 		handler, ok := handlers[task]
 		if !ok {
-			fmt.Printf("invalid task: %s", task)
+			log.Error().Msgf("invalid task: %s", task)
 			continue
 		}
 
 		select {
 		case <-ctx.Done():
+			log.Warn().Err(ctx.Err()).Msg("ctx done!")
 			return
 		case semaphore <- struct{}{}:
 		}
@@ -227,10 +241,10 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan worker
 				<-semaphore
 			}()
 
-			fmt.Printf("pool#%s: workerTaskLoop task=%v\n", pool.templateDB.TemplateHash, task)
+			log.Debug().Msgf("task=%v", task)
 
 			if err := handler(ctx); err != nil {
-				fmt.Printf("pool#%s: workerTaskLoop task=%v FAILED! err=%v\n", pool.templateDB.TemplateHash, task, err.Error())
+				log.Error().Err(err).Msgf("task=%v FAILED!", task)
 			}
 		}(task)
 
@@ -239,9 +253,9 @@ func (pool *HashPool) workerTaskLoop(ctx context.Context, taskChan <-chan worker
 
 func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc) {
 
-	fmt.Printf("pool#%s: controlLoop\n", pool.templateDB.TemplateHash)
+	log := pool.getPoolLogger(ctx, "controlLoop")
+	log.Debug().Msg("starting...")
 
-	// ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
 	workerTasksChan := make(chan workerTask, len(pool.tasksChan))
@@ -253,6 +267,7 @@ func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc
 
 	for task := range pool.tasksChan {
 		if task == workerTaskStop {
+			log.Debug().Msg("stopping...")
 			close(workerTasksChan)
 			cancel()
 			return
@@ -269,21 +284,28 @@ func (pool *HashPool) controlLoop(ctx context.Context, cancel context.CancelFunc
 
 // ReturnTestDatabase returns the given test DB directly to the pool, without cleaning (recreating it).
 func (pool *HashPool) ReturnTestDatabase(ctx context.Context, id int) error {
+
+	log := pool.getPoolLogger(ctx, "ReturnTestDatabase").With().Int("id", id).Logger()
+	log.Debug().Msg("returning...")
+
 	pool.Lock()
 	defer pool.Unlock()
 
-	if ctx.Err() != nil {
+	if err := ctx.Err(); err != nil {
 		// client vanished
-		return ctx.Err()
+		log.Warn().Err(err).Msg("bailout client vanished!")
+		return err
 	}
 
 	if id < 0 || id >= len(pool.dbs) {
+		log.Warn().Int("dbs", len(pool.dbs)).Msg("bailout invalid index!")
 		return ErrInvalidIndex
 	}
 
 	// check if db is in the correct state
 	testDB := pool.dbs[id]
 	if testDB.state != dbStateDirty {
+		log.Warn().Int("dbs", len(pool.dbs)).Msgf("bailout invalid state=%v.", testDB.state)
 		return nil
 	}
 
@@ -292,28 +314,28 @@ func (pool *HashPool) ReturnTestDatabase(ctx context.Context, id int) error {
 	pool.dbs[id] = testDB
 
 	// remove id from dirty and add it to ready channel
-	pool.excludeIDFromDirtyChannel(id)
+	pool.excludeIDFromChannel(pool.dirty, id)
 	pool.ready <- id
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (ReturnTestDatabase)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
+	pool.unsafeTraceLogStats(log)
 
 	return nil
 }
 
-func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
+func (pool *HashPool) excludeIDFromChannel(ch chan int, excludeID int) {
 
-	// The testDB identified by overgiven id may still in the dirty channel. We want to exclude it.
+	// The testDB identified by overgiven id may still in a specific channel (typically dirty). We want to exclude it.
 	// We need to explicitly remove it from there by filtering the current channel to a tmp channel.
-	// We finally close the tmp channel and flush it onto the dirty channel again.
+	// We finally close the tmp channel and flush it onto the specific channel again.
 	// The id is now no longer in the channel.
-	filteredDirty := make(chan int, pool.MaxPoolSize)
+	filtered := make(chan int, pool.MaxPoolSize)
 
-	var dirtyID int
+	var id int
 	for loop := true; loop; {
 		select {
-		case dirtyID = <-pool.dirty:
-			if dirtyID != id {
-				filteredDirty <- dirtyID
+		case id = <-ch:
+			if id != excludeID {
+				filtered <- id
 			}
 		default:
 			loop = false
@@ -321,55 +343,66 @@ func (pool *HashPool) excludeIDFromDirtyChannel(id int) {
 		}
 	}
 
-	// filteredDirty now has all filtered values without the above id, redirect the other ids back to the dirty channel.
+	// filtered now has all filtered values without the above id, redirect the other ids back to the specific channel.
 	// close so we can range over it...
-	close(filteredDirty)
+	close(filtered)
 
-	for dirtyID := range filteredDirty {
-		pool.dirty <- dirtyID
+	for id := range filtered {
+		ch <- id
 	}
 }
 
 // RecreateTestDatabase prioritizes the test DB to be recreated next via the dirty worker.
 func (pool *HashPool) RecreateTestDatabase(ctx context.Context, id int) error {
 
+	log := pool.getPoolLogger(ctx, "RecreateTestDatabase").With().Int("id", id).Logger()
+	log.Debug().Msg("flag testdatabase for recreation...")
+
 	pool.RLock()
+
 	if id < 0 || id >= len(pool.dbs) {
+		log.Warn().Int("dbs", len(pool.dbs)).Msg("bailout invalid index!")
 		pool.RUnlock()
 		return ErrInvalidIndex
 	}
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (RecreateTestDatabase %v)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize, id)
 	pool.RUnlock()
 
-	if ctx.Err() != nil {
+	if err := ctx.Err(); err != nil {
 		// client vanished
-		return ctx.Err()
+		log.Warn().Err(err).Msg("bailout client vanished!")
+		return err
 	}
 
 	// exclude from the normal dirty channel, force recreation in a background worker...
-	pool.excludeIDFromDirtyChannel(id)
+	pool.excludeIDFromChannel(pool.dirty, id)
 
 	// directly spawn a new worker in the bg (with the same ctx as the typical workers)
 	// note that this runs unchained, meaning we do not care about errors that may happen via this bg task
 	//nolint:errcheck
 	go pool.recreateDatabaseGracefully(pool.workerContext, id)
 
+	pool.unsafeTraceLogStats(log)
 	return nil
 }
 
 // recreateDatabaseGracefully continuosly tries to recreate the testdatabase and will retry/block until it succeeds
 func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) error {
 
-	if ctx.Err() != nil {
+	log := pool.getPoolLogger(ctx, "recreateDatabaseGracefully").With().Int("id", id).Logger()
+	log.Debug().Msg("recreating...")
+
+	if err := ctx.Err(); err != nil {
 		// pool closed in the meantime.
-		return ctx.Err()
+		log.Error().Err(err).Msg("bailout pre locking ctx err")
+		return err
 	}
 
 	pool.Lock()
 
-	if pool.dbs[id].state != dbStateDirty {
+	if state := pool.dbs[id].state; state != dbStateDirty {
 		// nothing to do
+		log.Error().Msgf("bailout not dbStateDirty state=%v", state)
 		pool.Unlock()
 		return nil
 	}
@@ -397,7 +430,7 @@ func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) er
 		default:
 			try++
 
-			fmt.Printf("recreateDatabaseGracefully: recreating ID='%v' try=%v...\n", id, try)
+			log.Trace().Int("try", try).Msg("trying to recreate...")
 			err := pool.recreateDB(ctx, &testDB)
 			if err != nil {
 				// only still connected errors are worthy a retry
@@ -408,10 +441,11 @@ func (pool *HashPool) recreateDatabaseGracefully(ctx context.Context, id int) er
 						backoff = pool.PoolConfig.TestDatabaseRetryRecreateSleepMax
 					}
 
-					fmt.Printf("recreateDatabaseGracefully: DB is still in use, will retry ID='%v' try=%v in backoff=%v.\n", id, try, backoff)
+					log.Warn().Int("try", try).Dur("backoff", backoff).Msg("DB is still in use, will retry...")
 					time.Sleep(backoff)
 				} else {
-					fmt.Printf("recreateDatabaseGracefully: bailout worker task DB error while cleanup ID='%v' try=%v err=%v\n", id, try, err)
+
+					log.Error().Int("try", try).Err(err).Msg("bailout worker task DB error while cleanup!")
 					return err
 				}
 			} else {
@@ -429,11 +463,10 @@ MoveToReady:
 		return ctx.Err()
 	}
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (recreateDatabaseGracefully %v)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize, id)
-
 	if pool.dbs[id].state == dbStateReady {
 		// oups, it has been cleaned by another worker already
 		// we won't add it to the 'ready' channel to avoid duplication
+		log.Warn().Msg("bailout DB has be cleaned by another worker as its already ready, skipping readd to ready channel!")
 		return nil
 	}
 
@@ -443,6 +476,8 @@ MoveToReady:
 
 	pool.ready <- pool.dbs[id].ID
 
+	log.Debug().Uint("generation", pool.dbs[id].generation).Msg("ready")
+	pool.unsafeTraceLogStats(log)
 	return nil
 }
 
@@ -451,6 +486,9 @@ MoveToReady:
 // Note that we generally gurantee FIFO when it comes to auto-cleaning as long as no manual unlock/recreates happen.
 func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 
+	log := pool.getPoolLogger(ctx, "autoCleanDirty")
+	log.Trace().Msg("autocleaning...")
+
 	ctx, task := trace.NewTask(ctx, "worker_clean_dirty")
 	defer task.End()
 
@@ -461,11 +499,13 @@ func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 		return ctx.Err()
 	default:
 		// nothing to do
-		fmt.Println("autoCleanDirty noop")
+		log.Trace().Msg("noop")
 		return nil
 	}
 
-	fmt.Printf("pool#%s: autoCleanDirty id=%v\n", pool.templateDB.TemplateHash, id)
+	// got id...
+	log = log.With().Int("id", id).Logger()
+	log.Trace().Msg("checking cleaning prerequisites...")
 
 	regLock := trace.StartRegion(ctx, "worker_wait_for_rlock_hash_pool")
 	pool.RLock()
@@ -473,6 +513,7 @@ func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 
 	if id < 0 || id >= len(pool.dbs) {
 		// sanity check, should never happen
+		log.Warn().Int("dbs", len(pool.dbs)).Msg("bailout invalid index!")
 		pool.RUnlock()
 		return ErrInvalidIndex
 	}
@@ -480,16 +521,19 @@ func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 	blockedUntil := time.Until(pool.dbs[id].blockAutoCleanDirtyUntil)
 	generation := pool.dbs[id].generation
 
+	log = log.With().Dur("blockedUntil", blockedUntil).Uint("generation", generation).Logger()
+
 	pool.RUnlock()
 
 	// immediately pass to pool recreate
 	if blockedUntil <= 0 {
+		log.Trace().Msg("clean now (immediate)!")
 		return pool.recreateDatabaseGracefully(ctx, id)
 	}
 
 	// else we need to wait until we are allowed to work with it!
 	// we block auto-cleaning until we are allowed to...
-	fmt.Printf("pool#%s: autoCleanDirty id=%v sleep for blockedUntil=%v...\n", pool.templateDB.TemplateHash, id, blockedUntil)
+	log.Warn().Msg("sleeping before being allowed to clean...")
 	time.Sleep(blockedUntil)
 
 	// we need to check that the testDB.generation did not change since we slept
@@ -497,13 +541,14 @@ func (pool *HashPool) autoCleanDirty(ctx context.Context) error {
 	pool.RLock()
 
 	if pool.dbs[id].generation != generation || pool.dbs[id].state != dbStateDirty {
-		fmt.Printf("pool#%s: autoCleanDirty id=%v bailout old generation=%v vs new generation=%v state=%v...\n", pool.templateDB.TemplateHash, id, generation, pool.dbs[id].generation, pool.dbs[id].state)
+		log.Error().Msgf("bailout old generation=%v vs new generation=%v state=%v", generation, pool.dbs[id].generation, pool.dbs[id].state)
 		pool.RUnlock()
 		return nil
 	}
 
 	pool.RUnlock()
 
+	log.Trace().Msg("clean now (after sleep has happenend)!")
 	return pool.recreateDatabaseGracefully(ctx, id)
 }
 
@@ -521,6 +566,9 @@ func ignoreErrs(f func(ctx context.Context) error, errs ...error) func(context.C
 
 func (pool *HashPool) extend(ctx context.Context) error {
 
+	log := pool.getPoolLogger(ctx, "extend")
+	log.Trace().Msg("extending...")
+
 	ctx, task := trace.NewTask(ctx, "worker_extend")
 	defer task.End()
 
@@ -528,12 +576,12 @@ func (pool *HashPool) extend(ctx context.Context) error {
 	pool.Lock()
 	defer pool.Unlock()
 
-	fmt.Printf("pool#%s: ready=%d, dirty=%d, recreating=%d, tasksChan=%d, dbs=%d initial=%d max=%d (extend)\n", pool.templateDB.TemplateHash, len(pool.ready), len(pool.dirty), len(pool.recreating), len(pool.tasksChan), len(pool.dbs), pool.PoolConfig.InitialPoolSize, pool.PoolConfig.MaxPoolSize)
 	reg.End()
 
 	// get index of a next test DB - its ID
 	index := len(pool.dbs)
 	if index == cap(pool.dbs) {
+		log.Error().Int("dbs", len(pool.dbs)).Int("cap", cap(pool.dbs)).Err(ErrPoolFull).Msg("pool is full")
 		return ErrPoolFull
 	}
 
@@ -551,6 +599,8 @@ func (pool *HashPool) extend(ctx context.Context) error {
 	// set DB name
 	newTestDB.Database.Config.Database = makeDBName(pool.TestDBNamePrefix, pool.templateDB.TemplateHash, index)
 
+	log.Trace().Int("id", index).Msg("adding...")
+
 	reg = trace.StartRegion(ctx, "worker_db_operation")
 	err := pool.recreateDB(ctx, &newTestDB)
 	reg.End()
@@ -564,18 +614,25 @@ func (pool *HashPool) extend(ctx context.Context) error {
 
 	pool.ready <- newTestDB.ID
 
+	pool.unsafeTraceLogStats(log)
+
 	return nil
 }
 
 func (pool *HashPool) RemoveAll(ctx context.Context, removeFunc RemoveDBFunc) error {
 
+	log := pool.getPoolLogger(ctx, "RemoveAll")
+
 	// stop all workers
 	pool.Stop()
 
+	// wait until all current "recreating" tasks are finished...
+
 	pool.Lock()
 	defer pool.Unlock()
 
 	if len(pool.dbs) == 0 {
+		log.Error().Msg("bailout no dbs.")
 		return nil
 	}
 
@@ -584,17 +641,33 @@ func (pool *HashPool) RemoveAll(ctx context.Context, removeFunc RemoveDBFunc) er
 		testDB := pool.dbs[id].TestDatabase
 
 		if err := removeFunc(ctx, testDB); err != nil {
+			log.Error().Int("id", id).Err(err).Msg("removeFunc testdatabase err")
 			return err
 		}
 
 		if len(pool.dbs) > 1 {
 			pool.dbs = pool.dbs[:len(pool.dbs)-1]
 		}
+
+		pool.excludeIDFromChannel(pool.dirty, id)
+		pool.excludeIDFromChannel(pool.ready, id)
+		log.Debug().Int("id", id).Msg("testdatabase removed!")
 	}
 
 	// close all only if removal of all succeeded
 	pool.dbs = nil
 	close(pool.tasksChan)
 
+	pool.unsafeTraceLogStats(log)
+
 	return nil
 }
+
+func (pool *HashPool) getPoolLogger(ctx context.Context, poolFunction string) zerolog.Logger {
+	return util.LogFromContext(ctx).With().Str("poolHash", pool.templateDB.TemplateHash).Str("poolFn", poolFunction).Logger()
+}
+
+// unsafeTraceLogStats logs stats of this pool. Attention: pool should be read or write locked!
+func (pool *HashPool) unsafeTraceLogStats(log zerolog.Logger) {
+	log.Trace().Int("ready", len(pool.ready)).Int("dirty", len(pool.dirty)).Int("recreating", len(pool.recreating)).Int("tasksChan", len(pool.tasksChan)).Int("dbs", len(pool.dbs)).Int("initial", pool.PoolConfig.InitialPoolSize).Int("max", pool.PoolConfig.MaxPoolSize).Msg("pool stats")
+}
diff --git a/pkg/pool/pool_collection.go b/pkg/pool/pool_collection.go
index d4ccd46..82db735 100644
--- a/pkg/pool/pool_collection.go
+++ b/pkg/pool/pool_collection.go
@@ -105,7 +105,7 @@ func (p *PoolCollection) GetTestDatabase(ctx context.Context, hash string, timeo
 		return db, err
 	}
 
-	return pool.GetTestDatabase(ctx, hash, timeout)
+	return pool.GetTestDatabase(ctx, timeout)
 }
 
 // ReturnTestDatabase returns the given test DB directly to the pool, without cleaning (recreating it).
diff --git a/pkg/util/context.go b/pkg/util/context.go
new file mode 100644
index 0000000..00ce454
--- /dev/null
+++ b/pkg/util/context.go
@@ -0,0 +1,58 @@
+package util
+
+import (
+	"context"
+	"errors"
+)
+
+type contextKey string
+
+const (
+	CTXKeyUser          contextKey = "user"
+	CTXKeyAccessToken   contextKey = "access_token"
+	CTXKeyRequestID     contextKey = "request_id"
+	CTXKeyDisableLogger contextKey = "disable_logger"
+	CTXKeyCacheControl  contextKey = "cache_control"
+)
+
+// RequestIDFromContext returns the ID of the (HTTP) request, returning an error if it is not present.
+func RequestIDFromContext(ctx context.Context) (string, error) {
+	val := ctx.Value(CTXKeyRequestID)
+	if val == nil {
+		return "", errors.New("No request ID present in context")
+	}
+
+	id, ok := val.(string)
+	if !ok {
+		return "", errors.New("Request ID in context is not a string")
+	}
+
+	return id, nil
+}
+
+// ShouldDisableLogger checks whether the logger instance should be disabled for the provided context.
+// `util.LogFromContext` will use this function to check whether it should return a default logger if
+// none has been set by our logging middleware before, or fall back to the disabled logger, suppressing
+// all output. Use `ctx = util.DisableLogger(ctx, true)` to disable logging for the given context.
+func ShouldDisableLogger(ctx context.Context) bool {
+	s := ctx.Value(CTXKeyDisableLogger)
+	if s == nil {
+		return false
+	}
+
+	shouldDisable, ok := s.(bool)
+	if !ok {
+		return false
+	}
+
+	return shouldDisable
+}
+
+// DisableLogger toggles the indication whether `util.LogFromContext` should return a disabled logger
+// for a context if none has been set by our logging middleware before. Whilst the usecase for a disabled
+// logger are relatively minimal (we almost always want to have some log output, even if the context
+// was not directly derived from a HTTP request), this functionality was provideds so you can switch back
+// to the old zerolog behavior if so desired.
+func DisableLogger(ctx context.Context, shouldDisable bool) context.Context {
+	return context.WithValue(ctx, CTXKeyDisableLogger, shouldDisable)
+}
diff --git a/pkg/util/log.go b/pkg/util/log.go
new file mode 100644
index 0000000..c1f4ceb
--- /dev/null
+++ b/pkg/util/log.go
@@ -0,0 +1,42 @@
+package util
+
+import (
+	"context"
+
+	"github.com/labstack/echo/v4"
+	"github.com/rs/zerolog"
+	"github.com/rs/zerolog/log"
+)
+
+// LogFromContext returns a request-specific zerolog instance using the provided context.
+// The returned logger will have the request ID as well as some other value predefined.
+// If no logger is associated with the context provided, the global zerolog instance
+// will be returned instead - this function will _always_ return a valid (enabled) logger.
+// Should you ever need to force a disabled logger for a context, use `util.DisableLogger(ctx, true)`
+// and pass the context returned to other code/`LogFromContext`.
+func LogFromContext(ctx context.Context) *zerolog.Logger {
+	l := log.Ctx(ctx)
+	if l.GetLevel() == zerolog.Disabled {
+		if ShouldDisableLogger(ctx) {
+			return l
+		}
+		l = &log.Logger
+	}
+	return l
+}
+
+// LogFromEchoContext returns a request-specific zerolog instance using the echo.Context of the request.
+// The returned logger will have the request ID as well as some other value predefined.
+func LogFromEchoContext(c echo.Context) *zerolog.Logger {
+	return LogFromContext(c.Request().Context())
+}
+
+func LogLevelFromString(s string) zerolog.Level {
+	l, err := zerolog.ParseLevel(s)
+	if err != nil {
+		log.Error().Err(err).Msgf("Failed to parse log level, defaulting to %s", zerolog.DebugLevel)
+		return zerolog.DebugLevel
+	}
+
+	return l
+}
diff --git a/pkg/util/log_test.go b/pkg/util/log_test.go
new file mode 100644
index 0000000..c318b74
--- /dev/null
+++ b/pkg/util/log_test.go
@@ -0,0 +1,20 @@
+package util_test
+
+import (
+	"testing"
+
+	"github.com/allaboutapps/integresql/pkg/util"
+	"github.com/rs/zerolog"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestLogLevelFromString(t *testing.T) {
+	res := util.LogLevelFromString("panic")
+	assert.Equal(t, zerolog.PanicLevel, res)
+
+	res = util.LogLevelFromString("warn")
+	assert.Equal(t, zerolog.WarnLevel, res)
+
+	res = util.LogLevelFromString("foo")
+	assert.Equal(t, zerolog.DebugLevel, res)
+}

From 7b99232ec350445edf06f1b85dc650f32b32616d Mon Sep 17 00:00:00 2001
From: Mario Ranftl <mario.ranftl@gmail.com>
Date: Tue, 5 Sep 2023 17:15:34 +0200
Subject: [PATCH 10/10] don't lock while extending, reuse
 recreateDatabaseGracefully

---
 pkg/manager/manager_test.go | 15 ++++++++++++---
 pkg/pool/pool.go            | 26 ++++++++------------------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 45f2a44..8765fd2 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -822,19 +822,28 @@ func TestManagerReturnTestDatabase(t *testing.T) {
 	// finally return it
 	assert.NoError(t, m.ReturnTestDatabase(ctx, hash, testDB1.ID))
 
+	// regetting these databases is quite random. Let's try to get the same id again...
 	// on first GET call the pool has been extended
 	// we will get the newly created DB
 	testDB2, err := m.GetTestDatabase(ctx, hash)
 	assert.NoError(t, err)
-	assert.NotEqual(t, testDB1.ID, testDB2.ID)
 
 	// next in 'ready' channel should be the returned DB
 	testDB3, err := m.GetTestDatabase(ctx, hash)
 	assert.NoError(t, err)
-	assert.Equal(t, testDB1.ID, testDB3.ID)
+
+	// restored db
+	var targetConnectionString string
+	if testDB2.ID == testDB1.ID {
+		targetConnectionString = testDB2.Config.ConnectionString()
+	} else if testDB3.ID == testDB1.ID {
+		targetConnectionString = testDB3.Config.ConnectionString()
+	} else {
+		t.Fatal("We should have been able to get the previously returned database.")
+	}
 
 	// assert that it hasn't been cleaned but just reused directly
-	db, err = sql.Open("postgres", testDB3.Config.ConnectionString())
+	db, err = sql.Open("postgres", targetConnectionString)
 	require.NoError(t, err)
 	require.NoError(t, db.PingContext(ctx))
 
diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go
index 869e581..b34cae8 100644
--- a/pkg/pool/pool.go
+++ b/pkg/pool/pool.go
@@ -574,20 +574,19 @@ func (pool *HashPool) extend(ctx context.Context) error {
 
 	reg := trace.StartRegion(ctx, "worker_wait_for_lock_hash_pool")
 	pool.Lock()
-	defer pool.Unlock()
-
 	reg.End()
 
 	// get index of a next test DB - its ID
 	index := len(pool.dbs)
 	if index == cap(pool.dbs) {
 		log.Error().Int("dbs", len(pool.dbs)).Int("cap", cap(pool.dbs)).Err(ErrPoolFull).Msg("pool is full")
+		pool.Unlock()
 		return ErrPoolFull
 	}
 
-	// initalization of a new DB using template config
+	// initalization of a new DB using template config, it must start in state dirty!
 	newTestDB := existingDB{
-		state: dbStateReady,
+		state: dbStateDirty,
 		TestDatabase: db.TestDatabase{
 			Database: db.Database{
 				TemplateHash: pool.templateDB.TemplateHash,
@@ -599,24 +598,15 @@ func (pool *HashPool) extend(ctx context.Context) error {
 	// set DB name
 	newTestDB.Database.Config.Database = makeDBName(pool.TestDBNamePrefix, pool.templateDB.TemplateHash, index)
 
-	log.Trace().Int("id", index).Msg("adding...")
-
-	reg = trace.StartRegion(ctx, "worker_db_operation")
-	err := pool.recreateDB(ctx, &newTestDB)
-	reg.End()
-
-	if err != nil {
-		return err
-	}
-
-	// add new test DB to the pool
+	// add new test DB to the pool (currently it's dirty!)
 	pool.dbs = append(pool.dbs, newTestDB)
 
-	pool.ready <- newTestDB.ID
-
+	log.Trace().Int("id", index).Msg("appended as dirty, recreating...")
 	pool.unsafeTraceLogStats(log)
+	pool.Unlock()
 
-	return nil
+	// forced recreate...
+	return pool.recreateDatabaseGracefully(ctx, index)
 }
 
 func (pool *HashPool) RemoveAll(ctx context.Context, removeFunc RemoveDBFunc) error {