Skip to content

Commit

Permalink
Merge pull request #58 from m-lab/sandbox-sidestream-hack
Browse files Browse the repository at this point in the history
Temporary gardener config for reprocessing sidestream
  • Loading branch information
gfr10598 authored Jun 11, 2018
2 parents c67bf44 + c961c76 commit 88d3bdd
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
3 changes: 3 additions & 0 deletions cloud/tq/queuehandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ func (qh *ChannelQueueHandler) handleLoop(next api.TaskPipe, bucketOpts ...optio
}
} else {
log.Println("No task files")
task.Queue = ""
task.Update(state.Done)
task.Delete()
}
}
log.Println(qh.Queue, "waiting for deduper to close")
Expand Down
6 changes: 3 additions & 3 deletions dispatch/deduphandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,13 @@ func (dh *DedupHandler) handleLoop(opts ...option.ClientOption) {
// feeding channel is closed, and processing is complete.
func NewDedupHandler(opts ...option.ClientOption) *DedupHandler {
project := os.Getenv("PROJECT")
dataset := os.Getenv("DATASET")
// When running in prod, the task files and queues are in mlab-oti, but the destination
// BigQuery tables are in measurement-lab.
if project == "mlab-oti" {
// However, for sidestream private tables, we leave them in mlab-oti
if project == "mlab-oti" && dataset != "private" {
project = "measurement-lab" // destination for production tables.
}
dataset := os.Getenv("DATASET")
msg := make(chan state.Task)
rsp := make(chan error)
dh := DedupHandler{project, dataset, msg, rsp}
Expand Down Expand Up @@ -312,4 +313,3 @@ func Dedup(dsExt *bqext.Dataset, src string, destTable *bigquery.Table) (*bigque
}
return job, nil
}

8 changes: 4 additions & 4 deletions k8s/data-processing-cluster/deployments/etl-gardener.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ spec:
- name: PROJECT
value: {{GCLOUD_PROJECT}}
- name: TASKFILE_BUCKET
value: "archive-{{GCLOUD_PROJECT}}"
value: "scraper-{{GCLOUD_PROJECT}}" # NOTE: if we start deleting unembargoed files from scraper, this will no longer work.
- name: START_DATE
value: "20180301"
value: "20170601"
- name: EXPERIMENTS
value: "sidestream,ndt" # For example "ndt,sidestream,switch"
value: "sidestream" # For example "ndt,sidestream,switch"
- name: DATASET
value: batch
value: private
- name: FINAL_DATASET
value: "" # e.g. base_tables
- name: QUEUE_BASE
Expand Down

0 comments on commit 88d3bdd

Please sign in to comment.