Skip to content

Commit

Permalink
Fix venv guidance
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-jw-brooks committed Apr 11, 2024
1 parent 0a158f9 commit e54e53f
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions scripts/cluster_k8s.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: sft-trainer-config-alex
data:
config.json: |
{
"model_name_or_path": "/granite/granite-13b-base-v2/step_300000_ckpt",
"training_data_path": "/data/anhuong/twitter_complaints.json",
"output_dir": "/data/anhuong/tuning_output/some-name",
"num_train_epochs": 20.0,
"per_device_train_batch_size": 2,
"gradient_accumulation_steps": 1,
"evaluation_strategy": "no",
"save_strategy": "epoch",
"learning_rate": 1e-5,
"response_template": "\n### Label:",
"dataset_text_field": "output",
"tokenizer_name_or_path": "/granite/granite-13b-base-v2/step_300000_ckpt"
}
---
apiVersion: v1
kind: Pod
metadata:
name: sft-trainer-test-alex-granite-13b
spec:
securityContext:
runAsUser: 1000850000
runAsGroup: 0
containers:
- env:
- name: SFT_TRAINER_CONFIG_JSON_PATH
value: /config/config.json
- name: LOG_LEVEL
value: info
image: docker-na-public.artifactory.swg-devops.com/wcp-ai-foundation-team-docker-virtual/sft-trainer:7f32893_ubi9_py311
imagePullPolicy: IfNotPresent
name: train-conductor-training
command: ["sleep", "99999"]
resources:
limits:
nvidia.com/gpu: "2"
memory: 200Gi
cpu: "10"
requests:
memory: 80Gi
cpu: "5"
volumeMounts:
- mountPath: /data
name: input-data
- mountPath: /config
name: sft-trainer-config
- mountPath: /llama
name: llama-eval-pvc
- mountPath: /granite
name: ibm-granite-pvc
imagePullSecrets:
- name: artifactory-docker-anh
restartPolicy: Never
terminationGracePeriodSeconds: 30
volumes:
- name: input-data
persistentVolumeClaim:
claimName: fms-tuning-pvc
- name: sft-trainer-config
configMap:
name: sft-trainer-config
- name: ibm-granite-pvc
persistentVolumeClaim:
claimName: ibm-granite-pvc
- name: llama-eval-pvc
persistentVolumeClaim:
claimName: llama-eval-pvc

0 comments on commit e54e53f

Please sign in to comment.