added smac_v2 support

Denys88 · Oct 12, 2023 · 5bfde80 · 5bfde80
1 parent 3ad7c3f
commit 5bfde80
Show file tree

Hide file tree

Showing 9 changed files with 588 additions and 22 deletions.
diff --git a/rl_games/common/env_configurations.py b/rl_games/common/env_configurations.py
@@ -161,6 +161,21 @@ def create_smac(name, **kwargs):
     env = SMACEnv(name, **kwargs)
 
 
+    if frames > 1:
+        if has_cv:
+            env = wrappers.BatchedFrameStackWithStates(env, frames, transpose=False, flatten=flatten)
+        else:
+            env = wrappers.BatchedFrameStack(env, frames, transpose=False, flatten=flatten)
+    return env
+
+def create_smac_v2(name, **kwargs):
+    from rl_games.envs.smac_v2_env import SMACEnvV2
+    frames = kwargs.pop('frames', 1)
+    transpose = kwargs.pop('transpose', False)
+    flatten = kwargs.pop('flatten', True)
+    has_cv = kwargs.get('central_value', False)
+    env = SMACEnvV2(name, **kwargs)
+
     if frames > 1:
         if has_cv:
             env = wrappers.BatchedFrameStackWithStates(env, frames, transpose=False, flatten=flatten)
@@ -349,6 +364,10 @@ def create_env(name, **kwargs):
         'env_creator' : lambda **kwargs : create_smac(**kwargs),
         'vecenv_type' : 'RAY'
     },
+    'smac_v2' : {
+        'env_creator' : lambda **kwargs : create_smac_v2(**kwargs),
+        'vecenv_type' : 'RAY'
+    },
     'smac_cnn' : {
         'env_creator' : lambda **kwargs : create_smac_cnn(**kwargs),
         'vecenv_type' : 'RAY'

diff --git a/rl_games/configs/smac/v2/env_configs/sc2_gen_protoss.yaml b/rl_games/configs/smac/v2/env_configs/sc2_gen_protoss.yaml
@@ -0,0 +1,69 @@
+env: sc2wrapped
+
+env_args:
+  continuing_episode: False
+  difficulty: "7"
+  game_version: null
+  map_name: "10gen_protoss"
+  move_amount: 2
+  obs_all_health: True
+  obs_instead_of_state: False
+  obs_last_action: False
+  obs_own_health: True
+  obs_pathing_grid: False
+  obs_terrain_height: False
+  obs_timestep_number: False
+  reward_death_value: 10
+  reward_defeat: 0
+  reward_negative_scale: 0.5
+  reward_only_positive: True
+  reward_scale: True
+  reward_scale_rate: 20
+  reward_sparse: False
+  reward_win: 200
+  replay_dir: ""
+  replay_prefix: ""
+  conic_fov: False
+  use_unit_ranges: True
+  min_attack_range: 2
+  obs_own_pos: True
+  num_fov_actions: 12
+  capability_config:
+    n_units: 5
+    n_enemies: 5
+    team_gen:
+      dist_type: "weighted_teams"
+      unit_types: 
+        - "stalker"
+        - "zealot"
+        - "colossus"
+      weights:
+        - 0.45
+        - 0.45
+        - 0.1
+      observe: True
+    start_positions:
+      dist_type: "surrounded_and_reflect"
+      p: 0.5
+      map_x: 32
+      map_y: 32
+
+    # enemy_mask:
+    #   dist_type: "mask"
+    #   mask_probability: 0.5
+    #   n_enemies: 5
+  state_last_action: True
+  state_timestep_number: False
+  step_mul: 8
+  heuristic_ai: False
+  # heuristic_rest: False
+  debug: False
+  prob_obs_enemy: 1.0
+  action_mask: True
+
+test_nepisode: 32
+test_interval: 10000
+log_interval: 2000
+runner_log_interval: 2000
+learner_log_interval: 2000
+t_max: 10050000
diff --git a/rl_games/configs/smac/v2/env_configs/sc2_gen_protoss_epo.yaml b/rl_games/configs/smac/v2/env_configs/sc2_gen_protoss_epo.yaml
@@ -0,0 +1,70 @@
+env: sc2wrapped
+
+env_args:
+  continuing_episode: False
+  difficulty: "7"
+  game_version: null
+  map_name: "10gen_protoss"
+  move_amount: 2
+  obs_all_health: True
+  obs_instead_of_state: False
+  obs_last_action: False
+  obs_own_health: True
+  obs_pathing_grid: False
+  obs_terrain_height: False
+  obs_timestep_number: False
+  reward_death_value: 10
+  reward_defeat: 0
+  reward_negative_scale: 0.5
+  reward_only_positive: True
+  reward_scale: True
+  reward_scale_rate: 20
+  reward_sparse: False
+  reward_win: 200
+  replay_dir: ""
+  replay_prefix: ""
+  conic_fov: False
+  use_unit_ranges: True
+  min_attack_range: 2
+  obs_own_pos: True
+  num_fov_actions: 12
+  capability_config:
+    n_units: 5
+    n_enemies: 5
+    team_gen:
+      dist_type: "weighted_teams"
+      unit_types:
+        - "stalker"
+        - "zealot"
+        - "colossus"
+      weights:
+        - 0.45
+        - 0.45
+        - 0.1
+      observe: True
+    start_positions:
+      dist_type: "surrounded_and_reflect"
+      p: 0.5
+      map_x: 32
+      map_y: 32
+
+    # enemy_mask:
+    #   dist_type: "mask"
+    #   mask_probability: 0.5
+    #   n_enemies: 5
+  state_last_action: True
+  state_timestep_number: False
+  step_mul: 8
+  heuristic_ai: False
+  # heuristic_rest: False
+  debug: False
+  # Most severe partial obs setting:
+  prob_obs_enemy: 0.0
+  action_mask: False
+
+test_nepisode: 32
+test_interval: 10000
+log_interval: 2000
+runner_log_interval: 2000
+learner_log_interval: 2000
+t_max: 10050000
diff --git a/rl_games/configs/smac/v2/env_configs/sc2_gen_terran.yaml b/rl_games/configs/smac/v2/env_configs/sc2_gen_terran.yaml
@@ -0,0 +1,71 @@
+env: sc2wrapped
+
+env_args:
+  continuing_episode: False
+  difficulty: "7"
+  game_version: null
+  map_name: "10gen_terran"
+  move_amount: 2
+  obs_all_health: True
+  obs_instead_of_state: False
+  obs_last_action: False
+  obs_own_health: True
+  obs_pathing_grid: False
+  obs_terrain_height: False
+  obs_timestep_number: False
+  reward_death_value: 10
+  reward_defeat: 0
+  reward_negative_scale: 0.5
+  reward_only_positive: True
+  reward_scale: True
+  reward_scale_rate: 20
+  reward_sparse: False
+  reward_win: 200
+  replay_dir: ""
+  replay_prefix: ""
+  conic_fov: False
+  obs_own_pos: True
+  use_unit_ranges: True
+  min_attack_range: 2
+  num_fov_actions: 12
+  capability_config:
+    n_units: 5
+    n_enemies: 5
+    team_gen:
+      dist_type: "weighted_teams"
+      unit_types: 
+        - "marine"
+        - "marauder"
+        - "medivac"
+      weights:
+        - 0.45
+        - 0.45
+        - 0.1
+      exception_unit_types:
+        - "medivac"
+      observe: True
+
+    start_positions:
+      dist_type: "surrounded_and_reflect"
+      p: 0.5
+      map_x: 32
+      map_y: 32
+    # enemy_mask:
+    #   dist_type: "mask"
+    #   mask_probability: 0.5
+    #   n_enemies: 5
+  state_last_action: True
+  state_timestep_number: False
+  step_mul: 8
+  heuristic_ai: False
+  # heuristic_rest: False
+  debug: False
+  prob_obs_enemy: 1.0
+  action_mask: True
+
+test_nepisode: 32
+test_interval: 10000
+log_interval: 2000
+runner_log_interval: 2000
+learner_log_interval: 2000
+t_max: 10050000
diff --git a/rl_games/configs/smac/v2/env_configs/sc2_gen_terran_epo.yaml b/rl_games/configs/smac/v2/env_configs/sc2_gen_terran_epo.yaml
@@ -0,0 +1,72 @@
+env: sc2wrapped
+
+env_args:
+  continuing_episode: False
+  difficulty: "7"
+  game_version: null
+  map_name: "10gen_terran"
+  move_amount: 2
+  obs_all_health: True
+  obs_instead_of_state: False
+  obs_last_action: False
+  obs_own_health: True
+  obs_pathing_grid: False
+  obs_terrain_height: False
+  obs_timestep_number: False
+  reward_death_value: 10
+  reward_defeat: 0
+  reward_negative_scale: 0.5
+  reward_only_positive: True
+  reward_scale: True
+  reward_scale_rate: 20
+  reward_sparse: False
+  reward_win: 200
+  replay_dir: ""
+  replay_prefix: ""
+  conic_fov: False
+  obs_own_pos: True
+  use_unit_ranges: True
+  min_attack_range: 2
+  num_fov_actions: 12
+  capability_config:
+    n_units: 5
+    n_enemies: 5
+    team_gen:
+      dist_type: "weighted_teams"
+      unit_types:
+        - "marine"
+        - "marauder"
+        - "medivac"
+      weights:
+        - 0.45
+        - 0.45
+        - 0.1
+      exception_unit_types:
+        - "medivac"
+      observe: True
+
+    start_positions:
+      dist_type: "surrounded_and_reflect"
+      p: 0.5
+      map_x: 32
+      map_y: 32
+    # enemy_mask:
+    #   dist_type: "mask"
+    #   mask_probability: 0.5
+    #   n_enemies: 5
+  state_last_action: True
+  state_timestep_number: False
+  step_mul: 8
+  heuristic_ai: False
+  # heuristic_rest: False
+  debug: False
+  # Most severe partial obs setting:
+  prob_obs_enemy: 0.0
+  action_mask: False
+
+test_nepisode: 32
+test_interval: 10000
+log_interval: 2000
+runner_log_interval: 2000
+learner_log_interval: 2000
+t_max: 10050000
diff --git a/rl_games/configs/smac/v2/env_configs/sc2_gen_zerg.yaml b/rl_games/configs/smac/v2/env_configs/sc2_gen_zerg.yaml
@@ -0,0 +1,71 @@
+env: sc2wrapped
+
+env_args:
+  continuing_episode: False
+  difficulty: "7"
+  game_version: null
+  map_name: "10gen_zerg"
+  move_amount: 2
+  obs_all_health: True
+  obs_instead_of_state: False
+  obs_last_action: False
+  obs_own_health: True
+  obs_pathing_grid: False
+  obs_terrain_height: False
+  obs_timestep_number: False
+  reward_death_value: 10
+  reward_defeat: 0
+  reward_negative_scale: 0.5
+  reward_only_positive: True
+  reward_scale: True
+  reward_scale_rate: 20
+  reward_sparse: False
+  reward_win: 200
+  replay_dir: ""
+  replay_prefix: ""
+  conic_fov: False
+  use_unit_ranges: True
+  min_attack_range: 2
+  num_fov_actions: 12
+  obs_own_pos: True
+  capability_config:
+    n_units: 5
+    n_enemies: 5
+    team_gen:
+      dist_type: "weighted_teams"
+      unit_types: 
+        - "zergling"
+        - "baneling"
+        - "hydralisk"
+      weights:
+        - 0.45
+        - 0.1
+        - 0.45
+      exception_unit_types:
+        - "baneling"
+      observe: True
+
+    start_positions:
+      dist_type: "surrounded_and_reflect"
+      p: 0.5
+      map_x: 32
+      map_y: 32
+    # enemy_mask:
+    #   dist_type: "mask"
+    #   mask_probability: 0.5
+    #   n_enemies: 5
+  state_last_action: True
+  state_timestep_number: False
+  step_mul: 8
+  heuristic_ai: False
+  # heuristic_rest: False
+  debug: False
+  prob_obs_enemy: 1.0
+  action_mask: True
+
+test_nepisode: 32
+test_interval: 10000
+log_interval: 2000
+runner_log_interval: 2000
+learner_log_interval: 2000
+t_max: 10050000