diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 0a6b1de527e..d15b4415fd9 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -10,9 +10,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - with: - token: ${{ secrets.GHE_ACCESS_TOKEN }} - submodules: recursive - uses: actions/setup-python@v2 - uses: actions/setup-node@v2 with: diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs new file mode 100644 index 00000000000..891fc4bb0b1 --- /dev/null +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs @@ -0,0 +1,203 @@ +using System; +using System.Collections.Generic; +using System.Text.RegularExpressions; +using TMPro; +using Unity.Barracuda; +using Unity.MLAgents; +using Unity.MLAgents.Policies; +using UnityEditor; +#if UNITY_EDITOR +using UnityEditor.Recorder; +#endif +using UnityEngine; + +/** + * Usage Notes: + * + * Add onnx models to the list and they will be played sequentially. + * Create a recorder and a video of the sequence will be captured automatically if "Auto Record" is selected. + * Recording only works in the Editor (not in standalone build) + * Create a TextMeshPro Text GameObject and attach it to have the number of training steps of the current model shown. + * To manually control transition between models choose a very large time, or "Pause" the system, + * then use "Force Next" to advance. + * "Reset" will start the sequence from the beginning again, use "Start" to proceed after resetting. + * "Time Scale Override" can be set, "Seconds Between Switches" will decrease proportionally if you increase this + * (i.e it represents simulated seconds between switches, not real time) + */ + + +public class ModelCarousel : MonoBehaviour +{ + public bool m_Start = true; + public bool m_Reset = false; + public bool m_Pause = true; + public bool m_ForceNext = false; + public bool m_Loop = false; + public bool m_AutoRecord = true; + public bool m_ResetAgentOnModelChange = false; + public int m_SecondsBetweenSwitches = 10; + public float m_TimeScaleOverride = 0.0f; + public List m_Models = new List(); + public bool m_ShowStepNumber = true; + public int m_StepNumberRounding = 10000; + + private int m_StepsSinceLastSwitch = 0; + private int m_CurrentModelIndex = 0; + private int m_CurrentlySetModelIndex = -1; + + private NNModel m_OriginalModel = null; + + private int k_FixedUpdatePerSecond; + + // The attached Agent + Agent m_Agent; + + public TextMeshProUGUI textMeshComponent; + +#if UNITY_EDITOR + private RecorderWindow GetRecorderWindow() + { + return (RecorderWindow)EditorWindow.GetWindow(typeof(RecorderWindow)); + } +#endif + + private void Reset() + { + m_Reset = false; + m_StepsSinceLastSwitch = 0; + m_CurrentModelIndex = 0; + m_Agent.SetModel(m_OriginalModel.name, m_OriginalModel); + textMeshComponent?.SetText("Ready to Start"); + } + + private void OnEnable() + { + m_Agent = GetComponent(); + m_OriginalModel = m_Agent.GetComponent().Model; + + Reset(); + + k_FixedUpdatePerSecond = (int)(1.0f / Time.fixedDeltaTime); + + if (m_TimeScaleOverride > 0.0f) + { + Time.timeScale = m_TimeScaleOverride; + } + } + + void StartRecording() + { +#if UNITY_EDITOR + if (!m_AutoRecord) + return; + + Debug.Log("Starting Recording"); + RecorderWindow recorderWindow = GetRecorderWindow(); + if (!recorderWindow.IsRecording()) + recorderWindow.StartRecording(); +#endif + } + + void StopRecording() + { +#if UNITY_EDITOR + if (!m_AutoRecord) + return; + + Debug.Log("Stopping Recording"); + RecorderWindow recorderWindow = GetRecorderWindow(); + if (recorderWindow.IsRecording()) + recorderWindow.StopRecording(); +#endif + } + + void UpdateStepNumberText() + { + if (!m_ShowStepNumber) + return; + + var result = Regex.Match(m_Models[m_CurrentModelIndex].name, @".*-(\d+)$"); + + string newText = ""; + if (result.Success && result.Groups.Count > 0) + { + var steps = Int32.Parse(result.Groups[1].Captures[0].Value); + + int round = m_StepNumberRounding; + steps += round / 2; + steps /= round; + steps *= round; + + newText = $"After {steps:n0} steps"; + } + + textMeshComponent?.SetText(newText); + } + + void SetModel() + { + if (m_CurrentModelIndex < 0 || m_CurrentModelIndex >= m_Models.Count) + return; + + m_Agent.SetModel(m_Models[m_CurrentModelIndex].name, m_Models[m_CurrentModelIndex]); + m_CurrentlySetModelIndex = m_CurrentModelIndex; + + UpdateStepNumberText(); + + if (m_ResetAgentOnModelChange) + m_Agent.EndEpisode(); + } + + void FixedUpdate() + { + if (m_Start) + { + m_Start = false; + m_Pause = false; + StartRecording(); + } + + if (m_Reset) + { + StopRecording(); + Reset(); + m_Pause = true; + m_Start = false; + } + + if (m_Pause && !m_ForceNext) + return; + + if (m_CurrentlySetModelIndex != m_CurrentModelIndex) + { + SetModel(); + } + + m_StepsSinceLastSwitch++; + + if (m_StepsSinceLastSwitch >= m_SecondsBetweenSwitches * k_FixedUpdatePerSecond || m_ForceNext) + { + m_ForceNext = false; + m_StepsSinceLastSwitch = 0; + m_CurrentModelIndex++; + + if (m_CurrentModelIndex == m_Models.Count) + { + if (m_Loop) + { + m_CurrentModelIndex = 0; + } + else + { + Application.Quit(0); +#if UNITY_EDITOR + EditorApplication.isPlaying = false; +#endif + return; + } + } + + SetModel(); + } + } +} diff --git a/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs.meta b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs.meta new file mode 100644 index 00000000000..affb030bc91 --- /dev/null +++ b/Project/Assets/ML-Agents/Examples/SharedAssets/Scripts/ModelCarousel.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: b941e667fa5163242bb52545d80e4059 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/Project/Packages/manifest.json b/Project/Packages/manifest.json index 7395e3d6c9e..b352f407828 100644 --- a/Project/Packages/manifest.json +++ b/Project/Packages/manifest.json @@ -5,7 +5,9 @@ "com.unity.ml-agents": "file:../../com.unity.ml-agents", "com.unity.ml-agents.extensions": "file:../../com.unity.ml-agents.extensions", "com.unity.nuget.newtonsoft-json": "2.0.0", + "com.unity.recorder": "3.0.3", "com.unity.test-framework": "1.1.29", + "com.unity.textmeshpro": "3.0.6", "com.unity.toolchain.macos-x86_64-linux-x86_64": "2.0.3", "com.unity.ugui": "1.0.0", "com.unity.modules.imageconversion": "1.0.0", diff --git a/Project/Packages/packages-lock.json b/Project/Packages/packages-lock.json index 44f8979b913..4c897ed4ed6 100644 --- a/Project/Packages/packages-lock.json +++ b/Project/Packages/packages-lock.json @@ -76,6 +76,15 @@ "dependencies": {}, "url": "https://packages.unity.com" }, + "com.unity.recorder": { + "version": "3.0.3", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.timeline": "1.0.0" + }, + "url": "https://packages.unity.com" + }, "com.unity.sysroot": { "version": "2.0.4", "depth": 1, @@ -103,6 +112,27 @@ }, "url": "https://packages.unity.com" }, + "com.unity.textmeshpro": { + "version": "3.0.6", + "depth": 0, + "source": "registry", + "dependencies": { + "com.unity.ugui": "1.0.0" + }, + "url": "https://packages.unity.com" + }, + "com.unity.timeline": { + "version": "1.6.4", + "depth": 1, + "source": "registry", + "dependencies": { + "com.unity.modules.director": "1.0.0", + "com.unity.modules.animation": "1.0.0", + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.particlesystem": "1.0.0" + }, + "url": "https://packages.unity.com" + }, "com.unity.toolchain.macos-x86_64-linux-x86_64": { "version": "2.0.3", "depth": 0, @@ -122,6 +152,27 @@ "com.unity.modules.imgui": "1.0.0" } }, + "com.unity.modules.animation": { + "version": "1.0.0", + "depth": 2, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.audio": { + "version": "1.0.0", + "depth": 2, + "source": "builtin", + "dependencies": {} + }, + "com.unity.modules.director": { + "version": "1.0.0", + "depth": 2, + "source": "builtin", + "dependencies": { + "com.unity.modules.audio": "1.0.0", + "com.unity.modules.animation": "1.0.0" + } + }, "com.unity.modules.imageconversion": { "version": "1.0.0", "depth": 0, @@ -140,6 +191,12 @@ "source": "builtin", "dependencies": {} }, + "com.unity.modules.particlesystem": { + "version": "1.0.0", + "depth": 2, + "source": "builtin", + "dependencies": {} + }, "com.unity.modules.physics": { "version": "1.0.0", "depth": 0, diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 5403f05c050..7f7eff3a959 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) #### ml-agents / ml-agents-envs +- Added training config feature to evenly distribute checkpoints throughout training. (#5842) +- Updated training area replicator to add a condition to only replicate training areas when running a build. (#5842) ### Bug Fixes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) diff --git a/com.unity.ml-agents/Runtime/Areas/TrainingAreaReplicator.cs b/com.unity.ml-agents/Runtime/Areas/TrainingAreaReplicator.cs index ef4a9d06334..c47383b4a30 100644 --- a/com.unity.ml-agents/Runtime/Areas/TrainingAreaReplicator.cs +++ b/com.unity.ml-agents/Runtime/Areas/TrainingAreaReplicator.cs @@ -25,8 +25,13 @@ public class TrainingAreaReplicator : MonoBehaviour /// public float separation = 10f; - int3 m_GridSize = new int3(1, 1, 1); - int m_areaCount = 0; + /// + /// Whether to replicate in the editor or in a build only. Default = true + /// + public bool buildOnly = true; + + int3 m_GridSize = new(1, 1, 1); + int m_AreaCount; string m_TrainingAreaName; /// @@ -57,7 +62,14 @@ public void Awake() /// public void OnEnable() { - // Adds the training are replicas during OnEnable to ensure they are added before the Academy begins its work. + // Adds the training as replicas during OnEnable to ensure they are added before the Academy begins its work. + if (buildOnly) + { +#if UNITY_STANDALONE && !UNITY_EDITOR + AddEnvironments(); +#endif + return; + } AddEnvironments(); } @@ -95,14 +107,14 @@ void AddEnvironments() { for (int x = 0; x < m_GridSize.x; x++) { - if (m_areaCount == 0) + if (m_AreaCount == 0) { // Skip this first area since it already exists. - m_areaCount = 1; + m_AreaCount = 1; } - else if (m_areaCount < numAreas) + else if (m_AreaCount < numAreas) { - m_areaCount++; + m_AreaCount++; var area = Instantiate(baseArea, new Vector3(x * separation, y * separation, z * separation), Quaternion.identity); area.name = m_TrainingAreaName; } diff --git a/com.unity.ml-agents/Runtime/Sensors/GridSensorComponent.cs b/com.unity.ml-agents/Runtime/Sensors/GridSensorComponent.cs index f381dbd039f..004a04f3963 100644 --- a/com.unity.ml-agents/Runtime/Sensors/GridSensorComponent.cs +++ b/com.unity.ml-agents/Runtime/Sensors/GridSensorComponent.cs @@ -285,7 +285,7 @@ void OnDrawGizmos() var cellColors = m_DebugSensor.PerceptionBuffer; var rotation = m_GridPerception.GetGridRotation(); - var scale = new Vector3(m_CellScale.x, 1, m_CellScale.z); + var scale = new Vector3(m_CellScale.x, m_CellScale.y, m_CellScale.z); var gizmoYOffset = new Vector3(0, m_GizmoYOffset, 0); var oldGizmoMatrix = Gizmos.matrix; for (var i = 0; i < m_DebugSensor.PerceptionBuffer.Length; i++) diff --git a/com.unity.ml-agents/Tests/Editor/Areas/TrainingAreaReplicatorTests.cs b/com.unity.ml-agents/Tests/Editor/Areas/TrainingAreaReplicatorTests.cs index f1046ebf682..5e5b54dc239 100644 --- a/com.unity.ml-agents/Tests/Editor/Areas/TrainingAreaReplicatorTests.cs +++ b/com.unity.ml-agents/Tests/Editor/Areas/TrainingAreaReplicatorTests.cs @@ -9,7 +9,7 @@ namespace Unity.MLAgents.Tests.Areas [TestFixture] public class TrainingAreaReplicatorTests { - private TrainingAreaReplicator m_Replicator; + TrainingAreaReplicator m_Replicator; [SetUp] public void Setup() @@ -21,6 +21,17 @@ public void Setup() m_Replicator.baseArea = trainingArea; } + [TearDown] + public void TearDown() + { + var trainingAreas = Resources.FindObjectsOfTypeAll().Where(obj => obj.name == m_Replicator.TrainingAreaName); + foreach (var trainingArea in trainingAreas) + { + Object.DestroyImmediate(trainingArea); + } + m_Replicator = null; + } + private static object[] NumAreasCases = { new object[] {1}, @@ -51,11 +62,24 @@ public void TestComputeGridSize(int numAreas) public void TestAddEnvironments() { m_Replicator.numAreas = 10; + m_Replicator.buildOnly = false; m_Replicator.Awake(); m_Replicator.OnEnable(); var trainingAreas = Resources.FindObjectsOfTypeAll().Where(obj => obj.name == m_Replicator.TrainingAreaName); Assert.AreEqual(10, trainingAreas.Count()); } + + [Test] + public void TestAddEnvironmentsBuildOnly() + { + m_Replicator.numAreas = 10; + m_Replicator.buildOnly = true; + m_Replicator.Awake(); + m_Replicator.OnEnable(); + var trainingAreas = Resources.FindObjectsOfTypeAll().Where(obj => obj.name == m_Replicator.TrainingAreaName); + Assert.AreEqual(1, trainingAreas.Count()); + + } } } diff --git a/docs/Training-Configuration-File.md b/docs/Training-Configuration-File.md index 1f4cce5f4db..d47b6ab103c 100644 --- a/docs/Training-Configuration-File.md +++ b/docs/Training-Configuration-File.md @@ -32,6 +32,7 @@ choice of the trainer (which we review on subsequent sections). | `time_horizon` | (default = `64`) How many steps of experience to collect per-agent before adding it to the experience buffer. When this limit is reached before the end of an episode, a value estimate is used to predict the overall expected reward from the agent's current state. As such, this parameter trades off between a less biased, but higher variance estimate (long time horizon) and more biased, but less varied estimate (short time horizon). In cases where there are frequent rewards within an episode, or episodes are prohibitively large, a smaller number can be more ideal. This number should be large enough to capture all the important behavior within a sequence of an agent's actions.

Typical range: `32` - `2048` | | `max_steps` | (default = `500000`) Total number of steps (i.e., observation collected and action taken) that must be taken in the environment (or across all environments if using multiple in parallel) before ending the training process. If you have multiple agents with the same behavior name within your environment, all steps taken by those agents will contribute to the same `max_steps` count.

Typical range: `5e5` - `1e7` | | `keep_checkpoints` | (default = `5`) The maximum number of model checkpoints to keep. Checkpoints are saved after the number of steps specified by the checkpoint_interval option. Once the maximum number of checkpoints has been reached, the oldest checkpoint is deleted when saving a new checkpoint. | +| `even_checkpoints` | (default = `false`) If set to true, ignores `checkpoint_interval` and evenly distributes checkpoints throughout training based on `keep_checkpoints`and `max_steps`, i.e. `checkpoint_interval = max_steps / keep_checkpoints`. Useful for cataloging agent behavior throughout training. | | `checkpoint_interval` | (default = `500000`) The number of experiences collected between each checkpoint by the trainer. A maximum of `keep_checkpoints` checkpoints are saved before old ones are deleted. Each checkpoint saves the `.onnx` files in `results/` folder.| | `init_path` | (default = None) Initialize trainer from a previously saved model. Note that the prior run should have used the same trainer configurations as the current run, and have been saved with the same version of ML-Agents.

You can provide either the file name or the full path to the checkpoint, e.g. `{checkpoint_name.pt}` or `./models/{run-id}/{behavior_name}/{checkpoint_name.pt}`. This option is provided in case you want to initialize different behaviors from different runs or initialize from an older checkpoint; in most cases, it is sufficient to use the `--initialize-from` CLI parameter to initialize all models from the same run. | | `threaded` | (default = `false`) Allow environments to step while updating the model. This might result in a training speedup, especially when using SAC. For best performance, leave setting to `false` when using self-play. | diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 7cff991ba22..2aca4fdb13a 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -616,18 +616,23 @@ class TrainerSettings(ExportableSettings): default_override: ClassVar[Optional["TrainerSettings"]] = None trainer_type: str = "ppo" hyperparameters: HyperparamSettings = attr.ib() + checkpoint_interval: int = attr.ib() @hyperparameters.default def _set_default_hyperparameters(self): return all_trainer_settings[self.trainer_type]() + @checkpoint_interval.default + def _set_default_checkpoint_interval(self): + return 500000 + network_settings: NetworkSettings = attr.ib(factory=NetworkSettings) reward_signals: Dict[RewardSignalType, RewardSignalSettings] = attr.ib( factory=lambda: {RewardSignalType.EXTRINSIC: RewardSignalSettings()} ) init_path: Optional[str] = None keep_checkpoints: int = 5 - checkpoint_interval: int = 500000 + even_checkpoints: bool = False max_steps: int = 500000 time_horizon: int = 64 summary_freq: int = 50000 @@ -651,6 +656,11 @@ def _check_batch_size_seq_length(self, attribute, value): "When using memory, sequence length must be less than or equal to batch size. " ) + @checkpoint_interval.validator + def _set_checkpoint_interval(self, attribute, value): + if self.even_checkpoints: + self.checkpoint_interval = int(self.max_steps / self.keep_checkpoints) + @staticmethod def dict_to_trainerdict(d: Dict, t: type) -> "TrainerSettings.DefaultTrainerDict": return TrainerSettings.DefaultTrainerDict( @@ -704,6 +714,9 @@ def structure(d: Mapping, t: type) -> Any: elif key == "max_steps": d_copy[key] = int(float(val)) # In some legacy configs, max steps was specified as a float + # elif key == "even_checkpoints": + # if val: + # d_copy["checkpoint_interval"] = int(d_copy["max_steps"] / d_copy["keep_checkpoints"]) elif key == "trainer_type": if val not in all_trainer_types.keys(): raise TrainerConfigError(f"Invalid trainer type {val} was found") diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py index b7001478794..568dd4c2d32 100644 --- a/ml-agents/mlagents/trainers/tests/test_settings.py +++ b/ml-agents/mlagents/trainers/tests/test_settings.py @@ -186,6 +186,37 @@ def test_trainersettingsschedules_structure(): assert trainer_settings.hyperparameters.epsilon_schedule == ScheduleType.LINEAR +def test_even_checkpoints_structure(): + """ + Test structuring for even checkpoints + """ + trainersettings_dict = { + "trainer_type": PPO_TRAINER_NAME, + "keep_checkpoints": 2, + "even_checkpoints": True, + "max_steps": 100.0, + } + + trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) + assert isinstance(trainer_settings.hyperparameters, PPOSettings) + assert trainer_settings.checkpoint_interval == 50 + + +def test_default_checkpoint_interval_structure(): + """ + Test structuring for even checkpoints + """ + trainersettings_dict = { + "trainer_type": PPO_TRAINER_NAME, + "keep_checkpoints": 2, + "max_steps": 100.0, + } + + trainer_settings = TrainerSettings.structure(trainersettings_dict, TrainerSettings) + assert isinstance(trainer_settings.hyperparameters, PPOSettings) + assert trainer_settings.checkpoint_interval == 500000 + + def test_reward_signal_structure(): """ Tests the RewardSignalSettings structure method. This one is special b/c