Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
2d93b0b
test lcb early stop
kywch Nov 18, 2025
f6ddd6c
log early stop
kywch Nov 18, 2025
323dc05
log early stop
kywch Nov 18, 2025
eb196ca
check threshold curve
kywch Nov 18, 2025
f32bb96
rename model, fn
kywch Nov 19, 2025
eb999c0
tweak logging min steps
kywch Nov 19, 2025
4caddc3
relax min cost
kywch Nov 19, 2025
fe26e66
try robust quantile regression
kywch Nov 19, 2025
3e972f1
get treshold from pareto fit
kywch Nov 20, 2025
bbc477b
relax breakout min cost
kywch Nov 20, 2025
b3c7495
try dynamic threshold
kywch Nov 20, 2025
f166182
tweak max fraction
kywch Nov 20, 2025
562a80a
revert back to quantile regression
kywch Nov 20, 2025
4b5be10
better handle early stop data
kywch Nov 20, 2025
17ee3a5
tweak threshold
kywch Nov 21, 2025
4fe88e4
check and stop long runs first
kywch Nov 21, 2025
553b26d
more samples for new best runs, tweak stopping rule
kywch Nov 21, 2025
0125f00
tweak pareto prune, random sample cost
kywch Nov 21, 2025
a352834
undo oversample
kywch Nov 21, 2025
7fdf027
lower min sample for threshold model
kywch Nov 21, 2025
4451efe
add diversity to search centers
kywch Nov 22, 2025
deb7b5e
add cost diversity
kywch Nov 22, 2025
72cbede
small tweak
kywch Nov 22, 2025
686a843
small tweak
kywch Nov 22, 2025
23624ff
tweak upper cost threshold
kywch Nov 23, 2025
4d6ff59
tweak cost sampling
kywch Nov 23, 2025
ccce5b0
slowly inc upper cost
kywch Nov 24, 2025
42fe9d2
fix upper cost threshold to use pruned pareto
kywch Nov 24, 2025
75862d9
lower hard stop threshold, use target running mean
kywch Nov 24, 2025
b903a21
make expansion slower, max cost less sensitive
kywch Nov 24, 2025
d93213d
compare thresh vs. max of current/running mean
kywch Nov 24, 2025
e161ca1
remove min allowed cost as it varies a lot from env to env
kywch Nov 25, 2025
a919a1d
try percentile score in sweep
kywch Nov 25, 2025
451ebbf
clip logit
kywch Nov 25, 2025
3233aaf
tweak climb sweep
kywch Nov 25, 2025
109c1b5
tower climb new params from sweep
kywch Nov 25, 2025
f4514a7
inc num maps
kywch Nov 25, 2025
8c417b4
dec num maps, setup too long
kywch Nov 25, 2025
630663b
add min_lr_ratio to sweep
kywch Nov 27, 2025
12b284d
fix typo
kywch Nov 28, 2025
33661e7
Merge pull request #425 from kywch/stop-train
jsuarez5341 Nov 28, 2025
07af00f
continuous sampling sanitisation
FinlaySanders Dec 29, 2025
971344b
Merge pull request #444 from FinlaySanders/3.0
jsuarez5341 Dec 29, 2025
d59bdb3
simplified 2048
kywch Jan 24, 2026
82dabc9
tweak rew
kywch Jan 24, 2026
6e85b66
simplify and edits for 132k
kywch Jan 24, 2026
ef81be6
match c network to torch net
kywch Jan 24, 2026
7cd6a23
simplified scaffolding
kywch Jan 24, 2026
1359de8
g2048 changes only
kywch Jan 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions pufferlib/config/ocean/g2048.ini
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,7 @@ num_envs = 4
[env]
num_envs = 4096
num_agents = 1
can_go_over_65536 = False
reward_scaler = 0.67
endgame_env_prob = 0.05
scaffolding_ratio = 0.67
use_heuristic_rewards = True
snake_reward_weight = 0.0005
use_sparse_reward = False

[sweep.policy.hidden_size]
distribution = uniform_pow2
Expand Down Expand Up @@ -62,7 +56,6 @@ max = 2
mean = 1
scale = auto


[train]
# 512 hidden: https://wandb.ai/kywch/pufferlib/runs/5thsjr61?nw=nwuserkywch
total_timesteps = 6_767_676_767
Expand Down
10 changes: 1 addition & 9 deletions pufferlib/ocean/g2048/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,7 @@
#include "../env_binding.h"

static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->can_go_over_65536 = unpack(kwargs, "can_go_over_65536");
env->reward_scaler = unpack(kwargs, "reward_scaler");
env->endgame_env_prob = unpack(kwargs, "endgame_env_prob");
env->scaffolding_ratio = unpack(kwargs, "scaffolding_ratio");
env->use_heuristic_rewards = unpack(kwargs, "use_heuristic_rewards");
env->snake_reward_weight = unpack(kwargs, "snake_reward_weight");
env->use_sparse_reward = unpack(kwargs, "use_sparse_reward");
init(env);
return 0;
}
Expand All @@ -24,8 +18,6 @@ static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "lifetime_max_tile", log->lifetime_max_tile);
assign_to_dict(dict, "reached_32768", log->reached_32768);
assign_to_dict(dict, "reached_65536", log->reached_65536);
assign_to_dict(dict, "monotonicity_reward", log->monotonicity_reward);
assign_to_dict(dict, "snake_state", log->snake_state);
assign_to_dict(dict, "snake_reward", log->snake_reward);
assign_to_dict(dict, "reached_131072", log->reached_131072);
return 0;
}
53 changes: 26 additions & 27 deletions pufferlib/ocean/g2048/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ def evaluate(env_name, load_model_path):
args['env']['num_envs'] = 4096
args['load_model_path'] = load_model_path
# Turn off endgame_envs and scaffolding episodes, which do not report results
args['env']['endgame_env_prob'] = 0
args['env']['scaffolding_ratio'] = 0
args['env']['can_go_over_65536'] = True

vecenv = pufferl.load_env(env_name, args)
policy = pufferl.load_policy(args, vecenv, env_name)
Expand All @@ -33,6 +31,7 @@ def evaluate(env_name, load_model_path):
merge_scores = sum(n * s for n, s in zip(stats['n'], stats['merge_score'])) / num_episodes
reached_32768 = sum(n * s for n, s in zip(stats['n'], stats['reached_32768'])) / num_episodes
reached_65536 = sum(n * s for n, s in zip(stats['n'], stats['reached_65536'])) / num_episodes
reached_131072 = sum(n * s for n, s in zip(stats['n'], stats['reached_131072'])) / num_episodes

print(f"Num episodes: {int(num_episodes)}")
print(f"Max tile avg: {max_tiles:.1f}")
Expand All @@ -41,43 +40,40 @@ def evaluate(env_name, load_model_path):
print(f"Merge score -- Avg: {merge_scores:.1f}, Max: {max(stats['merge_score']):.1f}")
print(f"Reached 32768 prob: {reached_32768*100:.2f} %")
print(f"Reached 65536 prob: {reached_65536*100:.2f} %")
print(f"Reached 131072 prob: {reached_131072*100:.2f} %")

"""
# hidden 256: https://wandb.ai/kywch/pufferlib/runs/nvd0pfuj?nw=nwuserkywch
Num episodes: 154406
Max tile avg: 22532.9
Episode length -- Avg: 16667.2, Max: 26659.1
Merge score -- Avg: 462797.9, Max: 744224.9
Reached 32768 prob: 46.08 %
Reached 65536 prob: 3.53 %

# hidden 512: https://wandb.ai/kywch/pufferlib/runs/2ch3my60?nw=nwuserkywch
Num episodes: 119243
Max tile avg: 30662.2
Episode length -- Avg: 21539.7, Max: 29680.3
Merge score -- Avg: 618011.8, Max: 918755.8
Reached 32768 prob: 68.25 %
Reached 65536 prob: 13.09 %

# hidden 512 (replication): https://wandb.ai/kywch/pufferlib/runs/5thsjr61?nw=nwuserkywch
Num episodes: 115652
Max tile avg: 31773.2
Episode length -- Avg: 22196.4, Max: 30316.5
Merge score -- Avg: 639395.6, Max: 909969.8
Reached 32768 prob: 71.22 %
Reached 65536 prob: 14.75 %

# embeddings: https://wandb.ai/thatguy11325/pufferlib/runs/g2f00pcm?nw=nwuserthatguy11325
Num episodes: 192276
Max tile avg: 33166.4
Episode length -- Avg: 26950.7, Max: 44906.1
Merge score -- Avg: 770645.8, Max: 1040367.2
Reached 32768 prob: 85.32 %
Reached 65536 prob: 10.15 %

# embeddings + new reward: https://wandb.ai/kywch/pufferlib/runs/1v5kls7l?nw=nwuserkywch
Num episodes: 95611
Max tile avg: 40980.9
Episode length -- Avg: 26792.1, Max: 37442.2
Merge score -- Avg: 779238.6, Max: 997571.8
Reached 32768 prob: 84.88 %
Reached 65536 prob: 33.96 %
Reached 131072 prob: 0.00 %
"""

def finetune(env_name, load_model_path):
args = pufferl.load_config(env_name)
args['load_model_path'] = load_model_path
# args['env']['use_sparse_reward'] = True
args['env']['scaffolding_ratio'] = 0.85

# args['policy']['hidden_size'] = 512
# args['rnn']['input_size'] = 512
# args['rnn']['hidden_size'] = 512

args['train']['total_timesteps'] = 1_000_000_000
args['train']['learning_rate'] = 0.00005
args['train']['anneal_lr'] = False
Expand All @@ -90,12 +86,15 @@ def finetune(env_name, load_model_path):
if __name__ == '__main__':
import os
import wandb
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--entity', type=str, default='kywch')
parser.add_argument('--run-id', type=str, default='1v5kls7l')

# https://wandb.ai/kywch/pufferlib/runs/5thsjr61?nw=nwuserkywch
wandb_run_id = '5thsjr61'
wandb.init(id=wandb_run_id, project='pufferlib', entity='kywch')
args = parser.parse_args()

artifact = wandb.use_artifact(f'{wandb_run_id}:latest')
wandb.init(id=args.run_id, project='pufferlib', entity=args.entity)
artifact = wandb.use_artifact(f'{args.run_id}:latest')
data_dir = artifact.download()
model_file = max(os.listdir(data_dir))
model_path = f'{data_dir}/{model_file}'
Expand Down
11 changes: 3 additions & 8 deletions pufferlib/ocean/g2048/g2048.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "g2048.h"
#include "g2048_net.h"

#define OBS_DIM 289
#define OBS_DIM 16
#define HIDDEN_DIM 512

// Set NO_RENDER to true to run evals without the render
Expand All @@ -11,12 +11,7 @@
int main() {
srand(time(NULL));
Game env = {
.can_go_over_65536 = true,
.reward_scaler = 0.0,
.endgame_env_prob = 0.0,
.scaffolding_ratio = 0.0,
.use_heuristic_rewards = false,
.snake_reward_weight = 0.0,
};
init(&env);

Expand All @@ -30,8 +25,8 @@ int main() {
env.actions = actions;
env.rewards = rewards;

Weights* weights = load_weights("resources/g2048/g2048_weights.bin", 3713541);
G2048Net* net = make_g2048net(weights, OBS_DIM, HIDDEN_DIM);
Weights* weights = load_weights("resources/g2048/g2048_weights.bin", 3466859);
G2048Net* net = make_g2048net(weights, HIDDEN_DIM);
c_reset(&env);
if (!NO_RENDER) c_render(&env);
printf("Starting...\n");
Expand Down
Loading
Loading